diff options
| author | Martin Matuska <mm@FreeBSD.org> | 2012-07-18 08:12:04 +0000 |
|---|---|---|
| committer | Martin Matuska <mm@FreeBSD.org> | 2012-07-18 08:12:04 +0000 |
| commit | af56e8c4b416d774961b41eee1eb349d657ebb8c (patch) | |
| tree | e332d1e6089905f45302dedddb9967a87ade136a | |
| parent | 93a00b0821525e25814cd720fafd04d600811c28 (diff) | |
Update vendor-sys/opensolaris to last OpenSolaris state (13149:b23a4dab3d50)vendor/opensolaris/20100818vendor/opensolaris
Add ZFS bits to vendor-sys/opensolaris
Obtained from: https://hg.openindiana.org/upstream/oracle/onnv-gate
Notes
Notes:
svn path=/vendor-sys/opensolaris/dist/; revision=238567
svn path=/vendor-sys/opensolaris/20100818/; revision=238568; tag=vendor/opensolaris/20100818
234 files changed, 177353 insertions, 110 deletions
diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c new file mode 100644 index 000000000000..eafc47d10f2d --- /dev/null +++ b/common/acl/acl_common.c @@ -0,0 +1,1755 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/avl.h> +#if defined(_KERNEL) +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <acl/acl_common.h> +#else +#include <errno.h> +#include <stdlib.h> +#include <stddef.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> +#include <grp.h> +#include <pwd.h> +#include <acl_common.h> +#define ASSERT assert +#endif + +#define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \ + ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \ + ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL) + + +#define ACL_SYNCHRONIZE_SET_DENY 0x0000001 +#define ACL_SYNCHRONIZE_SET_ALLOW 0x0000002 +#define ACL_SYNCHRONIZE_ERR_DENY 0x0000004 +#define ACL_SYNCHRONIZE_ERR_ALLOW 0x0000008 + +#define ACL_WRITE_OWNER_SET_DENY 0x0000010 +#define ACL_WRITE_OWNER_SET_ALLOW 0x0000020 +#define ACL_WRITE_OWNER_ERR_DENY 0x0000040 +#define ACL_WRITE_OWNER_ERR_ALLOW 0x0000080 + +#define ACL_DELETE_SET_DENY 0x0000100 +#define ACL_DELETE_SET_ALLOW 0x0000200 +#define ACL_DELETE_ERR_DENY 0x0000400 +#define ACL_DELETE_ERR_ALLOW 0x0000800 + +#define ACL_WRITE_ATTRS_OWNER_SET_DENY 0x0001000 +#define ACL_WRITE_ATTRS_OWNER_SET_ALLOW 0x0002000 +#define ACL_WRITE_ATTRS_OWNER_ERR_DENY 0x0004000 +#define ACL_WRITE_ATTRS_OWNER_ERR_ALLOW 0x0008000 + +#define ACL_WRITE_ATTRS_WRITER_SET_DENY 0x0010000 +#define ACL_WRITE_ATTRS_WRITER_SET_ALLOW 0x0020000 +#define ACL_WRITE_ATTRS_WRITER_ERR_DENY 0x0040000 +#define ACL_WRITE_ATTRS_WRITER_ERR_ALLOW 0x0080000 + +#define ACL_WRITE_NAMED_WRITER_SET_DENY 0x0100000 +#define ACL_WRITE_NAMED_WRITER_SET_ALLOW 0x0200000 +#define ACL_WRITE_NAMED_WRITER_ERR_DENY 0x0400000 +#define ACL_WRITE_NAMED_WRITER_ERR_ALLOW 0x0800000 + +#define ACL_READ_NAMED_READER_SET_DENY 0x1000000 +#define ACL_READ_NAMED_READER_SET_ALLOW 0x2000000 +#define ACL_READ_NAMED_READER_ERR_DENY 0x4000000 +#define ACL_READ_NAMED_READER_ERR_ALLOW 0x8000000 + + +#define ACE_VALID_MASK_BITS (\ + ACE_READ_DATA | \ + ACE_LIST_DIRECTORY | \ + ACE_WRITE_DATA | \ + ACE_ADD_FILE | \ + ACE_APPEND_DATA | \ + ACE_ADD_SUBDIRECTORY | \ + ACE_READ_NAMED_ATTRS | \ + ACE_WRITE_NAMED_ATTRS | \ + ACE_EXECUTE | \ + ACE_DELETE_CHILD | \ + ACE_READ_ATTRIBUTES | \ + ACE_WRITE_ATTRIBUTES | \ + ACE_DELETE | \ + ACE_READ_ACL | \ + ACE_WRITE_ACL | \ + ACE_WRITE_OWNER | \ + ACE_SYNCHRONIZE) + +#define ACE_MASK_UNDEFINED 0x80000000 + +#define ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \ + ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \ + ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \ + ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE) + +/* + * ACL conversion helpers + */ + +typedef enum { + ace_unused, + ace_user_obj, + ace_user, + ace_group, /* includes GROUP and GROUP_OBJ */ + ace_other_obj +} ace_to_aent_state_t; + +typedef struct acevals { + uid_t key; + avl_node_t avl; + uint32_t mask; + uint32_t allowed; + uint32_t denied; + int aent_type; +} acevals_t; + +typedef struct ace_list { + acevals_t user_obj; + avl_tree_t user; + int numusers; + acevals_t group_obj; + avl_tree_t group; + int numgroups; + acevals_t other_obj; + uint32_t acl_mask; + int hasmask; + int dfacl_flag; + ace_to_aent_state_t state; + int seen; /* bitmask of all aclent_t a_type values seen */ +} ace_list_t; + +/* + * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified. + * v = Ptr to array/vector of objs + * n = # objs in the array + * s = size of each obj (must be multiples of a word size) + * f = ptr to function to compare two objs + * returns (-1 = less than, 0 = equal, 1 = greater than + */ +void +ksort(caddr_t v, int n, int s, int (*f)()) +{ + int g, i, j, ii; + unsigned int *p1, *p2; + unsigned int tmp; + + /* No work to do */ + if (v == NULL || n <= 1) + return; + + /* Sanity check on arguments */ + ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0); + ASSERT(s > 0); + for (g = n / 2; g > 0; g /= 2) { + for (i = g; i < n; i++) { + for (j = i - g; j >= 0 && + (*f)(v + j * s, v + (j + g) * s) == 1; + j -= g) { + p1 = (void *)(v + j * s); + p2 = (void *)(v + (j + g) * s); + for (ii = 0; ii < s / 4; ii++) { + tmp = *p1; + *p1++ = *p2; + *p2++ = tmp; + } + } + } + } +} + +/* + * Compare two acls, all fields. Returns: + * -1 (less than) + * 0 (equal) + * +1 (greater than) + */ +int +cmp2acls(void *a, void *b) +{ + aclent_t *x = (aclent_t *)a; + aclent_t *y = (aclent_t *)b; + + /* Compare types */ + if (x->a_type < y->a_type) + return (-1); + if (x->a_type > y->a_type) + return (1); + /* Equal types; compare id's */ + if (x->a_id < y->a_id) + return (-1); + if (x->a_id > y->a_id) + return (1); + /* Equal ids; compare perms */ + if (x->a_perm < y->a_perm) + return (-1); + if (x->a_perm > y->a_perm) + return (1); + /* Totally equal */ + return (0); +} + +/*ARGSUSED*/ +static void * +cacl_realloc(void *ptr, size_t size, size_t new_size) +{ +#if defined(_KERNEL) + void *tmp; + + tmp = kmem_alloc(new_size, KM_SLEEP); + (void) memcpy(tmp, ptr, (size < new_size) ? size : new_size); + kmem_free(ptr, size); + return (tmp); +#else + return (realloc(ptr, new_size)); +#endif +} + +static int +cacl_malloc(void **ptr, size_t size) +{ +#if defined(_KERNEL) + *ptr = kmem_zalloc(size, KM_SLEEP); + return (0); +#else + *ptr = calloc(1, size); + if (*ptr == NULL) + return (errno); + + return (0); +#endif +} + +/*ARGSUSED*/ +static void +cacl_free(void *ptr, size_t size) +{ +#if defined(_KERNEL) + kmem_free(ptr, size); +#else + free(ptr); +#endif +} + +acl_t * +acl_alloc(enum acl_type type) +{ + acl_t *aclp; + + if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0) + return (NULL); + + aclp->acl_aclp = NULL; + aclp->acl_cnt = 0; + + switch (type) { + case ACE_T: + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + break; + case ACLENT_T: + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + break; + default: + acl_free(aclp); + aclp = NULL; + } + return (aclp); +} + +/* + * Free acl_t structure + */ +void +acl_free(acl_t *aclp) +{ + int acl_size; + + if (aclp == NULL) + return; + + if (aclp->acl_aclp) { + acl_size = aclp->acl_cnt * aclp->acl_entry_size; + cacl_free(aclp->acl_aclp, acl_size); + } + + cacl_free(aclp, sizeof (acl_t)); +} + +static uint32_t +access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow) +{ + uint32_t access_mask = 0; + int acl_produce; + int synchronize_set = 0, write_owner_set = 0; + int delete_set = 0, write_attrs_set = 0; + int read_named_set = 0, write_named_set = 0; + + acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_WRITER_SET_DENY); + + if (isallow) { + synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW; + write_owner_set = ACL_WRITE_OWNER_SET_ALLOW; + delete_set = ACL_DELETE_SET_ALLOW; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_ALLOW; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + } else { + + synchronize_set = ACL_SYNCHRONIZE_SET_DENY; + write_owner_set = ACL_WRITE_OWNER_SET_DENY; + delete_set = ACL_DELETE_SET_DENY; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_DENY; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY; + else + /* + * If the entity is not the owner and does not + * have write permissions ACE_WRITE_ATTRIBUTES will + * always go in the DENY ACE. + */ + access_mask |= ACE_WRITE_ATTRIBUTES; + } + + if (acl_produce & synchronize_set) + access_mask |= ACE_SYNCHRONIZE; + if (acl_produce & write_owner_set) + access_mask |= ACE_WRITE_OWNER; + if (acl_produce & delete_set) + access_mask |= ACE_DELETE; + if (acl_produce & write_attrs_set) + access_mask |= ACE_WRITE_ATTRIBUTES; + if (acl_produce & read_named_set) + access_mask |= ACE_READ_NAMED_ATTRS; + if (acl_produce & write_named_set) + access_mask |= ACE_WRITE_NAMED_ATTRS; + + return (access_mask); +} + +/* + * Given an mode_t, convert it into an access_mask as used + * by nfsace, assuming aclent_t -> nfsace semantics. + */ +static uint32_t +mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow) +{ + uint32_t access = 0; + int haswriteperm = 0; + int hasreadperm = 0; + + if (isallow) { + haswriteperm = (mode & S_IWOTH); + hasreadperm = (mode & S_IROTH); + } else { + haswriteperm = !(mode & S_IWOTH); + hasreadperm = !(mode & S_IROTH); + } + + /* + * The following call takes care of correctly setting the following + * mask bits in the access_mask: + * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE, + * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS + */ + access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow); + + if (isallow) { + access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES; + if (isowner) + access |= ACE_WRITE_ACL; + } else { + if (! isowner) + access |= ACE_WRITE_ACL; + } + + /* read */ + if (mode & S_IROTH) { + access |= ACE_READ_DATA; + } + /* write */ + if (mode & S_IWOTH) { + access |= ACE_WRITE_DATA | + ACE_APPEND_DATA; + if (isdir) + access |= ACE_DELETE_CHILD; + } + /* exec */ + if (mode & 01) { + access |= ACE_EXECUTE; + } + + return (access); +} + +/* + * Given an nfsace (presumably an ALLOW entry), make a + * corresponding DENY entry at the address given. + */ +static void +ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner) +{ + (void) memcpy(deny, allow, sizeof (ace_t)); + + deny->a_who = allow->a_who; + + deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS; + if (isdir) + deny->a_access_mask ^= ACE_DELETE_CHILD; + + deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER | + ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS | + ACE_WRITE_NAMED_ATTRS); + deny->a_access_mask |= access_mask_set((allow->a_access_mask & + ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner, + B_FALSE); +} +/* + * Make an initial pass over an array of aclent_t's. Gather + * information such as an ACL_MASK (if any), number of users, + * number of groups, and whether the array needs to be sorted. + */ +static int +ln_aent_preprocess(aclent_t *aclent, int n, + int *hasmask, mode_t *mask, + int *numuser, int *numgroup, int *needsort) +{ + int error = 0; + int i; + int curtype = 0; + + *hasmask = 0; + *mask = 07; + *needsort = 0; + *numuser = 0; + *numgroup = 0; + + for (i = 0; i < n; i++) { + if (aclent[i].a_type < curtype) + *needsort = 1; + else if (aclent[i].a_type > curtype) + curtype = aclent[i].a_type; + if (aclent[i].a_type & USER) + (*numuser)++; + if (aclent[i].a_type & (GROUP | GROUP_OBJ)) + (*numgroup)++; + if (aclent[i].a_type & CLASS_OBJ) { + if (*hasmask) { + error = EINVAL; + goto out; + } else { + *hasmask = 1; + *mask = aclent[i].a_perm; + } + } + } + + if ((! *hasmask) && (*numuser + *numgroup > 1)) { + error = EINVAL; + goto out; + } + +out: + return (error); +} + +/* + * Convert an array of aclent_t into an array of nfsace entries, + * following POSIX draft -> nfsv4 conversion semantics as outlined in + * the IETF draft. + */ +static int +ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir) +{ + int error = 0; + mode_t mask; + int numuser, numgroup, needsort; + int resultsize = 0; + int i, groupi = 0, skip; + ace_t *acep, *result = NULL; + int hasmask; + + error = ln_aent_preprocess(aclent, n, &hasmask, &mask, + &numuser, &numgroup, &needsort); + if (error != 0) + goto out; + + /* allow + deny for each aclent */ + resultsize = n * 2; + if (hasmask) { + /* + * stick extra deny on the group_obj and on each + * user|group for the mask (the group_obj was added + * into the count for numgroup) + */ + resultsize += numuser + numgroup; + /* ... and don't count the mask itself */ + resultsize -= 2; + } + + /* sort the source if necessary */ + if (needsort) + ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls); + + if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0) + goto out; + + acep = result; + + for (i = 0; i < n; i++) { + /* + * don't process CLASS_OBJ (mask); mask was grabbed in + * ln_aent_preprocess() + */ + if (aclent[i].a_type & CLASS_OBJ) + continue; + + /* If we need an ACL_MASK emulator, prepend it now */ + if ((hasmask) && + (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) { + acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + acep->a_flags = 0; + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= + (ACE_IDENTIFIER_GROUP|ACE_GROUP); + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + } else { + acep->a_who = aclent[i].a_id; + acep->a_flags |= ACE_IDENTIFIER_GROUP; + } + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + /* + * Set the access mask for the prepended deny + * ace. To do this, we invert the mask (found + * in ln_aent_preprocess()) then convert it to an + * DENY ace access_mask. + */ + acep->a_access_mask = mode_to_ace_access((mask ^ 07), + isdir, 0, 0); + acep += 1; + } + + /* handle a_perm -> access_mask */ + acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm, + isdir, aclent[i].a_type & USER_OBJ, 1); + + /* emulate a default aclent */ + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + + /* + * handle a_perm and a_id + * + * this must be done last, since it involves the + * corresponding deny aces, which are handled + * differently for each different a_type. + */ + if (aclent[i].a_type & USER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_OWNER; + ace_make_deny(acep, acep + 1, isdir, B_TRUE); + acep += 2; + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) { + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_GROUP; + } else { + acep->a_who = aclent[i].a_id; + } + acep->a_flags |= ACE_IDENTIFIER_GROUP; + /* + * Set the corresponding deny for the group ace. + * + * The deny aces go after all of the groups, unlike + * everything else, where they immediately follow + * the allow ace. + * + * We calculate "skip", the number of slots to + * skip ahead for the deny ace, here. + * + * The pattern is: + * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3 + * thus, skip is + * (2 * numgroup) - 1 - groupi + * (2 * numgroup) to account for MD + A + * - 1 to account for the fact that we're on the + * access (A), not the mask (MD) + * - groupi to account for the fact that we have + * passed up groupi number of MD's. + */ + skip = (2 * numgroup) - 1 - groupi; + ace_make_deny(acep, acep + skip, isdir, B_FALSE); + /* + * If we just did the last group, skip acep past + * all of the denies; else, just move ahead one. + */ + if (++groupi >= numgroup) + acep += numgroup + 1; + else + acep += 1; + } else if (aclent[i].a_type & OTHER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_EVERYONE; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else { + error = EINVAL; + goto out; + } + } + + *acepp = result; + *rescount = resultsize; + +out: + if (error != 0) { + if ((result != NULL) && (resultsize > 0)) { + cacl_free(result, resultsize * sizeof (ace_t)); + } + } + + return (error); +} + +static int +convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir, + ace_t **retacep, int *retacecnt) +{ + ace_t *acep; + ace_t *dfacep; + int acecnt = 0; + int dfacecnt = 0; + int dfaclstart = 0; + int dfaclcnt = 0; + aclent_t *aclp; + int i; + int error; + int acesz, dfacesz; + + ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls); + + for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) { + if (aclp->a_type & ACL_DEFAULT) + break; + } + + if (i < aclcnt) { + dfaclstart = i; + dfaclcnt = aclcnt - i; + } + + if (dfaclcnt && isdir == 0) { + return (EINVAL); + } + + error = ln_aent_to_ace(aclentp, i, &acep, &acecnt, isdir); + if (error) + return (error); + + if (dfaclcnt) { + error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt, + &dfacep, &dfacecnt, isdir); + if (error) { + if (acep) { + cacl_free(acep, acecnt * sizeof (ace_t)); + } + return (error); + } + } + + if (dfacecnt != 0) { + acesz = sizeof (ace_t) * acecnt; + dfacesz = sizeof (ace_t) * dfacecnt; + acep = cacl_realloc(acep, acesz, acesz + dfacesz); + if (acep == NULL) + return (ENOMEM); + if (dfaclcnt) { + (void) memcpy(acep + acecnt, dfacep, dfacesz); + } + } + if (dfaclcnt) + cacl_free(dfacep, dfacecnt * sizeof (ace_t)); + + *retacecnt = acecnt + dfacecnt; + *retacep = acep; + return (0); +} + +static int +ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir) +{ + int error = 0; + o_mode_t mode = 0; + uint32_t bits, wantbits; + + /* read */ + if (mask & ACE_READ_DATA) + mode |= S_IROTH; + + /* write */ + wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA); + if (isdir) + wantbits |= ACE_DELETE_CHILD; + bits = mask & wantbits; + if (bits != 0) { + if (bits != wantbits) { + error = ENOTSUP; + goto out; + } + mode |= S_IWOTH; + } + + /* exec */ + if (mask & ACE_EXECUTE) { + mode |= S_IXOTH; + } + + *modep = mode; + +out: + return (error); +} + +static void +acevals_init(acevals_t *vals, uid_t key) +{ + bzero(vals, sizeof (*vals)); + vals->allowed = ACE_MASK_UNDEFINED; + vals->denied = ACE_MASK_UNDEFINED; + vals->mask = ACE_MASK_UNDEFINED; + vals->key = key; +} + +static void +ace_list_init(ace_list_t *al, int dfacl_flag) +{ + acevals_init(&al->user_obj, NULL); + acevals_init(&al->group_obj, NULL); + acevals_init(&al->other_obj, NULL); + al->numusers = 0; + al->numgroups = 0; + al->acl_mask = 0; + al->hasmask = 0; + al->state = ace_unused; + al->seen = 0; + al->dfacl_flag = dfacl_flag; +} + +/* + * Find or create an acevals holder for a given id and avl tree. + * + * Note that only one thread will ever touch these avl trees, so + * there is no need for locking. + */ +static acevals_t * +acevals_find(ace_t *ace, avl_tree_t *avl, int *num) +{ + acevals_t key, *rc; + avl_index_t where; + + key.key = ace->a_who; + rc = avl_find(avl, &key, &where); + if (rc != NULL) + return (rc); + + /* this memory is freed by ln_ace_to_aent()->ace_list_free() */ + if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0) + return (NULL); + + acevals_init(rc, ace->a_who); + avl_insert(avl, rc, where); + (*num)++; + + return (rc); +} + +static int +access_mask_check(ace_t *acep, int mask_bit, int isowner) +{ + int set_deny, err_deny; + int set_allow, err_allow; + int acl_consume; + int haswriteperm, hasreadperm; + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1; + } else { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0; + } + + acl_consume = (ACL_SYNCHRONIZE_ERR_DENY | + ACL_DELETE_ERR_DENY | + ACL_WRITE_OWNER_ERR_DENY | + ACL_WRITE_OWNER_ERR_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_ERR_DENY | + ACL_WRITE_ATTRS_WRITER_SET_DENY | + ACL_WRITE_ATTRS_WRITER_ERR_ALLOW | + ACL_WRITE_NAMED_WRITER_ERR_DENY | + ACL_READ_NAMED_READER_ERR_DENY); + + if (mask_bit == ACE_SYNCHRONIZE) { + set_deny = ACL_SYNCHRONIZE_SET_DENY; + err_deny = ACL_SYNCHRONIZE_ERR_DENY; + set_allow = ACL_SYNCHRONIZE_SET_ALLOW; + err_allow = ACL_SYNCHRONIZE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_OWNER) { + set_deny = ACL_WRITE_OWNER_SET_DENY; + err_deny = ACL_WRITE_OWNER_ERR_DENY; + set_allow = ACL_WRITE_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_OWNER_ERR_ALLOW; + } else if (mask_bit == ACE_DELETE) { + set_deny = ACL_DELETE_SET_DENY; + err_deny = ACL_DELETE_ERR_DENY; + set_allow = ACL_DELETE_SET_ALLOW; + err_allow = ACL_DELETE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_ATTRIBUTES) { + if (isowner) { + set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_OWNER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW; + } else if (haswriteperm) { + set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_WRITER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW; + } else { + if ((acep->a_access_mask & mask_bit) && + (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) { + return (ENOTSUP); + } + return (0); + } + } else if (mask_bit == ACE_READ_NAMED_ATTRS) { + if (!hasreadperm) + return (0); + + set_deny = ACL_READ_NAMED_READER_SET_DENY; + err_deny = ACL_READ_NAMED_READER_ERR_DENY; + set_allow = ACL_READ_NAMED_READER_SET_ALLOW; + err_allow = ACL_READ_NAMED_READER_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_NAMED_ATTRS) { + if (!haswriteperm) + return (0); + + set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY; + err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY; + set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW; + } else { + return (EINVAL); + } + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + if (acl_consume & set_deny) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_deny) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } else { + /* ACE_ACCESS_ALLOWED_ACE_TYPE */ + if (acl_consume & set_allow) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_allow) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } + return (0); +} + +static int +ace_to_aent_legal(ace_t *acep) +{ + int error = 0; + int isowner; + + /* only ALLOW or DENY */ + if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) && + (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid flags */ + if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) { + error = EINVAL; + goto out; + } + + /* some flags are illegal */ + if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG | + ACE_FAILED_ACCESS_ACE_FLAG | + ACE_NO_PROPAGATE_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid masks */ + if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) { + error = EINVAL; + goto out; + } + + if ((acep->a_flags & ACE_OWNER)) { + isowner = 1; + } else { + isowner = 0; + } + + error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_OWNER, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_DELETE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner); + if (error) + goto out; + + /* more detailed checking of masks */ + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_access_mask & ACE_WRITE_DATA) && + (! (acep->a_access_mask & ACE_APPEND_DATA))) { + error = ENOTSUP; + goto out; + } + if ((! (acep->a_access_mask & ACE_WRITE_DATA)) && + (acep->a_access_mask & ACE_APPEND_DATA)) { + error = ENOTSUP; + goto out; + } + } + + /* ACL enforcement */ + if ((acep->a_access_mask & ACE_READ_ACL) && + (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + if (acep->a_access_mask & ACE_WRITE_ACL) { + if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) && + (isowner)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) && + (! isowner)) { + error = ENOTSUP; + goto out; + } + } + +out: + return (error); +} + +static int +ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir) +{ + /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */ + if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) != + (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) { + return (ENOTSUP); + } + + return (ace_mask_to_mode(mask, modep, isdir)); +} + +static int +acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list, + uid_t owner, gid_t group, int isdir) +{ + int error; + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + + if (isdir) + flips |= ACE_DELETE_CHILD; + if (vals->allowed != (vals->denied ^ flips)) { + error = ENOTSUP; + goto out; + } + if ((list->hasmask) && (list->acl_mask != vals->mask) && + (vals->aent_type & (USER | GROUP | GROUP_OBJ))) { + error = ENOTSUP; + goto out; + } + error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir); + if (error != 0) + goto out; + dest->a_type = vals->aent_type; + if (dest->a_type & (USER | GROUP)) { + dest->a_id = vals->key; + } else if (dest->a_type & USER_OBJ) { + dest->a_id = owner; + } else if (dest->a_type & GROUP_OBJ) { + dest->a_id = group; + } else if (dest->a_type & OTHER_OBJ) { + dest->a_id = 0; + } else { + error = EINVAL; + goto out; + } + +out: + return (error); +} + + +static int +ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt, + uid_t owner, gid_t group, int isdir) +{ + int error = 0; + aclent_t *aent, *result = NULL; + acevals_t *vals; + int resultcount; + + if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) != + (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) { + error = ENOTSUP; + goto out; + } + if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) { + error = ENOTSUP; + goto out; + } + + resultcount = 3 + list->numusers + list->numgroups; + /* + * This must be the same condition as below, when we add the CLASS_OBJ + * (aka ACL mask) + */ + if ((list->hasmask) || (! list->dfacl_flag)) + resultcount += 1; + + if (cacl_malloc((void **)&result, + resultcount * sizeof (aclent_t)) != 0) { + error = ENOMEM; + goto out; + } + aent = result; + + /* USER_OBJ */ + if (!(list->user_obj.aent_type & USER_OBJ)) { + error = EINVAL; + goto out; + } + + error = acevals_to_aent(&list->user_obj, aent, list, owner, group, + isdir); + + if (error != 0) + goto out; + ++aent; + /* USER */ + vals = NULL; + for (vals = avl_first(&list->user); vals != NULL; + vals = AVL_NEXT(&list->user, vals)) { + if (!(vals->aent_type & USER)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* GROUP_OBJ */ + if (!(list->group_obj.aent_type & GROUP_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->group_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + /* GROUP */ + vals = NULL; + for (vals = avl_first(&list->group); vals != NULL; + vals = AVL_NEXT(&list->group, vals)) { + if (!(vals->aent_type & GROUP)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* + * CLASS_OBJ (aka ACL_MASK) + * + * An ACL_MASK is not fabricated if the ACL is a default ACL. + * This is to follow UFS's behavior. + */ + if ((list->hasmask) || (! list->dfacl_flag)) { + if (list->hasmask) { + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + if (isdir) + flips |= ACE_DELETE_CHILD; + error = ace_mask_to_mode(list->acl_mask ^ flips, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } else { + /* fabricate the ACL_MASK from the group permissions */ + error = ace_mask_to_mode(list->group_obj.allowed, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } + aent->a_id = 0; + aent->a_type = CLASS_OBJ | list->dfacl_flag; + ++aent; + } + /* OTHER_OBJ */ + if (!(list->other_obj.aent_type & OTHER_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->other_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + + *aclentp = result; + *aclcnt = resultcount; + +out: + if (error != 0) { + if (result != NULL) + cacl_free(result, resultcount * sizeof (aclent_t)); + } + + return (error); +} + + +/* + * free all data associated with an ace_list + */ +static void +ace_list_free(ace_list_t *al) +{ + acevals_t *node; + void *cookie; + + if (al == NULL) + return; + + cookie = NULL; + while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + cookie = NULL; + while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + + avl_destroy(&al->user); + avl_destroy(&al->group); + + /* free the container itself */ + cacl_free(al, sizeof (ace_list_t)); +} + +static int +acevals_compare(const void *va, const void *vb) +{ + const acevals_t *a = va, *b = vb; + + if (a->key == b->key) + return (0); + + if (a->key > b->key) + return (1); + + else + return (-1); +} + +/* + * Convert a list of ace_t entries to equivalent regular and default + * aclent_t lists. Return error (ENOTSUP) when conversion is not possible. + */ +static int +ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group, + aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt, + int isdir) +{ + int error = 0; + ace_t *acep; + uint32_t bits; + int i; + ace_list_t *normacl = NULL, *dfacl = NULL, *acl; + acevals_t *vals; + + *aclentp = NULL; + *aclcnt = 0; + *dfaclentp = NULL; + *dfaclcnt = 0; + + /* we need at least user_obj, group_obj, and other_obj */ + if (n < 6) { + error = ENOTSUP; + goto out; + } + if (ace == NULL) { + error = EINVAL; + goto out; + } + + error = cacl_malloc((void **)&normacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&normacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&normacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + + ace_list_init(normacl, 0); + + error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + ace_list_init(dfacl, ACL_DEFAULT); + + /* process every ace_t... */ + for (i = 0; i < n; i++) { + acep = &ace[i]; + + /* rule out certain cases quickly */ + error = ace_to_aent_legal(acep); + if (error != 0) + goto out; + + /* + * Turn off these bits in order to not have to worry about + * them when doing the checks for compliments. + */ + acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE | + ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES | + ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS); + + /* see if this should be a regular or default acl */ + bits = acep->a_flags & + (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE); + if (bits != 0) { + /* all or nothing on these inherit bits */ + if (bits != (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + acl = dfacl; + } else { + acl = normacl; + } + + if ((acep->a_flags & ACE_OWNER)) { + if (acl->state > ace_user_obj) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user_obj; + acl->seen |= USER_OBJ; + vals = &acl->user_obj; + vals->aent_type = USER_OBJ | acl->dfacl_flag; + } else if ((acep->a_flags & ACE_EVERYONE)) { + acl->state = ace_other_obj; + acl->seen |= OTHER_OBJ; + vals = &acl->other_obj; + vals->aent_type = OTHER_OBJ | acl->dfacl_flag; + } else if (acep->a_flags & ACE_IDENTIFIER_GROUP) { + if (acl->state > ace_group) { + error = ENOTSUP; + goto out; + } + if ((acep->a_flags & ACE_GROUP)) { + acl->seen |= GROUP_OBJ; + vals = &acl->group_obj; + vals->aent_type = GROUP_OBJ | acl->dfacl_flag; + } else { + acl->seen |= GROUP; + vals = acevals_find(acep, &acl->group, + &acl->numgroups); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = GROUP | acl->dfacl_flag; + } + acl->state = ace_group; + } else { + if (acl->state > ace_user) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user; + acl->seen |= USER; + vals = acevals_find(acep, &acl->user, + &acl->numusers); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = USER | acl->dfacl_flag; + } + + if (!(acl->state > ace_unused)) { + error = EINVAL; + goto out; + } + + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + /* no more than one allowed per aclent_t */ + if (vals->allowed != ACE_MASK_UNDEFINED) { + error = ENOTSUP; + goto out; + } + vals->allowed = acep->a_access_mask; + } else { + /* + * it's a DENY; if there was a previous DENY, it + * must have been an ACL_MASK. + */ + if (vals->denied != ACE_MASK_UNDEFINED) { + /* ACL_MASK is for USER and GROUP only */ + if ((acl->state != ace_user) && + (acl->state != ace_group)) { + error = ENOTSUP; + goto out; + } + + if (! acl->hasmask) { + acl->hasmask = 1; + acl->acl_mask = vals->denied; + /* check for mismatched ACL_MASK emulations */ + } else if (acl->acl_mask != vals->denied) { + error = ENOTSUP; + goto out; + } + vals->mask = vals->denied; + } + vals->denied = acep->a_access_mask; + } + } + + /* done collating; produce the aclent_t lists */ + if (normacl->state != ace_unused) { + error = ace_list_to_aent(normacl, aclentp, aclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + if (dfacl->state != ace_unused) { + error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + +out: + if (normacl != NULL) + ace_list_free(normacl); + if (dfacl != NULL) + ace_list_free(dfacl); + + return (error); +} + +static int +convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir, + uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt) +{ + int error = 0; + aclent_t *aclentp, *dfaclentp; + int aclcnt, dfaclcnt; + int aclsz, dfaclsz; + + error = ln_ace_to_aent(acebufp, acecnt, owner, group, + &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir); + + if (error) + return (error); + + + if (dfaclcnt != 0) { + /* + * Slap aclentp and dfaclentp into a single array. + */ + aclsz = sizeof (aclent_t) * aclcnt; + dfaclsz = sizeof (aclent_t) * dfaclcnt; + aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz); + if (aclentp != NULL) { + (void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz); + } else { + error = ENOMEM; + } + } + + if (aclentp) { + *retaclentp = aclentp; + *retaclcnt = aclcnt + dfaclcnt; + } + + if (dfaclentp) + cacl_free(dfaclentp, dfaclsz); + + return (error); +} + + +int +acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner, + gid_t group) +{ + int aclcnt; + void *acldata; + int error; + + /* + * See if we need to translate + */ + if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) || + (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACLENT_T)) + return (0); + + if (target_flavor == -1) { + error = EINVAL; + goto out; + } + + if (target_flavor == _ACL_ACE_ENABLED && + aclp->acl_type == ACLENT_T) { + error = convert_aent_to_ace(aclp->acl_aclp, + aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt); + if (error) + goto out; + + } else if (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACE_T) { + error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt, + isdir, owner, group, (aclent_t **)&acldata, &aclcnt); + if (error) + goto out; + } else { + error = ENOTSUP; + goto out; + } + + /* + * replace old acl with newly translated acl + */ + cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size); + aclp->acl_aclp = acldata; + aclp->acl_cnt = aclcnt; + if (target_flavor == _ACL_ACE_ENABLED) { + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + } else { + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + } + return (0); + +out: + +#if !defined(_KERNEL) + errno = error; + return (-1); +#else + return (error); +#endif +} + +#define SET_ACE(acl, index, who, mask, type, flags) { \ + acl[0][index].a_who = (uint32_t)who; \ + acl[0][index].a_type = type; \ + acl[0][index].a_flags = flags; \ + acl[0][index++].a_access_mask = mask; \ +} + +void +acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone) +{ + *deny1 = *deny2 = *allow0 = *group = 0; + + if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) + *deny1 |= ACE_READ_DATA; + if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) + *deny1 |= ACE_WRITE_DATA; + if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) + *deny1 |= ACE_EXECUTE; + + if (!(mode & S_IRGRP) && (mode & S_IROTH)) + *deny2 = ACE_READ_DATA; + if (!(mode & S_IWGRP) && (mode & S_IWOTH)) + *deny2 |= ACE_WRITE_DATA; + if (!(mode & S_IXGRP) && (mode & S_IXOTH)) + *deny2 |= ACE_EXECUTE; + + if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) + *allow0 |= ACE_READ_DATA; + if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) + *allow0 |= ACE_WRITE_DATA; + if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) + *allow0 |= ACE_EXECUTE; + + *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| + ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; + if (mode & S_IRUSR) + *owner |= ACE_READ_DATA; + if (mode & S_IWUSR) + *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXUSR) + *owner |= ACE_EXECUTE; + + *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IRGRP) + *group |= ACE_READ_DATA; + if (mode & S_IWGRP) + *group |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXGRP) + *group |= ACE_EXECUTE; + + *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IROTH) + *everyone |= ACE_READ_DATA; + if (mode & S_IWOTH) + *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXOTH) + *everyone |= ACE_EXECUTE; +} + +int +acl_trivial_create(mode_t mode, ace_t **acl, int *count) +{ + uint32_t deny1, deny2; + uint32_t allow0; + uint32_t owner, group, everyone; + int index = 0; + int error; + + *count = 3; + acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group, + &everyone); + + if (allow0) + (*count)++; + if (deny1) + (*count)++; + if (deny2) + (*count)++; + + if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0) + return (error); + + if (allow0) { + SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_OWNER); + } + if (deny1) { + SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_OWNER); + } + if (deny2) { + SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_GROUP|ACE_IDENTIFIER_GROUP); + } + + SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); + SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_IDENTIFIER_GROUP|ACE_GROUP); + SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_EVERYONE); + + return (0); +} + +/* + * ace_trivial: + * determine whether an ace_t acl is trivial + * + * Trivialness implies that the acl is composed of only + * owner, group, everyone entries. ACL can't + * have read_acl denied, and write_owner/write_acl/write_attributes + * can only be owner@ entry. + */ +int +ace_trivial_common(void *acep, int aclcnt, + uint64_t (*walk)(void *, uint64_t, int aclcnt, + uint16_t *, uint16_t *, uint32_t *)) +{ + uint16_t flags; + uint32_t mask; + uint16_t type; + uint64_t cookie = 0; + + while (cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask)) { + switch (flags & ACE_TYPE_FLAGS) { + case ACE_OWNER: + case ACE_GROUP|ACE_IDENTIFIER_GROUP: + case ACE_EVERYONE: + break; + default: + return (1); + + } + + if (flags & (ACE_FILE_INHERIT_ACE| + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE| + ACE_INHERIT_ONLY_ACE)) + return (1); + + /* + * Special check for some special bits + * + * Don't allow anybody to deny reading basic + * attributes or a files ACL. + */ + if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && + (type == ACE_ACCESS_DENIED_ACE_TYPE)) + return (1); + + /* + * Delete permissions are never set by default + */ + if (mask & (ACE_DELETE|ACE_DELETE_CHILD)) + return (1); + /* + * only allow owner@ to have + * write_acl/write_owner/write_attributes/write_xattr/ + */ + if (type == ACE_ACCESS_ALLOWED_ACE_TYPE && + (!(flags & ACE_OWNER) && (mask & + (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES| + ACE_WRITE_NAMED_ATTRS)))) + return (1); + + } + return (0); +} + +uint64_t +ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags, + uint16_t *type, uint32_t *mask) +{ + ace_t *acep = datap; + + if (cookie >= aclcnt) + return (0); + + *flags = acep[cookie].a_flags; + *type = acep[cookie].a_type; + *mask = acep[cookie++].a_access_mask; + + return (cookie); +} + +int +ace_trivial(ace_t *acep, int aclcnt) +{ + return (ace_trivial_common(acep, aclcnt, ace_walk)); +} diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h new file mode 100644 index 000000000000..f76cbd3b450f --- /dev/null +++ b/common/acl/acl_common.h @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ACL_COMMON_H +#define _ACL_COMMON_H + +#include <sys/types.h> +#include <sys/acl.h> +#include <sys/stat.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern ace_t trivial_acl[6]; + +extern int acltrivial(const char *); +extern void adjust_ace_pair(ace_t *pair, mode_t mode); +extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t); +extern int ace_trivial(ace_t *acep, int aclcnt); +extern int ace_trivial_common(void *, int, + uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *, + uint32_t *mask)); +extern acl_t *acl_alloc(acl_type_t); +extern void acl_free(acl_t *aclp); +extern int acl_translate(acl_t *aclp, int target_flavor, + int isdir, uid_t owner, gid_t group); +void ksort(caddr_t v, int n, int s, int (*f)()); +int cmp2acls(void *a, void *b); +int acl_trivial_create(mode_t mode, ace_t **acl, int *count); +void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone); + +#ifdef __cplusplus +} +#endif + +#endif /* _ACL_COMMON_H */ diff --git a/common/atomic/amd64/atomic.s b/common/atomic/amd64/atomic.s new file mode 100644 index 000000000000..4b0d66e4db20 --- /dev/null +++ b/common/atomic/amd64/atomic.s @@ -0,0 +1,573 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) +#endif + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + lock + incb (%rdi) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + lock + incw (%rdi) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + lock + incl (%rdi) + ret + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_ulong) + lock + incq (%rdi) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + xorl %eax, %eax / clear upper bits of %eax return register + incb %al / %al = 1 + lock + xaddb %al, (%rdi) / %al = old value, (%rdi) = new value + incb %al / return new value + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + xorl %eax, %eax / clear upper bits of %eax return register + incw %ax / %ax = 1 + lock + xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value + incw %ax / return new value + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + xorl %eax, %eax / %eax = 0 + incl %eax / %eax = 1 + lock + xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value + incl %eax / return new value + ret + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + ENTRY(atomic_inc_64_nv) + ALTENTRY(atomic_inc_ulong_nv) + xorq %rax, %rax / %rax = 0 + incq %rax / %rax = 1 + lock + xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value + incq %rax / return new value + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_64_nv) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + lock + decb (%rdi) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + lock + decw (%rdi) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + lock + decl (%rdi) + ret + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_ulong) + lock + decq (%rdi) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + xorl %eax, %eax / clear upper bits of %eax return register + decb %al / %al = -1 + lock + xaddb %al, (%rdi) / %al = old value, (%rdi) = new value + decb %al / return new value + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + xorl %eax, %eax / clear upper bits of %eax return register + decw %ax / %ax = -1 + lock + xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value + decw %ax / return new value + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + xorl %eax, %eax / %eax = 0 + decl %eax / %eax = -1 + lock + xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value + decl %eax / return new value + ret + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + ENTRY(atomic_dec_64_nv) + ALTENTRY(atomic_dec_ulong_nv) + xorq %rax, %rax / %rax = 0 + decq %rax / %rax = -1 + lock + xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value + decq %rax / return new value + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_64_nv) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + lock + addb %sil, (%rdi) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + lock + addw %si, (%rdi) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + lock + addl %esi, (%rdi) + ret + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + lock + addq %rsi, (%rdi) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + lock + orb %sil, (%rdi) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + lock + orw %si, (%rdi) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + lock + orl %esi, (%rdi) + ret + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_ulong) + lock + orq %rsi, (%rdi) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + lock + andb %sil, (%rdi) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + lock + andw %si, (%rdi) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + lock + andl %esi, (%rdi) + ret + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_ulong) + lock + andq %rsi, (%rdi) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movzbl %sil, %eax / %al = delta addend, clear upper bits + lock + xaddb %sil, (%rdi) / %sil = old value, (%rdi) = sum + addb %sil, %al / new value = original value + delta + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movzwl %si, %eax / %ax = delta addend, clean upper bits + lock + xaddw %si, (%rdi) / %si = old value, (%rdi) = sum + addw %si, %ax / new value = original value + delta + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + mov %esi, %eax / %eax = delta addend + lock + xaddl %esi, (%rdi) / %esi = old value, (%rdi) = sum + add %esi, %eax / new value = original value + delta + ret + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + ENTRY(atomic_add_64_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + mov %rsi, %rax / %rax = delta addend + lock + xaddq %rsi, (%rdi) / %rsi = old value, (%rdi) = sum + addq %rsi, %rax / new value = original value + delta + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_64_nv) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movb (%rdi), %al / %al = old value +1: + movb %sil, %cl + andb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%rdi) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movw (%rdi), %ax / %ax = old value +1: + movw %si, %cx + andw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%rdi) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + andl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + ENTRY(atomic_and_64_nv) + ALTENTRY(atomic_and_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + andq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_64_nv) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movb (%rdi), %al / %al = old value +1: + movb %sil, %cl + orb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%rdi) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movw (%rdi), %ax / %ax = old value +1: + movw %si, %cx + orw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%rdi) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + orl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + ENTRY(atomic_or_64_nv) + ALTENTRY(atomic_or_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + orq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_64_nv) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movzbl %sil, %eax + lock + cmpxchgb %dl, (%rdi) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movzwl %si, %eax + lock + cmpxchgw %dx, (%rdi) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + movl %esi, %eax + lock + cmpxchgl %edx, (%rdi) + ret + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movq %rsi, %rax + lock + cmpxchgq %rdx, (%rdi) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movzbl %sil, %eax + lock + xchgb %al, (%rdi) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movzwl %si, %eax + lock + xchgw %ax, (%rdi) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + movl %esi, %eax + lock + xchgl %eax, (%rdi) + ret + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + ALTENTRY(atomic_swap_ulong) + ALTENTRY(atomic_swap_ptr) + movq %rsi, %rax + lock + xchgq %rax, (%rdi) + ret + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + xorl %eax, %eax + lock + btsq %rsi, (%rdi) + jnc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + xorl %eax, %eax + lock + btrq %rsi, (%rdi) + jc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * NOTE: membar_enter, and membar_exit are identical routines. + * We define them separately, instead of using an ALTENTRY + * definitions to alias them together, so that DTrace and + * debuggers will see a unique address for them, allowing + * more accurate tracing. + */ + + ENTRY(membar_enter) + mfence + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + mfence + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + sfence + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lfence + ret + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/atomic/i386/atomic.s b/common/atomic/i386/atomic.s new file mode 100644 index 000000000000..4fa525ba20af --- /dev/null +++ b/common/atomic/i386/atomic.s @@ -0,0 +1,720 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) +#endif + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + movl 4(%esp), %eax + lock + incb (%eax) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + movl 4(%esp), %eax + lock + incw (%eax) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + ALTENTRY(atomic_inc_ulong) + movl 4(%esp), %eax + lock + incl (%eax) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / clear upper bits of %eax + incb %al / %al = 1 + lock + xaddb %al, (%edx) / %al = old value, inc (%edx) + incb %al / return new value + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / clear upper bits of %eax + incw %ax / %ax = 1 + lock + xaddw %ax, (%edx) / %ax = old value, inc (%edx) + incw %ax / return new value + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + ALTENTRY(atomic_inc_ulong_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / %eax = 0 + incl %eax / %eax = 1 + lock + xaddl %eax, (%edx) / %eax = old value, inc (%edx) + incl %eax / return new value + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + /* + * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever + * separated, you need to also edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_64_nv. + */ + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + incl %ebx / %ecx:%ebx = 1 + addl %eax, %ebx + adcl %edx, %ecx / add in the carry from inc + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_inc_64_nv) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + movl 4(%esp), %eax + lock + decb (%eax) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + movl 4(%esp), %eax + lock + decw (%eax) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + ALTENTRY(atomic_dec_ulong) + movl 4(%esp), %eax + lock + decl (%eax) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / zero upper bits of %eax + decb %al / %al = -1 + lock + xaddb %al, (%edx) / %al = old value, dec (%edx) + decb %al / return new value + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / zero upper bits of %eax + decw %ax / %ax = -1 + lock + xaddw %ax, (%edx) / %ax = old value, dec (%edx) + decw %ax / return new value + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + ALTENTRY(atomic_dec_ulong_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / %eax = 0 + decl %eax / %eax = -1 + lock + xaddl %eax, (%edx) / %eax = old value, dec (%edx) + decl %eax / return new value + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + /* + * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_64_nv. + */ + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + not %ecx + not %ebx / %ecx:%ebx = -1 + addl %eax, %ebx + adcl %edx, %ecx / add in the carry from inc + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_dec_64_nv) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addb %cl, (%eax) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addw %cx, (%eax) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addl %ecx, (%eax) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + orb %cl, (%eax) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + orw %cx, (%eax) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + ALTENTRY(atomic_or_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + orl %ecx, (%eax) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + andb %cl, (%eax) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + andw %cx, (%eax) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + ALTENTRY(atomic_and_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + andl %ecx, (%eax) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movl 4(%esp), %edx / %edx = target address + movb 8(%esp), %cl / %cl = delta + movzbl %cl, %eax / %al = delta, zero extended + lock + xaddb %cl, (%edx) / %cl = old value, (%edx) = sum + addb %cl, %al / return old value plus delta + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movl 4(%esp), %edx / %edx = target address + movw 8(%esp), %cx / %cx = delta + movzwl %cx, %eax / %ax = delta, zero extended + lock + xaddw %cx, (%edx) / %cx = old value, (%edx) = sum + addw %cx, %ax / return old value plus delta + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %eax / %eax = delta + movl %eax, %ecx / %ecx = delta + lock + xaddl %eax, (%edx) / %eax = old value, (%edx) = sum + addl %ecx, %eax / return old value plus delta + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + /* + * NOTE: If atomic_add_64 and atomic_add_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_64_nv. + */ + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + addl %eax, %ebx + adcl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_add_64_nv) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movl 4(%esp), %edx / %edx = target address + movb (%edx), %al / %al = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%edx) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movl 4(%esp), %edx / %edx = target address + movw (%edx), %ax / %ax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%edx) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + ALTENTRY(atomic_or_ulong_nv) + movl 4(%esp), %edx / %edx = target address + movl (%edx), %eax / %eax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orl %eax, %ecx / %ecx = new value + lock + cmpxchgl %ecx, (%edx) / try to stick it in + jne 1b + movl %ecx, %eax / return new value + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + /* + * NOTE: If atomic_or_64 and atomic_or_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_64_nv. + */ + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + orl %eax, %ebx + orl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_or_64_nv) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movl 4(%esp), %edx / %edx = target address + movb (%edx), %al / %al = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%edx) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movl 4(%esp), %edx / %edx = target address + movw (%edx), %ax / %ax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%edx) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + ALTENTRY(atomic_and_ulong_nv) + movl 4(%esp), %edx / %edx = target address + movl (%edx), %eax / %eax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andl %eax, %ecx / %ecx = new value + lock + cmpxchgl %ecx, (%edx) / try to stick it in + jne 1b + movl %ecx, %eax / return new value + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + /* + * NOTE: If atomic_and_64 and atomic_and_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_64_nv. + */ + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + andl %eax, %ebx + andl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_and_64_nv) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + movb 12(%esp), %cl + lock + cmpxchgb %cl, (%edx) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + movw 12(%esp), %cx + lock + cmpxchgw %cx, (%edx) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + lock + cmpxchgl %ecx, (%edx) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + pushl %ebx + pushl %esi + movl 12(%esp), %esi + movl 16(%esp), %eax + movl 20(%esp), %edx + movl 24(%esp), %ebx + movl 28(%esp), %ecx + lock + cmpxchg8b (%esi) + popl %esi + popl %ebx + ret + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + lock + xchgb %al, (%edx) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + lock + xchgw %ax, (%edx) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + ALTENTRY(atomic_swap_ptr) + ALTENTRY(atomic_swap_ulong) + movl 4(%esp), %edx + movl 8(%esp), %eax + lock + xchgl %eax, (%edx) + ret + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + pushl %esi + pushl %ebx + movl 12(%esp), %esi + movl 16(%esp), %ebx + movl 20(%esp), %ecx + movl (%esi), %eax + movl 4(%esi), %edx / %edx:%eax = old value +1: + lock + cmpxchg8b (%esi) + jne 1b + popl %ebx + popl %esi + ret + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %ecx / %ecx = bit id + xorl %eax, %eax + lock + btsl %ecx, (%edx) + jnc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %ecx / %ecx = bit id + xorl %eax, %eax + lock + btrl %ecx, (%edx) + jc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * NOTE: membar_enter, membar_exit, membar_producer, and + * membar_consumer are all identical routines. We define them + * separately, instead of using ALTENTRY definitions to alias them + * together, so that DTrace and debuggers will see a unique address + * for them, allowing more accurate tracing. + */ + + + ENTRY(membar_enter) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/atomic/sparc/atomic.s b/common/atomic/sparc/atomic.s new file mode 100644 index 000000000000..8aa240efa297 --- /dev/null +++ b/common/atomic/sparc/atomic.s @@ -0,0 +1,801 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) + ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function) +#endif + + /* + * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_8_nv. + */ + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar) + ALTENTRY(atomic_inc_uchar_nv) + ba add_8 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8_nv) + SET_SIZE(atomic_inc_8) + + /* + * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_8_nv. + */ + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar) + ALTENTRY(atomic_dec_uchar_nv) + ba add_8 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8_nv) + SET_SIZE(atomic_dec_8) + + /* + * NOTE: If atomic_add_8 and atomic_add_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_8_nv. + */ + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char) + ALTENTRY(atomic_add_char_nv) +add_8: + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + add %o2, %o1, %o5 ! add value to the old value + and %o5, %o3, %o5 ! clear other bits + andn %o2, %o3, %o4 ! clear target bits + or %o4, %o5, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + add %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8_nv) + SET_SIZE(atomic_add_8) + + /* + * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_16_nv. + */ + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort) + ALTENTRY(atomic_inc_ushort_nv) + ba add_16 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16_nv) + SET_SIZE(atomic_inc_16) + + /* + * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_16_nv. + */ + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort) + ALTENTRY(atomic_dec_ushort_nv) + ba add_16 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16_nv) + SET_SIZE(atomic_dec_16) + + /* + * NOTE: If atomic_add_16 and atomic_add_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_16_nv. + */ + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short) + ALTENTRY(atomic_add_short_nv) +add_16: + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + add %o1, %o2, %o5 ! add value to the old value + and %o5, %o3, %o5 ! clear other bits + andn %o2, %o3, %o4 ! clear target bits + or %o4, %o5, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + add %o1, %o2, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16_nv) + SET_SIZE(atomic_add_16) + + /* + * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_32_nv. + */ + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint) + ALTENTRY(atomic_inc_uint_nv) + ALTENTRY(atomic_inc_ulong) + ALTENTRY(atomic_inc_ulong_nv) + ba add_32 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32_nv) + SET_SIZE(atomic_inc_32) + + /* + * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_32_nv. + */ + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint) + ALTENTRY(atomic_dec_uint_nv) + ALTENTRY(atomic_dec_ulong) + ALTENTRY(atomic_dec_ulong_nv) + ba add_32 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32_nv) + SET_SIZE(atomic_dec_32) + + /* + * NOTE: If atomic_add_32 and atomic_add_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_32_nv. + */ + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int) + ALTENTRY(atomic_add_int_nv) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long) + ALTENTRY(atomic_add_long_nv) +add_32: + ld [%o0], %o2 +1: + add %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + add %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32_nv) + SET_SIZE(atomic_add_32) + + /* + * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_64_nv. + */ + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_64_nv) + ba add_64 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_64_nv) + SET_SIZE(atomic_inc_64) + + /* + * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_64_nv. + */ + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_64_nv) + ba add_64 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_64_nv) + SET_SIZE(atomic_dec_64) + + /* + * NOTE: If atomic_add_64 and atomic_add_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_64_nv. + */ + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit +add_64: + ldx [%o0], %o2 +1: + add %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + add %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_add_64_nv) + SET_SIZE(atomic_add_64) + + /* + * NOTE: If atomic_or_8 and atomic_or_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_8_nv. + */ + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar) + ALTENTRY(atomic_or_uchar_nv) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + or %o2, %o1, %o5 ! or in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + or %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8_nv) + SET_SIZE(atomic_or_8) + + /* + * NOTE: If atomic_or_16 and atomic_or_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_16_nv. + */ + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort) + ALTENTRY(atomic_or_ushort_nv) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + or %o2, %o1, %o5 ! or in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + or %o2, %o1, %o5 ! or in the new value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16_nv) + SET_SIZE(atomic_or_16) + + /* + * NOTE: If atomic_or_32 and atomic_or_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_32_nv. + */ + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint) + ALTENTRY(atomic_or_uint_nv) + ALTENTRY(atomic_or_ulong) + ALTENTRY(atomic_or_ulong_nv) + ld [%o0], %o2 +1: + or %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + or %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32_nv) + SET_SIZE(atomic_or_32) + + /* + * NOTE: If atomic_or_64 and atomic_or_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_64_nv. + */ + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + or %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + or %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_or_64_nv) + SET_SIZE(atomic_or_64) + + /* + * NOTE: If atomic_and_8 and atomic_and_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_8_nv. + */ + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar) + ALTENTRY(atomic_and_uchar_nv) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + orn %o1, %o3, %o1 ! all ones in other bytes + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + and %o2, %o1, %o5 ! and in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8_nv) + SET_SIZE(atomic_and_8) + + /* + * NOTE: If atomic_and_16 and atomic_and_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_16_nv. + */ + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort) + ALTENTRY(atomic_and_ushort_nv) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + orn %o1, %o3, %o1 ! all ones in the other half + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + and %o2, %o1, %o5 ! and in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16_nv) + SET_SIZE(atomic_and_16) + + /* + * NOTE: If atomic_and_32 and atomic_and_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_32_nv. + */ + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint) + ALTENTRY(atomic_and_uint_nv) + ALTENTRY(atomic_and_ulong) + ALTENTRY(atomic_and_ulong_nv) + ld [%o0], %o2 +1: + and %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + and %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32_nv) + SET_SIZE(atomic_and_32) + + /* + * NOTE: If atomic_and_64 and atomic_and_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_64_nv. + */ + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + and %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + and %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_and_64_nv) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + sll %o2, %g1, %o2 ! %o2 = shifted to bit offset + and %o2, %o3, %o2 ! %o2 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o4 ! read old value +1: + andn %o4, %o3, %o4 ! clear target bits + or %o4, %o2, %o5 ! insert the new value + or %o4, %o1, %o4 ! insert the comparison value + cas [%o0], %o4, %o5 + cmp %o4, %o5 ! did we succeed? + be,pt %icc, 2f + and %o5, %o3, %o4 ! isolate the old value + cmp %o1, %o4 ! should we have succeeded? + be,a,pt %icc, 1b ! yes, try again + mov %o5, %o4 ! %o4 = old value +2: + retl + srl %o4, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + sll %o2, %g1, %o2 ! %o2 = shifted to bit offset + and %o2, %o3, %o2 ! %o2 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o4 ! read old value +1: + andn %o4, %o3, %o4 ! clear target bits + or %o4, %o2, %o5 ! insert the new value + or %o4, %o1, %o4 ! insert the comparison value + cas [%o0], %o4, %o5 + cmp %o4, %o5 ! did we succeed? + be,pt %icc, 2f + and %o5, %o3, %o4 ! isolate the old value + cmp %o1, %o4 ! should we have succeeded? + be,a,pt %icc, 1b ! yes, try again + mov %o5, %o4 ! %o4 = old value +2: + retl + srl %o4, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + ALTENTRY(atomic_cas_ptr) + ALTENTRY(atomic_cas_ulong) + cas [%o0], %o1, %o2 + retl + mov %o2, %o0 + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + sllx %o1, 32, %o1 ! cmp's upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 ! convert 2 32-bit args into 1 64-bit + add %o1, %o2, %o1 + sllx %o3, 32, %o2 ! newval upper 32 in %o3, lower in %o4 + srl %o4, 0, %o4 ! setup %o2 to have newval + add %o2, %o4, %o2 + casx [%o0], %o1, %o2 + srl %o2, 0, %o1 ! return lower 32-bits in %o1 + retl + srlx %o2, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + andn %o2, %o3, %o5 ! clear target bits + or %o5, %o1, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + andn %o2, %o3, %o5 ! clear target bits + or %o5, %o1, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + ALTENTRY(atomic_swap_ptr) + ALTENTRY(atomic_swap_ulong) + ld [%o0], %o2 +1: + mov %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + mov %o3, %o0 + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + mov %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + srl %o3, 0, %o1 ! return lower 32-bits in %o1 + retl + srlx %o3, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + mov 1, %o3 + slln %o3, %o1, %o3 + ldn [%o0], %o2 +1: + andcc %o2, %o3, %g0 ! test if the bit is set + bnz,a,pn %ncc, 2f ! if so, then fail out + mov -1, %o0 + or %o2, %o3, %o4 ! set the bit, and try to commit it + casn [%o0], %o2, %o4 + cmp %o2, %o4 + bne,a,pn %ncc, 1b ! failed to commit, try again + mov %o4, %o2 + mov %g0, %o0 +2: + retl + nop + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + mov 1, %o3 + slln %o3, %o1, %o3 + ldn [%o0], %o2 +1: + andncc %o3, %o2, %g0 ! test if the bit is clear + bnz,a,pn %ncc, 2f ! if so, then fail out + mov -1, %o0 + andn %o2, %o3, %o4 ! clear the bit, and try to commit it + casn [%o0], %o2, %o4 + cmp %o2, %o4 + bne,a,pn %ncc, 1b ! failed to commit, try again + mov %o4, %o2 + mov %g0, %o0 +2: + retl + nop + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * Spitfires and Blackbirds have a problem with membars in the + * delay slot (SF_ERRATA_51). For safety's sake, we assume + * that the whole world needs the workaround. + */ + ENTRY(membar_enter) + membar #StoreLoad|#StoreStore + retl + nop + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + membar #LoadStore|#StoreStore + retl + nop + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + membar #StoreStore + retl + nop + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + membar #LoadLoad + retl + nop + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/list/list.c b/common/list/list.c new file mode 100644 index 000000000000..94f7782a87d2 --- /dev/null +++ b/common/list/list.c @@ -0,0 +1,251 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Generic doubly-linked list implementation + */ + +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#ifdef _KERNEL +#include <sys/debug.h> +#else +#include <assert.h> +#define ASSERT(a) assert(a) +#endif + +#ifdef lint +extern list_node_t *list_d2l(list_t *list, void *obj); +#else +#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset)) +#endif +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head) + +#define list_insert_after_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_prev = (node); \ + lnew->list_next = (node)->list_next; \ + (node)->list_next->list_prev = lnew; \ + (node)->list_next = lnew; \ +} + +#define list_insert_before_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_next = (node); \ + lnew->list_prev = (node)->list_prev; \ + (node)->list_prev->list_next = lnew; \ + (node)->list_prev = lnew; \ +} + +#define list_remove_node(node) \ + (node)->list_prev->list_next = (node)->list_next; \ + (node)->list_next->list_prev = (node)->list_prev; \ + (node)->list_next = (node)->list_prev = NULL + +void +list_create(list_t *list, size_t size, size_t offset) +{ + ASSERT(list); + ASSERT(size > 0); + ASSERT(size >= offset + sizeof (list_node_t)); + + list->list_size = size; + list->list_offset = offset; + list->list_head.list_next = list->list_head.list_prev = + &list->list_head; +} + +void +list_destroy(list_t *list) +{ + list_node_t *node = &list->list_head; + + ASSERT(list); + ASSERT(list->list_head.list_next == node); + ASSERT(list->list_head.list_prev == node); + + node->list_next = node->list_prev = NULL; +} + +void +list_insert_after(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_head(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_after_node(list, lold, nobject); + } +} + +void +list_insert_before(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_tail(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_before_node(list, lold, nobject); + } +} + +void +list_insert_head(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_after_node(list, lold, object); +} + +void +list_insert_tail(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_before_node(list, lold, object); +} + +void +list_remove(list_t *list, void *object) +{ + list_node_t *lold = list_d2l(list, object); + ASSERT(!list_empty(list)); + ASSERT(lold->list_next != NULL); + list_remove_node(lold); +} + +void * +list_remove_head(list_t *list) +{ + list_node_t *head = list->list_head.list_next; + if (head == &list->list_head) + return (NULL); + list_remove_node(head); + return (list_object(list, head)); +} + +void * +list_remove_tail(list_t *list) +{ + list_node_t *tail = list->list_head.list_prev; + if (tail == &list->list_head) + return (NULL); + list_remove_node(tail); + return (list_object(list, tail)); +} + +void * +list_head(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_next)); +} + +void * +list_tail(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_prev)); +} + +void * +list_next(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_next != &list->list_head) + return (list_object(list, node->list_next)); + + return (NULL); +} + +void * +list_prev(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_prev != &list->list_head) + return (list_object(list, node->list_prev)); + + return (NULL); +} + +/* + * Insert src list after dst list. Empty src list thereafter. + */ +void +list_move_tail(list_t *dst, list_t *src) +{ + list_node_t *dstnode = &dst->list_head; + list_node_t *srcnode = &src->list_head; + + ASSERT(dst->list_size == src->list_size); + ASSERT(dst->list_offset == src->list_offset); + + if (list_empty(src)) + return; + + dstnode->list_prev->list_next = srcnode->list_next; + srcnode->list_next->list_prev = dstnode->list_prev; + dstnode->list_prev = srcnode->list_prev; + srcnode->list_prev->list_next = dstnode; + + /* empty src list */ + srcnode->list_next = srcnode->list_prev = srcnode; +} + +void +list_link_replace(list_node_t *lold, list_node_t *lnew) +{ + ASSERT(list_link_active(lold)); + ASSERT(!list_link_active(lnew)); + + lnew->list_next = lold->list_next; + lnew->list_prev = lold->list_prev; + lold->list_prev->list_next = lnew; + lold->list_next->list_prev = lnew; + lold->list_next = lold->list_prev = NULL; +} + +void +list_link_init(list_node_t *link) +{ + link->list_next = NULL; + link->list_prev = NULL; +} + +int +list_link_active(list_node_t *link) +{ + return (link->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/common/nvpair/nvpair.c b/common/nvpair/nvpair.c new file mode 100644 index 000000000000..00d44263ccda --- /dev/null +++ b/common/nvpair/nvpair.c @@ -0,0 +1,3297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/stropts.h> +#include <sys/debug.h> +#include <sys/isa_defs.h> +#include <sys/int_limits.h> +#include <sys/nvpair.h> +#include <sys/nvpair_impl.h> +#include <rpc/types.h> +#include <rpc/xdr.h> + +#if defined(_KERNEL) && !defined(_BOOT) +#include <sys/varargs.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#else +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#endif + +#ifndef offsetof +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif +#define skip_whitespace(p) while ((*(p) == ' ') || (*(p) == '\t')) p++ + +/* + * nvpair.c - Provides kernel & userland interfaces for manipulating + * name-value pairs. + * + * Overview Diagram + * + * +--------------+ + * | nvlist_t | + * |--------------| + * | nvl_version | + * | nvl_nvflag | + * | nvl_priv -+-+ + * | nvl_flag | | + * | nvl_pad | | + * +--------------+ | + * V + * +--------------+ last i_nvp in list + * | nvpriv_t | +---------------------> + * |--------------| | + * +--+- nvp_list | | +------------+ + * | | nvp_last -+--+ + nv_alloc_t | + * | | nvp_curr | |------------| + * | | nvp_nva -+----> | nva_ops | + * | | nvp_stat | | nva_arg | + * | +--------------+ +------------+ + * | + * +-------+ + * V + * +---------------------+ +-------------------+ + * | i_nvp_t | +-->| i_nvp_t | +--> + * |---------------------| | |-------------------| | + * | nvi_next -+--+ | nvi_next -+--+ + * | nvi_prev (NULL) | <----+ nvi_prev | + * | . . . . . . . . . . | | . . . . . . . . . | + * | nvp (nvpair_t) | | nvp (nvpair_t) | + * | - nvp_size | | - nvp_size | + * | - nvp_name_sz | | - nvp_name_sz | + * | - nvp_value_elem | | - nvp_value_elem | + * | - nvp_type | | - nvp_type | + * | - data ... | | - data ... | + * +---------------------+ +-------------------+ + * + * + * + * +---------------------+ +---------------------+ + * | i_nvp_t | +--> +-->| i_nvp_t (last) | + * |---------------------| | | |---------------------| + * | nvi_next -+--+ ... --+ | nvi_next (NULL) | + * <-+- nvi_prev |<-- ... <----+ nvi_prev | + * | . . . . . . . . . | | . . . . . . . . . | + * | nvp (nvpair_t) | | nvp (nvpair_t) | + * | - nvp_size | | - nvp_size | + * | - nvp_name_sz | | - nvp_name_sz | + * | - nvp_value_elem | | - nvp_value_elem | + * | - DATA_TYPE_NVLIST | | - nvp_type | + * | - data (embedded) | | - data ... | + * | nvlist name | +---------------------+ + * | +--------------+ | + * | | nvlist_t | | + * | |--------------| | + * | | nvl_version | | + * | | nvl_nvflag | | + * | | nvl_priv --+---+----> + * | | nvl_flag | | + * | | nvl_pad | | + * | +--------------+ | + * +---------------------+ + * + * + * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will + * allow value to be aligned on 8 byte boundary + * + * name_len is the length of the name string including the null terminator + * so it must be >= 1 + */ +#define NVP_SIZE_CALC(name_len, data_len) \ + (NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len)) + +static int i_get_value_size(data_type_t type, const void *data, uint_t nelem); +static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, + uint_t nelem, const void *data); + +#define NV_STAT_EMBEDDED 0x1 +#define EMBEDDED_NVL(nvp) ((nvlist_t *)(void *)NVP_VALUE(nvp)) +#define EMBEDDED_NVL_ARRAY(nvp) ((nvlist_t **)(void *)NVP_VALUE(nvp)) + +#define NVP_VALOFF(nvp) (NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz)) +#define NVPAIR2I_NVP(nvp) \ + ((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp))) + + +int +nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...) +{ + va_list valist; + int err = 0; + + nva->nva_ops = nvo; + nva->nva_arg = NULL; + + va_start(valist, nvo); + if (nva->nva_ops->nv_ao_init != NULL) + err = nva->nva_ops->nv_ao_init(nva, valist); + va_end(valist); + + return (err); +} + +void +nv_alloc_reset(nv_alloc_t *nva) +{ + if (nva->nva_ops->nv_ao_reset != NULL) + nva->nva_ops->nv_ao_reset(nva); +} + +void +nv_alloc_fini(nv_alloc_t *nva) +{ + if (nva->nva_ops->nv_ao_fini != NULL) + nva->nva_ops->nv_ao_fini(nva); +} + +nv_alloc_t * +nvlist_lookup_nv_alloc(nvlist_t *nvl) +{ + nvpriv_t *priv; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + return (priv->nvp_nva); +} + +static void * +nv_mem_zalloc(nvpriv_t *nvp, size_t size) +{ + nv_alloc_t *nva = nvp->nvp_nva; + void *buf; + + if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL) + bzero(buf, size); + + return (buf); +} + +static void +nv_mem_free(nvpriv_t *nvp, void *buf, size_t size) +{ + nv_alloc_t *nva = nvp->nvp_nva; + + nva->nva_ops->nv_ao_free(nva, buf, size); +} + +static void +nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat) +{ + bzero(priv, sizeof (nvpriv_t)); + + priv->nvp_nva = nva; + priv->nvp_stat = stat; +} + +static nvpriv_t * +nv_priv_alloc(nv_alloc_t *nva) +{ + nvpriv_t *priv; + + /* + * nv_mem_alloc() cannot called here because it needs the priv + * argument. + */ + if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL) + return (NULL); + + nv_priv_init(priv, nva, 0); + + return (priv); +} + +/* + * Embedded lists need their own nvpriv_t's. We create a new + * nvpriv_t using the parameters and allocator from the parent + * list's nvpriv_t. + */ +static nvpriv_t * +nv_priv_alloc_embedded(nvpriv_t *priv) +{ + nvpriv_t *emb_priv; + + if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL) + return (NULL); + + nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED); + + return (emb_priv); +} + +static void +nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv) +{ + nvl->nvl_version = NV_VERSION; + nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE); + nvl->nvl_priv = (uint64_t)(uintptr_t)priv; + nvl->nvl_flag = 0; + nvl->nvl_pad = 0; +} + +uint_t +nvlist_nvflag(nvlist_t *nvl) +{ + return (nvl->nvl_nvflag); +} + +/* + * nvlist_alloc - Allocate nvlist. + */ +/*ARGSUSED1*/ +int +nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xalloc(nvlp, nvflag, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva) +{ + nvpriv_t *priv; + + if (nvlp == NULL || nva == NULL) + return (EINVAL); + + if ((priv = nv_priv_alloc(nva)) == NULL) + return (ENOMEM); + + if ((*nvlp = nv_mem_zalloc(priv, + NV_ALIGN(sizeof (nvlist_t)))) == NULL) { + nv_mem_free(priv, priv, sizeof (nvpriv_t)); + return (ENOMEM); + } + + nvlist_init(*nvlp, nvflag, priv); + + return (0); +} + +/* + * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair. + */ +static nvpair_t * +nvp_buf_alloc(nvlist_t *nvl, size_t len) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *buf; + nvpair_t *nvp; + size_t nvsize; + + /* + * Allocate the buffer + */ + nvsize = len + offsetof(i_nvp_t, nvi_nvp); + + if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL) + return (NULL); + + nvp = &buf->nvi_nvp; + nvp->nvp_size = len; + + return (nvp); +} + +/* + * nvp_buf_free - de-Allocate an i_nvp_t. + */ +static void +nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp); + + nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize); +} + +/* + * nvp_buf_link - link a new nv pair into the nvlist. + */ +static void +nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr = NVPAIR2I_NVP(nvp); + + /* Put element at end of nvlist */ + if (priv->nvp_list == NULL) { + priv->nvp_list = priv->nvp_last = curr; + } else { + curr->nvi_prev = priv->nvp_last; + priv->nvp_last->nvi_next = curr; + priv->nvp_last = curr; + } +} + +/* + * nvp_buf_unlink - unlink an removed nvpair out of the nvlist. + */ +static void +nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr = NVPAIR2I_NVP(nvp); + + /* + * protect nvlist_next_nvpair() against walking on freed memory. + */ + if (priv->nvp_curr == curr) + priv->nvp_curr = curr->nvi_next; + + if (curr == priv->nvp_list) + priv->nvp_list = curr->nvi_next; + else + curr->nvi_prev->nvi_next = curr->nvi_next; + + if (curr == priv->nvp_last) + priv->nvp_last = curr->nvi_prev; + else + curr->nvi_next->nvi_prev = curr->nvi_prev; +} + +/* + * take a nvpair type and number of elements and make sure the are valid + */ +static int +i_validate_type_nelem(data_type_t type, uint_t nelem) +{ + switch (type) { + case DATA_TYPE_BOOLEAN: + if (nelem != 0) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_STRING: + case DATA_TYPE_HRTIME: + case DATA_TYPE_NVLIST: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + if (nelem != 1) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + /* we allow arrays with 0 elements */ + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * Verify nvp_name_sz and check the name string length. + */ +static int +i_validate_nvpair_name(nvpair_t *nvp) +{ + if ((nvp->nvp_name_sz <= 0) || + (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0))) + return (EFAULT); + + /* verify the name string, make sure its terminated */ + if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0') + return (EFAULT); + + return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT); +} + +static int +i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data) +{ + switch (type) { + case DATA_TYPE_BOOLEAN_VALUE: + if (*(boolean_t *)data != B_TRUE && + *(boolean_t *)data != B_FALSE) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_ARRAY: { + int i; + + for (i = 0; i < nelem; i++) + if (((boolean_t *)data)[i] != B_TRUE && + ((boolean_t *)data)[i] != B_FALSE) + return (EINVAL); + break; + } + default: + break; + } + + return (0); +} + +/* + * This function takes a pointer to what should be a nvpair and it's size + * and then verifies that all the nvpair fields make sense and can be + * trusted. This function is used when decoding packed nvpairs. + */ +static int +i_validate_nvpair(nvpair_t *nvp) +{ + data_type_t type = NVP_TYPE(nvp); + int size1, size2; + + /* verify nvp_name_sz, check the name string length */ + if (i_validate_nvpair_name(nvp) != 0) + return (EFAULT); + + if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0) + return (EFAULT); + + /* + * verify nvp_type, nvp_value_elem, and also possibly + * verify string values and get the value size. + */ + size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp)); + size1 = nvp->nvp_size - NVP_VALOFF(nvp); + if (size2 < 0 || size1 != NV_ALIGN(size2)) + return (EFAULT); + + return (0); +} + +static int +nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL) + return (EINVAL); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvpair_t *nvp = &curr->nvi_nvp; + int err; + + if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp), + NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0) + return (err); + } + + return (0); +} + +/* + * Frees all memory allocated for an nvpair (like embedded lists) with + * the exception of the nvpair buffer itself. + */ +static void +nvpair_free(nvpair_t *nvp) +{ + switch (NVP_TYPE(nvp)) { + case DATA_TYPE_NVLIST: + nvlist_free(EMBEDDED_NVL(nvp)); + break; + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + int i; + + for (i = 0; i < NVP_NELEM(nvp); i++) + if (nvlp[i] != NULL) + nvlist_free(nvlp[i]); + break; + } + default: + break; + } +} + +/* + * nvlist_free - free an unpacked nvlist + */ +void +nvlist_free(nvlist_t *nvl) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return; + + /* + * Unpacked nvlist are linked through i_nvp_t + */ + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + curr = curr->nvi_next; + + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + } + + if (!(priv->nvp_stat & NV_STAT_EMBEDDED)) + nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t))); + else + nvl->nvl_priv = 0; + + nv_mem_free(priv, priv, sizeof (nvpriv_t)); +} + +static int +nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + + if (nvp == NULL) + return (0); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) + if (&curr->nvi_nvp == nvp) + return (1); + + return (0); +} + +/* + * Make a copy of nvlist + */ +/*ARGSUSED1*/ +int +nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xdup(nvl, nvlp, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva) +{ + int err; + nvlist_t *ret; + + if (nvl == NULL || nvlp == NULL) + return (EINVAL); + + if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0) + return (err); + + if ((err = nvlist_copy_pairs(nvl, ret)) != 0) + nvlist_free(ret); + else + *nvlp = ret; + + return (err); +} + +/* + * Remove all with matching name + */ +int +nvlist_remove_all(nvlist_t *nvl, const char *name) +{ + nvpriv_t *priv; + i_nvp_t *curr; + int error = ENOENT; + + if (nvl == NULL || name == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + + curr = curr->nvi_next; + if (strcmp(name, NVP_NAME(nvp)) != 0) + continue; + + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + + error = 0; + } + + return (error); +} + +/* + * Remove first one with matching name and type + */ +int +nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || name == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) { + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + + return (0); + } + curr = curr->nvi_next; + } + + return (ENOENT); +} + +int +nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + if (nvl == NULL || nvp == NULL) + return (EINVAL); + + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (0); +} + +/* + * This function calculates the size of an nvpair value. + * + * The data argument controls the behavior in case of the data types + * DATA_TYPE_STRING and + * DATA_TYPE_STRING_ARRAY + * Is data == NULL then the size of the string(s) is excluded. + */ +static int +i_get_value_size(data_type_t type, const void *data, uint_t nelem) +{ + uint64_t value_sz; + + if (i_validate_type_nelem(type, nelem) != 0) + return (-1); + + /* Calculate required size for holding value */ + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + value_sz = sizeof (boolean_t); + break; + case DATA_TYPE_BYTE: + value_sz = sizeof (uchar_t); + break; + case DATA_TYPE_INT8: + value_sz = sizeof (int8_t); + break; + case DATA_TYPE_UINT8: + value_sz = sizeof (uint8_t); + break; + case DATA_TYPE_INT16: + value_sz = sizeof (int16_t); + break; + case DATA_TYPE_UINT16: + value_sz = sizeof (uint16_t); + break; + case DATA_TYPE_INT32: + value_sz = sizeof (int32_t); + break; + case DATA_TYPE_UINT32: + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_INT64: + value_sz = sizeof (int64_t); + break; + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: + value_sz = sizeof (double); + break; +#endif + case DATA_TYPE_STRING: + if (data == NULL) + value_sz = 0; + else + value_sz = strlen(data) + 1; + break; + case DATA_TYPE_BOOLEAN_ARRAY: + value_sz = (uint64_t)nelem * sizeof (boolean_t); + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uchar_t); + break; + case DATA_TYPE_INT8_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int8_t); + break; + case DATA_TYPE_UINT8_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint8_t); + break; + case DATA_TYPE_INT16_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int16_t); + break; + case DATA_TYPE_UINT16_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint16_t); + break; + case DATA_TYPE_INT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int32_t); + break; + case DATA_TYPE_UINT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int64_t); + break; + case DATA_TYPE_UINT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + + if (data != NULL) { + char *const *strs = data; + uint_t i; + + /* no alignment requirement for strings */ + for (i = 0; i < nelem; i++) { + if (strs[i] == NULL) + return (-1); + value_sz += strlen(strs[i]) + 1; + } + } + break; + case DATA_TYPE_HRTIME: + value_sz = sizeof (hrtime_t); + break; + case DATA_TYPE_NVLIST: + value_sz = NV_ALIGN(sizeof (nvlist_t)); + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t) + + (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t)); + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +static int +nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl) +{ + nvpriv_t *priv; + int err; + + if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t) + nvl->nvl_priv)) == NULL) + return (ENOMEM); + + nvlist_init(emb_nvl, onvl->nvl_nvflag, priv); + + if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) { + nvlist_free(emb_nvl); + emb_nvl->nvl_priv = 0; + } + + return (err); +} + +/* + * nvlist_add_common - Add new <name,value> pair to nvlist + */ +static int +nvlist_add_common(nvlist_t *nvl, const char *name, + data_type_t type, uint_t nelem, const void *data) +{ + nvpair_t *nvp; + uint_t i; + + int nvp_sz, name_sz, value_sz; + int err = 0; + + if (name == NULL || nvl == NULL || nvl->nvl_priv == 0) + return (EINVAL); + + if (nelem != 0 && data == NULL) + return (EINVAL); + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) included. + */ + if ((value_sz = i_get_value_size(type, data, nelem)) < 0) + return (EINVAL); + + if (i_validate_nvpair_value(type, nelem, data) != 0) + return (EINVAL); + + /* + * If we're adding an nvlist or nvlist array, ensure that we are not + * adding the input nvlist to itself, which would cause recursion, + * and ensure that no NULL nvlist pointers are present. + */ + switch (type) { + case DATA_TYPE_NVLIST: + if (data == nvl || data == NULL) + return (EINVAL); + break; + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **onvlp = (nvlist_t **)data; + for (i = 0; i < nelem; i++) { + if (onvlp[i] == nvl || onvlp[i] == NULL) + return (EINVAL); + } + break; + } + default: + break; + } + + /* calculate sizes of the nvpair elements and the nvpair itself */ + name_sz = strlen(name) + 1; + + nvp_sz = NVP_SIZE_CALC(name_sz, value_sz); + + if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL) + return (ENOMEM); + + ASSERT(nvp->nvp_size == nvp_sz); + nvp->nvp_name_sz = name_sz; + nvp->nvp_value_elem = nelem; + nvp->nvp_type = type; + bcopy(name, NVP_NAME(nvp), name_sz); + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + case DATA_TYPE_STRING_ARRAY: { + char *const *strs = data; + char *buf = NVP_VALUE(nvp); + char **cstrs = (void *)buf; + + /* skip pre-allocated space for pointer array */ + buf += nelem * sizeof (uint64_t); + for (i = 0; i < nelem; i++) { + int slen = strlen(strs[i]) + 1; + bcopy(strs[i], buf, slen); + cstrs[i] = buf; + buf += slen; + } + break; + } + case DATA_TYPE_NVLIST: { + nvlist_t *nnvl = EMBEDDED_NVL(nvp); + nvlist_t *onvl = (nvlist_t *)data; + + if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) { + nvp_buf_free(nvl, nvp); + return (err); + } + break; + } + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **onvlp = (nvlist_t **)data; + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + nvlist_t *embedded = (nvlist_t *) + ((uintptr_t)nvlp + nelem * sizeof (uint64_t)); + + for (i = 0; i < nelem; i++) { + if ((err = nvlist_copy_embedded(nvl, + onvlp[i], embedded)) != 0) { + /* + * Free any successfully created lists + */ + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (err); + } + + nvlp[i] = embedded++; + } + break; + } + default: + bcopy(data, NVP_VALUE(nvp), value_sz); + } + + /* if unique name, remove before add */ + if (nvl->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove_all(nvl, name); + else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE) + (void) nvlist_remove(nvl, name, type); + + nvp_buf_link(nvl, nvp); + + return (0); +} + +int +nvlist_add_boolean(nvlist_t *nvl, const char *name) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL)); +} + +int +nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val)); +} + +int +nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val)); +} + +int +nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val)); +} + +int +nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val)); +} + +int +nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val)); +} + +int +nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val)); +} + +int +nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val)); +} + +int +nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val)); +} + +int +nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val)); +} + +int +nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val)); +} + +#if !defined(_KERNEL) +int +nvlist_add_double(nvlist_t *nvl, const char *name, double val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val)); +} +#endif + +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val)); +} + +int +nvlist_add_boolean_array(nvlist_t *nvl, const char *name, + boolean_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_add_string_array(nvlist_t *nvl, const char *name, + char *const *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val)); +} + +int +nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); +} + +int +nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +/* reading name-value pairs */ +nvpair_t * +nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + curr = NVPAIR2I_NVP(nvp); + + /* + * Ensure that nvp is a valid nvpair on this nvlist. + * NB: nvp_curr is used only as a hint so that we don't always + * have to walk the list to determine if nvp is still on the list. + */ + if (nvp == NULL) + curr = priv->nvp_list; + else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) + curr = curr->nvi_next; + else + curr = NULL; + + priv->nvp_curr = curr; + + return (curr != NULL ? &curr->nvi_nvp : NULL); +} + +nvpair_t * +nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + curr = NVPAIR2I_NVP(nvp); + + if (nvp == NULL) + curr = priv->nvp_last; + else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) + curr = curr->nvi_prev; + else + curr = NULL; + + priv->nvp_curr = curr; + + return (curr != NULL ? &curr->nvi_nvp : NULL); +} + +boolean_t +nvlist_empty(nvlist_t *nvl) +{ + nvpriv_t *priv; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (B_TRUE); + + return (priv->nvp_list == NULL); +} + +char * +nvpair_name(nvpair_t *nvp) +{ + return (NVP_NAME(nvp)); +} + +data_type_t +nvpair_type(nvpair_t *nvp) +{ + return (NVP_TYPE(nvp)); +} + +int +nvpair_type_is_array(nvpair_t *nvp) +{ + data_type_t type = NVP_TYPE(nvp); + + if ((type == DATA_TYPE_BYTE_ARRAY) || + (type == DATA_TYPE_UINT8_ARRAY) || + (type == DATA_TYPE_INT16_ARRAY) || + (type == DATA_TYPE_UINT16_ARRAY) || + (type == DATA_TYPE_INT32_ARRAY) || + (type == DATA_TYPE_UINT32_ARRAY) || + (type == DATA_TYPE_INT64_ARRAY) || + (type == DATA_TYPE_UINT64_ARRAY) || + (type == DATA_TYPE_BOOLEAN_ARRAY) || + (type == DATA_TYPE_STRING_ARRAY) || + (type == DATA_TYPE_NVLIST_ARRAY)) + return (1); + return (0); + +} + +static int +nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data) +{ + if (nvp == NULL || nvpair_type(nvp) != type) + return (EINVAL); + + /* + * For non-array types, we copy the data. + * For array types (including string), we set a pointer. + */ + switch (type) { + case DATA_TYPE_BOOLEAN: + if (nelem != NULL) + *nelem = 0; + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + if (data == NULL) + return (EINVAL); + bcopy(NVP_VALUE(nvp), data, + (size_t)i_get_value_size(type, NULL, 1)); + if (nelem != NULL) + *nelem = 1; + break; + + case DATA_TYPE_NVLIST: + case DATA_TYPE_STRING: + if (data == NULL) + return (EINVAL); + *(void **)data = (void *)NVP_VALUE(nvp); + if (nelem != NULL) + *nelem = 1; + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + if (nelem == NULL || data == NULL) + return (EINVAL); + if ((*nelem = NVP_NELEM(nvp)) != 0) + *(void **)data = (void *)NVP_VALUE(nvp); + else + *(void **)data = NULL; + break; + + default: + return (ENOTSUP); + } + + return (0); +} + +static int +nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type, + uint_t *nelem, void *data) +{ + nvpriv_t *priv; + nvpair_t *nvp; + i_nvp_t *curr; + + if (name == NULL || nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE))) + return (ENOTSUP); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) + return (nvpair_value_common(nvp, type, nelem, data)); + } + + return (ENOENT); +} + +int +nvlist_lookup_boolean(nvlist_t *nvl, const char *name) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL)); +} + +int +nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val) +{ + return (nvlist_lookup_common(nvl, name, + DATA_TYPE_BOOLEAN_VALUE, NULL, val)); +} + +int +nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val)); +} + +int +nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val)); +} + +int +nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val)); +} + +int +nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val)); +} + +int +nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val)); +} + +int +nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val)); +} + +int +nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val)); +} + +int +nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val)); +} + +int +nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val)); +} + +#if !defined(_KERNEL) +int +nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val)); +} +#endif + +int +nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val)); +} + +int +nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val)); +} + +int +nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name, + boolean_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, + DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_lookup_byte_array(nvlist_t *nvl, const char *name, + uchar_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name, + uint8_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_lookup_int16_array(nvlist_t *nvl, const char *name, + int16_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name, + uint16_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_lookup_int32_array(nvlist_t *nvl, const char *name, + int32_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name, + uint32_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_lookup_int64_array(nvlist_t *nvl, const char *name, + int64_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name, + uint64_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_lookup_string_array(nvlist_t *nvl, const char *name, + char ***a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name, + nvlist_t ***a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +int +nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val)); +} + +int +nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...) +{ + va_list ap; + char *name; + int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0); + int ret = 0; + + va_start(ap, flag); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + data_type_t type; + void *val; + uint_t *nelem; + + switch (type = va_arg(ap, data_type_t)) { + case DATA_TYPE_BOOLEAN: + ret = nvlist_lookup_common(nvl, name, type, NULL, NULL); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: + case DATA_TYPE_STRING: + case DATA_TYPE_NVLIST: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + val = va_arg(ap, void *); + ret = nvlist_lookup_common(nvl, name, type, NULL, val); + break; + + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + val = va_arg(ap, void *); + nelem = va_arg(ap, uint_t *); + ret = nvlist_lookup_common(nvl, name, type, nelem, val); + break; + + default: + ret = EINVAL; + } + + if (ret == ENOENT && noentok) + ret = 0; + } + va_end(ap); + + return (ret); +} + +/* + * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function + * returns zero and a pointer to the matching nvpair is returned in '*ret' + * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate + * multiple levels of embedded nvlists, with 'sep' as the separator. As an + * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or + * "a.d[3].e[1]". This matches the C syntax for array embed (for convience, + * code also supports "a.d[3]e[1]" syntax). + * + * If 'ip' is non-NULL and the last name component is an array, return the + * value of the "...[index]" array index in *ip. For an array reference that + * is not indexed, *ip will be returned as -1. If there is a syntax error in + * 'name', and 'ep' is non-NULL then *ep will be set to point to the location + * inside the 'name' string where the syntax error was detected. + */ +static int +nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep, + nvpair_t **ret, int *ip, char **ep) +{ + nvpair_t *nvp; + const char *np; + char *sepp; + char *idxp, *idxep; + nvlist_t **nva; + long idx; + int n; + + if (ip) + *ip = -1; /* not indexed */ + if (ep) + *ep = NULL; + + if ((nvl == NULL) || (name == NULL)) + return (EINVAL); + + /* step through components of name */ + for (np = name; np && *np; np = sepp) { + /* ensure unique names */ + if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME)) + return (ENOTSUP); + + /* skip white space */ + skip_whitespace(np); + if (*np == 0) + break; + + /* set 'sepp' to end of current component 'np' */ + if (sep) + sepp = strchr(np, sep); + else + sepp = NULL; + + /* find start of next "[ index ]..." */ + idxp = strchr(np, '['); + + /* if sepp comes first, set idxp to NULL */ + if (sepp && idxp && (sepp < idxp)) + idxp = NULL; + + /* + * At this point 'idxp' is set if there is an index + * expected for the current component. + */ + if (idxp) { + /* set 'n' to length of current 'np' name component */ + n = idxp++ - np; + + /* keep sepp up to date for *ep use as we advance */ + skip_whitespace(idxp); + sepp = idxp; + + /* determine the index value */ +#if defined(_KERNEL) && !defined(_BOOT) + if (ddi_strtol(idxp, &idxep, 0, &idx)) + goto fail; +#else + idx = strtol(idxp, &idxep, 0); +#endif + if (idxep == idxp) + goto fail; + + /* keep sepp up to date for *ep use as we advance */ + sepp = idxep; + + /* skip white space index value and check for ']' */ + skip_whitespace(sepp); + if (*sepp++ != ']') + goto fail; + + /* for embedded arrays, support C syntax: "a[1].b" */ + skip_whitespace(sepp); + if (sep && (*sepp == sep)) + sepp++; + } else if (sepp) { + n = sepp++ - np; + } else { + n = strlen(np); + } + + /* trim trailing whitespace by reducing length of 'np' */ + if (n == 0) + goto fail; + for (n--; (np[n] == ' ') || (np[n] == '\t'); n--) + ; + n++; + + /* skip whitespace, and set sepp to NULL if complete */ + if (sepp) { + skip_whitespace(sepp); + if (*sepp == 0) + sepp = NULL; + } + + /* + * At this point: + * o 'n' is the length of current 'np' component. + * o 'idxp' is set if there was an index, and value 'idx'. + * o 'sepp' is set to the beginning of the next component, + * and set to NULL if we have no more components. + * + * Search for nvpair with matching component name. + */ + for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + + /* continue if no match on name */ + if (strncmp(np, nvpair_name(nvp), n) || + (strlen(nvpair_name(nvp)) != n)) + continue; + + /* if indexed, verify type is array oriented */ + if (idxp && !nvpair_type_is_array(nvp)) + goto fail; + + /* + * Full match found, return nvp and idx if this + * was the last component. + */ + if (sepp == NULL) { + if (ret) + *ret = nvp; + if (ip && idxp) + *ip = (int)idx; /* return index */ + return (0); /* found */ + } + + /* + * More components: current match must be + * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY + * to support going deeper. + */ + if (nvpair_type(nvp) == DATA_TYPE_NVLIST) { + nvl = EMBEDDED_NVL(nvp); + break; + } else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) { + (void) nvpair_value_nvlist_array(nvp, + &nva, (uint_t *)&n); + if ((n < 0) || (idx >= n)) + goto fail; + nvl = nva[idx]; + break; + } + + /* type does not support more levels */ + goto fail; + } + if (nvp == NULL) + goto fail; /* 'name' not found */ + + /* search for match of next component in embedded 'nvl' list */ + } + +fail: if (ep && sepp) + *ep = sepp; + return (EINVAL); +} + +/* + * Return pointer to nvpair with specified 'name'. + */ +int +nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret) +{ + return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL)); +} + +/* + * Determine if named nvpair exists in nvlist (use embedded separator of '.' + * and return array index). See nvlist_lookup_nvpair_ei_sep for more detailed + * description. + */ +int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl, + const char *name, nvpair_t **ret, int *ip, char **ep) +{ + return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep)); +} + +boolean_t +nvlist_exists(nvlist_t *nvl, const char *name) +{ + nvpriv_t *priv; + nvpair_t *nvp; + i_nvp_t *curr; + + if (name == NULL || nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (B_FALSE); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +int +nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val)); +} + +int +nvpair_value_byte(nvpair_t *nvp, uchar_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val)); +} + +int +nvpair_value_int8(nvpair_t *nvp, int8_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val)); +} + +int +nvpair_value_uint8(nvpair_t *nvp, uint8_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val)); +} + +int +nvpair_value_int16(nvpair_t *nvp, int16_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val)); +} + +int +nvpair_value_uint16(nvpair_t *nvp, uint16_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val)); +} + +int +nvpair_value_int32(nvpair_t *nvp, int32_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val)); +} + +int +nvpair_value_uint32(nvpair_t *nvp, uint32_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val)); +} + +int +nvpair_value_int64(nvpair_t *nvp, int64_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val)); +} + +int +nvpair_value_uint64(nvpair_t *nvp, uint64_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val)); +} + +#if !defined(_KERNEL) +int +nvpair_value_double(nvpair_t *nvp, double *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val)); +} +#endif + +int +nvpair_value_string(nvpair_t *nvp, char **val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val)); +} + +int +nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val)); +} + +int +nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val)); +} + +int +nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val)); +} + +int +nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val)); +} + +int +nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val)); +} + +int +nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val)); +} + +int +nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val)); +} + +int +nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val)); +} + +int +nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val)); +} + +int +nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val)); +} + +int +nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val)); +} + +int +nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val)); +} + +int +nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val)); +} + +int +nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val)); +} + +/* + * Add specified pair to the list. + */ +int +nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + if (nvl == NULL || nvp == NULL) + return (EINVAL); + + return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp), + NVP_NELEM(nvp), NVP_VALUE(nvp))); +} + +/* + * Merge the supplied nvlists and put the result in dst. + * The merged list will contain all names specified in both lists, + * the values are taken from nvl in the case of duplicates. + * Return 0 on success. + */ +/*ARGSUSED*/ +int +nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag) +{ + if (nvl == NULL || dst == NULL) + return (EINVAL); + + if (dst != nvl) + return (nvlist_copy_pairs(nvl, dst)); + + return (0); +} + +/* + * Encoding related routines + */ +#define NVS_OP_ENCODE 0 +#define NVS_OP_DECODE 1 +#define NVS_OP_GETSIZE 2 + +typedef struct nvs_ops nvs_ops_t; + +typedef struct { + int nvs_op; + const nvs_ops_t *nvs_ops; + void *nvs_private; + nvpriv_t *nvs_priv; +} nvstream_t; + +/* + * nvs operations are: + * - nvs_nvlist + * encoding / decoding of a nvlist header (nvlist_t) + * calculates the size used for header and end detection + * + * - nvs_nvpair + * responsible for the first part of encoding / decoding of an nvpair + * calculates the decoded size of an nvpair + * + * - nvs_nvp_op + * second part of encoding / decoding of an nvpair + * + * - nvs_nvp_size + * calculates the encoding size of an nvpair + * + * - nvs_nvl_fini + * encodes the end detection mark (zeros). + */ +struct nvs_ops { + int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *); + int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *); + int (*nvs_nvp_op)(nvstream_t *, nvpair_t *); + int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *); + int (*nvs_nvl_fini)(nvstream_t *); +}; + +typedef struct { + char nvh_encoding; /* nvs encoding method */ + char nvh_endian; /* nvs endian */ + char nvh_reserved1; /* reserved for future use */ + char nvh_reserved2; /* reserved for future use */ +} nvs_header_t; + +static int +nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + + /* + * Walk nvpair in list and encode each nvpair + */ + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) + if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0) + return (EFAULT); + + return (nvs->nvs_ops->nvs_nvl_fini(nvs)); +} + +static int +nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl) +{ + nvpair_t *nvp; + size_t nvsize; + int err; + + /* + * Get decoded size of next pair in stream, alloc + * memory for nvpair_t, then decode the nvpair + */ + while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) { + if (nvsize == 0) /* end of list */ + break; + + /* make sure len makes sense */ + if (nvsize < NVP_SIZE_CALC(1, 0)) + return (EFAULT); + + if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL) + return (ENOMEM); + + if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) { + nvp_buf_free(nvl, nvp); + return (err); + } + + if (i_validate_nvpair(nvp) != 0) { + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (EFAULT); + } + + nvp_buf_link(nvl, nvp); + } + return (err); +} + +static int +nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + uint64_t nvsize = *buflen; + size_t size; + + /* + * Get encoded size of nvpairs in nvlist + */ + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0) + return (EINVAL); + + if ((nvsize += size) > INT32_MAX) + return (EINVAL); + } + + *buflen = nvsize; + return (0); +} + +static int +nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) +{ + int err; + + if (nvl->nvl_priv == 0) + return (EFAULT); + + /* + * Perform the operation, starting with header, then each nvpair + */ + if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0) + return (err); + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + err = nvs_encode_pairs(nvs, nvl); + break; + + case NVS_OP_DECODE: + err = nvs_decode_pairs(nvs, nvl); + break; + + case NVS_OP_GETSIZE: + err = nvs_getsize_pairs(nvs, nvl, buflen); + break; + + default: + err = EINVAL; + } + + return (err); +} + +static int +nvs_embedded(nvstream_t *nvs, nvlist_t *embedded) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + return (nvs_operation(nvs, embedded, NULL)); + + case NVS_OP_DECODE: { + nvpriv_t *priv; + int err; + + if (embedded->nvl_version != NV_VERSION) + return (ENOTSUP); + + if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL) + return (ENOMEM); + + nvlist_init(embedded, embedded->nvl_nvflag, priv); + + if ((err = nvs_operation(nvs, embedded, NULL)) != 0) + nvlist_free(embedded); + return (err); + } + default: + break; + } + + return (EINVAL); +} + +static int +nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + size_t nelem = NVP_NELEM(nvp); + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + int i; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + for (i = 0; i < nelem; i++) + if (nvs_embedded(nvs, nvlp[i]) != 0) + return (EFAULT); + break; + + case NVS_OP_DECODE: { + size_t len = nelem * sizeof (uint64_t); + nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len); + + bzero(nvlp, len); /* don't trust packed data */ + for (i = 0; i < nelem; i++) { + if (nvs_embedded(nvs, embedded) != 0) { + nvpair_free(nvp); + return (EFAULT); + } + + nvlp[i] = embedded++; + } + break; + } + case NVS_OP_GETSIZE: { + uint64_t nvsize = 0; + + for (i = 0; i < nelem; i++) { + size_t nvp_sz = 0; + + if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0) + return (EINVAL); + + if ((nvsize += nvp_sz) > INT32_MAX) + return (EINVAL); + } + + *size = nvsize; + break; + } + default: + return (EINVAL); + } + + return (0); +} + +static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *); +static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *); + +/* + * Common routine for nvlist operations: + * encode, decode, getsize (encoded size). + */ +static int +nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, + int nvs_op) +{ + int err = 0; + nvstream_t nvs; + int nvl_endian; +#ifdef _LITTLE_ENDIAN + int host_endian = 1; +#else + int host_endian = 0; +#endif /* _LITTLE_ENDIAN */ + nvs_header_t *nvh = (void *)buf; + + if (buflen == NULL || nvl == NULL || + (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + nvs.nvs_op = nvs_op; + + /* + * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and + * a buffer is allocated. The first 4 bytes in the buffer are + * used for encoding method and host endian. + */ + switch (nvs_op) { + case NVS_OP_ENCODE: + if (buf == NULL || *buflen < sizeof (nvs_header_t)) + return (EINVAL); + + nvh->nvh_encoding = encoding; + nvh->nvh_endian = nvl_endian = host_endian; + nvh->nvh_reserved1 = 0; + nvh->nvh_reserved2 = 0; + break; + + case NVS_OP_DECODE: + if (buf == NULL || *buflen < sizeof (nvs_header_t)) + return (EINVAL); + + /* get method of encoding from first byte */ + encoding = nvh->nvh_encoding; + nvl_endian = nvh->nvh_endian; + break; + + case NVS_OP_GETSIZE: + nvl_endian = host_endian; + + /* + * add the size for encoding + */ + *buflen = sizeof (nvs_header_t); + break; + + default: + return (ENOTSUP); + } + + /* + * Create an nvstream with proper encoding method + */ + switch (encoding) { + case NV_ENCODE_NATIVE: + /* + * check endianness, in case we are unpacking + * from a file + */ + if (nvl_endian != host_endian) + return (ENOTSUP); + err = nvs_native(&nvs, nvl, buf, buflen); + break; + case NV_ENCODE_XDR: + err = nvs_xdr(&nvs, nvl, buf, buflen); + break; + default: + err = ENOTSUP; + break; + } + + return (err); +} + +int +nvlist_size(nvlist_t *nvl, size_t *size, int encoding) +{ + return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE)); +} + +/* + * Pack nvlist into contiguous memory + */ +/*ARGSUSED1*/ +int +nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, + int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xpack(nvl, bufp, buflen, encoding, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, + nv_alloc_t *nva) +{ + nvpriv_t nvpriv; + size_t alloc_size; + char *buf; + int err; + + if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL) + return (EINVAL); + + if (*bufp != NULL) + return (nvlist_common(nvl, *bufp, buflen, encoding, + NVS_OP_ENCODE)); + + /* + * Here is a difficult situation: + * 1. The nvlist has fixed allocator properties. + * All other nvlist routines (like nvlist_add_*, ...) use + * these properties. + * 2. When using nvlist_pack() the user can specify his own + * allocator properties (e.g. by using KM_NOSLEEP). + * + * We use the user specified properties (2). A clearer solution + * will be to remove the kmflag from nvlist_pack(), but we will + * not change the interface. + */ + nv_priv_init(&nvpriv, nva, 0); + + if (err = nvlist_size(nvl, &alloc_size, encoding)) + return (err); + + if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL) + return (ENOMEM); + + if ((err = nvlist_common(nvl, buf, &alloc_size, encoding, + NVS_OP_ENCODE)) != 0) { + nv_mem_free(&nvpriv, buf, alloc_size); + } else { + *buflen = alloc_size; + *bufp = buf; + } + + return (err); +} + +/* + * Unpack buf into an nvlist_t + */ +/*ARGSUSED1*/ +int +nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xunpack(buf, buflen, nvlp, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva) +{ + nvlist_t *nvl; + int err; + + if (nvlp == NULL) + return (EINVAL); + + if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0) + return (err); + + if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0) + nvlist_free(nvl); + else + *nvlp = nvl; + + return (err); +} + +/* + * Native encoding functions + */ +typedef struct { + /* + * This structure is used when decoding a packed nvpair in + * the native format. n_base points to a buffer containing the + * packed nvpair. n_end is a pointer to the end of the buffer. + * (n_end actually points to the first byte past the end of the + * buffer.) n_curr is a pointer that lies between n_base and n_end. + * It points to the current data that we are decoding. + * The amount of data left in the buffer is equal to n_end - n_curr. + * n_flag is used to recognize a packed embedded list. + */ + caddr_t n_base; + caddr_t n_end; + caddr_t n_curr; + uint_t n_flag; +} nvs_native_t; + +static int +nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf, + size_t buflen) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + nvs->nvs_private = native; + native->n_curr = native->n_base = buf; + native->n_end = buf + buflen; + native->n_flag = 0; + return (0); + + case NVS_OP_GETSIZE: + nvs->nvs_private = native; + native->n_curr = native->n_base = native->n_end = NULL; + native->n_flag = 0; + return (0); + default: + return (EINVAL); + } +} + +/*ARGSUSED*/ +static void +nvs_native_destroy(nvstream_t *nvs) +{ +} + +static int +native_cp(nvstream_t *nvs, void *buf, size_t size) +{ + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + + if (native->n_curr + size > native->n_end) + return (EFAULT); + + /* + * The bcopy() below eliminates alignment requirement + * on the buffer (stream) and is preferred over direct access. + */ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + bcopy(buf, native->n_curr, size); + break; + case NVS_OP_DECODE: + bcopy(native->n_curr, buf, size); + break; + default: + return (EINVAL); + } + + native->n_curr += size; + return (0); +} + +/* + * operate on nvlist_t header + */ +static int +nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) +{ + nvs_native_t *native = nvs->nvs_private; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + if (native->n_flag) + return (0); /* packed embedded list */ + + native->n_flag = 1; + + /* copy version and nvflag of the nvlist_t */ + if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 || + native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0) + return (EFAULT); + + return (0); + + case NVS_OP_GETSIZE: + /* + * if calculate for packed embedded list + * 4 for end of the embedded list + * else + * 2 * sizeof (int32_t) for nvl_version and nvl_nvflag + * and 4 for end of the entire list + */ + if (native->n_flag) { + *size += 4; + } else { + native->n_flag = 1; + *size += 2 * sizeof (int32_t) + 4; + } + + return (0); + + default: + return (EINVAL); + } +} + +static int +nvs_native_nvl_fini(nvstream_t *nvs) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + /* + * Add 4 zero bytes at end of nvlist. They are used + * for end detection by the decode routine. + */ + if (native->n_curr + sizeof (int) > native->n_end) + return (EFAULT); + + bzero(native->n_curr, sizeof (int)); + native->n_curr += sizeof (int); + } + + return (0); +} + +static int +nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + nvlist_t *packed = (void *) + (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); + /* + * Null out the pointer that is meaningless in the packed + * structure. The address may not be aligned, so we have + * to use bzero. + */ + bzero(&packed->nvl_priv, sizeof (packed->nvl_priv)); + } + + return (nvs_embedded(nvs, EMBEDDED_NVL(nvp))); +} + +static int +nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp); + size_t len = NVP_NELEM(nvp) * sizeof (uint64_t); + nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len); + int i; + /* + * Null out pointers that are meaningless in the packed + * structure. The addresses may not be aligned, so we have + * to use bzero. + */ + bzero(value, len); + + for (i = 0; i < NVP_NELEM(nvp); i++, packed++) + /* + * Null out the pointer that is meaningless in the + * packed structure. The address may not be aligned, + * so we have to use bzero. + */ + bzero(&packed->nvl_priv, sizeof (packed->nvl_priv)); + } + + return (nvs_embedded_nvl_array(nvs, nvp, NULL)); +} + +static void +nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + uint64_t *strp = (void *) + (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); + /* + * Null out pointers that are meaningless in the packed + * structure. The addresses may not be aligned, so we have + * to use bzero. + */ + bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t)); + break; + } + case NVS_OP_DECODE: { + char **strp = (void *)NVP_VALUE(nvp); + char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t)); + int i; + + for (i = 0; i < NVP_NELEM(nvp); i++) { + strp[i] = buf; + buf += strlen(buf) + 1; + } + break; + } + } +} + +static int +nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp) +{ + data_type_t type; + int value_sz; + int ret = 0; + + /* + * We do the initial bcopy of the data before we look at + * the nvpair type, because when we're decoding, we won't + * have the correct values for the pair until we do the bcopy. + */ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + if (native_cp(nvs, nvp, nvp->nvp_size) != 0) + return (EFAULT); + break; + default: + return (EINVAL); + } + + /* verify nvp_name_sz, check the name string length */ + if (i_validate_nvpair_name(nvp) != 0) + return (EFAULT); + + type = NVP_TYPE(nvp); + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) excluded. + */ + if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0) + return (EFAULT); + + if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size) + return (EFAULT); + + switch (type) { + case DATA_TYPE_NVLIST: + ret = nvpair_native_embedded(nvs, nvp); + break; + case DATA_TYPE_NVLIST_ARRAY: + ret = nvpair_native_embedded_array(nvs, nvp); + break; + case DATA_TYPE_STRING_ARRAY: + nvpair_native_string_array(nvs, nvp); + break; + default: + break; + } + + return (ret); +} + +static int +nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + uint64_t nvp_sz = nvp->nvp_size; + + switch (NVP_TYPE(nvp)) { + case DATA_TYPE_NVLIST: { + size_t nvsize = 0; + + if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + case DATA_TYPE_NVLIST_ARRAY: { + size_t nvsize; + + if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + default: + break; + } + + if (nvp_sz > INT32_MAX) + return (EINVAL); + + *size = nvp_sz; + + return (0); +} + +static int +nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + return (nvs_native_nvp_op(nvs, nvp)); + + case NVS_OP_DECODE: { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + int32_t decode_len; + + /* try to read the size value from the stream */ + if (native->n_curr + sizeof (int32_t) > native->n_end) + return (EFAULT); + bcopy(native->n_curr, &decode_len, sizeof (int32_t)); + + /* sanity check the size value */ + if (decode_len < 0 || + decode_len > native->n_end - native->n_curr) + return (EFAULT); + + *size = decode_len; + + /* + * If at the end of the stream then move the cursor + * forward, otherwise nvpair_native_op() will read + * the entire nvpair at the same cursor position. + */ + if (*size == 0) + native->n_curr += sizeof (int32_t); + break; + } + + default: + return (EINVAL); + } + + return (0); +} + +static const nvs_ops_t nvs_native_ops = { + nvs_native_nvlist, + nvs_native_nvpair, + nvs_native_nvp_op, + nvs_native_nvp_size, + nvs_native_nvl_fini +}; + +static int +nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) +{ + nvs_native_t native; + int err; + + nvs->nvs_ops = &nvs_native_ops; + + if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t), + *buflen - sizeof (nvs_header_t))) != 0) + return (err); + + err = nvs_operation(nvs, nvl, buflen); + + nvs_native_destroy(nvs); + + return (err); +} + +/* + * XDR encoding functions + * + * An xdr packed nvlist is encoded as: + * + * - encoding methode and host endian (4 bytes) + * - nvl_version (4 bytes) + * - nvl_nvflag (4 bytes) + * + * - encoded nvpairs, the format of one xdr encoded nvpair is: + * - encoded size of the nvpair (4 bytes) + * - decoded size of the nvpair (4 bytes) + * - name string, (4 + sizeof(NV_ALIGN4(string)) + * a string is coded as size (4 bytes) and data + * - data type (4 bytes) + * - number of elements in the nvpair (4 bytes) + * - data + * + * - 2 zero's for end of the entire list (8 bytes) + */ +static int +nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen) +{ + /* xdr data must be 4 byte aligned */ + if ((ulong_t)buf % 4 != 0) + return (EFAULT); + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE); + nvs->nvs_private = xdr; + return (0); + case NVS_OP_DECODE: + xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE); + nvs->nvs_private = xdr; + return (0); + case NVS_OP_GETSIZE: + nvs->nvs_private = NULL; + return (0); + default: + return (EINVAL); + } +} + +static void +nvs_xdr_destroy(nvstream_t *nvs) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + xdr_destroy((XDR *)nvs->nvs_private); + break; + default: + break; + } +} + +static int +nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: { + XDR *xdr = nvs->nvs_private; + + if (!xdr_int(xdr, &nvl->nvl_version) || + !xdr_u_int(xdr, &nvl->nvl_nvflag)) + return (EFAULT); + break; + } + case NVS_OP_GETSIZE: { + /* + * 2 * 4 for nvl_version + nvl_nvflag + * and 8 for end of the entire list + */ + *size += 2 * 4 + 8; + break; + } + default: + return (EINVAL); + } + return (0); +} + +static int +nvs_xdr_nvl_fini(nvstream_t *nvs) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + XDR *xdr = nvs->nvs_private; + int zero = 0; + + if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero)) + return (EFAULT); + } + + return (0); +} + +/* + * The format of xdr encoded nvpair is: + * encode_size, decode_size, name string, data type, nelem, data + */ +static int +nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp) +{ + data_type_t type; + char *buf; + char *buf_end = (char *)nvp + nvp->nvp_size; + int value_sz; + uint_t nelem, buflen; + bool_t ret = FALSE; + XDR *xdr = nvs->nvs_private; + + ASSERT(xdr != NULL && nvp != NULL); + + /* name string */ + if ((buf = NVP_NAME(nvp)) >= buf_end) + return (EFAULT); + buflen = buf_end - buf; + + if (!xdr_string(xdr, &buf, buflen - 1)) + return (EFAULT); + nvp->nvp_name_sz = strlen(buf) + 1; + + /* type and nelem */ + if (!xdr_int(xdr, (int *)&nvp->nvp_type) || + !xdr_int(xdr, &nvp->nvp_value_elem)) + return (EFAULT); + + type = NVP_TYPE(nvp); + nelem = nvp->nvp_value_elem; + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) excluded. + */ + if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0) + return (EFAULT); + + /* if there is no data to extract then return */ + if (nelem == 0) + return (0); + + /* value */ + if ((buf = NVP_VALUE(nvp)) >= buf_end) + return (EFAULT); + buflen = buf_end - buf; + + if (buflen < value_sz) + return (EFAULT); + + switch (type) { + case DATA_TYPE_NVLIST: + if (nvs_embedded(nvs, (void *)buf) == 0) + return (0); + break; + + case DATA_TYPE_NVLIST_ARRAY: + if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0) + return (0); + break; + + case DATA_TYPE_BOOLEAN: + ret = TRUE; + break; + + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + ret = xdr_char(xdr, buf); + break; + + case DATA_TYPE_INT16: + ret = xdr_short(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT16: + ret = xdr_u_short(xdr, (void *)buf); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_INT32: + ret = xdr_int(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT32: + ret = xdr_u_int(xdr, (void *)buf); + break; + + case DATA_TYPE_INT64: + ret = xdr_longlong_t(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT64: + ret = xdr_u_longlong_t(xdr, (void *)buf); + break; + + case DATA_TYPE_HRTIME: + /* + * NOTE: must expose the definition of hrtime_t here + */ + ret = xdr_longlong_t(xdr, (void *)buf); + break; +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: + ret = xdr_double(xdr, (void *)buf); + break; +#endif + case DATA_TYPE_STRING: + ret = xdr_string(xdr, &buf, buflen - 1); + break; + + case DATA_TYPE_BYTE_ARRAY: + ret = xdr_opaque(xdr, buf, nelem); + break; + + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t), + (xdrproc_t)xdr_char); + break; + + case DATA_TYPE_INT16_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t), + sizeof (int16_t), (xdrproc_t)xdr_short); + break; + + case DATA_TYPE_UINT16_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t), + sizeof (uint16_t), (xdrproc_t)xdr_u_short); + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT32_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t), + sizeof (int32_t), (xdrproc_t)xdr_int); + break; + + case DATA_TYPE_UINT32_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t), + sizeof (uint32_t), (xdrproc_t)xdr_u_int); + break; + + case DATA_TYPE_INT64_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t), + sizeof (int64_t), (xdrproc_t)xdr_longlong_t); + break; + + case DATA_TYPE_UINT64_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t), + sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t); + break; + + case DATA_TYPE_STRING_ARRAY: { + size_t len = nelem * sizeof (uint64_t); + char **strp = (void *)buf; + int i; + + if (nvs->nvs_op == NVS_OP_DECODE) + bzero(buf, len); /* don't trust packed data */ + + for (i = 0; i < nelem; i++) { + if (buflen <= len) + return (EFAULT); + + buf += len; + buflen -= len; + + if (xdr_string(xdr, &buf, buflen - 1) != TRUE) + return (EFAULT); + + if (nvs->nvs_op == NVS_OP_DECODE) + strp[i] = buf; + len = strlen(buf) + 1; + } + ret = TRUE; + break; + } + default: + break; + } + + return (ret == TRUE ? 0 : EFAULT); +} + +static int +nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + data_type_t type = NVP_TYPE(nvp); + /* + * encode_size + decode_size + name string size + data type + nelem + * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + */ + uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4; + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + nvp_sz += 4; /* 4 is the minimum xdr unit */ + break; + + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + nvp_sz += 8; + break; + + case DATA_TYPE_STRING: + nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp))); + break; + + case DATA_TYPE_BYTE_ARRAY: + nvp_sz += NV_ALIGN4(NVP_NELEM(nvp)); + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp); + break; + + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp); + break; + + case DATA_TYPE_STRING_ARRAY: { + int i; + char **strs = (void *)NVP_VALUE(nvp); + + for (i = 0; i < NVP_NELEM(nvp); i++) + nvp_sz += 4 + NV_ALIGN4(strlen(strs[i])); + + break; + } + + case DATA_TYPE_NVLIST: + case DATA_TYPE_NVLIST_ARRAY: { + size_t nvsize = 0; + int old_nvs_op = nvs->nvs_op; + int err; + + nvs->nvs_op = NVS_OP_GETSIZE; + if (type == DATA_TYPE_NVLIST) + err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize); + else + err = nvs_embedded_nvl_array(nvs, nvp, &nvsize); + nvs->nvs_op = old_nvs_op; + + if (err != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + + default: + return (EINVAL); + } + + if (nvp_sz > INT32_MAX) + return (EINVAL); + + *size = nvp_sz; + + return (0); +} + + +/* + * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates + * the largest nvpair that could be encoded in the buffer. + * + * See comments above nvpair_xdr_op() for the format of xdr encoding. + * The size of a xdr packed nvpair without any data is 5 words. + * + * Using the size of the data directly as an estimate would be ok + * in all cases except one. If the data type is of DATA_TYPE_STRING_ARRAY + * then the actual nvpair has space for an array of pointers to index + * the strings. These pointers are not encoded into the packed xdr buffer. + * + * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are + * of length 0, then each string is endcoded in xdr format as a single word. + * Therefore when expanded to an nvpair there will be 2.25 word used for + * each string. (a int64_t allocated for pointer usage, and a single char + * for the null termination.) + * + * This is the calculation performed by the NVS_XDR_MAX_LEN macro. + */ +#define NVS_XDR_HDR_LEN ((size_t)(5 * 4)) +#define NVS_XDR_DATA_LEN(y) (((size_t)(y) <= NVS_XDR_HDR_LEN) ? \ + 0 : ((size_t)(y) - NVS_XDR_HDR_LEN)) +#define NVS_XDR_MAX_LEN(x) (NVP_SIZE_CALC(1, 0) + \ + (NVS_XDR_DATA_LEN(x) * 2) + \ + NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4))) + +static int +nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + XDR *xdr = nvs->nvs_private; + int32_t encode_len, decode_len; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: { + size_t nvsize; + + if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0) + return (EFAULT); + + decode_len = nvp->nvp_size; + encode_len = nvsize; + if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) + return (EFAULT); + + return (nvs_xdr_nvp_op(nvs, nvp)); + } + case NVS_OP_DECODE: { + struct xdr_bytesrec bytesrec; + + /* get the encode and decode size */ + if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) + return (EFAULT); + *size = decode_len; + + /* are we at the end of the stream? */ + if (*size == 0) + return (0); + + /* sanity check the size parameter */ + if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec)) + return (EFAULT); + + if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail)) + return (EFAULT); + break; + } + + default: + return (EINVAL); + } + return (0); +} + +static const struct nvs_ops nvs_xdr_ops = { + nvs_xdr_nvlist, + nvs_xdr_nvpair, + nvs_xdr_nvp_op, + nvs_xdr_nvp_size, + nvs_xdr_nvl_fini +}; + +static int +nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) +{ + XDR xdr; + int err; + + nvs->nvs_ops = &nvs_xdr_ops; + + if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t), + *buflen - sizeof (nvs_header_t))) != 0) + return (err); + + err = nvs_operation(nvs, nvl, buflen); + + nvs_xdr_destroy(nvs); + + return (err); +} diff --git a/common/nvpair/nvpair_alloc_fixed.c b/common/nvpair/nvpair_alloc_fixed.c new file mode 100644 index 000000000000..b1128eeb9bc3 --- /dev/null +++ b/common/nvpair/nvpair_alloc_fixed.c @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/stropts.h> +#include <sys/isa_defs.h> +#include <sys/nvpair.h> +#include <sys/sysmacros.h> +#if defined(_KERNEL) && !defined(_BOOT) +#include <sys/varargs.h> +#else +#include <stdarg.h> +#include <strings.h> +#endif + +/* + * This allocator is very simple. + * - it uses a pre-allocated buffer for memory allocations. + * - it does _not_ free memory in the pre-allocated buffer. + * + * The reason for the selected implemention is simplicity. + * This allocator is designed for the usage in interrupt context when + * the caller may not wait for free memory. + */ + +/* pre-allocated buffer for memory allocations */ +typedef struct nvbuf { + uintptr_t nvb_buf; /* address of pre-allocated buffer */ + uintptr_t nvb_lim; /* limit address in the buffer */ + uintptr_t nvb_cur; /* current address in the buffer */ +} nvbuf_t; + +/* + * Initialize the pre-allocated buffer allocator. The caller needs to supply + * + * buf address of pre-allocated buffer + * bufsz size of pre-allocated buffer + * + * nv_fixed_init() calculates the remaining members of nvbuf_t. + */ +static int +nv_fixed_init(nv_alloc_t *nva, va_list valist) +{ + uintptr_t base = va_arg(valist, uintptr_t); + uintptr_t lim = base + va_arg(valist, size_t); + nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t)); + + if (base == 0 || (uintptr_t)&nvb[1] > lim) + return (EINVAL); + + nvb->nvb_buf = (uintptr_t)&nvb[0]; + nvb->nvb_cur = (uintptr_t)&nvb[1]; + nvb->nvb_lim = lim; + nva->nva_arg = nvb; + + return (0); +} + +static void * +nv_fixed_alloc(nv_alloc_t *nva, size_t size) +{ + nvbuf_t *nvb = nva->nva_arg; + uintptr_t new = nvb->nvb_cur; + + if (size == 0 || new + size > nvb->nvb_lim) + return (NULL); + + nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t)); + + return ((void *)new); +} + +/*ARGSUSED*/ +static void +nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size) +{ + /* don't free memory in the pre-allocated buffer */ +} + +static void +nv_fixed_reset(nv_alloc_t *nva) +{ + nvbuf_t *nvb = nva->nva_arg; + + nvb->nvb_cur = (uintptr_t)&nvb[1]; +} + +const nv_alloc_ops_t nv_fixed_ops_def = { + nv_fixed_init, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_fixed_alloc, /* nv_ao_alloc() */ + nv_fixed_free, /* nv_ao_free() */ + nv_fixed_reset /* nv_ao_reset() */ +}; + +const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def; diff --git a/common/unicode/u8_textprep.c b/common/unicode/u8_textprep.c new file mode 100644 index 000000000000..8faf1a97e47e --- /dev/null +++ b/common/unicode/u8_textprep.c @@ -0,0 +1,2132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +/* + * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458). + * + * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F), + * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also + * the section 3C man pages. + * Interface stability: Committed. + */ + +#include <sys/types.h> +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/debug.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#else +#include <sys/u8_textprep.h> +#include <strings.h> +#endif /* _KERNEL */ +#include <sys/byteorder.h> +#include <sys/errno.h> +#include <sys/u8_textprep_data.h> + + +/* The maximum possible number of bytes in a UTF-8 character. */ +#define U8_MB_CUR_MAX (4) + +/* + * The maximum number of bytes needed for a UTF-8 character to cover + * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2. + */ +#define U8_MAX_BYTES_UCS2 (3) + +/* The maximum possible number of bytes in a Stream-Safe Text. */ +#define U8_STREAM_SAFE_TEXT_MAX (128) + +/* + * The maximum number of characters in a combining/conjoining sequence and + * the actual upperbound limit of a combining/conjoining sequence. + */ +#define U8_MAX_CHARS_A_SEQ (32) +#define U8_UPPER_LIMIT_IN_A_SEQ (31) + +/* The combining class value for Starter. */ +#define U8_COMBINING_CLASS_STARTER (0) + +/* + * Some Hangul related macros at below. + * + * The first and the last of Hangul syllables, Hangul Jamo Leading consonants, + * Vowels, and optional Trailing consonants in Unicode scalar values. + * + * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not + * the actual U+11A8. This is due to that the trailing consonant is optional + * and thus we are doing a pre-calculation of subtracting one. + * + * Each of 19 modern leading consonants has total 588 possible syllables since + * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for + * no trailing consonant case, i.e., 21 x 28 = 588. + * + * We also have bunch of Hangul related macros at below. Please bear in mind + * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is + * a Hangul Jamo or not but the value does not guarantee that it is a Hangul + * Jamo; it just guarantee that it will be most likely. + */ +#define U8_HANGUL_SYL_FIRST (0xAC00U) +#define U8_HANGUL_SYL_LAST (0xD7A3U) + +#define U8_HANGUL_JAMO_L_FIRST (0x1100U) +#define U8_HANGUL_JAMO_L_LAST (0x1112U) +#define U8_HANGUL_JAMO_V_FIRST (0x1161U) +#define U8_HANGUL_JAMO_V_LAST (0x1175U) +#define U8_HANGUL_JAMO_T_FIRST (0x11A7U) +#define U8_HANGUL_JAMO_T_LAST (0x11C2U) + +#define U8_HANGUL_V_COUNT (21) +#define U8_HANGUL_VT_COUNT (588) +#define U8_HANGUL_T_COUNT (28) + +#define U8_HANGUL_JAMO_1ST_BYTE (0xE1U) + +#define U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \ + (s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \ + (s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \ + (s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU)); + +#define U8_HANGUL_JAMO_L(u) \ + ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST) + +#define U8_HANGUL_JAMO_V(u) \ + ((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST) + +#define U8_HANGUL_JAMO_T(u) \ + ((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST) + +#define U8_HANGUL_JAMO(u) \ + ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST) + +#define U8_HANGUL_SYLLABLE(u) \ + ((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST) + +#define U8_HANGUL_COMPOSABLE_L_V(s, u) \ + ((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u))) + +#define U8_HANGUL_COMPOSABLE_LV_T(s, u) \ + ((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u))) + +/* The types of decomposition mappings. */ +#define U8_DECOMP_BOTH (0xF5U) +#define U8_DECOMP_CANONICAL (0xF6U) + +/* The indicator for 16-bit table. */ +#define U8_16BIT_TABLE_INDICATOR (0x8000U) + +/* The following are some convenience macros. */ +#define U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \ + (u) = ((uint32_t)(b1) & 0x0F) << 12 | ((uint32_t)(b2) & 0x3F) << 6 | \ + (uint32_t)(b3) & 0x3F; + +#define U8_SIMPLE_SWAP(a, b, t) \ + (t) = (a); \ + (a) = (b); \ + (b) = (t); + +#define U8_ASCII_TOUPPER(c) \ + (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c)) + +#define U8_ASCII_TOLOWER(c) \ + (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c)) + +#define U8_ISASCII(c) (((uchar_t)(c)) < 0x80U) +/* + * The following macro assumes that the two characters that are to be + * swapped are adjacent to each other and 'a' comes before 'b'. + * + * If the assumptions are not met, then, the macro will fail. + */ +#define U8_SWAP_COMB_MARKS(a, b) \ + for (k = 0; k < disp[(a)]; k++) \ + u8t[k] = u8s[start[(a)] + k]; \ + for (k = 0; k < disp[(b)]; k++) \ + u8s[start[(a)] + k] = u8s[start[(b)] + k]; \ + start[(b)] = start[(a)] + disp[(b)]; \ + for (k = 0; k < disp[(a)]; k++) \ + u8s[start[(b)] + k] = u8t[k]; \ + U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \ + U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc); + +/* The possible states during normalization. */ +typedef enum { + U8_STATE_START = 0, + U8_STATE_HANGUL_L = 1, + U8_STATE_HANGUL_LV = 2, + U8_STATE_HANGUL_LVT = 3, + U8_STATE_HANGUL_V = 4, + U8_STATE_HANGUL_T = 5, + U8_STATE_COMBINING_MARK = 6 +} u8_normalization_states_t; + +/* + * The three vectors at below are used to check bytes of a given UTF-8 + * character are valid and not containing any malformed byte values. + * + * We used to have a quite relaxed UTF-8 binary representation but then there + * was some security related issues and so the Unicode Consortium defined + * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it + * one more time at the Unicode 3.2. The following three tables are based on + * that. + */ + +#define U8_ILLEGAL_NEXT_BYTE_COMMON(c) ((c) < 0x80 || (c) > 0xBF) + +#define I_ U8_ILLEGAL_CHAR +#define O_ U8_OUT_OF_RANGE_CHAR + +const int8_t u8_number_of_bytes[0x100] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +/* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */ + I_, I_, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + +/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + +/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */ + 4, 4, 4, 4, 4, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, +}; + +#undef I_ +#undef O_ + +const uint8_t u8_valid_min_2nd_byte[0x100] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* C0 C1 C2 C3 C4 C5 C6 C7 */ + 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* C8 C9 CA CB CC CD CE CF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* D0 D1 D2 D3 D4 D5 D6 D7 */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* D8 D9 DA DB DC DD DE DF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* E0 E1 E2 E3 E4 E5 E6 E7 */ + 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* E8 E9 EA EB EC ED EE EF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* F0 F1 F2 F3 F4 F5 F6 F7 */ + 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const uint8_t u8_valid_max_2nd_byte[0x100] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* C0 C1 C2 C3 C4 C5 C6 C7 */ + 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* C8 C9 CA CB CC CD CE CF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* D0 D1 D2 D3 D4 D5 D6 D7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* D8 D9 DA DB DC DD DE DF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* E0 E1 E2 E3 E4 E5 E6 E7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* E8 E9 EA EB EC ED EE EF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf, +/* F0 F1 F2 F3 F4 F5 F6 F7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +/* + * The u8_validate() validates on the given UTF-8 character string and + * calculate the byte length. It is quite similar to mblen(3C) except that + * this will validate against the list of characters if required and + * specific to UTF-8 and Unicode. + */ +int +u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum) +{ + uchar_t *ib; + uchar_t *ibtail; + uchar_t **p; + uchar_t *s1; + uchar_t *s2; + uchar_t f; + int sz; + size_t i; + int ret_val; + boolean_t second; + boolean_t no_need_to_validate_entire; + boolean_t check_additional; + boolean_t validate_ucs2_range_only; + + if (! u8str) + return (0); + + ib = (uchar_t *)u8str; + ibtail = ib + n; + + ret_val = 0; + + no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE); + check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL; + validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE; + + while (ib < ibtail) { + /* + * The first byte of a UTF-8 character tells how many + * bytes will follow for the character. If the first byte + * is an illegal byte value or out of range value, we just + * return -1 with an appropriate error number. + */ + sz = u8_number_of_bytes[*ib]; + if (sz == U8_ILLEGAL_CHAR) { + *errnum = EILSEQ; + return (-1); + } + + if (sz == U8_OUT_OF_RANGE_CHAR || + (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) { + *errnum = ERANGE; + return (-1); + } + + /* + * If we don't have enough bytes to check on, that's also + * an error. As you can see, we give illegal byte sequence + * checking higher priority then EINVAL cases. + */ + if ((ibtail - ib) < sz) { + *errnum = EINVAL; + return (-1); + } + + if (sz == 1) { + ib++; + ret_val++; + } else { + /* + * Check on the multi-byte UTF-8 character. For more + * details on this, see comment added for the used + * data structures at the beginning of the file. + */ + f = *ib++; + ret_val++; + second = B_TRUE; + for (i = 1; i < sz; i++) { + if (second) { + if (*ib < u8_valid_min_2nd_byte[f] || + *ib > u8_valid_max_2nd_byte[f]) { + *errnum = EILSEQ; + return (-1); + } + second = B_FALSE; + } else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) { + *errnum = EILSEQ; + return (-1); + } + ib++; + ret_val++; + } + } + + if (check_additional) { + for (p = (uchar_t **)list, i = 0; p[i]; i++) { + s1 = ib - sz; + s2 = p[i]; + while (s1 < ib) { + if (*s1 != *s2 || *s2 == '\0') + break; + s1++; + s2++; + } + + if (s1 >= ib && *s2 == '\0') { + *errnum = EBADF; + return (-1); + } + } + } + + if (no_need_to_validate_entire) + break; + } + + return (ret_val); +} + +/* + * The do_case_conv() looks at the mapping tables and returns found + * bytes if any. If not found, the input bytes are returned. The function + * always terminate the return bytes with a null character assuming that + * there are plenty of room to do so. + * + * The case conversions are simple case conversions mapping a character to + * another character as specified in the Unicode data. The byte size of + * the mapped character could be different from that of the input character. + * + * The return value is the byte length of the returned character excluding + * the terminating null byte. + */ +static size_t +do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) +{ + size_t i; + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + + /* + * At this point, the only possible values for sz are 2, 3, and 4. + * The u8s should point to a vector that is well beyond the size of + * 5 bytes. + */ + if (sz == 2) { + b3 = u8s[0] = s[0]; + b4 = u8s[1] = s[1]; + } else if (sz == 3) { + b2 = u8s[0] = s[0]; + b3 = u8s[1] = s[1]; + b4 = u8s[2] = s[2]; + } else if (sz == 4) { + b1 = u8s[0] = s[0]; + b2 = u8s[1] = s[1]; + b3 = u8s[2] = s[2]; + b4 = u8s[3] = s[3]; + } else { + /* This is not possible but just in case as a fallback. */ + if (is_it_toupper) + *u8s = U8_ASCII_TOUPPER(*s); + else + *u8s = U8_ASCII_TOLOWER(*s); + u8s[1] = '\0'; + + return (1); + } + u8s[sz] = '\0'; + + /* + * Let's find out if we have a corresponding character. + */ + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b2 = u8_case_common_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + if (is_it_toupper) { + b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1]; + + /* Either there is no match or an error at the table. */ + if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX) + return ((size_t)sz); + + b3_base = u8_toupper_b3_tbl[uv][b2][b3].base; + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id]; + } else { + b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1]; + + if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX) + return ((size_t)sz); + + b3_base = u8_tolower_b3_tbl[uv][b2][b3].base; + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id]; + } + + /* + * If i is still zero, that means there is no corresponding character. + */ + if (i == 0) + return ((size_t)sz); + + u8s[i] = '\0'; + + return (i); +} + +/* + * The do_case_compare() function compares the two input strings, s1 and s2, + * one character at a time doing case conversions if applicable and return + * the comparison result as like strcmp(). + * + * Since, in empirical sense, most of text data are 7-bit ASCII characters, + * we treat the 7-bit ASCII characters as a special case trying to yield + * faster processing time. + */ +static int +do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, + size_t n2, boolean_t is_it_toupper, int *errnum) +{ + int f; + int sz1; + int sz2; + size_t j; + size_t i1; + size_t i2; + uchar_t u8s1[U8_MB_CUR_MAX + 1]; + uchar_t u8s2[U8_MB_CUR_MAX + 1]; + + i1 = i2 = 0; + while (i1 < n1 && i2 < n2) { + /* + * Find out what would be the byte length for this UTF-8 + * character at string s1 and also find out if this is + * an illegal start byte or not and if so, issue a proper + * error number and yet treat this byte as a character. + */ + sz1 = u8_number_of_bytes[*s1]; + if (sz1 < 0) { + *errnum = EILSEQ; + sz1 = 1; + } + + /* + * For 7-bit ASCII characters mainly, we do a quick case + * conversion right at here. + * + * If we don't have enough bytes for this character, issue + * an EINVAL error and use what are available. + * + * If we have enough bytes, find out if there is + * a corresponding uppercase character and if so, copy over + * the bytes for a comparison later. If there is no + * corresponding uppercase character, then, use what we have + * for the comparison. + */ + if (sz1 == 1) { + if (is_it_toupper) + u8s1[0] = U8_ASCII_TOUPPER(*s1); + else + u8s1[0] = U8_ASCII_TOLOWER(*s1); + s1++; + u8s1[1] = '\0'; + } else if ((i1 + sz1) > n1) { + *errnum = EINVAL; + for (j = 0; (i1 + j) < n1; ) + u8s1[j++] = *s1++; + u8s1[j] = '\0'; + } else { + (void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper); + s1 += sz1; + } + + /* Do the same for the string s2. */ + sz2 = u8_number_of_bytes[*s2]; + if (sz2 < 0) { + *errnum = EILSEQ; + sz2 = 1; + } + + if (sz2 == 1) { + if (is_it_toupper) + u8s2[0] = U8_ASCII_TOUPPER(*s2); + else + u8s2[0] = U8_ASCII_TOLOWER(*s2); + s2++; + u8s2[1] = '\0'; + } else if ((i2 + sz2) > n2) { + *errnum = EINVAL; + for (j = 0; (i2 + j) < n2; ) + u8s2[j++] = *s2++; + u8s2[j] = '\0'; + } else { + (void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper); + s2 += sz2; + } + + /* Now compare the two characters. */ + if (sz1 == 1 && sz2 == 1) { + if (*u8s1 > *u8s2) + return (1); + if (*u8s1 < *u8s2) + return (-1); + } else { + f = strcmp((const char *)u8s1, (const char *)u8s2); + if (f != 0) + return (f); + } + + /* + * They were the same. Let's move on to the next + * characters then. + */ + i1 += sz1; + i2 += sz2; + } + + /* + * We compared until the end of either or both strings. + * + * If we reached to or went over the ends for the both, that means + * they are the same. + * + * If we reached only one of the two ends, that means the other string + * has something which then the fact can be used to determine + * the return value. + */ + if (i1 >= n1) { + if (i2 >= n2) + return (0); + return (-1); + } + return (1); +} + +/* + * The combining_class() function checks on the given bytes and find out + * the corresponding Unicode combining class value. The return value 0 means + * it is a Starter. Any illegal UTF-8 character will also be treated as + * a Starter. + */ +static uchar_t +combining_class(size_t uv, uchar_t *s, size_t sz) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b4 = 0; + + if (sz == 1 || sz > 4) + return (0); + + if (sz == 2) { + b3 = s[0]; + b4 = s[1]; + } else if (sz == 3) { + b2 = s[0]; + b3 = s[1]; + b4 = s[2]; + } else if (sz == 4) { + b1 = s[0]; + b2 = s[1]; + b3 = s[2]; + b4 = s[3]; + } + + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + b2 = u8_combining_class_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + b3 = u8_combining_class_b3_tbl[uv][b2][b3]; + if (b3 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + return (u8_combining_class_b4_tbl[uv][b3][b4]); +} + +/* + * The do_decomp() function finds out a matching decomposition if any + * and return. If there is no match, the input bytes are copied and returned. + * The function also checks if there is a Hangul, decomposes it if necessary + * and returns. + * + * To save time, a single byte 7-bit ASCII character should be handled by + * the caller. + * + * The function returns the number of bytes returned sans always terminating + * the null byte. It will also return a state that will tell if there was + * a Hangul character decomposed which then will be used by the caller. + */ +static size_t +do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz, + boolean_t canonical_decomposition, u8_normalization_states_t *state) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + size_t i; + uint32_t u1; + + if (sz == 2) { + b3 = u8s[0] = s[0]; + b4 = u8s[1] = s[1]; + u8s[2] = '\0'; + } else if (sz == 3) { + /* Convert it to a Unicode scalar value. */ + U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]); + + /* + * If this is a Hangul syllable, we decompose it into + * a leading consonant, a vowel, and an optional trailing + * consonant and then return. + */ + if (U8_HANGUL_SYLLABLE(u1)) { + u1 -= U8_HANGUL_SYL_FIRST; + + b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT; + b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT) + / U8_HANGUL_T_COUNT; + b3 = u1 % U8_HANGUL_T_COUNT; + + U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1); + U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2); + if (b3) { + b3 += U8_HANGUL_JAMO_T_FIRST; + U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3); + + u8s[9] = '\0'; + *state = U8_STATE_HANGUL_LVT; + return (9); + } + + u8s[6] = '\0'; + *state = U8_STATE_HANGUL_LV; + return (6); + } + + b2 = u8s[0] = s[0]; + b3 = u8s[1] = s[1]; + b4 = u8s[2] = s[2]; + u8s[3] = '\0'; + + /* + * If this is a Hangul Jamo, we know there is nothing + * further that we can decompose. + */ + if (U8_HANGUL_JAMO_L(u1)) { + *state = U8_STATE_HANGUL_L; + return (3); + } + + if (U8_HANGUL_JAMO_V(u1)) { + if (*state == U8_STATE_HANGUL_L) + *state = U8_STATE_HANGUL_LV; + else + *state = U8_STATE_HANGUL_V; + return (3); + } + + if (U8_HANGUL_JAMO_T(u1)) { + if (*state == U8_STATE_HANGUL_LV) + *state = U8_STATE_HANGUL_LVT; + else + *state = U8_STATE_HANGUL_T; + return (3); + } + } else if (sz == 4) { + b1 = u8s[0] = s[0]; + b2 = u8s[1] = s[1]; + b3 = u8s[2] = s[2]; + b4 = u8s[3] = s[3]; + u8s[4] = '\0'; + } else { + /* + * This is a fallback and should not happen if the function + * was called properly. + */ + u8s[0] = s[0]; + u8s[1] = '\0'; + *state = U8_STATE_START; + return (1); + } + + /* + * At this point, this rountine does not know what it would get. + * The caller should sort it out if the state isn't a Hangul one. + */ + *state = U8_STATE_START; + + /* Try to find matching decomposition mapping byte sequence. */ + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b2 = u8_decomp_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + /* + * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR + * which is 0x8000, this means we couldn't fit the mappings into + * the cardinality of a unsigned byte. + */ + if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) { + b3_tbl -= U8_16BIT_TABLE_INDICATOR; + start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4]; + end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1]; + } else { + start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1]; + } + + /* This also means there wasn't any matching decomposition. */ + if (start_id >= end_id) + return ((size_t)sz); + + /* + * The final table for decomposition mappings has three types of + * byte sequences depending on whether a mapping is for compatibility + * decomposition, canonical decomposition, or both like the following: + * + * (1) Compatibility decomposition mappings: + * + * +---+---+-...-+---+ + * | B0| B1| ... | Bm| + * +---+---+-...-+---+ + * + * The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH). + * + * (2) Canonical decomposition mappings: + * + * +---+---+---+-...-+---+ + * | T | b0| b1| ... | bn| + * +---+---+---+-...-+---+ + * + * where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL). + * + * (3) Both mappings: + * + * +---+---+---+---+-...-+---+---+---+-...-+---+ + * | T | D | b0| b1| ... | bn| B0| B1| ... | Bm| + * +---+---+---+---+-...-+---+---+---+-...-+---+ + * + * where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement + * byte, b0 to bn are canonical mapping bytes and B0 to Bm are + * compatibility mapping bytes. + * + * Note that compatibility decomposition means doing recursive + * decompositions using both compatibility decomposition mappings and + * canonical decomposition mappings. On the other hand, canonical + * decomposition means doing recursive decompositions using only + * canonical decomposition mappings. Since the table we have has gone + * through the recursions already, we do not need to do so during + * runtime, i.e., the table has been completely flattened out + * already. + */ + + b3_base = u8_decomp_b3_tbl[uv][b2][b3].base; + + /* Get the type, T, of the byte sequence. */ + b1 = u8_decomp_final_tbl[uv][b3_base + start_id]; + + /* + * If necessary, adjust start_id, end_id, or both. Note that if + * this is compatibility decomposition mapping, there is no + * adjustment. + */ + if (canonical_decomposition) { + /* Is the mapping only for compatibility decomposition? */ + if (b1 < U8_DECOMP_BOTH) + return ((size_t)sz); + + start_id++; + + if (b1 == U8_DECOMP_BOTH) { + end_id = start_id + + u8_decomp_final_tbl[uv][b3_base + start_id]; + start_id++; + } + } else { + /* + * Unless this is a compatibility decomposition mapping, + * we adjust the start_id. + */ + if (b1 == U8_DECOMP_BOTH) { + start_id++; + start_id += u8_decomp_final_tbl[uv][b3_base + start_id]; + } else if (b1 == U8_DECOMP_CANONICAL) { + start_id++; + } + } + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id]; + u8s[i] = '\0'; + + return (i); +} + +/* + * The find_composition_start() function uses the character bytes given and + * find out the matching composition mappings if any and return the address + * to the composition mappings as explained in the do_composition(). + */ +static uchar_t * +find_composition_start(size_t uv, uchar_t *s, size_t sz) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + + if (sz == 1) { + b4 = s[0]; + } else if (sz == 2) { + b3 = s[0]; + b4 = s[1]; + } else if (sz == 3) { + b2 = s[0]; + b3 = s[1]; + b4 = s[2]; + } else if (sz == 4) { + b1 = s[0]; + b2 = s[1]; + b3 = s[2]; + b4 = s[3]; + } else { + /* + * This is a fallback and should not happen if the function + * was called properly. + */ + return (NULL); + } + + b1 = u8_composition_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + b2 = u8_composition_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) { + b3_tbl -= U8_16BIT_TABLE_INDICATOR; + start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4]; + end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1]; + } else { + start_id = u8_composition_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1]; + } + + if (start_id >= end_id) + return (NULL); + + b3_base = u8_composition_b3_tbl[uv][b2][b3].base; + + return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id])); +} + +/* + * The blocked() function checks on the combining class values of previous + * characters in this sequence and return whether it is blocked or not. + */ +static boolean_t +blocked(uchar_t *comb_class, size_t last) +{ + uchar_t my_comb_class; + size_t i; + + my_comb_class = comb_class[last]; + for (i = 1; i < last; i++) + if (comb_class[i] >= my_comb_class || + comb_class[i] == U8_COMBINING_CLASS_STARTER) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * The do_composition() reads the character string pointed by 's' and + * do necessary canonical composition and then copy over the result back to + * the 's'. + * + * The input argument 's' cannot contain more than 32 characters. + */ +static size_t +do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start, + uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast) +{ + uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t tc[U8_MB_CUR_MAX]; + uint8_t saved_marks[U8_MAX_CHARS_A_SEQ]; + size_t saved_marks_count; + uchar_t *p; + uchar_t *saved_p; + uchar_t *q; + size_t i; + size_t saved_i; + size_t j; + size_t k; + size_t l; + size_t C; + size_t saved_l; + size_t size; + uint32_t u1; + uint32_t u2; + boolean_t match_not_found = B_TRUE; + + /* + * This should never happen unless the callers are doing some strange + * and unexpected things. + * + * The "last" is the index pointing to the last character not last + 1. + */ + if (last >= U8_MAX_CHARS_A_SEQ) + last = U8_UPPER_LIMIT_IN_A_SEQ; + + for (i = l = 0; i <= last; i++) { + /* + * The last or any non-Starters at the beginning, we don't + * have any chance to do composition and so we just copy them + * to the temporary buffer. + */ + if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) { +SAVE_THE_CHAR: + p = s + start[i]; + size = disp[i]; + for (k = 0; k < size; k++) + t[l++] = *p++; + continue; + } + + /* + * If this could be a start of Hangul Jamos, then, we try to + * conjoin them. + */ + if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) { + U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]], + s[start[i] + 1], s[start[i] + 2]); + U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3], + s[start[i] + 4], s[start[i] + 5]); + + if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) { + u1 -= U8_HANGUL_JAMO_L_FIRST; + u2 -= U8_HANGUL_JAMO_V_FIRST; + u1 = U8_HANGUL_SYL_FIRST + + (u1 * U8_HANGUL_V_COUNT + u2) * + U8_HANGUL_T_COUNT; + + i += 2; + if (i <= last) { + U8_PUT_3BYTES_INTO_UTF32(u2, + s[start[i]], s[start[i] + 1], + s[start[i] + 2]); + + if (U8_HANGUL_JAMO_T(u2)) { + u1 += u2 - + U8_HANGUL_JAMO_T_FIRST; + i++; + } + } + + U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1); + i--; + l += 3; + continue; + } + } + + /* + * Let's then find out if this Starter has composition + * mapping. + */ + p = find_composition_start(uv, s + start[i], disp[i]); + if (p == NULL) + goto SAVE_THE_CHAR; + + /* + * We have a Starter with composition mapping and the next + * character is a non-Starter. Let's try to find out if + * we can do composition. + */ + + saved_p = p; + saved_i = i; + saved_l = l; + saved_marks_count = 0; + +TRY_THE_NEXT_MARK: + q = s + start[++i]; + size = disp[i]; + + /* + * The next for() loop compares the non-Starter pointed by + * 'q' with the possible (joinable) characters pointed by 'p'. + * + * The composition final table entry pointed by the 'p' + * looks like the following: + * + * +---+---+---+-...-+---+---+---+---+-...-+---+---+ + * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F | + * +---+---+---+-...-+---+---+---+---+-...-+---+---+ + * + * where C is the count byte indicating the number of + * mapping pairs where each pair would be look like + * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second + * character of a canonical decomposition and the B0-Bm are + * the bytes of a matching composite character. The F is + * a filler byte after each character as the separator. + */ + + match_not_found = B_TRUE; + + for (C = *p++; C > 0; C--) { + for (k = 0; k < size; p++, k++) + if (*p != q[k]) + break; + + /* Have we found it? */ + if (k >= size && *p == U8_TBL_ELEMENT_FILLER) { + match_not_found = B_FALSE; + + l = saved_l; + + while (*++p != U8_TBL_ELEMENT_FILLER) + t[l++] = *p; + + break; + } + + /* We didn't find; skip to the next pair. */ + if (*p != U8_TBL_ELEMENT_FILLER) + while (*++p != U8_TBL_ELEMENT_FILLER) + ; + while (*++p != U8_TBL_ELEMENT_FILLER) + ; + p++; + } + + /* + * If there was no match, we will need to save the combining + * mark for later appending. After that, if the next one + * is a non-Starter and not blocked, then, we try once + * again to do composition with the next non-Starter. + * + * If there was no match and this was a Starter, then, + * this is a new start. + * + * If there was a match and a composition done and we have + * more to check on, then, we retrieve a new composition final + * table entry for the composite and then try to do the + * composition again. + */ + + if (match_not_found) { + if (comb_class[i] == U8_COMBINING_CLASS_STARTER) { + i--; + goto SAVE_THE_CHAR; + } + + saved_marks[saved_marks_count++] = i; + } + + if (saved_l == l) { + while (i < last) { + if (blocked(comb_class, i + 1)) + saved_marks[saved_marks_count++] = ++i; + else + break; + } + if (i < last) { + p = saved_p; + goto TRY_THE_NEXT_MARK; + } + } else if (i < last) { + p = find_composition_start(uv, t + saved_l, + l - saved_l); + if (p != NULL) { + saved_p = p; + goto TRY_THE_NEXT_MARK; + } + } + + /* + * There is no more composition possible. + * + * If there was no composition what so ever then we copy + * over the original Starter and then append any non-Starters + * remaining at the target string sequentially after that. + */ + + if (saved_l == l) { + p = s + start[saved_i]; + size = disp[saved_i]; + for (j = 0; j < size; j++) + t[l++] = *p++; + } + + for (k = 0; k < saved_marks_count; k++) { + p = s + start[saved_marks[k]]; + size = disp[saved_marks[k]]; + for (j = 0; j < size; j++) + t[l++] = *p++; + } + } + + /* + * If the last character is a Starter and if we have a character + * (possibly another Starter) that can be turned into a composite, + * we do so and we do so until there is no more of composition + * possible. + */ + if (comb_class[last] == U8_COMBINING_CLASS_STARTER) { + p = *os; + saved_l = l - disp[last]; + + while (p < oslast) { + size = u8_number_of_bytes[*p]; + if (size <= 1 || (p + size) > oslast) + break; + + saved_p = p; + + for (i = 0; i < size; i++) + tc[i] = *p++; + + q = find_composition_start(uv, t + saved_l, + l - saved_l); + if (q == NULL) { + p = saved_p; + break; + } + + match_not_found = B_TRUE; + + for (C = *q++; C > 0; C--) { + for (k = 0; k < size; q++, k++) + if (*q != tc[k]) + break; + + if (k >= size && *q == U8_TBL_ELEMENT_FILLER) { + match_not_found = B_FALSE; + + l = saved_l; + + while (*++q != U8_TBL_ELEMENT_FILLER) { + /* + * This is practically + * impossible but we don't + * want to take any chances. + */ + if (l >= + U8_STREAM_SAFE_TEXT_MAX) { + p = saved_p; + goto SAFE_RETURN; + } + t[l++] = *q; + } + + break; + } + + if (*q != U8_TBL_ELEMENT_FILLER) + while (*++q != U8_TBL_ELEMENT_FILLER) + ; + while (*++q != U8_TBL_ELEMENT_FILLER) + ; + q++; + } + + if (match_not_found) { + p = saved_p; + break; + } + } +SAFE_RETURN: + *os = p; + } + + /* + * Now we copy over the temporary string to the target string. + * Since composition always reduces the number of characters or + * the number of characters stay, we don't need to worry about + * the buffer overflow here. + */ + for (i = 0; i < l; i++) + s[i] = t[i]; + s[l] = '\0'; + + return (l); +} + +/* + * The collect_a_seq() function checks on the given string s, collect + * a sequence of characters at u8s, and return the sequence. While it collects + * a sequence, it also applies case conversion, canonical or compatibility + * decomposition, canonical decomposition, or some or all of them and + * in that order. + * + * The collected sequence cannot be bigger than 32 characters since if + * it is having more than 31 characters, the sequence will be terminated + * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into + * a Stream-Safe Text. The collected sequence is always terminated with + * a null byte and the return value is the byte length of the sequence + * including 0. The return value does not include the terminating + * null byte. + */ +static size_t +collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast, + boolean_t is_it_toupper, + boolean_t is_it_tolower, + boolean_t canonical_decomposition, + boolean_t compatibility_decomposition, + boolean_t canonical_composition, + int *errnum, u8_normalization_states_t *state) +{ + uchar_t *s; + int sz; + int saved_sz; + size_t i; + size_t j; + size_t k; + size_t l; + uchar_t comb_class[U8_MAX_CHARS_A_SEQ]; + uchar_t disp[U8_MAX_CHARS_A_SEQ]; + uchar_t start[U8_MAX_CHARS_A_SEQ]; + uchar_t u8t[U8_MB_CUR_MAX]; + uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t tc; + size_t last; + size_t saved_last; + uint32_t u1; + + /* + * Save the source string pointer which we will return a changed + * pointer if we do processing. + */ + s = *source; + + /* + * The following is a fallback for just in case callers are not + * checking the string boundaries before the calling. + */ + if (s >= slast) { + u8s[0] = '\0'; + + return (0); + } + + /* + * As the first thing, let's collect a character and do case + * conversion if necessary. + */ + + sz = u8_number_of_bytes[*s]; + + if (sz < 0) { + *errnum = EILSEQ; + + u8s[0] = *s++; + u8s[1] = '\0'; + + *source = s; + + return (1); + } + + if (sz == 1) { + if (is_it_toupper) + u8s[0] = U8_ASCII_TOUPPER(*s); + else if (is_it_tolower) + u8s[0] = U8_ASCII_TOLOWER(*s); + else + u8s[0] = *s; + s++; + u8s[1] = '\0'; + } else if ((s + sz) > slast) { + *errnum = EINVAL; + + for (i = 0; s < slast; ) + u8s[i++] = *s++; + u8s[i] = '\0'; + + *source = s; + + return (i); + } else { + if (is_it_toupper || is_it_tolower) { + i = do_case_conv(uv, u8s, s, sz, is_it_toupper); + s += sz; + sz = i; + } else { + for (i = 0; i < sz; ) + u8s[i++] = *s++; + u8s[i] = '\0'; + } + } + + /* + * And then canonical/compatibility decomposition followed by + * an optional canonical composition. Please be noted that + * canonical composition is done only when a decomposition is + * done. + */ + if (canonical_decomposition || compatibility_decomposition) { + if (sz == 1) { + *state = U8_STATE_START; + + saved_sz = 1; + + comb_class[0] = 0; + start[0] = 0; + disp[0] = 1; + + last = 1; + } else { + saved_sz = do_decomp(uv, u8s, u8s, sz, + canonical_decomposition, state); + + last = 0; + + for (i = 0; i < saved_sz; ) { + sz = u8_number_of_bytes[u8s[i]]; + + comb_class[last] = combining_class(uv, + u8s + i, sz); + start[last] = i; + disp[last] = sz; + + last++; + i += sz; + } + + /* + * Decomposition yields various Hangul related + * states but not on combining marks. We need to + * find out at here by checking on the last + * character. + */ + if (*state == U8_STATE_START) { + if (comb_class[last - 1]) + *state = U8_STATE_COMBINING_MARK; + } + } + + saved_last = last; + + while (s < slast) { + sz = u8_number_of_bytes[*s]; + + /* + * If this is an illegal character, an incomplete + * character, or an 7-bit ASCII Starter character, + * then we have collected a sequence; break and let + * the next call deal with the two cases. + * + * Note that this is okay only if you are using this + * function with a fixed length string, not on + * a buffer with multiple calls of one chunk at a time. + */ + if (sz <= 1) { + break; + } else if ((s + sz) > slast) { + break; + } else { + /* + * If the previous character was a Hangul Jamo + * and this character is a Hangul Jamo that + * can be conjoined, we collect the Jamo. + */ + if (*s == U8_HANGUL_JAMO_1ST_BYTE) { + U8_PUT_3BYTES_INTO_UTF32(u1, + *s, *(s + 1), *(s + 2)); + + if (U8_HANGUL_COMPOSABLE_L_V(*state, + u1)) { + i = 0; + *state = U8_STATE_HANGUL_LV; + goto COLLECT_A_HANGUL; + } + + if (U8_HANGUL_COMPOSABLE_LV_T(*state, + u1)) { + i = 0; + *state = U8_STATE_HANGUL_LVT; + goto COLLECT_A_HANGUL; + } + } + + /* + * Regardless of whatever it was, if this is + * a Starter, we don't collect the character + * since that's a new start and we will deal + * with it at the next time. + */ + i = combining_class(uv, s, sz); + if (i == U8_COMBINING_CLASS_STARTER) + break; + + /* + * We know the current character is a combining + * mark. If the previous character wasn't + * a Starter (not Hangul) or a combining mark, + * then, we don't collect this combining mark. + */ + if (*state != U8_STATE_START && + *state != U8_STATE_COMBINING_MARK) + break; + + *state = U8_STATE_COMBINING_MARK; +COLLECT_A_HANGUL: + /* + * If we collected a Starter and combining + * marks up to 30, i.e., total 31 characters, + * then, we terminate this degenerately long + * combining sequence with a U+034F COMBINING + * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in + * UTF-8 and turn this into a Stream-Safe + * Text. This will be extremely rare but + * possible. + * + * The following will also guarantee that + * we are not writing more than 32 characters + * plus a NULL at u8s[]. + */ + if (last >= U8_UPPER_LIMIT_IN_A_SEQ) { +TURN_STREAM_SAFE: + *state = U8_STATE_START; + comb_class[last] = 0; + start[last] = saved_sz; + disp[last] = 2; + last++; + + u8s[saved_sz++] = 0xCD; + u8s[saved_sz++] = 0x8F; + + break; + } + + /* + * Some combining marks also do decompose into + * another combining mark or marks. + */ + if (*state == U8_STATE_COMBINING_MARK) { + k = last; + l = sz; + i = do_decomp(uv, uts, s, sz, + canonical_decomposition, state); + for (j = 0; j < i; ) { + sz = u8_number_of_bytes[uts[j]]; + + comb_class[last] = + combining_class(uv, + uts + j, sz); + start[last] = saved_sz + j; + disp[last] = sz; + + last++; + if (last >= + U8_UPPER_LIMIT_IN_A_SEQ) { + last = k; + goto TURN_STREAM_SAFE; + } + j += sz; + } + + *state = U8_STATE_COMBINING_MARK; + sz = i; + s += l; + + for (i = 0; i < sz; i++) + u8s[saved_sz++] = uts[i]; + } else { + comb_class[last] = i; + start[last] = saved_sz; + disp[last] = sz; + last++; + + for (i = 0; i < sz; i++) + u8s[saved_sz++] = *s++; + } + + /* + * If this is U+0345 COMBINING GREEK + * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a., + * iota subscript, and need to be converted to + * uppercase letter, convert it to U+0399 GREEK + * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8), + * i.e., convert to capital adscript form as + * specified in the Unicode standard. + * + * This is the only special case of (ambiguous) + * case conversion at combining marks and + * probably the standard will never have + * anything similar like this in future. + */ + if (is_it_toupper && sz >= 2 && + u8s[saved_sz - 2] == 0xCD && + u8s[saved_sz - 1] == 0x85) { + u8s[saved_sz - 2] = 0xCE; + u8s[saved_sz - 1] = 0x99; + } + } + } + + /* + * Let's try to ensure a canonical ordering for the collected + * combining marks. We do this only if we have collected + * at least one more non-Starter. (The decomposition mapping + * data tables have fully (and recursively) expanded and + * canonically ordered decompositions.) + * + * The U8_SWAP_COMB_MARKS() convenience macro has some + * assumptions and we are meeting the assumptions. + */ + last--; + if (last >= saved_last) { + for (i = 0; i < last; i++) + for (j = last; j > i; j--) + if (comb_class[j] && + comb_class[j - 1] > comb_class[j]) { + U8_SWAP_COMB_MARKS(j - 1, j); + } + } + + *source = s; + + if (! canonical_composition) { + u8s[saved_sz] = '\0'; + return (saved_sz); + } + + /* + * Now do the canonical composition. Note that we do this + * only after a canonical or compatibility decomposition to + * finish up NFC or NFKC. + */ + sz = do_composition(uv, u8s, comb_class, start, disp, last, + &s, slast); + } + + *source = s; + + return ((size_t)sz); +} + +/* + * The do_norm_compare() function does string comparion based on Unicode + * simple case mappings and Unicode Normalization definitions. + * + * It does so by collecting a sequence of character at a time and comparing + * the collected sequences from the strings. + * + * The meanings on the return values are the same as the usual strcmp(). + */ +static int +do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2, + int flag, int *errnum) +{ + int result; + size_t sz1; + size_t sz2; + uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t *s1last; + uchar_t *s2last; + boolean_t is_it_toupper; + boolean_t is_it_tolower; + boolean_t canonical_decomposition; + boolean_t compatibility_decomposition; + boolean_t canonical_composition; + u8_normalization_states_t state; + + s1last = s1 + n1; + s2last = s2 + n2; + + is_it_toupper = flag & U8_TEXTPREP_TOUPPER; + is_it_tolower = flag & U8_TEXTPREP_TOLOWER; + canonical_decomposition = flag & U8_CANON_DECOMP; + compatibility_decomposition = flag & U8_COMPAT_DECOMP; + canonical_composition = flag & U8_CANON_COMP; + + while (s1 < s1last && s2 < s2last) { + /* + * If the current character is a 7-bit ASCII and the last + * character, or, if the current character and the next + * character are both some 7-bit ASCII characters then + * we treat the current character as a sequence. + * + * In any other cases, we need to call collect_a_seq(). + */ + + if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last || + ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) { + if (is_it_toupper) + u8s1[0] = U8_ASCII_TOUPPER(*s1); + else if (is_it_tolower) + u8s1[0] = U8_ASCII_TOLOWER(*s1); + else + u8s1[0] = *s1; + u8s1[1] = '\0'; + sz1 = 1; + s1++; + } else { + state = U8_STATE_START; + sz1 = collect_a_seq(uv, u8s1, &s1, s1last, + is_it_toupper, is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, errnum, &state); + } + + if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last || + ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) { + if (is_it_toupper) + u8s2[0] = U8_ASCII_TOUPPER(*s2); + else if (is_it_tolower) + u8s2[0] = U8_ASCII_TOLOWER(*s2); + else + u8s2[0] = *s2; + u8s2[1] = '\0'; + sz2 = 1; + s2++; + } else { + state = U8_STATE_START; + sz2 = collect_a_seq(uv, u8s2, &s2, s2last, + is_it_toupper, is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, errnum, &state); + } + + /* + * Now compare the two characters. If they are the same, + * we move on to the next character sequences. + */ + if (sz1 == 1 && sz2 == 1) { + if (*u8s1 > *u8s2) + return (1); + if (*u8s1 < *u8s2) + return (-1); + } else { + result = strcmp((const char *)u8s1, (const char *)u8s2); + if (result != 0) + return (result); + } + } + + /* + * We compared until the end of either or both strings. + * + * If we reached to or went over the ends for the both, that means + * they are the same. + * + * If we reached only one end, that means the other string has + * something which then can be used to determine the return value. + */ + if (s1 >= s1last) { + if (s2 >= s2last) + return (0); + return (-1); + } + return (1); +} + +/* + * The u8_strcmp() function compares two UTF-8 strings quite similar to + * the strcmp(). For the comparison, however, Unicode Normalization specific + * equivalency and Unicode simple case conversion mappings based equivalency + * can be requested and checked against. + */ +int +u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, + int *errnum) +{ + int f; + size_t n1; + size_t n2; + + *errnum = 0; + + /* + * Check on the requested Unicode version, case conversion, and + * normalization flag values. + */ + + if (uv > U8_UNICODE_LATEST) { + *errnum = ERANGE; + uv = U8_UNICODE_LATEST; + } + + if (flag == 0) { + flag = U8_STRCMP_CS; + } else { + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER | + U8_STRCMP_CI_LOWER); + if (f == 0) { + flag |= U8_STRCMP_CS; + } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && + f != U8_STRCMP_CI_LOWER) { + *errnum = EBADF; + flag = U8_STRCMP_CS; + } + + f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); + if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC && + f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) { + *errnum = EBADF; + flag = U8_STRCMP_CS; + } + } + + if (flag == U8_STRCMP_CS) { + return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n)); + } + + n1 = strlen(s1); + n2 = strlen(s2); + if (n != 0) { + if (n < n1) + n1 = n; + if (n < n2) + n2 = n; + } + + /* + * Simple case conversion can be done much faster and so we do + * them separately here. + */ + if (flag == U8_STRCMP_CI_UPPER) { + return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, + n1, n2, B_TRUE, errnum)); + } else if (flag == U8_STRCMP_CI_LOWER) { + return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, + n1, n2, B_FALSE, errnum)); + } + + return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, + flag, errnum)); +} + +size_t +u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, + int flag, size_t unicode_version, int *errnum) +{ + int f; + int sz; + uchar_t *ib; + uchar_t *ibtail; + uchar_t *ob; + uchar_t *obtail; + boolean_t do_not_ignore_null; + boolean_t do_not_ignore_invalid; + boolean_t is_it_toupper; + boolean_t is_it_tolower; + boolean_t canonical_decomposition; + boolean_t compatibility_decomposition; + boolean_t canonical_composition; + size_t ret_val; + size_t i; + size_t j; + uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1]; + u8_normalization_states_t state; + + if (unicode_version > U8_UNICODE_LATEST) { + *errnum = ERANGE; + return ((size_t)-1); + } + + f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER); + if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) { + *errnum = EBADF; + return ((size_t)-1); + } + + f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); + if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC && + f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) { + *errnum = EBADF; + return ((size_t)-1); + } + + if (inarray == NULL || *inlen == 0) + return (0); + + if (outarray == NULL) { + *errnum = E2BIG; + return ((size_t)-1); + } + + ib = (uchar_t *)inarray; + ob = (uchar_t *)outarray; + ibtail = ib + *inlen; + obtail = ob + *outlen; + + do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL); + do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID); + is_it_toupper = flag & U8_TEXTPREP_TOUPPER; + is_it_tolower = flag & U8_TEXTPREP_TOLOWER; + + ret_val = 0; + + /* + * If we don't have a normalization flag set, we do the simple case + * conversion based text preparation separately below. Text + * preparation involving Normalization will be done in the false task + * block, again, separately since it will take much more time and + * resource than doing simple case conversions. + */ + if (f == 0) { + while (ib < ibtail) { + if (*ib == '\0' && do_not_ignore_null) + break; + + sz = u8_number_of_bytes[*ib]; + + if (sz < 0) { + if (do_not_ignore_invalid) { + *errnum = EILSEQ; + ret_val = (size_t)-1; + break; + } + + sz = 1; + ret_val++; + } + + if (sz == 1) { + if (ob >= obtail) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + if (is_it_toupper) + *ob = U8_ASCII_TOUPPER(*ib); + else if (is_it_tolower) + *ob = U8_ASCII_TOLOWER(*ib); + else + *ob = *ib; + ib++; + ob++; + } else if ((ib + sz) > ibtail) { + if (do_not_ignore_invalid) { + *errnum = EINVAL; + ret_val = (size_t)-1; + break; + } + + if ((obtail - ob) < (ibtail - ib)) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + /* + * We treat the remaining incomplete character + * bytes as a character. + */ + ret_val++; + + while (ib < ibtail) + *ob++ = *ib++; + } else { + if (is_it_toupper || is_it_tolower) { + i = do_case_conv(unicode_version, u8s, + ib, sz, is_it_toupper); + + if ((obtail - ob) < i) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + ib += sz; + + for (sz = 0; sz < i; sz++) + *ob++ = u8s[sz]; + } else { + if ((obtail - ob) < sz) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + for (i = 0; i < sz; i++) + *ob++ = *ib++; + } + } + } + } else { + canonical_decomposition = flag & U8_CANON_DECOMP; + compatibility_decomposition = flag & U8_COMPAT_DECOMP; + canonical_composition = flag & U8_CANON_COMP; + + while (ib < ibtail) { + if (*ib == '\0' && do_not_ignore_null) + break; + + /* + * If the current character is a 7-bit ASCII + * character and it is the last character, or, + * if the current character is a 7-bit ASCII + * character and the next character is also a 7-bit + * ASCII character, then, we copy over this + * character without going through collect_a_seq(). + * + * In any other cases, we need to look further with + * the collect_a_seq() function. + */ + if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail || + ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) { + if (ob >= obtail) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + if (is_it_toupper) + *ob = U8_ASCII_TOUPPER(*ib); + else if (is_it_tolower) + *ob = U8_ASCII_TOLOWER(*ib); + else + *ob = *ib; + ib++; + ob++; + } else { + *errnum = 0; + state = U8_STATE_START; + + j = collect_a_seq(unicode_version, u8s, + &ib, ibtail, + is_it_toupper, + is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, + errnum, &state); + + if (*errnum && do_not_ignore_invalid) { + ret_val = (size_t)-1; + break; + } + + if ((obtail - ob) < j) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + for (i = 0; i < j; i++) + *ob++ = u8s[i]; + } + } + } + + *inlen = ibtail - ib; + *outlen = obtail - ob; + + return (ret_val); +} diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c new file mode 100644 index 000000000000..ed9b67ea3bc9 --- /dev/null +++ b/common/zfs/zfs_comutil.c @@ -0,0 +1,202 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file is intended for functions that ought to be common between user + * land (libzfs) and the kernel. When many common routines need to be shared + * then a separate file should to be created. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include <sys/types.h> +#include <sys/fs/zfs.h> +#include <sys/int_limits.h> +#include <sys/nvpair.h> +#include "zfs_comutil.h" + +/* + * Are there allocatable vdevs? + */ +boolean_t +zfs_allocatable_devs(nvlist_t *nv) +{ + uint64_t is_log; + uint_t c; + nvlist_t **child; + uint_t children; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + return (B_FALSE); + } + for (c = 0; c < children; c++) { + is_log = 0; + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (!is_log) + return (B_TRUE); + } + return (B_FALSE); +} + +void +zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp) +{ + nvlist_t *policy; + nvpair_t *elem; + char *nm; + + /* Defaults */ + zrpp->zrp_request = ZPOOL_NO_REWIND; + zrpp->zrp_maxmeta = 0; + zrpp->zrp_maxdata = UINT64_MAX; + zrpp->zrp_txg = UINT64_MAX; + + if (nvl == NULL) + return; + + elem = NULL; + while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + nm = nvpair_name(elem); + if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) { + if (nvpair_value_nvlist(elem, &policy) == 0) + zpool_get_rewind_policy(policy, zrpp); + return; + } else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) { + if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0) + if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES) + zrpp->zrp_request = ZPOOL_NO_REWIND; + } else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_txg); + } else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta); + } else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata); + } + } + if (zrpp->zrp_request == 0) + zrpp->zrp_request = ZPOOL_NO_REWIND; +} + +typedef struct zfs_version_spa_map { + int version_zpl; + int version_spa; +} zfs_version_spa_map_t; + +/* + * Keep this table in monotonically increasing version number order. + */ +static zfs_version_spa_map_t zfs_version_table[] = { + {ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL}, + {ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL}, + {ZPL_VERSION_FUID, SPA_VERSION_FUID}, + {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE}, + {ZPL_VERSION_SA, SPA_VERSION_SA}, + {0, 0} +}; + +/* + * Return the max zpl version for a corresponding spa version + * -1 is returned if no mapping exists. + */ +int +zfs_zpl_version_map(int spa_version) +{ + int i; + int version = -1; + + for (i = 0; zfs_version_table[i].version_spa; i++) { + if (spa_version >= zfs_version_table[i].version_spa) + version = zfs_version_table[i].version_zpl; + } + + return (version); +} + +/* + * Return the min spa version for a corresponding spa version + * -1 is returned if no mapping exists. + */ +int +zfs_spa_version_map(int zpl_version) +{ + int i; + int version = -1; + + for (i = 0; zfs_version_table[i].version_zpl; i++) { + if (zfs_version_table[i].version_zpl >= zpl_version) + return (zfs_version_table[i].version_spa); + } + + return (version); +} + +const char *zfs_history_event_names[LOG_END] = { + "invalid event", + "pool create", + "vdev add", + "pool remove", + "pool destroy", + "pool export", + "pool import", + "vdev attach", + "vdev replace", + "vdev detach", + "vdev online", + "vdev offline", + "vdev upgrade", + "pool clear", + "pool scrub", + "pool property set", + "create", + "clone", + "destroy", + "destroy_begin_sync", + "inherit", + "property set", + "quota set", + "permission update", + "permission remove", + "permission who remove", + "promote", + "receive", + "rename", + "reservation set", + "replay_inc_sync", + "replay_full_sync", + "rollback", + "snapshot", + "filesystem version upgrade", + "refquota set", + "refreservation set", + "pool scrub done", + "user hold", + "user release", + "pool split", +}; diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h new file mode 100644 index 000000000000..61327f9aa909 --- /dev/null +++ b/common/zfs/zfs_comutil.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZFS_COMUTIL_H +#define _ZFS_COMUTIL_H + +#include <sys/fs/zfs.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern boolean_t zfs_allocatable_devs(nvlist_t *); +extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *); + +extern int zfs_zpl_version_map(int spa_version); +extern int zfs_spa_version_map(int zpl_version); +extern const char *zfs_history_event_names[LOG_END]; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_COMUTIL_H */ diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c new file mode 100644 index 000000000000..83d9edb21389 --- /dev/null +++ b/common/zfs/zfs_deleg.c @@ -0,0 +1,237 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#include <sys/sunddi.h> +#include <sys/ctype.h> +#else +#include <stdio.h> +#include <unistd.h> +#include <strings.h> +#include <libnvpair.h> +#include <ctype.h> +#endif +/* XXX includes zfs_context.h, so why bother with the above? */ +#include <sys/dsl_deleg.h> +#include "zfs_prop.h" +#include "zfs_deleg.h" +#include "zfs_namecheck.h" + +/* + * permission table + * + * Keep this table in sorted order + * + * This table is used for displaying all permissions for + * zfs allow + */ + +zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { + {ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW}, + {ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, + {ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, + {ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, + {ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, + {ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, + {ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, + {ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, + {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, + {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, + {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, + {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, + {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, + {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, + {ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, + {ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, + {ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, + {ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, + {ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, + {NULL, ZFS_DELEG_NOTE_NONE } +}; + +static int +zfs_valid_permission_name(const char *perm) +{ + if (zfs_deleg_canonicalize_perm(perm)) + return (0); + + return (permset_namecheck(perm, NULL, NULL)); +} + +const char * +zfs_deleg_canonicalize_perm(const char *perm) +{ + int i; + zfs_prop_t prop; + + for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) { + if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0) + return (perm); + } + + prop = zfs_name_to_prop(perm); + if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop)) + return (zfs_prop_to_name(prop)); + return (NULL); + +} + +static int +zfs_validate_who(char *who) +{ + char *p; + + if (who[2] != ZFS_DELEG_FIELD_SEP_CHR) + return (-1); + + switch (who[0]) { + case ZFS_DELEG_USER: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP_SETS: + if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) + return (-1); + for (p = &who[3]; *p; p++) + if (!isdigit(*p)) + return (-1); + break; + + case ZFS_DELEG_NAMED_SET: + case ZFS_DELEG_NAMED_SET_SETS: + if (who[1] != ZFS_DELEG_NA) + return (-1); + return (permset_namecheck(&who[3], NULL, NULL)); + + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + if (who[1] != ZFS_DELEG_NA) + return (-1); + if (who[3] != '\0') + return (-1); + break; + + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) + return (-1); + if (who[3] != '\0') + return (-1); + break; + + default: + return (-1); + } + + return (0); +} + +int +zfs_deleg_verify_nvlist(nvlist_t *nvp) +{ + nvpair_t *who, *perm_name; + nvlist_t *perms; + int error; + + if (nvp == NULL) + return (-1); + + who = nvlist_next_nvpair(nvp, NULL); + if (who == NULL) + return (-1); + + do { + if (zfs_validate_who(nvpair_name(who))) + return (-1); + + error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms); + + if (error && error != ENOENT) + return (-1); + if (error == ENOENT) + continue; + + perm_name = nvlist_next_nvpair(perms, NULL); + if (perm_name == NULL) { + return (-1); + } + do { + error = zfs_valid_permission_name( + nvpair_name(perm_name)); + if (error) + return (-1); + } while (perm_name = nvlist_next_nvpair(perms, perm_name)); + } while (who = nvlist_next_nvpair(nvp, who)); + return (0); +} + +/* + * Construct the base attribute name. The base attribute names + * are the "key" to locate the jump objects which contain the actual + * permissions. The base attribute names are encoded based on + * type of entry and whether it is a local or descendent permission. + * + * Arguments: + * attr - attribute name return string, attribute is assumed to be + * ZFS_MAX_DELEG_NAME long. + * type - type of entry to construct + * inheritchr - inheritance type (local,descendent, or NA for create and + * permission set definitions + * data - is either a permission set name or a 64 bit uid/gid. + */ +void +zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, + char inheritchr, void *data) +{ + int len = ZFS_MAX_DELEG_NAME; + uint64_t *id = data; + + switch (type) { + case ZFS_DELEG_USER: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP_SETS: + (void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr, + ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id); + break; + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + (void) snprintf(attr, len, "%c-%c%s", type, + ZFS_DELEG_FIELD_SEP_CHR, (char *)data); + break; + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + (void) snprintf(attr, len, "%c-%c", type, + ZFS_DELEG_FIELD_SEP_CHR); + break; + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + (void) snprintf(attr, len, "%c%c%c", type, inheritchr, + ZFS_DELEG_FIELD_SEP_CHR); + break; + default: + ASSERT(!"bad zfs_deleg_who_type_t"); + } +} diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h new file mode 100644 index 000000000000..b4cb8e2b4e37 --- /dev/null +++ b/common/zfs/zfs_deleg.h @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZFS_DELEG_H +#define _ZFS_DELEG_H + +#include <sys/fs/zfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZFS_DELEG_SET_NAME_CHR '@' /* set name lead char */ +#define ZFS_DELEG_FIELD_SEP_CHR '$' /* field separator */ + +/* + * Max name length for a delegation attribute + */ +#define ZFS_MAX_DELEG_NAME 128 + +#define ZFS_DELEG_LOCAL 'l' +#define ZFS_DELEG_DESCENDENT 'd' +#define ZFS_DELEG_NA '-' + +typedef enum { + ZFS_DELEG_NOTE_CREATE, + ZFS_DELEG_NOTE_DESTROY, + ZFS_DELEG_NOTE_SNAPSHOT, + ZFS_DELEG_NOTE_ROLLBACK, + ZFS_DELEG_NOTE_CLONE, + ZFS_DELEG_NOTE_PROMOTE, + ZFS_DELEG_NOTE_RENAME, + ZFS_DELEG_NOTE_RECEIVE, + ZFS_DELEG_NOTE_ALLOW, + ZFS_DELEG_NOTE_USERPROP, + ZFS_DELEG_NOTE_MOUNT, + ZFS_DELEG_NOTE_SHARE, + ZFS_DELEG_NOTE_USERQUOTA, + ZFS_DELEG_NOTE_GROUPQUOTA, + ZFS_DELEG_NOTE_USERUSED, + ZFS_DELEG_NOTE_GROUPUSED, + ZFS_DELEG_NOTE_HOLD, + ZFS_DELEG_NOTE_RELEASE, + ZFS_DELEG_NOTE_DIFF, + ZFS_DELEG_NOTE_NONE +} zfs_deleg_note_t; + +typedef struct zfs_deleg_perm_tab { + char *z_perm; + zfs_deleg_note_t z_note; +} zfs_deleg_perm_tab_t; + +extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[]; + +int zfs_deleg_verify_nvlist(nvlist_t *nvlist); +void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, + char checkflag, void *data); +const char *zfs_deleg_canonicalize_perm(const char *perm); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_DELEG_H */ diff --git a/common/zfs/zfs_fletcher.c b/common/zfs/zfs_fletcher.c new file mode 100644 index 000000000000..fa43ce6bdb5d --- /dev/null +++ b/common/zfs/zfs_fletcher.c @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Fletcher Checksums + * ------------------ + * + * ZFS's 2nd and 4th order Fletcher checksums are defined by the following + * recurrence relations: + * + * a = a + f + * i i-1 i-1 + * + * b = b + a + * i i-1 i + * + * c = c + b (fletcher-4 only) + * i i-1 i + * + * d = d + c (fletcher-4 only) + * i i-1 i + * + * Where + * a_0 = b_0 = c_0 = d_0 = 0 + * and + * f_0 .. f_(n-1) are the input data. + * + * Using standard techniques, these translate into the following series: + * + * __n_ __n_ + * \ | \ | + * a = > f b = > i * f + * n /___| n - i n /___| n - i + * i = 1 i = 1 + * + * + * __n_ __n_ + * \ | i*(i+1) \ | i*(i+1)*(i+2) + * c = > ------- f d = > ------------- f + * n /___| 2 n - i n /___| 6 n - i + * i = 1 i = 1 + * + * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. + * Since the additions are done mod (2^64), errors in the high bits may not + * be noticed. For this reason, fletcher-2 is deprecated. + * + * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. + * A conservative estimate of how big the buffer can get before we overflow + * can be estimated using f_i = 0xffffffff for all i: + * + * % bc + * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 + * 2264 + * quit + * % + * + * So blocks of up to 2k will not overflow. Our largest block size is + * 128k, which has 32k 4-byte words, so we can compute the largest possible + * accumulators, then divide by 2^64 to figure the max amount of overflow: + * + * % bc + * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } + * a/2^64;b/2^64;c/2^64;d/2^64 + * 0 + * 0 + * 1365 + * 11186858 + * quit + * % + * + * So a and b cannot overflow. To make sure each bit of input has some + * effect on the contents of c and d, we can look at what the factors of + * the coefficients in the equations for c_n and d_n are. The number of 2s + * in the factors determines the lowest set bit in the multiplier. Running + * through the cases for n*(n+1)/2 reveals that the highest power of 2 is + * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow + * the 64-bit accumulators, every bit of every f_i effects every accumulator, + * even for 128k blocks. + * + * If we wanted to make a stronger version of fletcher4 (fletcher4c?), + * we could do our calculations mod (2^32 - 1) by adding in the carries + * periodically, and store the number of carries in the top 32-bits. + * + * -------------------- + * Checksum Performance + * -------------------- + * + * There are two interesting components to checksum performance: cached and + * uncached performance. With cached data, fletcher-2 is about four times + * faster than fletcher-4. With uncached data, the performance difference is + * negligible, since the cost of a cache fill dominates the processing time. + * Even though fletcher-4 is slower than fletcher-2, it is still a pretty + * efficient pass over the data. + * + * In normal operation, the data which is being checksummed is in a buffer + * which has been filled either by: + * + * 1. a compression step, which will be mostly cached, or + * 2. a bcopy() or copyin(), which will be uncached (because the + * copy is cache-bypassing). + * + * For both cached and uncached data, both fletcher checksums are much faster + * than sha-256, and slower than 'off', which doesn't touch the data at all. + */ + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/byteorder.h> +#include <sys/zio.h> +#include <sys/spa.h> + +void +fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += ip[0]; + a1 += ip[1]; + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += BSWAP_64(ip[0]); + a1 += BSWAP_64(ip[1]); + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_native(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_byteswap(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} diff --git a/common/zfs/zfs_fletcher.h b/common/zfs/zfs_fletcher.h new file mode 100644 index 000000000000..b49df0cf4f0f --- /dev/null +++ b/common/zfs/zfs_fletcher.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_FLETCHER_H +#define _ZFS_FLETCHER_H + +#include <sys/types.h> +#include <sys/spa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * fletcher checksum functions + */ + +void fletcher_2_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_incremental_native(const void *, uint64_t, + zio_cksum_t *); +void fletcher_4_incremental_byteswap(const void *, uint64_t, + zio_cksum_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_FLETCHER_H */ diff --git a/common/zfs/zfs_namecheck.c b/common/zfs/zfs_namecheck.c new file mode 100644 index 000000000000..5cfafea471b3 --- /dev/null +++ b/common/zfs/zfs_namecheck.c @@ -0,0 +1,345 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Common name validation routines for ZFS. These routines are shared by the + * userland code as well as the ioctl() layer to ensure that we don't + * inadvertently expose a hole through direct ioctl()s that never gets tested. + * In userland, however, we want significantly more information about _why_ the + * name is invalid. In the kernel, we only care whether it's valid or not. + * Each routine therefore takes a 'namecheck_err_t' which describes exactly why + * the name failed to validate. + * + * Each function returns 0 on success, -1 on error. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include <sys/param.h> +#include <sys/nvpair.h> +#include "zfs_namecheck.h" +#include "zfs_deleg.h" + +static int +valid_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == ':' || c == ' '); +} + +/* + * Snapshot names must be made up of alphanumeric characters plus the following + * characters: + * + * [-_.: ] + */ +int +snapshot_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + const char *loc; + + if (strlen(path) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + if (path[0] == '\0') { + if (why) + *why = NAME_ERR_EMPTY_COMPONENT; + return (-1); + } + + for (loc = path; *loc; loc++) { + if (!valid_char(*loc)) { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *loc; + } + return (-1); + } + } + return (0); +} + + +/* + * Permissions set name must start with the letter '@' followed by the + * same character restrictions as snapshot names, except that the name + * cannot exceed 64 characters. + */ +int +permset_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + if (strlen(path) >= ZFS_PERMSET_MAXLEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + if (path[0] != '@') { + if (why) { + *why = NAME_ERR_NO_AT; + *what = path[0]; + } + return (-1); + } + + return (snapshot_namecheck(&path[1], why, what)); +} + +/* + * Dataset names must be of the following form: + * + * [component][/]*[component][@component] + * + * Where each component is made up of alphanumeric characters plus the following + * characters: + * + * [-_.:%] + * + * We allow '%' here as we use that character internally to create unique + * names for temporary clones (for online recv). + */ +int +dataset_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + const char *loc, *end; + int found_snapshot; + + /* + * Make sure the name is not too long. + * + * ZFS_MAXNAMELEN is the maximum dataset length used in the userland + * which is the same as MAXNAMELEN used in the kernel. + * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all + * places using MAXNAMELEN. + */ + + if (strlen(path) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + /* Explicitly check for a leading slash. */ + if (path[0] == '/') { + if (why) + *why = NAME_ERR_LEADING_SLASH; + return (-1); + } + + if (path[0] == '\0') { + if (why) + *why = NAME_ERR_EMPTY_COMPONENT; + return (-1); + } + + loc = path; + found_snapshot = 0; + for (;;) { + /* Find the end of this component */ + end = loc; + while (*end != '/' && *end != '@' && *end != '\0') + end++; + + if (*end == '\0' && end[-1] == '/') { + /* trailing slashes are not allowed */ + if (why) + *why = NAME_ERR_TRAILING_SLASH; + return (-1); + } + + /* Zero-length components are not allowed */ + if (loc == end) { + if (why) { + /* + * Make sure this is really a zero-length + * component and not a '@@'. + */ + if (*end == '@' && found_snapshot) { + *why = NAME_ERR_MULTIPLE_AT; + } else { + *why = NAME_ERR_EMPTY_COMPONENT; + } + } + + return (-1); + } + + /* Validate the contents of this component */ + while (loc != end) { + if (!valid_char(*loc) && *loc != '%') { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *loc; + } + return (-1); + } + loc++; + } + + /* If we've reached the end of the string, we're OK */ + if (*end == '\0') + return (0); + + if (*end == '@') { + /* + * If we've found an @ symbol, indicate that we're in + * the snapshot component, and report a second '@' + * character as an error. + */ + if (found_snapshot) { + if (why) + *why = NAME_ERR_MULTIPLE_AT; + return (-1); + } + + found_snapshot = 1; + } + + /* + * If there is a '/' in a snapshot name + * then report an error + */ + if (*end == '/' && found_snapshot) { + if (why) + *why = NAME_ERR_TRAILING_SLASH; + return (-1); + } + + /* Update to the next component */ + loc = end + 1; + } +} + + +/* + * mountpoint names must be of the following form: + * + * /[component][/]*[component][/] + */ +int +mountpoint_namecheck(const char *path, namecheck_err_t *why) +{ + const char *start, *end; + + /* + * Make sure none of the mountpoint component names are too long. + * If a component name is too long then the mkdir of the mountpoint + * will fail but then the mountpoint property will be set to a value + * that can never be mounted. Better to fail before setting the prop. + * Extra slashes are OK, they will be tossed by the mountpoint mkdir. + */ + + if (path == NULL || *path != '/') { + if (why) + *why = NAME_ERR_LEADING_SLASH; + return (-1); + } + + /* Skip leading slash */ + start = &path[1]; + do { + end = start; + while (*end != '/' && *end != '\0') + end++; + + if (end - start >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + start = end + 1; + + } while (*end != '\0'); + + return (0); +} + +/* + * For pool names, we have the same set of valid characters as described in + * dataset names, with the additional restriction that the pool name must begin + * with a letter. The pool names 'raidz' and 'mirror' are also reserved names + * that cannot be used. + */ +int +pool_namecheck(const char *pool, namecheck_err_t *why, char *what) +{ + const char *c; + + /* + * Make sure the name is not too long. + * + * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland + * which is the same as MAXNAMELEN used in the kernel. + * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all + * places using MAXNAMELEN. + */ + if (strlen(pool) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + c = pool; + while (*c != '\0') { + if (!valid_char(*c)) { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *c; + } + return (-1); + } + c++; + } + + if (!(*pool >= 'a' && *pool <= 'z') && + !(*pool >= 'A' && *pool <= 'Z')) { + if (why) + *why = NAME_ERR_NOLETTER; + return (-1); + } + + if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) { + if (why) + *why = NAME_ERR_RESERVED; + return (-1); + } + + if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) { + if (why) + *why = NAME_ERR_DISKLIKE; + return (-1); + } + + return (0); +} diff --git a/common/zfs/zfs_namecheck.h b/common/zfs/zfs_namecheck.h new file mode 100644 index 000000000000..7711da099be9 --- /dev/null +++ b/common/zfs/zfs_namecheck.h @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_NAMECHECK_H +#define _ZFS_NAMECHECK_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + NAME_ERR_LEADING_SLASH, /* name begins with leading slash */ + NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */ + NAME_ERR_TRAILING_SLASH, /* name ends with a slash */ + NAME_ERR_INVALCHAR, /* invalid character found */ + NAME_ERR_MULTIPLE_AT, /* multiple '@' characters found */ + NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */ + NAME_ERR_RESERVED, /* entire name is reserved */ + NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ + NAME_ERR_TOOLONG, /* name is too long */ + NAME_ERR_NO_AT, /* permission set is missing '@' */ +} namecheck_err_t; + +#define ZFS_PERMSET_MAXLEN 64 + +int pool_namecheck(const char *, namecheck_err_t *, char *); +int dataset_namecheck(const char *, namecheck_err_t *, char *); +int mountpoint_namecheck(const char *, namecheck_err_t *); +int snapshot_namecheck(const char *, namecheck_err_t *, char *); +int permset_namecheck(const char *, namecheck_err_t *, char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_NAMECHECK_H */ diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c new file mode 100644 index 000000000000..f29bcf62718f --- /dev/null +++ b/common/zfs/zfs_prop.c @@ -0,0 +1,595 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/u8_textprep.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_znode.h> + +#include "zfs_prop.h" +#include "zfs_deleg.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS]; + +/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */ +const char *zfs_userquota_prop_prefixes[] = { + "userused@", + "userquota@", + "groupused@", + "groupquota@" +}; + +zprop_desc_t * +zfs_prop_get_table(void) +{ + return (zfs_prop_table); +} + +void +zfs_prop_init(void) +{ + static zprop_index_t checksum_table[] = { + { "on", ZIO_CHECKSUM_ON }, + { "off", ZIO_CHECKSUM_OFF }, + { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, + { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, + { "sha256", ZIO_CHECKSUM_SHA256 }, + { NULL } + }; + + static zprop_index_t dedup_table[] = { + { "on", ZIO_CHECKSUM_ON }, + { "off", ZIO_CHECKSUM_OFF }, + { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY }, + { "sha256", ZIO_CHECKSUM_SHA256 }, + { "sha256,verify", + ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, + { NULL } + }; + + static zprop_index_t compress_table[] = { + { "on", ZIO_COMPRESS_ON }, + { "off", ZIO_COMPRESS_OFF }, + { "lzjb", ZIO_COMPRESS_LZJB }, + { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */ + { "gzip-1", ZIO_COMPRESS_GZIP_1 }, + { "gzip-2", ZIO_COMPRESS_GZIP_2 }, + { "gzip-3", ZIO_COMPRESS_GZIP_3 }, + { "gzip-4", ZIO_COMPRESS_GZIP_4 }, + { "gzip-5", ZIO_COMPRESS_GZIP_5 }, + { "gzip-6", ZIO_COMPRESS_GZIP_6 }, + { "gzip-7", ZIO_COMPRESS_GZIP_7 }, + { "gzip-8", ZIO_COMPRESS_GZIP_8 }, + { "gzip-9", ZIO_COMPRESS_GZIP_9 }, + { "zle", ZIO_COMPRESS_ZLE }, + { NULL } + }; + + static zprop_index_t snapdir_table[] = { + { "hidden", ZFS_SNAPDIR_HIDDEN }, + { "visible", ZFS_SNAPDIR_VISIBLE }, + { NULL } + }; + + static zprop_index_t acl_inherit_table[] = { + { "discard", ZFS_ACL_DISCARD }, + { "noallow", ZFS_ACL_NOALLOW }, + { "restricted", ZFS_ACL_RESTRICTED }, + { "passthrough", ZFS_ACL_PASSTHROUGH }, + { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */ + { "passthrough-x", ZFS_ACL_PASSTHROUGH_X }, + { NULL } + }; + + static zprop_index_t case_table[] = { + { "sensitive", ZFS_CASE_SENSITIVE }, + { "insensitive", ZFS_CASE_INSENSITIVE }, + { "mixed", ZFS_CASE_MIXED }, + { NULL } + }; + + static zprop_index_t copies_table[] = { + { "1", 1 }, + { "2", 2 }, + { "3", 3 }, + { NULL } + }; + + /* + * Use the unique flags we have to send to u8_strcmp() and/or + * u8_textprep() to represent the various normalization property + * values. + */ + static zprop_index_t normalize_table[] = { + { "none", 0 }, + { "formD", U8_TEXTPREP_NFD }, + { "formKC", U8_TEXTPREP_NFKC }, + { "formC", U8_TEXTPREP_NFC }, + { "formKD", U8_TEXTPREP_NFKD }, + { NULL } + }; + + static zprop_index_t version_table[] = { + { "1", 1 }, + { "2", 2 }, + { "3", 3 }, + { "4", 4 }, + { "5", 5 }, + { "current", ZPL_VERSION }, + { NULL } + }; + + static zprop_index_t boolean_table[] = { + { "off", 0 }, + { "on", 1 }, + { NULL } + }; + + static zprop_index_t logbias_table[] = { + { "latency", ZFS_LOGBIAS_LATENCY }, + { "throughput", ZFS_LOGBIAS_THROUGHPUT }, + { NULL } + }; + + static zprop_index_t canmount_table[] = { + { "off", ZFS_CANMOUNT_OFF }, + { "on", ZFS_CANMOUNT_ON }, + { "noauto", ZFS_CANMOUNT_NOAUTO }, + { NULL } + }; + + static zprop_index_t cache_table[] = { + { "none", ZFS_CACHE_NONE }, + { "metadata", ZFS_CACHE_METADATA }, + { "all", ZFS_CACHE_ALL }, + { NULL } + }; + + static zprop_index_t sync_table[] = { + { "standard", ZFS_SYNC_STANDARD }, + { "always", ZFS_SYNC_ALWAYS }, + { "disabled", ZFS_SYNC_DISABLED }, + { NULL } + }; + + /* inherit index properties */ + zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "standard | always | disabled", "SYNC", + sync_table); + zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", + ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME, + "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM", + checksum_table); + zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "on | off | verify | sha256[,verify]", "DEDUP", + dedup_table); + zprop_register_index(ZFS_PROP_COMPRESSION, "compression", + ZIO_COMPRESS_DEFAULT, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS", + compress_table); + zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "hidden | visible", "SNAPDIR", snapdir_table); + zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", + ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "discard | noallow | restricted | passthrough | passthrough-x", + "ACLINHERIT", acl_inherit_table); + zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "1 | 2 | 3", "COPIES", copies_table); + zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache", + ZFS_CACHE_ALL, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, + "all | none | metadata", "PRIMARYCACHE", cache_table); + zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache", + ZFS_CACHE_ALL, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, + "all | none | metadata", "SECONDARYCACHE", cache_table); + zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "latency | throughput", "LOGBIAS", logbias_table); + + /* inherit index (boolean) properties */ + zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); + zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", + boolean_table); + zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC", + boolean_table); + zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID", + boolean_table); + zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY", + boolean_table); + zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table); + zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR", + boolean_table); + zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", + boolean_table); + zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND", + boolean_table); + + /* default index properties */ + zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "1 | 2 | 3 | 4 | current", "VERSION", version_table); + zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", + "CANMOUNT", canmount_table); + + /* readonly index (boolean) properties */ + zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, + ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); + zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, + PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", + boolean_table); + + /* set once index properties */ + zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, + PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "none | formC | formD | formKC | formKD", "NORMALIZATION", + normalize_table); + zprop_register_index(ZFS_PROP_CASE, "casesensitivity", + ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_SNAPSHOT, + "sensitive | insensitive | mixed", "CASE", case_table); + + /* set once index (boolean) properties */ + zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "on | off", "UTF8ONLY", boolean_table); + + /* string properties */ + zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN"); + zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", + "MOUNTPOINT"); + zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", + "SHARENFS"); + zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY, + ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE"); + zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "on | off | sharemgr(1M) options", "SHARESMB"); + zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel", + ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET, + "<sensitivity label>", "MLSLABEL"); + + /* readonly number properties */ + zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, + ZFS_TYPE_DATASET, "<size>", "USED"); + zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL"); + zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0, + PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER"); + zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, + PROP_READONLY, ZFS_TYPE_DATASET, + "<1.00x or higher if compressed>", "RATIO"); + zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", + ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, + ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); + zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDSNAP"); + zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDDS"); + zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDCHILD"); + zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0, + PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV"); + zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, + ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS"); + + /* default number properties */ + zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA"); + zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "<size> | none", "RESERV"); + zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT, + ZFS_TYPE_VOLUME, "<size>", "VOLSIZE"); + zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA"); + zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "<size> | none", "REFRESERV"); + + /* inherit number properties */ + zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", + SPA_MAXBLOCKSIZE, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE"); + + /* hidden properties */ + zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG"); + zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES"); + zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING, + PROP_READONLY, ZFS_TYPE_DATASET, "NAME"); + zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", + PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS"); + zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu", + PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, + "STMF_SBD_LU"); + zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "GUID"); + zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, + "USERACCOUNTING"); + zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE"); + zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); + + /* + * Property to be removed once libbe is integrated + */ + zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM, + "PRIV_PROP"); + + /* oddball properties */ + zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, + NULL, PROP_READONLY, ZFS_TYPE_DATASET, + "<date>", "CREATION", B_FALSE, B_TRUE, NULL); +} + +boolean_t +zfs_prop_delegatable(zfs_prop_t prop) +{ + zprop_desc_t *pd = &zfs_prop_table[prop]; + + /* The mlslabel property is never delegatable. */ + if (prop == ZFS_PROP_MLSLABEL) + return (B_FALSE); + + return (pd->pd_attr != PROP_READONLY); +} + +/* + * Given a zfs dataset property name, returns the corresponding property ID. + */ +zfs_prop_t +zfs_name_to_prop(const char *propname) +{ + return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); +} + +/* + * For user property names, we allow all lowercase alphanumeric characters, plus + * a few useful punctuation characters. + */ +static int +valid_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == ':'); +} + +/* + * Returns true if this is a valid user-defined property (one with a ':'). + */ +boolean_t +zfs_prop_user(const char *name) +{ + int i; + char c; + boolean_t foundsep = B_FALSE; + + for (i = 0; i < strlen(name); i++) { + c = name[i]; + if (!valid_char(c)) + return (B_FALSE); + if (c == ':') + foundsep = B_TRUE; + } + + if (!foundsep) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Returns true if this is a valid userspace-type property (one with a '@'). + * Note that after the @, any character is valid (eg, another @, for SID + * user@domain). + */ +boolean_t +zfs_prop_userquota(const char *name) +{ + zfs_userquota_prop_t prop; + + for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) { + if (strncmp(name, zfs_userquota_prop_prefixes[prop], + strlen(zfs_userquota_prop_prefixes[prop])) == 0) { + return (B_TRUE); + } + } + + return (B_FALSE); +} + +/* + * Tables of index types, plus functions to convert between the user view + * (strings) and internal representation (uint64_t). + */ +int +zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index) +{ + return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET)); +} + +int +zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string) +{ + return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET)); +} + +uint64_t +zfs_prop_random_value(zfs_prop_t prop, uint64_t seed) +{ + return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET)); +} + +/* + * Returns TRUE if the property applies to any of the given dataset types. + */ +boolean_t +zfs_prop_valid_for_type(int prop, zfs_type_t types) +{ + return (zprop_valid_for_type(prop, types)); +} + +zprop_type_t +zfs_prop_get_type(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_proptype); +} + +/* + * Returns TRUE if the property is readonly. + */ +boolean_t +zfs_prop_readonly(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_READONLY || + zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +/* + * Returns TRUE if the property is only allowed to be set once. + */ +boolean_t +zfs_prop_setonce(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +const char * +zfs_prop_default_string(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_strdefault); +} + +uint64_t +zfs_prop_default_numeric(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_numdefault); +} + +/* + * Given a dataset property ID, returns the corresponding name. + * Assuming the zfs dataset property ID is valid. + */ +const char * +zfs_prop_to_name(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_name); +} + +/* + * Returns TRUE if the property is inheritable. + */ +boolean_t +zfs_prop_inheritable(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_INHERIT || + zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +#ifndef _KERNEL + +/* + * Returns a string describing the set of acceptable values for the given + * zfs property, or NULL if it cannot be set. + */ +const char * +zfs_prop_values(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_values); +} + +/* + * Returns TRUE if this property is a string type. Note that index types + * (compression, checksum) are treated as strings in userland, even though they + * are stored numerically on disk. + */ +int +zfs_prop_is_string(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING || + zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX); +} + +/* + * Returns the column header for the given property. Used only in + * 'zfs list -o', but centralized here with the other property information. + */ +const char * +zfs_prop_column_name(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_colname); +} + +/* + * Returns whether the given property should be displayed right-justified for + * 'zfs list'. + */ +boolean_t +zfs_prop_align_right(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_rightalign); +} + +#endif diff --git a/common/zfs/zfs_prop.h b/common/zfs/zfs_prop.h new file mode 100644 index 000000000000..a63262311b3d --- /dev/null +++ b/common/zfs/zfs_prop.h @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_PROP_H +#define _ZFS_PROP_H + +#include <sys/fs/zfs.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * For index types (e.g. compression and checksum), we want the numeric value + * in the kernel, but the string value in userland. + */ +typedef enum { + PROP_TYPE_NUMBER, /* numeric value */ + PROP_TYPE_STRING, /* string value */ + PROP_TYPE_INDEX /* numeric value indexed by string */ +} zprop_type_t; + +typedef enum { + PROP_DEFAULT, + PROP_READONLY, + PROP_INHERIT, + /* + * ONETIME properties are a sort of conglomeration of READONLY + * and INHERIT. They can be set only during object creation, + * after that they are READONLY. If not explicitly set during + * creation, they can be inherited. + */ + PROP_ONETIME +} zprop_attr_t; + +typedef struct zfs_index { + const char *pi_name; + uint64_t pi_value; +} zprop_index_t; + +typedef struct { + const char *pd_name; /* human-readable property name */ + int pd_propnum; /* property number */ + zprop_type_t pd_proptype; /* string, boolean, index, number */ + const char *pd_strdefault; /* default for strings */ + uint64_t pd_numdefault; /* for boolean / index / number */ + zprop_attr_t pd_attr; /* default, readonly, inherit */ + int pd_types; /* bitfield of valid dataset types */ + /* fs | vol | snap; or pool */ + const char *pd_values; /* string telling acceptable values */ + const char *pd_colname; /* column header for "zfs list" */ + boolean_t pd_rightalign; /* column alignment for "zfs list" */ + boolean_t pd_visible; /* do we list this property with the */ + /* "zfs get" help message */ + const zprop_index_t *pd_table; /* for index properties, a table */ + /* defining the possible values */ + size_t pd_table_size; /* number of entries in pd_table[] */ +} zprop_desc_t; + +/* + * zfs dataset property functions + */ +void zfs_prop_init(void); +zprop_type_t zfs_prop_get_type(zfs_prop_t); +boolean_t zfs_prop_delegatable(zfs_prop_t prop); +zprop_desc_t *zfs_prop_get_table(void); + +/* + * zpool property functions + */ +void zpool_prop_init(void); +zprop_type_t zpool_prop_get_type(zpool_prop_t); +zprop_desc_t *zpool_prop_get_table(void); + +/* + * Common routines to initialize property tables + */ +void zprop_register_impl(int, const char *, zprop_type_t, uint64_t, + const char *, zprop_attr_t, int, const char *, const char *, + boolean_t, boolean_t, const zprop_index_t *); +void zprop_register_string(int, const char *, const char *, + zprop_attr_t attr, int, const char *, const char *); +void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int, + const char *, const char *); +void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int, + const char *, const char *, const zprop_index_t *); +void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t, + int, const char *); + +/* + * Common routines for zfs and zpool property management + */ +int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t); +int zprop_name_to_prop(const char *, zfs_type_t); +int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t); +int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t); +uint64_t zprop_random_value(int, uint64_t, zfs_type_t); +const char *zprop_values(int, zfs_type_t); +size_t zprop_width(int, boolean_t *, zfs_type_t); +boolean_t zprop_valid_for_type(int, zfs_type_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_PROP_H */ diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c new file mode 100644 index 000000000000..988d05de6e20 --- /dev/null +++ b/common/zfs/zpool_prop.c @@ -0,0 +1,202 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/fs/zfs.h> + +#include "zfs_prop.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS]; + +zprop_desc_t * +zpool_prop_get_table(void) +{ + return (zpool_prop_table); +} + +void +zpool_prop_init(void) +{ + static zprop_index_t boolean_table[] = { + { "off", 0}, + { "on", 1}, + { NULL } + }; + + static zprop_index_t failuremode_table[] = { + { "wait", ZIO_FAILURE_MODE_WAIT }, + { "continue", ZIO_FAILURE_MODE_CONTINUE }, + { "panic", ZIO_FAILURE_MODE_PANIC }, + { NULL } + }; + + /* string properties */ + zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, "<path>", "ALTROOT"); + zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, "<filesystem>", "BOOTFS"); + zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, + PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE"); + + /* readonly number properties */ + zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "SIZE"); + zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "FREE"); + zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0, + PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC"); + zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "CAP"); + zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<guid>", "GUID"); + zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<state>", "HEALTH"); + zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0, + PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>", + "DEDUP"); + + /* default number properties */ + zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, + PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION"); + zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO"); + + /* default index (boolean) properties */ + zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION", + boolean_table); + zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table); + zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS", + boolean_table); + zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table); + zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table); + + /* default index properties */ + zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", + ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL, + "wait | continue | panic", "FAILMODE", failuremode_table); + + /* hidden properties */ + zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, + PROP_READONLY, ZFS_TYPE_POOL, "NAME"); +} + +/* + * Given a property name and its type, returns the corresponding property ID. + */ +zpool_prop_t +zpool_name_to_prop(const char *propname) +{ + return (zprop_name_to_prop(propname, ZFS_TYPE_POOL)); +} + +/* + * Given a pool property ID, returns the corresponding name. + * Assuming the pool propety ID is valid. + */ +const char * +zpool_prop_to_name(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_name); +} + +zprop_type_t +zpool_prop_get_type(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_proptype); +} + +boolean_t +zpool_prop_readonly(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_attr == PROP_READONLY); +} + +const char * +zpool_prop_default_string(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_strdefault); +} + +uint64_t +zpool_prop_default_numeric(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_numdefault); +} + +int +zpool_prop_string_to_index(zpool_prop_t prop, const char *string, + uint64_t *index) +{ + return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL)); +} + +int +zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index, + const char **string) +{ + return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL)); +} + +uint64_t +zpool_prop_random_value(zpool_prop_t prop, uint64_t seed) +{ + return (zprop_random_value(prop, seed, ZFS_TYPE_POOL)); +} + +#ifndef _KERNEL + +const char * +zpool_prop_values(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_values); +} + +const char * +zpool_prop_column_name(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_colname); +} + +boolean_t +zpool_prop_align_right(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_rightalign); +} +#endif diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c new file mode 100644 index 000000000000..0bbf20d4f02c --- /dev/null +++ b/common/zfs/zprop_common.c @@ -0,0 +1,426 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Common routines used by zfs and zpool property management. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_znode.h> +#include <sys/fs/zfs.h> + +#include "zfs_prop.h" +#include "zfs_deleg.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#include <util/qsort.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t * +zprop_get_proptable(zfs_type_t type) +{ + if (type == ZFS_TYPE_POOL) + return (zpool_prop_get_table()); + else + return (zfs_prop_get_table()); +} + +static int +zprop_get_numprops(zfs_type_t type) +{ + if (type == ZFS_TYPE_POOL) + return (ZPOOL_NUM_PROPS); + else + return (ZFS_NUM_PROPS); +} + +void +zprop_register_impl(int prop, const char *name, zprop_type_t type, + uint64_t numdefault, const char *strdefault, zprop_attr_t attr, + int objset_types, const char *values, const char *colname, + boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl) +{ + zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types); + zprop_desc_t *pd; + + pd = &prop_tbl[prop]; + + ASSERT(pd->pd_name == NULL || pd->pd_name == name); + ASSERT(name != NULL); + ASSERT(colname != NULL); + + pd->pd_name = name; + pd->pd_propnum = prop; + pd->pd_proptype = type; + pd->pd_numdefault = numdefault; + pd->pd_strdefault = strdefault; + pd->pd_attr = attr; + pd->pd_types = objset_types; + pd->pd_values = values; + pd->pd_colname = colname; + pd->pd_rightalign = rightalign; + pd->pd_visible = visible; + pd->pd_table = idx_tbl; + pd->pd_table_size = 0; + while (idx_tbl && (idx_tbl++)->pi_name != NULL) + pd->pd_table_size++; +} + +void +zprop_register_string(int prop, const char *name, const char *def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname) +{ + zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr, + objset_types, values, colname, B_FALSE, B_TRUE, NULL); + +} + +void +zprop_register_number(int prop, const char *name, uint64_t def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname) +{ + zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr, + objset_types, values, colname, B_TRUE, B_TRUE, NULL); +} + +void +zprop_register_index(int prop, const char *name, uint64_t def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname, const zprop_index_t *idx_tbl) +{ + zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr, + objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl); +} + +void +zprop_register_hidden(int prop, const char *name, zprop_type_t type, + zprop_attr_t attr, int objset_types, const char *colname) +{ + zprop_register_impl(prop, name, type, 0, NULL, attr, + objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); +} + + +/* + * A comparison function we can use to order indexes into property tables. + */ +static int +zprop_compare(const void *arg1, const void *arg2) +{ + const zprop_desc_t *p1 = *((zprop_desc_t **)arg1); + const zprop_desc_t *p2 = *((zprop_desc_t **)arg2); + boolean_t p1ro, p2ro; + + p1ro = (p1->pd_attr == PROP_READONLY); + p2ro = (p2->pd_attr == PROP_READONLY); + + if (p1ro == p2ro) + return (strcmp(p1->pd_name, p2->pd_name)); + + return (p1ro ? -1 : 1); +} + +/* + * Iterate over all properties in the given property table, calling back + * into the specified function for each property. We will continue to + * iterate until we either reach the end or the callback function returns + * something other than ZPROP_CONT. + */ +int +zprop_iter_common(zprop_func func, void *cb, boolean_t show_all, + boolean_t ordered, zfs_type_t type) +{ + int i, num_props, size, prop; + zprop_desc_t *prop_tbl; + zprop_desc_t **order; + + prop_tbl = zprop_get_proptable(type); + num_props = zprop_get_numprops(type); + size = num_props * sizeof (zprop_desc_t *); + +#if defined(_KERNEL) + order = kmem_alloc(size, KM_SLEEP); +#else + if ((order = malloc(size)) == NULL) + return (ZPROP_CONT); +#endif + + for (int j = 0; j < num_props; j++) + order[j] = &prop_tbl[j]; + + if (ordered) { + qsort((void *)order, num_props, sizeof (zprop_desc_t *), + zprop_compare); + } + + prop = ZPROP_CONT; + for (i = 0; i < num_props; i++) { + if ((order[i]->pd_visible || show_all) && + (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) { + prop = order[i]->pd_propnum; + break; + } + } + +#if defined(_KERNEL) + kmem_free(order, size); +#else + free(order); +#endif + return (prop); +} + +static boolean_t +propname_match(const char *p, size_t len, zprop_desc_t *prop_entry) +{ + const char *propname = prop_entry->pd_name; +#ifndef _KERNEL + const char *colname = prop_entry->pd_colname; + int c; +#endif + + if (len == strlen(propname) && + strncmp(p, propname, len) == 0) + return (B_TRUE); + +#ifndef _KERNEL + if (colname == NULL || len != strlen(colname)) + return (B_FALSE); + + for (c = 0; c < len; c++) + if (p[c] != tolower(colname[c])) + break; + + return (colname[c] == '\0'); +#else + return (B_FALSE); +#endif +} + +typedef struct name_to_prop_cb { + const char *propname; + zprop_desc_t *prop_tbl; +} name_to_prop_cb_t; + +static int +zprop_name_to_prop_cb(int prop, void *cb_data) +{ + name_to_prop_cb_t *data = cb_data; + + if (propname_match(data->propname, strlen(data->propname), + &data->prop_tbl[prop])) + return (prop); + + return (ZPROP_CONT); +} + +int +zprop_name_to_prop(const char *propname, zfs_type_t type) +{ + int prop; + name_to_prop_cb_t cb_data; + + cb_data.propname = propname; + cb_data.prop_tbl = zprop_get_proptable(type); + + prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data, + B_TRUE, B_FALSE, type); + + return (prop == ZPROP_CONT ? ZPROP_INVAL : prop); +} + +int +zprop_string_to_index(int prop, const char *string, uint64_t *index, + zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + int i; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (-1); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) + return (-1); + + for (i = 0; idx_tbl[i].pi_name != NULL; i++) { + if (strcmp(string, idx_tbl[i].pi_name) == 0) { + *index = idx_tbl[i].pi_value; + return (0); + } + } + + return (-1); +} + +int +zprop_index_to_string(int prop, uint64_t index, const char **string, + zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + int i; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (-1); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) + return (-1); + + for (i = 0; idx_tbl[i].pi_name != NULL; i++) { + if (idx_tbl[i].pi_value == index) { + *string = idx_tbl[i].pi_name; + return (0); + } + } + + return (-1); +} + +/* + * Return a random valid property value. Used by ztest. + */ +uint64_t +zprop_random_value(int prop, uint64_t seed, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + + ASSERT((uint_t)prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + idx_tbl = prop_tbl[prop].pd_table; + + if (idx_tbl == NULL) + return (seed); + + return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value); +} + +const char * +zprop_values(int prop, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + + ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); + ASSERT(prop < zprop_get_numprops(type)); + + prop_tbl = zprop_get_proptable(type); + + return (prop_tbl[prop].pd_values); +} + +/* + * Returns TRUE if the property applies to any of the given dataset types. + */ +boolean_t +zprop_valid_for_type(int prop, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (B_FALSE); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + return ((prop_tbl[prop].pd_types & type) != 0); +} + +#ifndef _KERNEL + +/* + * Determines the minimum width for the column, and indicates whether it's fixed + * or not. Only string columns are non-fixed. + */ +size_t +zprop_width(int prop, boolean_t *fixed, zfs_type_t type) +{ + zprop_desc_t *prop_tbl, *pd; + const zprop_index_t *idx; + size_t ret; + int i; + + ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); + ASSERT(prop < zprop_get_numprops(type)); + + prop_tbl = zprop_get_proptable(type); + pd = &prop_tbl[prop]; + + *fixed = B_TRUE; + + /* + * Start with the width of the column name. + */ + ret = strlen(pd->pd_colname); + + /* + * For fixed-width values, make sure the width is large enough to hold + * any possible value. + */ + switch (pd->pd_proptype) { + case PROP_TYPE_NUMBER: + /* + * The maximum length of a human-readable number is 5 characters + * ("20.4M", for example). + */ + if (ret < 5) + ret = 5; + /* + * 'creation' is handled specially because it's a number + * internally, but displayed as a date string. + */ + if (prop == ZFS_PROP_CREATION) + *fixed = B_FALSE; + break; + case PROP_TYPE_INDEX: + idx = prop_tbl[prop].pd_table; + for (i = 0; idx[i].pi_name != NULL; i++) { + if (strlen(idx[i].pi_name) > ret) + ret = strlen(idx[i].pi_name); + } + break; + + case PROP_TYPE_STRING: + *fixed = B_FALSE; + break; + } + + return (ret); +} + +#endif diff --git a/uts/common/Makefile.files b/uts/common/Makefile.files new file mode 100644 index 000000000000..ec08410b4ff3 --- /dev/null +++ b/uts/common/Makefile.files @@ -0,0 +1,2007 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. +# + +# +# This Makefile defines all file modules for the directory uts/common +# and its children. These are the source files which may be considered +# common to all SunOS systems. + +i386_CORE_OBJS += \ + atomic.o \ + avintr.o \ + pic.o + +sparc_CORE_OBJS += + +COMMON_CORE_OBJS += \ + beep.o \ + bitset.o \ + bp_map.o \ + brand.o \ + cpucaps.o \ + cmt.o \ + cmt_policy.o \ + cpu.o \ + cpu_event.o \ + cpu_intr.o \ + cpu_pm.o \ + cpupart.o \ + cap_util.o \ + disp.o \ + group.o \ + kstat_fr.o \ + iscsiboot_prop.o \ + lgrp.o \ + lgrp_topo.o \ + mmapobj.o \ + mutex.o \ + page_lock.o \ + page_retire.o \ + panic.o \ + param.o \ + pg.o \ + pghw.o \ + putnext.o \ + rctl_proc.o \ + rwlock.o \ + seg_kmem.o \ + softint.o \ + string.o \ + strtol.o \ + strtoul.o \ + strtoll.o \ + strtoull.o \ + thread_intr.o \ + vm_page.o \ + vm_pagelist.o \ + zlib_obj.o \ + clock_tick.o + +CORE_OBJS += $(COMMON_CORE_OBJS) $($(MACH)_CORE_OBJS) + +ZLIB_OBJS = zutil.o zmod.o zmod_subr.o \ + adler32.o crc32.o deflate.o inffast.o \ + inflate.o inftrees.o trees.o + +GENUNIX_OBJS += \ + access.o \ + acl.o \ + acl_common.o \ + adjtime.o \ + alarm.o \ + aio_subr.o \ + auditsys.o \ + audit_core.o \ + audit_zone.o \ + audit_memory.o \ + autoconf.o \ + avl.o \ + bdev_dsort.o \ + bio.o \ + bitmap.o \ + blabel.o \ + brandsys.o \ + bz2blocksort.o \ + bz2compress.o \ + bz2decompress.o \ + bz2randtable.o \ + bz2bzlib.o \ + bz2crctable.o \ + bz2huffman.o \ + callb.o \ + callout.o \ + chdir.o \ + chmod.o \ + chown.o \ + cladm.o \ + class.o \ + clock.o \ + clock_highres.o \ + clock_realtime.o\ + close.o \ + compress.o \ + condvar.o \ + conf.o \ + console.o \ + contract.o \ + copyops.o \ + core.o \ + corectl.o \ + cred.o \ + cs_stubs.o \ + dacf.o \ + dacf_clnt.o \ + damap.o \ + cyclic.o \ + ddi.o \ + ddifm.o \ + ddi_hp_impl.o \ + ddi_hp_ndi.o \ + ddi_intr.o \ + ddi_intr_impl.o \ + ddi_intr_irm.o \ + ddi_nodeid.o \ + ddi_timer.o \ + devcfg.o \ + devcache.o \ + device.o \ + devid.o \ + devid_cache.o \ + devid_scsi.o \ + devid_smp.o \ + devpolicy.o \ + disp_lock.o \ + dnlc.o \ + driver.o \ + dumpsubr.o \ + driver_lyr.o \ + dtrace_subr.o \ + errorq.o \ + etheraddr.o \ + evchannels.o \ + exacct.o \ + exacct_core.o \ + exec.o \ + exit.o \ + fbio.o \ + fcntl.o \ + fdbuffer.o \ + fdsync.o \ + fem.o \ + ffs.o \ + fio.o \ + flock.o \ + fm.o \ + fork.o \ + vpm.o \ + fs_reparse.o \ + fs_subr.o \ + fsflush.o \ + ftrace.o \ + getcwd.o \ + getdents.o \ + getloadavg.o \ + getpagesizes.o \ + getpid.o \ + gfs.o \ + rusagesys.o \ + gid.o \ + groups.o \ + grow.o \ + hat.o \ + hat_refmod.o \ + id32.o \ + id_space.o \ + inet_ntop.o \ + instance.o \ + ioctl.o \ + ip_cksum.o \ + issetugid.o \ + ippconf.o \ + kcpc.o \ + kdi.o \ + kiconv.o \ + klpd.o \ + kmem.o \ + ksyms_snapshot.o \ + l_strplumb.o \ + labelsys.o \ + link.o \ + list.o \ + lockstat_subr.o \ + log_sysevent.o \ + logsubr.o \ + lookup.o \ + lseek.o \ + ltos.o \ + lwp.o \ + lwp_create.o \ + lwp_info.o \ + lwp_self.o \ + lwp_sobj.o \ + lwp_timer.o \ + lwpsys.o \ + main.o \ + mmapobjsys.o \ + memcntl.o \ + memstr.o \ + lgrpsys.o \ + mkdir.o \ + mknod.o \ + mount.o \ + move.o \ + msacct.o \ + multidata.o \ + nbmlock.o \ + ndifm.o \ + nice.o \ + netstack.o \ + ntptime.o \ + nvpair.o \ + nvpair_alloc_system.o \ + nvpair_alloc_fixed.o \ + octet.o \ + open.o \ + p_online.o \ + pathconf.o \ + pathname.o \ + pause.o \ + serializer.o \ + pci_intr_lib.o \ + pci_cap.o \ + pcifm.o \ + pgrp.o \ + pgrpsys.o \ + pid.o \ + pkp_hash.o \ + policy.o \ + poll.o \ + pool.o \ + pool_pset.o \ + port_subr.o \ + ppriv.o \ + printf.o \ + priocntl.o \ + priv.o \ + priv_const.o \ + proc.o \ + procset.o \ + processor_bind.o \ + processor_info.o \ + profil.o \ + project.o \ + qsort.o \ + rctl.o \ + rctlsys.o \ + readlink.o \ + refstr.o \ + rename.o \ + resolvepath.o \ + retire_store.o \ + process.o \ + rlimit.o \ + rmap.o \ + rw.o \ + rwstlock.o \ + sad_conf.o \ + sid.o \ + sidsys.o \ + sched.o \ + schedctl.o \ + sctp_crc32.o \ + seg_dev.o \ + seg_kp.o \ + seg_kpm.o \ + seg_map.o \ + seg_vn.o \ + seg_spt.o \ + semaphore.o \ + sendfile.o \ + session.o \ + share.o \ + shuttle.o \ + sig.o \ + sigaction.o \ + sigaltstack.o \ + signotify.o \ + sigpending.o \ + sigprocmask.o \ + sigqueue.o \ + sigsendset.o \ + sigsuspend.o \ + sigtimedwait.o \ + sleepq.o \ + sock_conf.o \ + space.o \ + sscanf.o \ + stat.o \ + statfs.o \ + statvfs.o \ + stol.o \ + str_conf.o \ + strcalls.o \ + stream.o \ + streamio.o \ + strext.o \ + strsubr.o \ + strsun.o \ + subr.o \ + sunddi.o \ + sunmdi.o \ + sunndi.o \ + sunpci.o \ + sunpm.o \ + sundlpi.o \ + suntpi.o \ + swap_subr.o \ + swap_vnops.o \ + symlink.o \ + sync.o \ + sysclass.o \ + sysconfig.o \ + sysent.o \ + sysfs.o \ + systeminfo.o \ + task.o \ + taskq.o \ + tasksys.o \ + time.o \ + timer.o \ + times.o \ + timers.o \ + thread.o \ + tlabel.o \ + tnf_res.o \ + turnstile.o \ + tty_common.o \ + u8_textprep.o \ + uadmin.o \ + uconv.o \ + ucredsys.o \ + uid.o \ + umask.o \ + umount.o \ + uname.o \ + unix_bb.o \ + unlink.o \ + urw.o \ + utime.o \ + utssys.o \ + uucopy.o \ + vfs.o \ + vfs_conf.o \ + vmem.o \ + vm_anon.o \ + vm_as.o \ + vm_meter.o \ + vm_pageout.o \ + vm_pvn.o \ + vm_rm.o \ + vm_seg.o \ + vm_subr.o \ + vm_swap.o \ + vm_usage.o \ + vnode.o \ + vuid_queue.o \ + vuid_store.o \ + waitq.o \ + watchpoint.o \ + yield.o \ + scsi_confdata.o \ + xattr.o \ + xattr_common.o \ + xdr_mblk.o \ + xdr_mem.o \ + xdr.o \ + xdr_array.o \ + xdr_refer.o \ + xhat.o \ + zone.o + +# +# Stubs for the stand-alone linker/loader +# +sparc_GENSTUBS_OBJS = \ + kobj_stubs.o + +i386_GENSTUBS_OBJS = + +COMMON_GENSTUBS_OBJS = + +GENSTUBS_OBJS += $(COMMON_GENSTUBS_OBJS) $($(MACH)_GENSTUBS_OBJS) + +# +# DTrace and DTrace Providers +# +DTRACE_OBJS += dtrace.o dtrace_isa.o dtrace_asm.o + +SDT_OBJS += sdt_subr.o + +PROFILE_OBJS += profile.o + +SYSTRACE_OBJS += systrace.o + +LOCKSTAT_OBJS += lockstat.o + +FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o + +DCPC_OBJS += dcpc.o + +# +# Driver (pseudo-driver) Modules +# +IPP_OBJS += ippctl.o + +AUDIO_OBJS += audio_client.o audio_ddi.o audio_engine.o \ + audio_fltdata.o audio_format.o audio_ctrl.o \ + audio_grc3.o audio_output.o audio_input.o \ + audio_oss.o audio_sun.o + +AUDIOEMU10K_OBJS += audioemu10k.o + +AUDIOENS_OBJS += audioens.o + +AUDIOVIA823X_OBJS += audiovia823x.o + +AUDIOVIA97_OBJS += audiovia97.o + +AUDIO1575_OBJS += audio1575.o + +AUDIO810_OBJS += audio810.o + +AUDIOCMI_OBJS += audiocmi.o + +AUDIOHD_OBJS += audiohd.o + +AUDIOIXP_OBJS += audioixp.o + +AUDIOLS_OBJS += audiols.o + +AUDIOP16X_OBJS += audiop16x.o + +AUDIOPCI_OBJS += audiopci.o + +AUDIOSOLO_OBJS += audiosolo.o + +AUDIOTS_OBJS += audiots.o + +AC97_OBJS += ac97.o ac97_ad.o ac97_alc.o ac97_cmi.o + +BLKDEV_OBJS += blkdev.o + +CARDBUS_OBJS += cardbus.o cardbus_hp.o cardbus_cfg.o + +CONSKBD_OBJS += conskbd.o + +CONSMS_OBJS += consms.o + +OLDPTY_OBJS += tty_ptyconf.o + +PTC_OBJS += tty_pty.o + +PTSL_OBJS += tty_pts.o + +PTM_OBJS += ptm.o + +MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \ + mii_marvell.o mii_realtek.o mii_other.o + +PTS_OBJS += pts.o + +PTY_OBJS += ptms_conf.o + +SAD_OBJS += sad.o + +MD4_OBJS += md4.o md4_mod.o + +MD5_OBJS += md5.o md5_mod.o + +SHA1_OBJS += sha1.o sha1_mod.o fips_sha1_util.o + +SHA2_OBJS += sha2.o sha2_mod.o fips_sha2_util.o + +IPGPC_OBJS += classifierddi.o classifier.o filters.o trie.o table.o \ + ba_table.o + +DSCPMK_OBJS += dscpmk.o dscpmkddi.o + +DLCOSMK_OBJS += dlcosmk.o dlcosmkddi.o + +FLOWACCT_OBJS += flowacctddi.o flowacct.o + +TOKENMT_OBJS += tokenmt.o tokenmtddi.o + +TSWTCL_OBJS += tswtcl.o tswtclddi.o + +ARP_OBJS += arpddi.o + +ICMP_OBJS += icmpddi.o + +ICMP6_OBJS += icmp6ddi.o + +RTS_OBJS += rtsddi.o + +IP_ICMP_OBJS = icmp.o icmp_opt_data.o +IP_RTS_OBJS = rts.o rts_opt_data.o +IP_TCP_OBJS = tcp.o tcp_fusion.o tcp_opt_data.o tcp_sack.o tcp_stats.o \ + tcp_misc.o tcp_timers.o tcp_time_wait.o tcp_tpi.o tcp_output.o \ + tcp_input.o tcp_socket.o tcp_bind.o tcp_cluster.o tcp_tunables.o +IP_UDP_OBJS = udp.o udp_opt_data.o udp_tunables.o udp_stats.o +IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \ + sctp_init.o sctp_input.o sctp_cookie.o \ + sctp_conn.o sctp_error.o sctp_snmp.o \ + sctp_tunables.o sctp_shutdown.o sctp_common.o \ + sctp_timer.o sctp_heartbeat.o sctp_hash.o \ + sctp_bind.o sctp_notify.o sctp_asconf.o \ + sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o \ + sctp_misc.o +IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o + +IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \ + ip6_rts.o ip_if.o ip_ire.o ip_listutils.o ip_mroute.o \ + ip_multi.o ip2mac.o ip_ndp.o ip_rts.o ip_srcid.o \ + ipddi.o ipdrop.o mi.o nd.o tunables.o optcom.o snmpcom.o \ + ipsec_loader.o spd.o ipclassifier.o inet_common.o ip_squeue.o \ + squeue.o ip_sadb.o ip_ftable.o proto_set.o radix.o ip_dummy.o \ + ip_helper_stream.o ip_tunables.o \ + ip_output.o ip_input.o ip6_input.o ip6_output.o ip_arp.o \ + conn_opt.o ip_attr.o ip_dce.o \ + $(IP_ICMP_OBJS) \ + $(IP_RTS_OBJS) \ + $(IP_TCP_OBJS) \ + $(IP_UDP_OBJS) \ + $(IP_SCTP_OBJS) \ + $(IP_ILB_OBJS) + +IP6_OBJS += ip6ddi.o + +HOOK_OBJS += hook.o + +NETI_OBJS += neti_impl.o neti_mod.o neti_stack.o + +KEYSOCK_OBJS += keysockddi.o keysock.o keysock_opt_data.o + +IPNET_OBJS += ipnet.o ipnet_bpf.o + +SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o + +IPSECESP_OBJS += ipsecespddi.o ipsecesp.o + +IPSECAH_OBJS += ipsecahddi.o ipsecah.o sadb.o + +SPPP_OBJS += sppp.o sppp_dlpi.o sppp_mod.o s_common.o + +SPPPTUN_OBJS += sppptun.o sppptun_mod.o + +SPPPASYN_OBJS += spppasyn.o spppasyn_mod.o + +SPPPCOMP_OBJS += spppcomp.o spppcomp_mod.o deflate.o bsd-comp.o vjcompress.o \ + zlib.o + +TCP_OBJS += tcpddi.o + +TCP6_OBJS += tcp6ddi.o + +NCA_OBJS += ncaddi.o + +SDP_SOCK_MOD_OBJS += sockmod_sdp.o socksdp.o socksdpsubr.o + +SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o + +PFP_SOCK_MOD_OBJS += sockmod_pfp.o + +RDS_SOCK_MOD_OBJS += sockmod_rds.o + +RDS_OBJS += rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o + +RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \ + rdsib_debug.o rdsib_sc.o + +RDSV3_OBJS += af_rds.o rdsv3_ddi.o bind.o loop.o threads.o connection.o \ + transport.o cong.o sysctl.o message.o rds_recv.o send.o \ + stats.o info.o page.o rdma_transport.o ib_ring.o ib_rdma.o \ + ib_recv.o ib.o ib_send.o ib_sysctl.o ib_stats.o ib_cm.o \ + rdsv3_sc.o rdsv3_debug.o rdsv3_impl.o rdma.o rdsv3_af_thr.o + +ISER_OBJS += iser.o iser_cm.o iser_cq.o iser_ib.o iser_idm.o \ + iser_resource.o iser_xfer.o + +UDP_OBJS += udpddi.o + +UDP6_OBJS += udp6ddi.o + +SY_OBJS += gentty.o + +TCO_OBJS += ticots.o + +TCOO_OBJS += ticotsord.o + +TCL_OBJS += ticlts.o + +TL_OBJS += tl.o + +DUMP_OBJS += dump.o + +BPF_OBJS += bpf.o bpf_filter.o bpf_mod.o bpf_dlt.o bpf_mac.o + +CLONE_OBJS += clone.o + +CN_OBJS += cons.o + +DLD_OBJS += dld_drv.o dld_proto.o dld_str.o dld_flow.o + +DLS_OBJS += dls.o dls_link.o dls_mod.o dls_stat.o dls_mgmt.o + +GLD_OBJS += gld.o gldutil.o + +MAC_OBJS += mac.o mac_bcast.o mac_client.o mac_datapath_setup.o mac_flow.o \ + mac_hio.o mac_mod.o mac_ndd.o mac_provider.o mac_sched.o \ + mac_protect.o mac_soft_ring.o mac_stat.o mac_util.o + +MAC_6TO4_OBJS += mac_6to4.o + +MAC_ETHER_OBJS += mac_ether.o + +MAC_IPV4_OBJS += mac_ipv4.o + +MAC_IPV6_OBJS += mac_ipv6.o + +MAC_WIFI_OBJS += mac_wifi.o + +MAC_IB_OBJS += mac_ib.o + +IPTUN_OBJS += iptun_dev.o iptun_ctl.o iptun.o + +AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \ + aggr_send.o aggr_recv.o aggr_lacp.o + +SOFTMAC_OBJS += softmac_main.o softmac_ctl.o softmac_capab.o \ + softmac_dev.o softmac_stat.o softmac_pkt.o softmac_fp.o + +NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \ + net80211_output.o net80211_node.o net80211_crypto.o \ + net80211_crypto_none.o net80211_crypto_wep.o net80211_ioctl.o \ + net80211_crypto_tkip.o net80211_crypto_ccmp.o \ + net80211_ht.o + +VNIC_OBJS += vnic_ctl.o vnic_dev.o + +SIMNET_OBJS += simnet.o + +IB_OBJS += ibnex.o ibnex_ioctl.o ibnex_hca.o + +IBCM_OBJS += ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o \ + ibcm_arp.o ibcm_arp_link.o + +IBDM_OBJS += ibdm.o + +IBDMA_OBJS += ibdma.o + +IBMF_OBJS += ibmf.o ibmf_impl.o ibmf_dr.o ibmf_wqe.o ibmf_ud_dest.o ibmf_mod.o \ + ibmf_send.o ibmf_recv.o ibmf_handlers.o ibmf_trans.o \ + ibmf_timers.o ibmf_msg.o ibmf_utils.o ibmf_rmpp.o \ + ibmf_saa.o ibmf_saa_impl.o ibmf_saa_utils.o ibmf_saa_events.o + +IBTL_OBJS += ibtl_impl.o ibtl_util.o ibtl_mem.o ibtl_handlers.o ibtl_qp.o \ + ibtl_cq.o ibtl_wr.o ibtl_hca.o ibtl_chan.o ibtl_cm.o \ + ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o ibtl_part.o + +TAVOR_OBJS += tavor.o tavor_agents.o tavor_cfg.o tavor_ci.o tavor_cmd.o \ + tavor_cq.o tavor_event.o tavor_ioctl.o tavor_misc.o \ + tavor_mr.o tavor_qp.o tavor_qpmod.o tavor_rsrc.o \ + tavor_srq.o tavor_stats.o tavor_umap.o tavor_wr.o + +HERMON_OBJS += hermon.o hermon_agents.o hermon_cfg.o hermon_ci.o hermon_cmd.o \ + hermon_cq.o hermon_event.o hermon_ioctl.o hermon_misc.o \ + hermon_mr.o hermon_qp.o hermon_qpmod.o hermon_rsrc.o \ + hermon_srq.o hermon_stats.o hermon_umap.o hermon_wr.o \ + hermon_fcoib.o hermon_fm.o + +DAPLT_OBJS += daplt.o + +SOL_OFS_OBJS += sol_cma.o sol_ib_cma.o sol_uobj.o \ + sol_ofs_debug_util.o sol_ofs_gen_util.o \ + sol_kverbs.o + +SOL_UCMA_OBJS += sol_ucma.o + +SOL_UVERBS_OBJS += sol_uverbs.o sol_uverbs_comp.o sol_uverbs_event.o \ + sol_uverbs_hca.o sol_uverbs_qp.o + +SOL_UMAD_OBJS += sol_umad.o + +KSTAT_OBJS += kstat.o + +KSYMS_OBJS += ksyms.o + +INSTANCE_OBJS += inst_sync.o + +IWSCN_OBJS += iwscons.o + +LOFI_OBJS += lofi.o LzmaDec.o + +FSSNAP_OBJS += fssnap.o + +FSSNAPIF_OBJS += fssnap_if.o + +MM_OBJS += mem.o + +PHYSMEM_OBJS += physmem.o + +OPTIONS_OBJS += options.o + +WINLOCK_OBJS += winlockio.o + +PM_OBJS += pm.o +SRN_OBJS += srn.o + +PSEUDO_OBJS += pseudonex.o + +RAMDISK_OBJS += ramdisk.o + +LLC1_OBJS += llc1.o + +USBKBM_OBJS += usbkbm.o + +USBWCM_OBJS += usbwcm.o + +BOFI_OBJS += bofi.o + +HID_OBJS += hid.o + +HWA_RC_OBJS += hwarc.o + +USBSKEL_OBJS += usbskel.o + +USBVC_OBJS += usbvc.o usbvc_v4l2.o + +HIDPARSER_OBJS += hidparser.o + +USB_AC_OBJS += usb_ac.o + +USB_AS_OBJS += usb_as.o + +USB_AH_OBJS += usb_ah.o + +USBMS_OBJS += usbms.o + +USBPRN_OBJS += usbprn.o + +UGEN_OBJS += ugen.o + +USBSER_OBJS += usbser.o usbser_rseq.o + +USBSACM_OBJS += usbsacm.o + +USBSER_KEYSPAN_OBJS += usbser_keyspan.o keyspan_dsd.o keyspan_pipe.o + +USBS49_FW_OBJS += keyspan_49fw.o + +USBSPRL_OBJS += usbser_pl2303.o pl2303_dsd.o + +WUSB_CA_OBJS += wusb_ca.o + +USBFTDI_OBJS += usbser_uftdi.o uftdi_dsd.o + +USBECM_OBJS += usbecm.o + +WC_OBJS += wscons.o vcons.o + +VCONS_CONF_OBJS += vcons_conf.o + +SCSI_OBJS += scsi_capabilities.o scsi_confsubr.o scsi_control.o \ + scsi_data.o scsi_fm.o scsi_hba.o scsi_reset_notify.o \ + scsi_resource.o scsi_subr.o scsi_transport.o scsi_watch.o \ + smp_transport.o + +SCSI_VHCI_OBJS += scsi_vhci.o mpapi_impl.o scsi_vhci_tpgs.o + +SCSI_VHCI_F_SYM_OBJS += sym.o + +SCSI_VHCI_F_TPGS_OBJS += tpgs.o + +SCSI_VHCI_F_ASYM_SUN_OBJS += asym_sun.o + +SCSI_VHCI_F_SYM_HDS_OBJS += sym_hds.o + +SCSI_VHCI_F_TAPE_OBJS += tape.o + +SCSI_VHCI_F_TPGS_TAPE_OBJS += tpgs_tape.o + +SGEN_OBJS += sgen.o + +SMP_OBJS += smp.o + +SATA_OBJS += sata.o + +USBA_OBJS += hcdi.o usba.o usbai.o hubdi.o parser.o genconsole.o \ + usbai_pipe_mgmt.o usbai_req.o usbai_util.o usbai_register.o \ + usba_devdb.o usba10_calls.o usba_ugen.o whcdi.o wa.o +USBA_WITHOUT_WUSB_OBJS += hcdi.o usba.o usbai.o hubdi.o parser.o genconsole.o \ + usbai_pipe_mgmt.o usbai_req.o usbai_util.o usbai_register.o \ + usba_devdb.o usba10_calls.o usba_ugen.o + +USBA10_OBJS += usba10.o + +RSM_OBJS += rsm.o rsmka_pathmanager.o rsmka_util.o + +RSMOPS_OBJS += rsmops.o + +S1394_OBJS += t1394.o t1394_errmsg.o s1394.o s1394_addr.o s1394_asynch.o \ + s1394_bus_reset.o s1394_cmp.o s1394_csr.o s1394_dev_disc.o \ + s1394_fa.o s1394_fcp.o \ + s1394_hotplug.o s1394_isoch.o s1394_misc.o h1394.o nx1394.o + +HCI1394_OBJS += hci1394.o hci1394_async.o hci1394_attach.o hci1394_buf.o \ + hci1394_csr.o hci1394_detach.o hci1394_extern.o \ + hci1394_ioctl.o hci1394_isoch.o hci1394_isr.o \ + hci1394_ixl_comp.o hci1394_ixl_isr.o hci1394_ixl_misc.o \ + hci1394_ixl_update.o hci1394_misc.o hci1394_ohci.o \ + hci1394_q.o hci1394_s1394if.o hci1394_tlabel.o \ + hci1394_tlist.o hci1394_vendor.o + +AV1394_OBJS += av1394.o av1394_as.o av1394_async.o av1394_cfgrom.o \ + av1394_cmp.o av1394_fcp.o av1394_isoch.o av1394_isoch_chan.o \ + av1394_isoch_recv.o av1394_isoch_xmit.o av1394_list.o \ + av1394_queue.o + +DCAM1394_OBJS += dcam.o dcam_frame.o dcam_param.o dcam_reg.o \ + dcam_ring_buff.o + +SCSA1394_OBJS += hba.o sbp2_driver.o sbp2_bus.o + +SBP2_OBJS += cfgrom.o sbp2.o + +PMODEM_OBJS += pmodem.o pmodem_cis.o cis.o cis_callout.o cis_handlers.o cis_params.o + +DSW_OBJS += dsw.o dsw_dev.o ii_tree.o + +NCALL_OBJS += ncall.o \ + ncall_stub.o + +RDC_OBJS += rdc.o \ + rdc_dev.o \ + rdc_io.o \ + rdc_clnt.o \ + rdc_prot_xdr.o \ + rdc_svc.o \ + rdc_bitmap.o \ + rdc_health.o \ + rdc_subr.o \ + rdc_diskq.o + +RDCSRV_OBJS += rdcsrv.o + +RDCSTUB_OBJS += rdc_stub.o + +SDBC_OBJS += sd_bcache.o \ + sd_bio.o \ + sd_conf.o \ + sd_ft.o \ + sd_hash.o \ + sd_io.o \ + sd_misc.o \ + sd_pcu.o \ + sd_tdaemon.o \ + sd_trace.o \ + sd_iob_impl0.o \ + sd_iob_impl1.o \ + sd_iob_impl2.o \ + sd_iob_impl3.o \ + sd_iob_impl4.o \ + sd_iob_impl5.o \ + sd_iob_impl6.o \ + sd_iob_impl7.o \ + safestore.o \ + safestore_ram.o + +NSCTL_OBJS += nsctl.o \ + nsc_cache.o \ + nsc_disk.o \ + nsc_dev.o \ + nsc_freeze.o \ + nsc_gen.o \ + nsc_mem.o \ + nsc_ncallio.o \ + nsc_power.o \ + nsc_resv.o \ + nsc_rmspin.o \ + nsc_solaris.o \ + nsc_trap.o \ + nsc_list.o +UNISTAT_OBJS += spuni.o \ + spcs_s_k.o + +NSKERN_OBJS += nsc_ddi.o \ + nsc_proc.o \ + nsc_raw.o \ + nsc_thread.o \ + nskernd.o + +SV_OBJS += sv.o + +PMCS_OBJS += pmcs_attach.o pmcs_ds.o pmcs_intr.o pmcs_nvram.o pmcs_sata.o \ + pmcs_scsa.o pmcs_smhba.o pmcs_subr.o pmcs_fwlog.o + +PMCS8001FW_C_OBJS += pmcs_fw_hdr.o +PMCS8001FW_OBJS += $(PMCS8001FW_C_OBJS) SPCBoot.o ila.o firmware.o + +# +# Build up defines and paths. + +ST_OBJS += st.o st_conf.o + +EMLXS_OBJS += emlxs_clock.o emlxs_dfc.o emlxs_dhchap.o emlxs_diag.o \ + emlxs_download.o emlxs_dump.o emlxs_els.o emlxs_event.o \ + emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \ + emlxs_mbox.o emlxs_mem.o emlxs_msg.o emlxs_node.o \ + emlxs_pkt.o emlxs_sli3.o emlxs_sli4.o emlxs_solaris.o \ + emlxs_thread.o + +EMLXS_FW_OBJS += emlxs_fw.o + +OCE_OBJS += oce_buf.o oce_fm.o oce_gld.o oce_hw.o oce_intr.o oce_main.o \ + oce_mbx.o oce_mq.o oce_queue.o oce_rx.o oce_stat.o oce_tx.o \ + oce_utils.o + +FCT_OBJS += discovery.o fct.o + +QLT_OBJS += 2400.o 2500.o 8100.o qlt.o qlt_dma.o + +SRPT_OBJS += srpt_mod.o srpt_ch.o srpt_cm.o srpt_ioc.o srpt_stp.o + +FCOE_OBJS += fcoe.o fcoe_eth.o fcoe_fc.o + +FCOET_OBJS += fcoet.o fcoet_eth.o fcoet_fc.o + +FCOEI_OBJS += fcoei.o fcoei_eth.o fcoei_lv.o + +ISCSIT_SHARED_OBJS += \ + iscsit_common.o + +ISCSIT_OBJS += $(ISCSIT_SHARED_OBJS) \ + iscsit.o iscsit_tgt.o iscsit_sess.o iscsit_login.o \ + iscsit_text.o iscsit_isns.o iscsit_radiusauth.o \ + iscsit_radiuspacket.o iscsit_auth.o iscsit_authclient.o + +PPPT_OBJS += alua_ic_if.o pppt.o pppt_msg.o pppt_tgt.o + +STMF_OBJS += lun_map.o stmf.o + +STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o + +SYSMSG_OBJS += sysmsg.o + +SES_OBJS += ses.o ses_sen.o ses_safte.o ses_ses.o + +TNF_OBJS += tnf_buf.o tnf_trace.o tnf_writer.o trace_init.o \ + trace_funcs.o tnf_probe.o tnf.o + +LOGINDMUX_OBJS += logindmux.o + +DEVINFO_OBJS += devinfo.o + +DEVPOLL_OBJS += devpoll.o + +DEVPOOL_OBJS += devpool.o + +I8042_OBJS += i8042.o + +KB8042_OBJS += \ + at_keyprocess.o \ + kb8042.o \ + kb8042_keytables.o + +MOUSE8042_OBJS += mouse8042.o + +FDC_OBJS += fdc.o + +ASY_OBJS += asy.o + +ECPP_OBJS += ecpp.o + +VUIDM3P_OBJS += vuidmice.o vuidm3p.o + +VUIDM4P_OBJS += vuidmice.o vuidm4p.o + +VUIDM5P_OBJS += vuidmice.o vuidm5p.o + +VUIDPS2_OBJS += vuidmice.o vuidps2.o + +HPCSVC_OBJS += hpcsvc.o + +PCIE_MISC_OBJS += pcie.o pcie_fault.o pcie_hp.o pciehpc.o pcishpc.o pcie_pwr.o pciev.o + +PCIHPNEXUS_OBJS += pcihp.o + +OPENEEPR_OBJS += openprom.o + +RANDOM_OBJS += random.o + +PSHOT_OBJS += pshot.o + +GEN_DRV_OBJS += gen_drv.o + +TCLIENT_OBJS += tclient.o + +TPHCI_OBJS += tphci.o + +TVHCI_OBJS += tvhci.o + +EMUL64_OBJS += emul64.o emul64_bsd.o + +FCP_OBJS += fcp.o + +FCIP_OBJS += fcip.o + +FCSM_OBJS += fcsm.o + +FCTL_OBJS += fctl.o + +FP_OBJS += fp.o + +QLC_OBJS += ql_api.o ql_debug.o ql_hba_fru.o ql_init.o ql_iocb.o ql_ioctl.o \ + ql_isr.o ql_mbx.o ql_nx.o ql_xioctl.o ql_fw_table.o + +QLC_FW_2200_OBJS += ql_fw_2200.o + +QLC_FW_2300_OBJS += ql_fw_2300.o + +QLC_FW_2400_OBJS += ql_fw_2400.o + +QLC_FW_2500_OBJS += ql_fw_2500.o + +QLC_FW_6322_OBJS += ql_fw_6322.o + +QLC_FW_8100_OBJS += ql_fw_8100.o + +QLGE_OBJS += qlge.o qlge_dbg.o qlge_flash.o qlge_fm.o qlge_gld.o qlge_mpi.o + +ZCONS_OBJS += zcons.o + +NV_SATA_OBJS += nv_sata.o + +SI3124_OBJS += si3124.o + +AHCI_OBJS += ahci.o + +PCIIDE_OBJS += pci-ide.o + +PCEPP_OBJS += pcepp.o + +CPC_OBJS += cpc.o + +CPUID_OBJS += cpuid_drv.o + +SYSEVENT_OBJS += sysevent.o + +BL_OBJS += bl.o + +DRM_OBJS += drm_sunmod.o drm_kstat.o drm_agpsupport.o \ + drm_auth.o drm_bufs.o drm_context.o drm_dma.o \ + drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \ + drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \ + drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o + +FM_OBJS += devfm.o devfm_machdep.o + +RTLS_OBJS += rtls.o + +# +# exec modules +# +AOUTEXEC_OBJS +=aout.o + +ELFEXEC_OBJS += elf.o elf_notes.o old_notes.o + +INTPEXEC_OBJS +=intp.o + +SHBINEXEC_OBJS +=shbin.o + +JAVAEXEC_OBJS +=java.o + +# +# file system modules +# +AUTOFS_OBJS += auto_vfsops.o auto_vnops.o auto_subr.o auto_xdr.o auto_sys.o + +CACHEFS_OBJS += cachefs_cnode.o cachefs_cod.o \ + cachefs_dir.o cachefs_dlog.o cachefs_filegrp.o \ + cachefs_fscache.o cachefs_ioctl.o cachefs_log.o \ + cachefs_module.o \ + cachefs_noopc.o cachefs_resource.o \ + cachefs_strict.o \ + cachefs_subr.o cachefs_vfsops.o \ + cachefs_vnops.o + +DCFS_OBJS += dc_vnops.o + +DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o + +DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \ + sdev_ptsops.o sdev_zvolops.o sdev_comm.o \ + sdev_profile.o sdev_ncache.o sdev_netops.o \ + sdev_ipnetops.o \ + sdev_vtops.o + +CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \ + ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o + +OBJFS_OBJS += objfs_vfs.o objfs_root.o objfs_common.o \ + objfs_odir.o objfs_data.o + +FDFS_OBJS += fdops.o + +FIFO_OBJS += fifosubr.o fifovnops.o + +PIPE_OBJS += pipe.o + +HSFS_OBJS += hsfs_node.o hsfs_subr.o hsfs_vfsops.o hsfs_vnops.o \ + hsfs_susp.o hsfs_rrip.o hsfs_susp_subr.o + +LOFS_OBJS += lofs_subr.o lofs_vfsops.o lofs_vnops.o + +NAMEFS_OBJS += namevfs.o namevno.o + +NFS_OBJS += nfs_client.o nfs_common.o nfs_dump.o \ + nfs_subr.o nfs_vfsops.o nfs_vnops.o \ + nfs_xdr.o nfs_sys.o nfs_strerror.o \ + nfs3_vfsops.o nfs3_vnops.o nfs3_xdr.o \ + nfs_acl_vnops.o nfs_acl_xdr.o nfs4_vfsops.o \ + nfs4_vnops.o nfs4_xdr.o nfs4_idmap.o \ + nfs4_shadow.o nfs4_subr.o \ + nfs4_attr.o nfs4_rnode.o nfs4_client.o \ + nfs4_acache.o nfs4_common.o nfs4_client_state.o \ + nfs4_callback.o nfs4_recovery.o nfs4_client_secinfo.o \ + nfs4_client_debug.o nfs_stats.o \ + nfs4_acl.o nfs4_stub_vnops.o nfs_cmd.o + +NFSSRV_OBJS += nfs_server.o nfs_srv.o nfs3_srv.o \ + nfs_acl_srv.o nfs_auth.o nfs_auth_xdr.o \ + nfs_export.o nfs_log.o nfs_log_xdr.o \ + nfs4_srv.o nfs4_state.o nfs4_srv_attr.o \ + nfs4_srv_ns.o nfs4_db.o nfs4_srv_deleg.o \ + nfs4_deleg_ops.o nfs4_srv_readdir.o nfs4_dispatch.o + +SMBSRV_SHARED_OBJS += \ + smb_inet.o \ + smb_match.o \ + smb_msgbuf.o \ + smb_oem.o \ + smb_string.o \ + smb_utf8.o \ + smb_door_legacy.o \ + smb_xdr.o \ + smb_token.o \ + smb_token_xdr.o \ + smb_sid.o \ + smb_native.o \ + smb_netbios_util.o + +SMBSRV_OBJS += $(SMBSRV_SHARED_OBJS) \ + smb_acl.o \ + smb_alloc.o \ + smb_close.o \ + smb_common_open.o \ + smb_common_transact.o \ + smb_create.o \ + smb_delete.o \ + smb_directory.o \ + smb_dispatch.o \ + smb_echo.o \ + smb_fem.o \ + smb_find.o \ + smb_flush.o \ + smb_fsinfo.o \ + smb_fsops.o \ + smb_init.o \ + smb_kdoor.o \ + smb_kshare.o \ + smb_kutil.o \ + smb_lock.o \ + smb_lock_byte_range.o \ + smb_locking_andx.o \ + smb_logoff_andx.o \ + smb_mangle_name.o \ + smb_mbuf_marshaling.o \ + smb_mbuf_util.o \ + smb_negotiate.o \ + smb_net.o \ + smb_node.o \ + smb_nt_cancel.o \ + smb_nt_create_andx.o \ + smb_nt_transact_create.o \ + smb_nt_transact_ioctl.o \ + smb_nt_transact_notify_change.o \ + smb_nt_transact_quota.o \ + smb_nt_transact_security.o \ + smb_odir.o \ + smb_ofile.o \ + smb_open_andx.o \ + smb_opipe.o \ + smb_oplock.o \ + smb_pathname.o \ + smb_print.o \ + smb_process_exit.o \ + smb_query_fileinfo.o \ + smb_read.o \ + smb_rename.o \ + smb_sd.o \ + smb_seek.o \ + smb_server.o \ + smb_session.o \ + smb_session_setup_andx.o \ + smb_set_fileinfo.o \ + smb_signing.o \ + smb_tree.o \ + smb_trans2_create_directory.o \ + smb_trans2_dfs.o \ + smb_trans2_find.o \ + smb_tree_connect.o \ + smb_unlock_byte_range.o \ + smb_user.o \ + smb_vfs.o \ + smb_vops.o \ + smb_vss.o \ + smb_write.o \ + smb_write_raw.o + +PCFS_OBJS += pc_alloc.o pc_dir.o pc_node.o pc_subr.o \ + pc_vfsops.o pc_vnops.o + +PROC_OBJS += prcontrol.o prioctl.o prsubr.o prusrio.o \ + prvfsops.o prvnops.o + +MNTFS_OBJS += mntvfsops.o mntvnops.o + +SHAREFS_OBJS += sharetab.o sharefs_vfsops.o sharefs_vnops.o + +SPEC_OBJS += specsubr.o specvfsops.o specvnops.o + +SOCK_OBJS += socksubr.o sockvfsops.o sockparams.o \ + socksyscalls.o socktpi.o sockstr.o \ + sockcommon_vnops.o sockcommon_subr.o \ + sockcommon_sops.o sockcommon.o \ + sock_notsupp.o socknotify.o \ + nl7c.o nl7curi.o nl7chttp.o nl7clogd.o \ + nl7cnca.o sodirect.o sockfilter.o + +TMPFS_OBJS += tmp_dir.o tmp_subr.o tmp_tnode.o tmp_vfsops.o \ + tmp_vnops.o + +UDFS_OBJS += udf_alloc.o udf_bmap.o udf_dir.o \ + udf_inode.o udf_subr.o udf_vfsops.o \ + udf_vnops.o + +UFS_OBJS += ufs_alloc.o ufs_bmap.o ufs_dir.o ufs_xattr.o \ + ufs_inode.o ufs_subr.o ufs_tables.o ufs_vfsops.o \ + ufs_vnops.o quota.o quotacalls.o quota_ufs.o \ + ufs_filio.o ufs_lockfs.o ufs_thread.o ufs_trans.o \ + ufs_acl.o ufs_panic.o ufs_directio.o ufs_log.o \ + ufs_extvnops.o ufs_snap.o lufs.o lufs_thread.o \ + lufs_log.o lufs_map.o lufs_top.o lufs_debug.o +VSCAN_OBJS += vscan_drv.o vscan_svc.o vscan_door.o + +NSMB_OBJS += smb_conn.o smb_dev.o smb_iod.o smb_pass.o \ + smb_rq.o smb_sign.o smb_smb.o smb_subrs.o \ + smb_time.o smb_tran.o smb_trantcp.o smb_usr.o \ + subr_mchain.o + +SMBFS_COMMON_OBJS += smbfs_ntacl.o +SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \ + smbfs_acl.o smbfs_client.o smbfs_smb.o \ + smbfs_subr.o smbfs_subr2.o \ + smbfs_rwlock.o smbfs_xattr.o \ + $(SMBFS_COMMON_OBJS) + + +# +# LVM modules +# +MD_OBJS += md.o md_error.o md_ioctl.o md_mddb.o md_names.o \ + md_med.o md_rename.o md_subr.o + +MD_COMMON_OBJS = md_convert.o md_crc.o md_revchk.o + +MD_DERIVED_OBJS = metamed_xdr.o meta_basic_xdr.o + +SOFTPART_OBJS += sp.o sp_ioctl.o + +STRIPE_OBJS += stripe.o stripe_ioctl.o + +HOTSPARES_OBJS += hotspares.o + +RAID_OBJS += raid.o raid_ioctl.o raid_replay.o raid_resync.o raid_hotspare.o + +MIRROR_OBJS += mirror.o mirror_ioctl.o mirror_resync.o + +NOTIFY_OBJS += md_notify.o + +TRANS_OBJS += mdtrans.o trans_ioctl.o trans_log.o + +ZFS_COMMON_OBJS += \ + arc.o \ + bplist.o \ + bpobj.o \ + dbuf.o \ + ddt.o \ + ddt_zap.o \ + dmu.o \ + dmu_diff.o \ + dmu_send.o \ + dmu_object.o \ + dmu_objset.o \ + dmu_traverse.o \ + dmu_tx.o \ + dnode.o \ + dnode_sync.o \ + dsl_dir.o \ + dsl_dataset.o \ + dsl_deadlist.o \ + dsl_pool.o \ + dsl_synctask.o \ + dmu_zfetch.o \ + dsl_deleg.o \ + dsl_prop.o \ + dsl_scan.o \ + gzip.o \ + lzjb.o \ + metaslab.o \ + refcount.o \ + sa.o \ + sha256.o \ + spa.o \ + spa_config.o \ + spa_errlog.o \ + spa_history.o \ + spa_misc.o \ + space_map.o \ + txg.o \ + uberblock.o \ + unique.o \ + vdev.o \ + vdev_cache.o \ + vdev_file.o \ + vdev_label.o \ + vdev_mirror.o \ + vdev_missing.o \ + vdev_queue.o \ + vdev_raidz.o \ + vdev_root.o \ + zap.o \ + zap_leaf.o \ + zap_micro.o \ + zfs_byteswap.o \ + zfs_debug.o \ + zfs_fm.o \ + zfs_fuid.o \ + zfs_sa.o \ + zfs_znode.o \ + zil.o \ + zio.o \ + zio_checksum.o \ + zio_compress.o \ + zio_inject.o \ + zle.o \ + zrlock.o + +ZFS_SHARED_OBJS += \ + zfs_namecheck.o \ + zfs_deleg.o \ + zfs_prop.o \ + zfs_comutil.o \ + zfs_fletcher.o \ + zpool_prop.o \ + zprop_common.o + +ZFS_OBJS += \ + $(ZFS_COMMON_OBJS) \ + $(ZFS_SHARED_OBJS) \ + vdev_disk.o \ + zfs_acl.o \ + zfs_ctldir.o \ + zfs_dir.o \ + zfs_ioctl.o \ + zfs_log.o \ + zfs_onexit.o \ + zfs_replay.o \ + zfs_rlock.o \ + rrwlock.o \ + zfs_vfsops.o \ + zfs_vnops.o \ + zvol.o + +ZUT_OBJS += \ + zut.o + +# +# streams modules +# +BUFMOD_OBJS += bufmod.o + +CONNLD_OBJS += connld.o + +DEDUMP_OBJS += dedump.o + +DRCOMPAT_OBJS += drcompat.o + +LDLINUX_OBJS += ldlinux.o + +LDTERM_OBJS += ldterm.o uwidth.o + +PCKT_OBJS += pckt.o + +PFMOD_OBJS += pfmod.o + +PTEM_OBJS += ptem.o + +REDIRMOD_OBJS += strredirm.o + +TIMOD_OBJS += timod.o + +TIRDWR_OBJS += tirdwr.o + +TTCOMPAT_OBJS +=ttcompat.o + +LOG_OBJS += log.o + +PIPEMOD_OBJS += pipemod.o + +RPCMOD_OBJS += rpcmod.o clnt_cots.o clnt_clts.o \ + clnt_gen.o clnt_perr.o mt_rpcinit.o rpc_calmsg.o \ + rpc_prot.o rpc_sztypes.o rpc_subr.o rpcb_prot.o \ + svc.o svc_clts.o svc_gen.o svc_cots.o \ + rpcsys.o xdr_sizeof.o clnt_rdma.o svc_rdma.o \ + xdr_rdma.o rdma_subr.o xdrrdma_sizeof.o + +TLIMOD_OBJS += tlimod.o t_kalloc.o t_kbind.o t_kclose.o \ + t_kconnect.o t_kfree.o t_kgtstate.o t_kopen.o \ + t_krcvudat.o t_ksndudat.o t_kspoll.o t_kunbind.o \ + t_kutil.o + +RLMOD_OBJS += rlmod.o + +TELMOD_OBJS += telmod.o + +CRYPTMOD_OBJS += cryptmod.o + +KB_OBJS += kbd.o keytables.o + +# +# ID mapping module +# +IDMAP_OBJS += idmap_mod.o idmap_kapi.o idmap_xdr.o idmap_cache.o + +# +# scheduling class modules +# +SDC_OBJS += sysdc.o + +RT_OBJS += rt.o +RT_DPTBL_OBJS += rt_dptbl.o + +TS_OBJS += ts.o +TS_DPTBL_OBJS += ts_dptbl.o + +IA_OBJS += ia.o + +FSS_OBJS += fss.o + +FX_OBJS += fx.o +FX_DPTBL_OBJS += fx_dptbl.o + +# +# Inter-Process Communication (IPC) modules +# +IPC_OBJS += ipc.o + +IPCMSG_OBJS += msg.o + +IPCSEM_OBJS += sem.o + +IPCSHM_OBJS += shm.o + +# +# bignum module +# +COMMON_BIGNUM_OBJS += bignum_mod.o bignumimpl.o + +BIGNUM_OBJS += $(COMMON_BIGNUM_OBJS) $(BIGNUM_PSR_OBJS) + +# +# kernel cryptographic framework +# +KCF_OBJS += kcf.o kcf_callprov.o kcf_cbufcall.o kcf_cipher.o kcf_crypto.o \ + kcf_cryptoadm.o kcf_ctxops.o kcf_digest.o kcf_dual.o \ + kcf_keys.o kcf_mac.o kcf_mech_tabs.o kcf_miscapi.o \ + kcf_object.o kcf_policy.o kcf_prov_lib.o kcf_prov_tabs.o \ + kcf_sched.o kcf_session.o kcf_sign.o kcf_spi.o kcf_verify.o \ + kcf_random.o modes.o ecb.o cbc.o ctr.o ccm.o gcm.o \ + fips_random.o fips_checksum.o fips_test_vectors.o + +CRYPTOADM_OBJS += cryptoadm.o + +CRYPTO_OBJS += crypto.o + +DPROV_OBJS += dprov.o + +DCA_OBJS += dca.o dca_3des.o dca_debug.o dca_dsa.o dca_kstat.o dca_rng.o \ + dca_rsa.o + +AESPROV_OBJS += aes.o aes_impl.o aes_modes.o fips_aes_util.o + +ARCFOURPROV_OBJS += arcfour.o arcfour_crypt.o + +BLOWFISHPROV_OBJS += blowfish.o blowfish_impl.o + +ECCPROV_OBJS += ecc.o ec.o ec2_163.o ec2_mont.o ecdecode.o ecl_mult.o \ + ecp_384.o ecp_jac.o ec2_193.o ecl.o ecp_192.o ecp_521.o \ + ecp_jm.o ec2_233.o ecl_curve.o ecp_224.o ecp_aff.o \ + ecp_mont.o ec2_aff.o ec_naf.o ecl_gf.o ecp_256.o mp_gf2m.o \ + mpi.o mplogic.o mpmontg.o mpprime.o oid.o \ + secitem.o ec2_test.o ecp_test.o fips_ecc_util.o + +RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o fips_rsa_util.o + +SWRANDPROV_OBJS += swrand.o fips_random_util.o + +# +# kernel SSL +# +KSSL_OBJS += kssl.o ksslioctl.o + +KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o + +# +# misc. modules +# + +C2AUDIT_OBJS += adr.o audit.o audit_event.o audit_io.o \ + audit_path.o audit_start.o audit_syscalls.o audit_token.o \ + audit_mem.o + +PCIC_OBJS += pcic.o + +RPCSEC_OBJS += secmod.o sec_clnt.o sec_svc.o sec_gen.o \ + auth_des.o auth_kern.o auth_none.o auth_loopb.o\ + authdesprt.o authdesubr.o authu_prot.o \ + key_call.o key_prot.o svc_authu.o svcauthdes.o + +RPCSEC_GSS_OBJS += rpcsec_gssmod.o rpcsec_gss.o rpcsec_gss_misc.o \ + rpcsec_gss_utils.o svc_rpcsec_gss.o + +CONSCONFIG_OBJS += consconfig.o + +CONSCONFIG_DACF_OBJS += consconfig_dacf.o consplat.o + +TEM_OBJS += tem.o tem_safe.o 6x10.o 7x14.o 12x22.o + +KBTRANS_OBJS += \ + kbtrans.o \ + kbtrans_keytables.o \ + kbtrans_polled.o \ + kbtrans_streams.o \ + usb_keytables.o + +KGSSD_OBJS += gssd_clnt_stubs.o gssd_handle.o gssd_prot.o \ + gss_display_name.o gss_release_name.o gss_import_name.o \ + gss_release_buffer.o gss_release_oid_set.o gen_oids.o gssdmod.o + +KGSSD_DERIVED_OBJS = gssd_xdr.o + +KGSS_DUMMY_OBJS += dmech.o + +KSOCKET_OBJS += ksocket.o ksocket_mod.o + +CRYPTO= cksumtypes.o decrypt.o encrypt.o encrypt_length.o etypes.o \ + nfold.o verify_checksum.o prng.o block_size.o make_checksum.o\ + checksum_length.o hmac.o default_state.o mandatory_sumtype.o + +# crypto/des +CRYPTO_DES= f_cbc.o f_cksum.o f_parity.o weak_key.o d3_cbc.o ef_crypto.o + +CRYPTO_DK= checksum.o derive.o dk_decrypt.o dk_encrypt.o + +CRYPTO_ARCFOUR= k5_arcfour.o + +# crypto/enc_provider +CRYPTO_ENC= des.o des3.o arcfour_provider.o aes_provider.o + +# crypto/hash_provider +CRYPTO_HASH= hash_kef_generic.o hash_kmd5.o hash_crc32.o hash_ksha1.o + +# crypto/keyhash_provider +CRYPTO_KEYHASH= descbc.o k5_kmd5des.o k_hmac_md5.o + +# crypto/crc32 +CRYPTO_CRC32= crc32.o + +# crypto/old +CRYPTO_OLD= old_decrypt.o old_encrypt.o + +# crypto/raw +CRYPTO_RAW= raw_decrypt.o raw_encrypt.o + +K5_KRB= kfree.o copy_key.o \ + parse.o init_ctx.o \ + ser_adata.o ser_addr.o \ + ser_auth.o ser_cksum.o \ + ser_key.o ser_princ.o \ + serialize.o unparse.o \ + ser_actx.o + +K5_OS= timeofday.o toffset.o \ + init_os_ctx.o c_ustime.o + +SEAL= +# EXPORT DELETE START +SEAL= seal.o unseal.o +# EXPORT DELETE END + +MECH= delete_sec_context.o \ + import_sec_context.o \ + gssapi_krb5.o \ + k5seal.o k5unseal.o k5sealv3.o \ + ser_sctx.o \ + sign.o \ + util_crypt.o \ + util_validate.o util_ordering.o \ + util_seqnum.o util_set.o util_seed.o \ + wrap_size_limit.o verify.o + + + +MECH_GEN= util_token.o + + +KGSS_KRB5_OBJS += krb5mech.o \ + $(MECH) $(SEAL) $(MECH_GEN) \ + $(CRYPTO) $(CRYPTO_DES) $(CRYPTO_DK) $(CRYPTO_ARCFOUR) \ + $(CRYPTO_ENC) $(CRYPTO_HASH) \ + $(CRYPTO_KEYHASH) $(CRYPTO_CRC32) \ + $(CRYPTO_OLD) \ + $(CRYPTO_RAW) $(K5_KRB) $(K5_OS) + +DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o fips_des_util.o + +DLBOOT_OBJS += bootparam_xdr.o nfs_dlinet.o scan.o + +KRTLD_OBJS += kobj_bootflags.o getoptstr.o \ + kobj.o kobj_kdi.o kobj_lm.o kobj_subr.o + +MOD_OBJS += modctl.o modsubr.o modsysfile.o modconf.o modhash.o + +STRPLUMB_OBJS += strplumb.o + +CPR_OBJS += cpr_driver.o cpr_dump.o \ + cpr_main.o cpr_misc.o cpr_mod.o cpr_stat.o \ + cpr_uthread.o + +PROF_OBJS += prf.o + +SE_OBJS += se_driver.o + +SYSACCT_OBJS += acct.o + +ACCTCTL_OBJS += acctctl.o + +EXACCTSYS_OBJS += exacctsys.o + +KAIO_OBJS += aio.o + +PCMCIA_OBJS += pcmcia.o cs.o cis.o cis_callout.o cis_handlers.o cis_params.o + +BUSRA_OBJS += busra.o + +PCS_OBJS += pcs.o + +PCAN_OBJS += pcan.o + +PCATA_OBJS += pcide.o pcdisk.o pclabel.o pcata.o + +PCSER_OBJS += pcser.o pcser_cis.o + +PCWL_OBJS += pcwl.o + +PSET_OBJS += pset.o + +OHCI_OBJS += ohci.o ohci_hub.o ohci_polled.o + +UHCI_OBJS += uhci.o uhciutil.o uhcitgt.o uhcihub.o uhcipolled.o + +EHCI_OBJS += ehci.o ehci_hub.o ehci_xfer.o ehci_intr.o ehci_util.o ehci_polled.o ehci_isoch.o ehci_isoch_util.o + +HUBD_OBJS += hubd.o + +USB_MID_OBJS += usb_mid.o + +USB_IA_OBJS += usb_ia.o + +UWBA_OBJS += uwba.o uwbai.o + +SCSA2USB_OBJS += scsa2usb.o usb_ms_bulkonly.o usb_ms_cbi.o + +HWAHC_OBJS += hwahc.o hwahc_util.o + +WUSB_DF_OBJS += wusb_df.o +WUSB_FWMOD_OBJS += wusb_fwmod.o + +IPF_OBJS += ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o ip_nat.o \ + ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \ + ip_log.o misc.o ip_compat.o ip_nat6.o drand48.o + +IBD_OBJS += ibd.o ibd_cm.o + +EIBNX_OBJS += enx_main.o enx_hdlrs.o enx_ibt.o enx_log.o enx_fip.o \ + enx_misc.o enx_q.o enx_ctl.o + +EOIB_OBJS += eib_adm.o eib_chan.o eib_cmn.o eib_ctl.o eib_data.o \ + eib_fip.o eib_ibt.o eib_log.o eib_mac.o eib_main.o \ + eib_rsrc.o eib_svc.o eib_vnic.o + +DLPISTUB_OBJS += dlpistub.o + +SDP_OBJS += sdpddi.o + +TRILL_OBJS += trill.o + +CTF_OBJS += ctf_create.o ctf_decl.o ctf_error.o ctf_hash.o ctf_labels.o \ + ctf_lookup.o ctf_open.o ctf_types.o ctf_util.o ctf_subr.o ctf_mod.o + +SMBIOS_OBJS += smb_error.o smb_info.o smb_open.o smb_subr.o smb_dev.o + +RPCIB_OBJS += rpcib.o + +KMDB_OBJS += kdrv.o + +AFE_OBJS += afe.o + +BGE_OBJS += bge_main2.o bge_chip2.o bge_kstats.o bge_log.o bge_ndd.o \ + bge_atomic.o bge_mii.o bge_send.o bge_recv2.o bge_mii_5906.o + +DMFE_OBJS += dmfe_log.o dmfe_main.o dmfe_mii.o + +ELXL_OBJS += elxl.o + +HME_OBJS += hme.o + +IXGB_OBJS += ixgb.o ixgb_atomic.o ixgb_chip.o ixgb_gld.o ixgb_kstats.o \ + ixgb_log.o ixgb_ndd.o ixgb_rx.o ixgb_tx.o ixgb_xmii.o + +NGE_OBJS += nge_main.o nge_atomic.o nge_chip.o nge_ndd.o nge_kstats.o \ + nge_log.o nge_rx.o nge_tx.o nge_xmii.o + +RGE_OBJS += rge_main.o rge_chip.o rge_ndd.o rge_kstats.o rge_log.o rge_rxtx.o + +URTW_OBJS += urtw.o + +ARN_OBJS += arn_hw.o arn_eeprom.o arn_mac.o arn_calib.o arn_ani.o arn_phy.o arn_regd.o arn_beacon.o \ + arn_main.o arn_recv.o arn_xmit.o arn_rc.o + +ATH_OBJS += ath_aux.o ath_main.o ath_osdep.o ath_rate.o + +ATU_OBJS += atu.o + +IPW_OBJS += ipw2100_hw.o ipw2100.o + +IWI_OBJS += ipw2200_hw.o ipw2200.o + +IWH_OBJS += iwh.o + +IWK_OBJS += iwk2.o + +IWP_OBJS += iwp.o + +MWL_OBJS += mwl.o + +MWLFW_OBJS += mwlfw_mode.o + +WPI_OBJS += wpi.o + +RAL_OBJS += rt2560.o ral_rate.o + +RUM_OBJS += rum.o + +RWD_OBJS += rt2661.o + +RWN_OBJS += rt2860.o + +UATH_OBJS += uath.o + +UATHFW_OBJS += uathfw_mod.o + +URAL_OBJS += ural.o + +RTW_OBJS += rtw.o smc93cx6.o rtwphy.o rtwphyio.o + +ZYD_OBJS += zyd.o zyd_usb.o zyd_hw.o zyd_fw.o + +MXFE_OBJS += mxfe.o + +MPTSAS_OBJS += mptsas.o mptsas_impl.o mptsas_init.o mptsas_raid.o mptsas_smhba.o + +SFE_OBJS += sfe.o sfe_util.o + +BFE_OBJS += bfe.o + +BRIDGE_OBJS += bridge.o + +IDM_SHARED_OBJS += base64.o + +IDM_OBJS += $(IDM_SHARED_OBJS) \ + idm.o idm_impl.o idm_text.o idm_conn_sm.o idm_so.o + +VR_OBJS += vr.o + +ATGE_OBJS += atge_main.o atge_l1e.o atge_mii.o atge_l1.o + +YGE_OBJS = yge.o + +# +# Build up defines and paths. +# +LINT_DEFS += -Dunix + +# +# This duality can be removed when the native and target compilers +# are the same (or at least recognize the same command line syntax!) +# It is a bug in the current compilation system that the assember +# can't process the -Y I, flag. +# +NATIVE_INC_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common +AS_INC_PATH += $(INC_PATH) -I$(UTSBASE)/common +INCLUDE_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common + +PCIEB_OBJS += pcieb.o + +# Chelsio N110 10G NIC driver module +# +CH_OBJS = ch.o glue.o pe.o sge.o + +CH_COM_OBJS = ch_mac.o ch_subr.o cspi.o espi.o ixf1010.o mc3.o mc4.o mc5.o \ + mv88e1xxx.o mv88x201x.o my3126.o pm3393.o tp.o ulp.o \ + vsc7321.o vsc7326.o xpak.o + +# +# PCI strings file +# +PCI_STRING_OBJS = pci_strings.o + +NET_DACF_OBJS += net_dacf.o + +# +# Xframe 10G NIC driver module +# +XGE_OBJS = xge.o xgell.o + +XGE_HAL_OBJS = xgehal-channel.o xgehal-fifo.o xgehal-ring.o xgehal-config.o \ + xgehal-driver.o xgehal-mm.o xgehal-stats.o xgehal-device.o \ + xge-queue.o xgehal-mgmt.o xgehal-mgmtaux.o + +# +# e1000g module +# +E1000G_OBJS += e1000_80003es2lan.o e1000_82540.o e1000_82541.o e1000_82542.o \ + e1000_82543.o e1000_82571.o e1000_api.o e1000_ich8lan.o \ + e1000_mac.o e1000_manage.o e1000_nvm.o e1000_osdep.o \ + e1000_phy.o e1000g_debug.o e1000g_main.o e1000g_alloc.o \ + e1000g_tx.o e1000g_rx.o e1000g_stat.o + +# +# Intel 82575 1G NIC driver module +# +IGB_OBJS = igb_82575.o igb_api.o igb_mac.o igb_manage.o \ + igb_nvm.o igb_osdep.o igb_phy.o igb_buf.o \ + igb_debug.o igb_gld.o igb_log.o igb_main.o \ + igb_rx.o igb_stat.o igb_tx.o + +# +# Intel 10GbE PCIE NIC driver module +# +IXGBE_OBJS = ixgbe_82598.o ixgbe_82599.o ixgbe_api.o \ + ixgbe_common.o ixgbe_phy.o \ + ixgbe_buf.o ixgbe_debug.o ixgbe_gld.o \ + ixgbe_log.o ixgbe_main.o \ + ixgbe_osdep.o ixgbe_rx.o ixgbe_stat.o \ + ixgbe_tx.o + +# +# NIU 10G/1G driver module +# +NXGE_OBJS = nxge_mac.o nxge_ipp.o nxge_rxdma.o \ + nxge_txdma.o nxge_txc.o nxge_main.o \ + nxge_hw.o nxge_fzc.o nxge_virtual.o \ + nxge_send.o nxge_classify.o nxge_fflp.o \ + nxge_fflp_hash.o nxge_ndd.o nxge_kstats.o \ + nxge_zcp.o nxge_fm.o nxge_espc.o nxge_hv.o \ + nxge_hio.o nxge_hio_guest.o nxge_intr.o + +NXGE_NPI_OBJS = \ + npi.o npi_mac.o npi_ipp.o \ + npi_txdma.o npi_rxdma.o npi_txc.o \ + npi_zcp.o npi_espc.o npi_fflp.o \ + npi_vir.o + +NXGE_HCALL_OBJS = \ + nxge_hcall.o + +# +# kiconv modules +# +KICONV_EMEA_OBJS += kiconv_emea.o + +# +# blk2scsa +# +BLK2SCSA_OBJS = blk2scsa.o + +KICONV_JA_OBJS += kiconv_ja.o + +KICONV_KO_OBJS += kiconv_cck_common.o kiconv_ko.o + +KICONV_SC_OBJS += kiconv_cck_common.o kiconv_sc.o + +KICONV_TC_OBJS += kiconv_cck_common.o kiconv_tc.o + +# +# AAC module +# +AAC_OBJS = aac.o aac_ioctl.o + +# +# sdcard modules +# +SDA_OBJS = sda_cmd.o sda_host.o sda_init.o sda_mem.o sda_mod.o sda_slot.o +SDHOST_OBJS = sdhost.o + +# +# hxge 10G driver module +# +HXGE_OBJS = hxge_main.o hxge_vmac.o hxge_send.o \ + hxge_txdma.o hxge_rxdma.o hxge_virtual.o \ + hxge_fm.o hxge_fzc.o hxge_hw.o hxge_kstats.o \ + hxge_ndd.o hxge_pfc.o \ + hpi.o hpi_vmac.o hpi_rxdma.o hpi_txdma.o \ + hpi_vir.o hpi_pfc.o + +# +# MEGARAID_SAS module +# +MEGA_SAS_OBJS = megaraid_sas.o + +# +# MR_SAS module +# +MR_SAS_OBJS = mr_sas.o + +# +# ISCSI_INITIATOR module +# +ISCSI_INITIATOR_OBJS = chap.o iscsi_io.o iscsi_thread.o \ + iscsi_ioctl.o iscsid.o iscsi.o \ + iscsi_login.o isns_client.o iscsiAuthClient.o \ + iscsi_lun.o iscsiAuthClientGlue.o \ + iscsi_net.o nvfile.o iscsi_cmd.o \ + iscsi_queue.o persistent.o iscsi_conn.o \ + iscsi_sess.o radius_auth.o iscsi_crc.o \ + iscsi_stats.o radius_packet.o iscsi_doorclt.o \ + iscsi_targetparam.o utils.o kifconf.o + +# +# ntxn 10Gb/1Gb NIC driver module +# +NTXN_OBJS = unm_nic_init.o unm_gem.o unm_nic_hw.o unm_ndd.o \ + unm_nic_main.o unm_nic_isr.o unm_nic_ctx.o niu.o + +# +# Myricom 10Gb NIC driver module +# +MYRI10GE_OBJS = myri10ge.o myri10ge_lro.o + +# nulldriver module +# +NULLDRIVER_OBJS = nulldriver.o + +TPM_OBJS = tpm.o tpm_hcall.o diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index c721386280f8..2a9df6d403f2 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -20,12 +20,9 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * DTrace - Dynamic Tracing for Solaris * @@ -186,7 +183,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ +static int dtrace_dynvar_failclean; /* dynvars failed to clean */ /* * DTrace Locking @@ -240,10 +239,16 @@ static void dtrace_nullop(void) {} +static int +dtrace_enable_nullop(void) +{ + return (0); +} + static dtrace_pops_t dtrace_provider_ops = { (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, (void (*)(void *, struct modctl *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, @@ -427,6 +432,7 @@ dtrace_load##bits(uintptr_t addr) \ #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 +#define DTRACE_MATCH_FAIL -1 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') @@ -1182,12 +1188,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; dtrace_dstate_percpu_t *dcpu; - int i, work = 0; + dtrace_dynvar_t **rinsep; + int i, j, work = 0; for (i = 0; i < NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; - - ASSERT(dcpu->dtdsc_rinsing == NULL); + rinsep = &dcpu->dtdsc_rinsing; /* * If the dirty list is NULL, there is no dirty work to do. @@ -1195,14 +1201,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) if (dcpu->dtdsc_dirty == NULL) continue; - /* - * If the clean list is non-NULL, then we're not going to do - * any work for this CPU -- it means that there has not been - * a dtrace_dynvar() allocation on this CPU (or from this CPU) - * since the last time we cleaned house. - */ - if (dcpu->dtdsc_clean != NULL) + if (dcpu->dtdsc_rinsing != NULL) { + /* + * If the rinsing list is non-NULL, then it is because + * this CPU was selected to accept another CPU's + * dirty list -- and since that time, dirty buffers + * have accumulated. This is a highly unlikely + * condition, but we choose to ignore the dirty + * buffers -- they'll be picked up a future cleanse. + */ continue; + } + + if (dcpu->dtdsc_clean != NULL) { + /* + * If the clean list is non-NULL, then we're in a + * situation where a CPU has done deallocations (we + * have a non-NULL dirty list) but no allocations (we + * also have a non-NULL clean list). We can't simply + * move the dirty list into the clean list on this + * CPU, yet we also don't want to allow this condition + * to persist, lest a short clean list prevent a + * massive dirty list from being cleaned (which in + * turn could lead to otherwise avoidable dynamic + * drops). To deal with this, we look for some CPU + * with a NULL clean list, NULL dirty list, and NULL + * rinsing list -- and then we borrow this CPU to + * rinse our dirty list. + */ + for (j = 0; j < NCPU; j++) { + dtrace_dstate_percpu_t *rinser; + + rinser = &dstate->dtds_percpu[j]; + + if (rinser->dtdsc_rinsing != NULL) + continue; + + if (rinser->dtdsc_dirty != NULL) + continue; + + if (rinser->dtdsc_clean != NULL) + continue; + + rinsep = &rinser->dtdsc_rinsing; + break; + } + + if (j == NCPU) { + /* + * We were unable to find another CPU that + * could accept this dirty list -- we are + * therefore unable to clean it now. + */ + dtrace_dynvar_failclean++; + continue; + } + } work = 1; @@ -1219,7 +1273,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * on a hash chain, either the dirty list or the * rinsing list for some CPU must be non-NULL.) */ - dcpu->dtdsc_rinsing = dirty; + *rinsep = dirty; dtrace_membar_producer(); } while (dtrace_casptr(&dcpu->dtdsc_dirty, dirty, NULL) != dirty); @@ -1650,7 +1704,7 @@ retry: ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); /* - * Now we'll move the clean list to the free list. + * Now we'll move the clean list to our free list. * It's impossible for this to fail: the only way * the free list can be updated is through this * code path, and only one CPU can own the clean list. @@ -1663,6 +1717,7 @@ retry: * owners of the clean lists out before resetting * the clean lists. */ + dcpu = &dstate->dtds_percpu[me]; rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); ASSERT(rval == NULL); goto retry; @@ -3600,7 +3655,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int64_t index = (int64_t)tupregs[1].dttk_value; int64_t remaining = (int64_t)tupregs[2].dttk_value; size_t len = dtrace_strlen((char *)s, size); - int64_t i = 0; + int64_t i; if (!dtrace_canload(s, len + 1, mstate, vstate)) { regs[rd] = NULL; @@ -6655,7 +6710,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, { dtrace_probe_t template, *probe; dtrace_hash_t *hash = NULL; - int len, best = INT_MAX, nmatched = 0; + int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -6667,7 +6722,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - (void) (*matched)(probe, arg); + if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); @@ -6714,8 +6770,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != + DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6734,8 +6794,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6955,7 +7018,7 @@ dtrace_unregister(dtrace_provider_id_t id) dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. @@ -7101,7 +7164,7 @@ dtrace_invalidate(dtrace_provider_id_t id) dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); @@ -7142,7 +7205,7 @@ dtrace_condense(dtrace_provider_id_t id) * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); @@ -8103,7 +8166,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; default: - err += efunc(dp->dtdo_len - 1, "bad return size"); + err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } @@ -9096,7 +9159,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) return (ecb); } -static void +static int dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; @@ -9109,7 +9172,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) /* * This is the NULL probe -- there's nothing to do. */ - return; + return (0); } if (probe->dtpr_ecb == NULL) { @@ -9123,8 +9186,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; - prov->dtpv_pops.dtps_enable(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); + return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg)); } else { /* * This probe is already active. Swing the last pointer to @@ -9137,6 +9200,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) probe->dtpr_predcache = 0; dtrace_sync(); + return (0); } } @@ -9920,7 +9984,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); - dtrace_ecb_enable(ecb); + if (dtrace_ecb_enable(ecb) < 0) + return (DTRACE_MATCH_FAIL); + return (DTRACE_MATCH_NEXT); } @@ -10557,6 +10623,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10599,6 +10666,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10713,7 +10781,7 @@ static int dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) { int i = 0; - int matched = 0; + int total_matched = 0, matched = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -10724,7 +10792,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; - matched += dtrace_probe_enable(&ep->dted_probe, enab); + /* + * If a provider failed to enable a probe then get out and + * let the consumer know we failed. + */ + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) + return (EBUSY); + + total_matched += matched; if (enab->dten_error != 0) { /* @@ -10752,7 +10827,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) - *nmatched = matched; + *nmatched = total_matched; return (0); } @@ -10766,13 +10841,22 @@ dtrace_enabling_matchall(void) mutex_enter(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; + + if (INGLOBALZONE(curproc) || + cr != NULL && getzoneid() == crgetzoneid(cr)) + (void) dtrace_enabling_match(enab, NULL); + } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); @@ -10830,6 +10914,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&dtrace_provider_lock)); @@ -10840,15 +10925,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; mutex_exit(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); mutex_enter(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10970,7 +11065,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { + if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || + dof->dofh_loadsz != hdr.dofh_loadsz) { kmem_free(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); @@ -11698,6 +11794,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } + if (DOF_SEC_ISLOADABLE(sec->dofs_type) && + !(sec->dofs_flags & DOF_SECF_LOAD)) { + dtrace_dof_error(dof, "loadable section with load " + "flag unset"); + return (-1); + } + if (!(sec->dofs_flags & DOF_SECF_LOAD)) continue; /* just ignore non-loadable sections */ @@ -14390,7 +14493,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) * If this wasn't an open with the "helper" minor, then it must be * the "dtrace" minor. */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); + if (getminor(*devp) != DTRACEMNRN_DTRACE) + return (ENXIO); /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -14427,7 +14531,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) mutex_exit(&cpu_lock); if (state == NULL) { - if (--dtrace_opens == 0) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); return (EAGAIN); @@ -14463,7 +14567,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); - if (--dtrace_opens == 0) + + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); @@ -15458,7 +15567,8 @@ static struct dev_ops dtrace_ops = { nodev, /* reset */ &dtrace_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c index b7ca92f54a59..42263e4ef274 100644 --- a/uts/common/dtrace/fasttrap.c +++ b/uts/common/dtrace/fasttrap.c @@ -20,11 +20,10 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/atomic.h> #include <sys/errno.h> @@ -876,7 +875,7 @@ fasttrap_disable_callbacks(void) } /*ARGSUSED*/ -static void +static int fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; @@ -904,7 +903,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * provider can't go away while we're in this code path. */ if (probe->ftp_prov->ftp_retired) - return; + return (0); /* * If we can't find the process, it may be that we're in the context of @@ -913,7 +912,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) */ if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) - return; + return (0); mutex_enter(&pidlock); p = prfind(probe->ftp_pid); @@ -975,7 +974,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * drop our reference on the trap table entry. */ fasttrap_disable_callbacks(); - return; + return (0); } } @@ -983,6 +982,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) sprunlock(p); probe->ftp_enabled = 1; + return (0); } /*ARGSUSED*/ @@ -1946,7 +1946,8 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) probe = kmem_alloc(size, KM_SLEEP); - if (copyin(uprobe, probe, size) != 0) { + if (copyin(uprobe, probe, size) != 0 || + probe->ftps_noffs != noffs) { kmem_free(probe, size); return (EFAULT); } @@ -2044,13 +2045,6 @@ err: tp->ftt_proc->ftpc_acount != 0) break; - /* - * The count of active providers can only be - * decremented (i.e. to zero) during exec, exit, and - * removal of a meta provider so it should be - * impossible to drop the count during this operation(). - */ - ASSERT(tp->ftt_proc->ftpc_acount != 0); tp = tp->ftt_next; } @@ -2346,7 +2340,8 @@ static struct dev_ops fasttrap_ops = { nodev, /* reset */ &fasttrap_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* diff --git a/uts/common/dtrace/lockstat.c b/uts/common/dtrace/lockstat.c index 3eb76a061d32..69c8b7254486 100644 --- a/uts/common/dtrace/lockstat.c +++ b/uts/common/dtrace/lockstat.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> #include <sys/param.h> @@ -84,7 +83,7 @@ static kmutex_t lockstat_test; /* for testing purposes only */ static dtrace_provider_id_t lockstat_id; /*ARGSUSED*/ -static void +static int lockstat_enable(void *arg, dtrace_id_t id, void *parg) { lockstat_probe_t *probe = parg; @@ -103,6 +102,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg) */ mutex_enter(&lockstat_test); mutex_exit(&lockstat_test); + return (0); } /*ARGSUSED*/ @@ -310,11 +310,13 @@ static struct dev_ops lockstat_ops = { nulldev, /* reset */ &lockstat_cb_ops, /* cb_ops */ NULL, /* bus_ops */ + NULL, /* power */ + ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { &mod_driverops, /* Type of module. This one is a driver */ - "Lock Statistics %I%", /* name of module */ + "Lock Statistics", /* name of module */ &lockstat_ops, /* driver ops */ }; diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c index 8de919a851a2..c1a2d1f1c12f 100644 --- a/uts/common/dtrace/profile.c +++ b/uts/common/dtrace/profile.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/errno.h> #include <sys/stat.h> @@ -361,7 +360,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg) } /*ARGSUSED*/ -static void +static int profile_enable(void *arg, dtrace_id_t id, void *parg) { profile_probe_t *prof = parg; @@ -391,6 +390,7 @@ profile_enable(void *arg, dtrace_id_t id, void *parg) } else { prof->prof_cyclic = cyclic_add_omni(&omni); } + return (0); } /*ARGSUSED*/ @@ -539,7 +539,8 @@ static struct dev_ops profile_ops = { nodev, /* reset */ &profile_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* diff --git a/uts/common/dtrace/sdt_subr.c b/uts/common/dtrace/sdt_subr.c index 66ff8a92a01b..242185071bb2 100644 --- a/uts/common/dtrace/sdt_subr.c +++ b/uts/common/dtrace/sdt_subr.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/sdt_impl.h> static dtrace_pattr_t vtrace_attr = { @@ -43,6 +40,14 @@ static dtrace_pattr_t info_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; +static dtrace_pattr_t fc_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +}; + static dtrace_pattr_t fpu_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, @@ -83,6 +88,14 @@ static dtrace_pattr_t xpv_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, }; +static dtrace_pattr_t iscsi_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +}; + sdt_provider_t sdt_providers[] = { { "vtrace", "__vtrace_", &vtrace_attr, 0 }, { "sysinfo", "__cpu_sysinfo_", &info_attr, 0 }, @@ -91,11 +104,17 @@ sdt_provider_t sdt_providers[] = { { "sched", "__sched_", &stab_attr, 0 }, { "proc", "__proc_", &stab_attr, 0 }, { "io", "__io_", &stab_attr, 0 }, + { "ip", "__ip_", &stab_attr, 0 }, + { "tcp", "__tcp_", &stab_attr, 0 }, + { "udp", "__udp_", &stab_attr, 0 }, { "mib", "__mib_", &stab_attr, 0 }, { "fsinfo", "__fsinfo_", &fsinfo_attr, 0 }, + { "iscsi", "__iscsi_", &iscsi_attr, 0 }, { "nfsv3", "__nfsv3_", &stab_attr, 0 }, { "nfsv4", "__nfsv4_", &stab_attr, 0 }, { "xpv", "__xpv_", &xpv_attr, 0 }, + { "fc", "__fc_", &fc_attr, 0 }, + { "srp", "__srp_", &fc_attr, 0 }, { "sysevent", "__sysevent_", &stab_attr, 0 }, { "sdt", NULL, &sdt_attr, 0 }, { NULL } @@ -169,6 +188,73 @@ sdt_argdesc_t sdt_args[] = { { "fsinfo", NULL, 0, 0, "vnode_t *", "fileinfo_t *" }, { "fsinfo", NULL, 1, 1, "int", "int" }, + { "iscsi", "async-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "async-send", 1, 1, "iscsi_async_evt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "login-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "login-command", 1, 1, "iscsi_login_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "login-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "login-response", 1, 1, "iscsi_login_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "logout-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "logout-command", 1, 1, "iscsi_logout_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "logout-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "logout-response", 1, 1, "iscsi_logout_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-request", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-request", 1, 1, "iscsi_rtt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-send", 1, 1, "iscsi_data_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-receive", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-receive", 1, 1, "iscsi_data_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "nop-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "nop-send", 1, 1, "iscsi_nop_in_hdr_t *", "iscsiinfo_t *" }, + { "iscsi", "nop-receive", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "nop-receive", 1, 1, "iscsi_nop_out_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "scsi-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "scsi-command", 1, 1, "iscsi_scsi_cmd_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "scsi-command", 2, 2, "scsi_task_t *", "scsicmd_t *" }, + { "iscsi", "scsi-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "scsi-response", 1, 1, "iscsi_scsi_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "task-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "task-command", 1, 1, "iscsi_scsi_task_mgt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "task-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "task-response", 1, 1, "iscsi_scsi_task_mgt_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "text-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "text-command", 1, 1, "iscsi_text_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "text-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "text-response", 1, 1, "iscsi_text_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "xfer-start", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "xfer-start", 1, 0, "idm_conn_t *", "iscsiinfo_t *" }, + { "iscsi", "xfer-start", 2, 1, "uintptr_t", "xferinfo_t *" }, + { "iscsi", "xfer-start", 3, 2, "uint32_t"}, + { "iscsi", "xfer-start", 4, 3, "uintptr_t"}, + { "iscsi", "xfer-start", 5, 4, "uint32_t"}, + { "iscsi", "xfer-start", 6, 5, "uint32_t"}, + { "iscsi", "xfer-start", 7, 6, "uint32_t"}, + { "iscsi", "xfer-start", 8, 7, "int"}, + { "iscsi", "xfer-done", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "xfer-done", 1, 0, "idm_conn_t *", "iscsiinfo_t *" }, + { "iscsi", "xfer-done", 2, 1, "uintptr_t", "xferinfo_t *" }, + { "iscsi", "xfer-done", 3, 2, "uint32_t"}, + { "iscsi", "xfer-done", 4, 3, "uintptr_t"}, + { "iscsi", "xfer-done", 5, 4, "uint32_t"}, + { "iscsi", "xfer-done", 6, 5, "uint32_t"}, + { "iscsi", "xfer-done", 7, 6, "uint32_t"}, + { "iscsi", "xfer-done", 8, 7, "int"}, + { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", "conninfo_t *" }, { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", @@ -788,6 +874,75 @@ sdt_argdesc_t sdt_args[] = { "nfsv4cbinfo_t *" }, { "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" }, + { "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "send", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "send", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */ + { "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "receive", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */ + + { "tcp", "connect-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-established", 1, 1, "ip_xmit_attr_t *", + "csinfo_t *" }, + { "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-request", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-request", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-request", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-request", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-established", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "state-change", 0, 0, "void", "void" }, + { "tcp", "state-change", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "state-change", 2, 2, "void", "void" }, + { "tcp", "state-change", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "state-change", 4, 4, "void", "void" }, + { "tcp", "state-change", 5, 5, "int32_t", "tcplsinfo_t *" }, + { "tcp", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "send", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" }, + { "tcp", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "send", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" }, + { "tcp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "receive", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" }, + { "tcp", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "receive", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" }, + + { "udp", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "udp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "udp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "udp", "send", 3, 3, "udp_t *", "udpsinfo_t *" }, + { "udp", "send", 4, 4, "udpha_t *", "udpinfo_t *" }, + { "udp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "udp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "udp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "udp", "receive", 3, 3, "udp_t *", "udpsinfo_t *" }, + { "udp", "receive", 4, 4, "udpha_t *", "udpinfo_t *" }, + { "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" }, { "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" }, @@ -848,6 +1003,154 @@ sdt_argdesc_t sdt_args[] = { { "xpv", "setvcpucontext-end", 0, 0, "int" }, { "xpv", "setvcpucontext-start", 0, 0, "domid_t" }, { "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *" }, + + { "srp", "service-up", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "service-up", 1, 0, "srpt_session_t *", "srp_portinfo_t *" }, + { "srp", "service-down", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "service-down", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-command", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "login-command", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-command", 2, 1, "srp_login_req_t *", + "srp_logininfo_t *" }, + { "srp", "login-response", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "login-response", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-response", 2, 1, "srp_login_rsp_t *", + "srp_logininfo_t *" }, + { "srp", "login-response", 3, 2, "srp_login_rej_t *" }, + { "srp", "logout-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "logout-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "task-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-command", 2, 1, "srp_cmd_req_t *", "srp_taskinfo_t *" }, + { "srp", "task-response", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "task-response", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" }, + { "srp", "task-response", 3, 2, "scsi_task_t *" }, + { "srp", "task-response", 4, 3, "int8_t" }, + { "srp", "scsi-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "scsi-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "scsi-command", 2, 1, "scsi_task_t *", "scsicmd_t *" }, + { "srp", "scsi-command", 3, 2, "srp_cmd_req_t *", "srp_taskinfo_t *" }, + { "srp", "scsi-response", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "scsi-response", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "scsi-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" }, + { "srp", "scsi-response", 3, 2, "scsi_task_t *" }, + { "srp", "scsi-response", 4, 3, "int8_t" }, + { "srp", "xfer-start", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "xfer-start", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "xfer-start", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" }, + { "srp", "xfer-start", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" }, + { "srp", "xfer-start", 4, 3, "ibt_send_wr_t *"}, + { "srp", "xfer-start", 5, 4, "uint32_t" }, + { "srp", "xfer-start", 6, 5, "uint32_t" }, + { "srp", "xfer-start", 7, 6, "uint32_t" }, + { "srp", "xfer-start", 8, 7, "uint32_t" }, + { "srp", "xfer-done", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "xfer-done", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "xfer-done", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" }, + { "srp", "xfer-done", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" }, + { "srp", "xfer-done", 4, 3, "ibt_send_wr_t *"}, + { "srp", "xfer-done", 5, 4, "uint32_t" }, + { "srp", "xfer-done", 6, 5, "uint32_t" }, + { "srp", "xfer-done", 7, 6, "uint32_t" }, + { "srp", "xfer-done", 8, 7, "uint32_t" }, + + { "fc", "link-up", 0, 0, "fct_i_local_port_t *", "conninfo_t *" }, + { "fc", "link-down", 0, 0, "fct_i_local_port_t *", "conninfo_t *" }, + { "fc", "fabric-login-start", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "fabric-login-start", 1, 0, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "fabric-login-end", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "fabric-login-end", 1, 0, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-login-start", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 3, 3, "int", "int" }, + { "fc", "rport-login-end", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-login-end", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-end", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-end", 3, 3, "int", "int" }, + { "fc", "rport-login-end", 4, 4, "int", "int" }, + { "fc", "rport-logout-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-logout-start", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-start", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-start", 3, 3, "int", "int" }, + { "fc", "rport-logout-end", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-logout-end", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-end", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-end", 3, 3, "int", "int" }, + { "fc", "scsi-command", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "scsi-command", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-command", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "scsi-command", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-response", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "scsi-response", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-response", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "scsi-response", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "xfer-start", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "xfer-start", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 4, 4, "stmf_data_buf_t *", + "fc_xferinfo_t *" }, + { "fc", "xfer-done", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "xfer-done", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-done", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "xfer-done", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-done", 4, 4, "stmf_data_buf_t *", + "fc_xferinfo_t *" }, + { "fc", "rscn-receive", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "rscn-receive", 1, 1, "int", "int"}, + { "fc", "abts-receive", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "abts-receive", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + + { NULL } }; diff --git a/uts/common/dtrace/systrace.c b/uts/common/dtrace/systrace.c index be14660b04c0..b864041c450d 100644 --- a/uts/common/dtrace/systrace.c +++ b/uts/common/dtrace/systrace.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/dtrace.h> #include <sys/systrace.h> @@ -141,7 +140,7 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg) } /*ARGSUSED*/ -static void +static int systrace_enable(void *arg, dtrace_id_t id, void *parg) { int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); @@ -162,7 +161,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg) if (enabled) { ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); - return; + return (0); } (void) casptr(&sysent[sysnum].sy_callc, @@ -173,6 +172,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg) (void *)systrace_sysent32[sysnum].stsy_underlying, (void *)dtrace_systrace_syscall32); #endif + return (0); } /*ARGSUSED*/ @@ -336,7 +336,8 @@ static struct dev_ops systrace_ops = { nodev, /* reset */ &systrace_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* diff --git a/uts/common/fs/gfs.c b/uts/common/fs/gfs.c new file mode 100644 index 000000000000..4d24df60f75b --- /dev/null +++ b/uts/common/fs/gfs.c @@ -0,0 +1,1178 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Portions Copyright 2007 Shivakumar GN */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/dirent.h> +#include <sys/kmem.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/sunddi.h> +#include <sys/uio.h> +#include <sys/vmsystm.h> +#include <sys/vfs.h> +#include <sys/vnode.h> + +#include <vm/as.h> +#include <vm/seg_vn.h> + +#include <sys/gfs.h> + +/* + * Generic pseudo-filesystem routines. + * + * There are significant similarities between the implementation of certain file + * system entry points across different filesystems. While one could attempt to + * "choke up on the bat" and incorporate common functionality into a VOP + * preamble or postamble, such an approach is limited in the benefit it can + * provide. In this file we instead define a toolkit of routines which can be + * called from a filesystem (with in-kernel pseudo-filesystems being the focus + * of the exercise) in a more component-like fashion. + * + * There are three basic classes of routines: + * + * 1) Lowlevel support routines + * + * These routines are designed to play a support role for existing + * pseudo-filesystems (such as procfs). They simplify common tasks, + * without forcing the filesystem to hand over management to GFS. The + * routines covered are: + * + * gfs_readdir_init() + * gfs_readdir_emit() + * gfs_readdir_emitn() + * gfs_readdir_pred() + * gfs_readdir_fini() + * gfs_lookup_dot() + * + * 2) Complete GFS management + * + * These routines take a more active role in management of the + * pseudo-filesystem. They handle the relationship between vnode private + * data and VFS data, as well as the relationship between vnodes in the + * directory hierarchy. + * + * In order to use these interfaces, the first member of every private + * v_data must be a gfs_file_t or a gfs_dir_t. This hands over all control + * to GFS. + * + * gfs_file_create() + * gfs_dir_create() + * gfs_root_create() + * + * gfs_file_inactive() + * gfs_dir_inactive() + * gfs_dir_lookup() + * gfs_dir_readdir() + * + * gfs_vop_inactive() + * gfs_vop_lookup() + * gfs_vop_readdir() + * gfs_vop_map() + * + * 3) Single File pseudo-filesystems + * + * This routine creates a rooted file to be overlayed ontop of another + * file in the physical filespace. + * + * Note that the parent is NULL (actually the vfs), but there is nothing + * technically keeping such a file from utilizing the "Complete GFS + * management" set of routines. + * + * gfs_root_create_file() + */ + +/* + * gfs_make_opsvec: take an array of vnode type definitions and create + * their vnodeops_t structures + * + * This routine takes an array of gfs_opsvec_t's. It could + * alternatively take an array of gfs_opsvec_t*'s, which would allow + * vnode types to be completely defined in files external to the caller + * of gfs_make_opsvec(). As it stands, much more sharing takes place -- + * both the caller and the vnode type provider need to access gfsv_ops + * and gfsv_template, and the caller also needs to know gfsv_name. + */ +int +gfs_make_opsvec(gfs_opsvec_t *vec) +{ + int error, i; + + for (i = 0; ; i++) { + if (vec[i].gfsv_name == NULL) + return (0); + error = vn_make_ops(vec[i].gfsv_name, vec[i].gfsv_template, + vec[i].gfsv_ops); + if (error) + break; + } + + cmn_err(CE_WARN, "gfs_make_opsvec: bad vnode ops template for '%s'", + vec[i].gfsv_name); + for (i--; i >= 0; i--) { + vn_freevnodeops(*vec[i].gfsv_ops); + *vec[i].gfsv_ops = NULL; + } + return (error); +} + +/* + * Low level directory routines + * + * These routines provide some simple abstractions for reading directories. + * They are designed to be used by existing pseudo filesystems (namely procfs) + * that already have a complicated management infrastructure. + */ + +/* + * gfs_get_parent_ino: used to obtain a parent inode number and the + * inode number of the given vnode in preparation for calling gfs_readdir_init. + */ +int +gfs_get_parent_ino(vnode_t *dvp, cred_t *cr, caller_context_t *ct, + ino64_t *pino, ino64_t *ino) +{ + vnode_t *parent; + gfs_dir_t *dp = dvp->v_data; + int error; + + *ino = dp->gfsd_file.gfs_ino; + parent = dp->gfsd_file.gfs_parent; + + if (parent == NULL) { + *pino = *ino; /* root of filesystem */ + } else if (dvp->v_flag & V_XATTRDIR) { + vattr_t va; + + va.va_mask = AT_NODEID; + error = VOP_GETATTR(parent, &va, 0, cr, ct); + if (error) + return (error); + *pino = va.va_nodeid; + } else { + *pino = ((gfs_file_t *)(parent->v_data))->gfs_ino; + } + + return (0); +} + +/* + * gfs_readdir_init: initiate a generic readdir + * st - a pointer to an uninitialized gfs_readdir_state_t structure + * name_max - the directory's maximum file name length + * ureclen - the exported file-space record length (1 for non-legacy FSs) + * uiop - the uiop passed to readdir + * parent - the parent directory's inode + * self - this directory's inode + * flags - flags from VOP_READDIR + * + * Returns 0 or a non-zero errno. + * + * Typical VOP_READDIR usage of gfs_readdir_*: + * + * if ((error = gfs_readdir_init(...)) != 0) + * return (error); + * eof = 0; + * while ((error = gfs_readdir_pred(..., &voffset)) != 0) { + * if (!consumer_entry_at(voffset)) + * voffset = consumer_next_entry(voffset); + * if (consumer_eof(voffset)) { + * eof = 1 + * break; + * } + * if ((error = gfs_readdir_emit(..., voffset, + * consumer_ino(voffset), consumer_name(voffset))) != 0) + * break; + * } + * return (gfs_readdir_fini(..., error, eofp, eof)); + * + * As you can see, a zero result from gfs_readdir_pred() or + * gfs_readdir_emit() indicates that processing should continue, + * whereas a non-zero result indicates that the loop should terminate. + * Most consumers need do nothing more than let gfs_readdir_fini() + * determine what the cause of failure was and return the appropriate + * value. + */ +int +gfs_readdir_init(gfs_readdir_state_t *st, int name_max, int ureclen, + uio_t *uiop, ino64_t parent, ino64_t self, int flags) +{ + size_t dirent_size; + + if (uiop->uio_loffset < 0 || uiop->uio_resid <= 0 || + (uiop->uio_loffset % ureclen) != 0) + return (EINVAL); + + st->grd_ureclen = ureclen; + st->grd_oresid = uiop->uio_resid; + st->grd_namlen = name_max; + if (flags & V_RDDIR_ENTFLAGS) + dirent_size = EDIRENT_RECLEN(st->grd_namlen); + else + dirent_size = DIRENT64_RECLEN(st->grd_namlen); + st->grd_dirent = kmem_zalloc(dirent_size, KM_SLEEP); + st->grd_parent = parent; + st->grd_self = self; + st->grd_flags = flags; + + return (0); +} + +/* + * gfs_readdir_emit_int: internal routine to emit directory entry + * + * st - the current readdir state, which must have d_ino/ed_ino + * and d_name/ed_name set + * uiop - caller-supplied uio pointer + * next - the offset of the next entry + */ +static int +gfs_readdir_emit_int(gfs_readdir_state_t *st, uio_t *uiop, offset_t next) +{ + int reclen; + dirent64_t *dp; + edirent_t *edp; + + if (st->grd_flags & V_RDDIR_ENTFLAGS) { + edp = st->grd_dirent; + reclen = EDIRENT_RECLEN(strlen(edp->ed_name)); + } else { + dp = st->grd_dirent; + reclen = DIRENT64_RECLEN(strlen(dp->d_name)); + } + + if (reclen > uiop->uio_resid) { + /* + * Error if no entries were returned yet + */ + if (uiop->uio_resid == st->grd_oresid) + return (EINVAL); + return (-1); + } + + if (st->grd_flags & V_RDDIR_ENTFLAGS) { + edp->ed_off = next; + edp->ed_reclen = (ushort_t)reclen; + } else { + dp->d_off = next; + dp->d_reclen = (ushort_t)reclen; + } + + if (uiomove((caddr_t)st->grd_dirent, reclen, UIO_READ, uiop)) + return (EFAULT); + + uiop->uio_loffset = next; + + return (0); +} + +/* + * gfs_readdir_emit: emit a directory entry + * voff - the virtual offset (obtained from gfs_readdir_pred) + * ino - the entry's inode + * name - the entry's name + * eflags - value for ed_eflags (if processing edirent_t) + * + * Returns a 0 on success, a non-zero errno on failure, or -1 if the + * readdir loop should terminate. A non-zero result (either errno or + * -1) from this function is typically passed directly to + * gfs_readdir_fini(). + */ +int +gfs_readdir_emit(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff, + ino64_t ino, const char *name, int eflags) +{ + offset_t off = (voff + 2) * st->grd_ureclen; + + if (st->grd_flags & V_RDDIR_ENTFLAGS) { + edirent_t *edp = st->grd_dirent; + + edp->ed_ino = ino; + (void) strncpy(edp->ed_name, name, st->grd_namlen); + edp->ed_eflags = eflags; + } else { + dirent64_t *dp = st->grd_dirent; + + dp->d_ino = ino; + (void) strncpy(dp->d_name, name, st->grd_namlen); + } + + /* + * Inter-entry offsets are invalid, so we assume a record size of + * grd_ureclen and explicitly set the offset appropriately. + */ + return (gfs_readdir_emit_int(st, uiop, off + st->grd_ureclen)); +} + +/* + * gfs_readdir_emitn: like gfs_readdir_emit(), but takes an integer + * instead of a string for the entry's name. + */ +int +gfs_readdir_emitn(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff, + ino64_t ino, unsigned long num) +{ + char buf[40]; + + numtos(num, buf); + return (gfs_readdir_emit(st, uiop, voff, ino, buf, 0)); +} + +/* + * gfs_readdir_pred: readdir loop predicate + * voffp - a pointer in which the next virtual offset should be stored + * + * Returns a 0 on success, a non-zero errno on failure, or -1 if the + * readdir loop should terminate. A non-zero result (either errno or + * -1) from this function is typically passed directly to + * gfs_readdir_fini(). + */ +int +gfs_readdir_pred(gfs_readdir_state_t *st, uio_t *uiop, offset_t *voffp) +{ + offset_t off, voff; + int error; + +top: + if (uiop->uio_resid <= 0) + return (-1); + + off = uiop->uio_loffset / st->grd_ureclen; + voff = off - 2; + if (off == 0) { + if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_self, + ".", 0)) == 0) + goto top; + } else if (off == 1) { + if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_parent, + "..", 0)) == 0) + goto top; + } else { + *voffp = voff; + return (0); + } + + return (error); +} + +/* + * gfs_readdir_fini: generic readdir cleanup + * error - if positive, an error to return + * eofp - the eofp passed to readdir + * eof - the eof value + * + * Returns a 0 on success, a non-zero errno on failure. This result + * should be returned from readdir. + */ +int +gfs_readdir_fini(gfs_readdir_state_t *st, int error, int *eofp, int eof) +{ + size_t dirent_size; + + if (st->grd_flags & V_RDDIR_ENTFLAGS) + dirent_size = EDIRENT_RECLEN(st->grd_namlen); + else + dirent_size = DIRENT64_RECLEN(st->grd_namlen); + kmem_free(st->grd_dirent, dirent_size); + if (error > 0) + return (error); + if (eofp) + *eofp = eof; + return (0); +} + +/* + * gfs_lookup_dot + * + * Performs a basic check for "." and ".." directory entries. + */ +int +gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) +{ + if (*nm == '\0' || strcmp(nm, ".") == 0) { + VN_HOLD(dvp); + *vpp = dvp; + return (0); + } else if (strcmp(nm, "..") == 0) { + if (pvp == NULL) { + ASSERT(dvp->v_flag & VROOT); + VN_HOLD(dvp); + *vpp = dvp; + } else { + VN_HOLD(pvp); + *vpp = pvp; + } + return (0); + } + + return (-1); +} + +/* + * gfs_file_create(): create a new GFS file + * + * size - size of private data structure (v_data) + * pvp - parent vnode (GFS directory) + * ops - vnode operations vector + * + * In order to use this interface, the parent vnode must have been created by + * gfs_dir_create(), and the private data stored in v_data must have a + * 'gfs_file_t' as its first field. + * + * Given these constraints, this routine will automatically: + * + * - Allocate v_data for the vnode + * - Initialize necessary fields in the vnode + * - Hold the parent + */ +vnode_t * +gfs_file_create(size_t size, vnode_t *pvp, vnodeops_t *ops) +{ + gfs_file_t *fp; + vnode_t *vp; + + /* + * Allocate vnode and internal data structure + */ + fp = kmem_zalloc(size, KM_SLEEP); + vp = vn_alloc(KM_SLEEP); + + /* + * Set up various pointers + */ + fp->gfs_vnode = vp; + fp->gfs_parent = pvp; + vp->v_data = fp; + fp->gfs_size = size; + fp->gfs_type = GFS_FILE; + + /* + * Initialize vnode and hold parent. + */ + vn_setops(vp, ops); + if (pvp) { + VN_SET_VFS_TYPE_DEV(vp, pvp->v_vfsp, VREG, 0); + VN_HOLD(pvp); + } + + return (vp); +} + +/* + * gfs_dir_create: creates a new directory in the parent + * + * size - size of private data structure (v_data) + * pvp - parent vnode (GFS directory) + * ops - vnode operations vector + * entries - NULL-terminated list of static entries (if any) + * maxlen - maximum length of a directory entry + * readdir_cb - readdir callback (see gfs_dir_readdir) + * inode_cb - inode callback (see gfs_dir_readdir) + * lookup_cb - lookup callback (see gfs_dir_lookup) + * + * In order to use this function, the first member of the private vnode + * structure (v_data) must be a gfs_dir_t. For each directory, there are + * static entries, defined when the structure is initialized, and dynamic + * entries, retrieved through callbacks. + * + * If a directory has static entries, then it must supply a inode callback, + * which will compute the inode number based on the parent and the index. + * For a directory with dynamic entries, the caller must supply a readdir + * callback and a lookup callback. If a static lookup fails, we fall back to + * the supplied lookup callback, if any. + * + * This function also performs the same initialization as gfs_file_create(). + */ +vnode_t * +gfs_dir_create(size_t struct_size, vnode_t *pvp, vnodeops_t *ops, + gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen, + gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb) +{ + vnode_t *vp; + gfs_dir_t *dp; + gfs_dirent_t *de; + + vp = gfs_file_create(struct_size, pvp, ops); + vp->v_type = VDIR; + + dp = vp->v_data; + dp->gfsd_file.gfs_type = GFS_DIR; + dp->gfsd_maxlen = maxlen; + + if (entries != NULL) { + for (de = entries; de->gfse_name != NULL; de++) + dp->gfsd_nstatic++; + + dp->gfsd_static = kmem_alloc( + dp->gfsd_nstatic * sizeof (gfs_dirent_t), KM_SLEEP); + bcopy(entries, dp->gfsd_static, + dp->gfsd_nstatic * sizeof (gfs_dirent_t)); + } + + dp->gfsd_readdir = readdir_cb; + dp->gfsd_lookup = lookup_cb; + dp->gfsd_inode = inode_cb; + + mutex_init(&dp->gfsd_lock, NULL, MUTEX_DEFAULT, NULL); + + return (vp); +} + +/* + * gfs_root_create(): create a root vnode for a GFS filesystem + * + * Similar to gfs_dir_create(), this creates a root vnode for a filesystem. The + * only difference is that it takes a vfs_t instead of a vnode_t as its parent. + */ +vnode_t * +gfs_root_create(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino, + gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen, + gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb) +{ + vnode_t *vp = gfs_dir_create(size, NULL, ops, entries, inode_cb, + maxlen, readdir_cb, lookup_cb); + + /* Manually set the inode */ + ((gfs_file_t *)vp->v_data)->gfs_ino = ino; + + VFS_HOLD(vfsp); + VN_SET_VFS_TYPE_DEV(vp, vfsp, VDIR, 0); + vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; + + return (vp); +} + +/* + * gfs_root_create_file(): create a root vnode for a GFS file as a filesystem + * + * Similar to gfs_root_create(), this creates a root vnode for a file to + * be the pseudo-filesystem. + */ +vnode_t * +gfs_root_create_file(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino) +{ + vnode_t *vp = gfs_file_create(size, NULL, ops); + + ((gfs_file_t *)vp->v_data)->gfs_ino = ino; + + VFS_HOLD(vfsp); + VN_SET_VFS_TYPE_DEV(vp, vfsp, VREG, 0); + vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; + + return (vp); +} + +/* + * gfs_file_inactive() + * + * Called from the VOP_INACTIVE() routine. If necessary, this routine will + * remove the given vnode from the parent directory and clean up any references + * in the VFS layer. + * + * If the vnode was not removed (due to a race with vget), then NULL is + * returned. Otherwise, a pointer to the private data is returned. + */ +void * +gfs_file_inactive(vnode_t *vp) +{ + int i; + gfs_dirent_t *ge = NULL; + gfs_file_t *fp = vp->v_data; + gfs_dir_t *dp = NULL; + void *data; + + if (fp->gfs_parent == NULL || (vp->v_flag & V_XATTRDIR)) + goto found; + + dp = fp->gfs_parent->v_data; + + /* + * First, see if this vnode is cached in the parent. + */ + gfs_dir_lock(dp); + + /* + * Find it in the set of static entries. + */ + for (i = 0; i < dp->gfsd_nstatic; i++) { + ge = &dp->gfsd_static[i]; + + if (ge->gfse_vnode == vp) + goto found; + } + + /* + * If 'ge' is NULL, then it is a dynamic entry. + */ + ge = NULL; + +found: + if (vp->v_flag & V_XATTRDIR) { + mutex_enter(&fp->gfs_parent->v_lock); + } + mutex_enter(&vp->v_lock); + if (vp->v_count == 1) { + /* + * Really remove this vnode + */ + data = vp->v_data; + if (ge != NULL) { + /* + * If this was a statically cached entry, simply set the + * cached vnode to NULL. + */ + ge->gfse_vnode = NULL; + } + if (vp->v_flag & V_XATTRDIR) { + fp->gfs_parent->v_xattrdir = NULL; + mutex_exit(&fp->gfs_parent->v_lock); + } + mutex_exit(&vp->v_lock); + + /* + * Free vnode and release parent + */ + if (fp->gfs_parent) { + if (dp) { + gfs_dir_unlock(dp); + } + VN_RELE(fp->gfs_parent); + } else { + ASSERT(vp->v_vfsp != NULL); + VFS_RELE(vp->v_vfsp); + } + vn_free(vp); + } else { + vp->v_count--; + data = NULL; + mutex_exit(&vp->v_lock); + if (vp->v_flag & V_XATTRDIR) { + mutex_exit(&fp->gfs_parent->v_lock); + } + if (dp) + gfs_dir_unlock(dp); + } + + return (data); +} + +/* + * gfs_dir_inactive() + * + * Same as above, but for directories. + */ +void * +gfs_dir_inactive(vnode_t *vp) +{ + gfs_dir_t *dp; + + ASSERT(vp->v_type == VDIR); + + if ((dp = gfs_file_inactive(vp)) != NULL) { + mutex_destroy(&dp->gfsd_lock); + if (dp->gfsd_nstatic) + kmem_free(dp->gfsd_static, + dp->gfsd_nstatic * sizeof (gfs_dirent_t)); + } + + return (dp); +} + +/* + * gfs_dir_lookup_dynamic() + * + * This routine looks up the provided name amongst the dynamic entries + * in the gfs directory and returns the corresponding vnode, if found. + * + * The gfs directory is expected to be locked by the caller prior to + * calling this function. The directory will be unlocked during the + * execution of this function, but will be locked upon return from the + * function. This function returns 0 on success, non-zero on error. + * + * The dynamic lookups are performed by invoking the lookup + * callback, which is passed to this function as the first argument. + * The arguments to the callback are: + * + * int gfs_lookup_cb(vnode_t *pvp, const char *nm, vnode_t **vpp, cred_t *cr, + * int flags, int *deflgs, pathname_t *rpnp); + * + * pvp - parent vnode + * nm - name of entry + * vpp - pointer to resulting vnode + * cr - pointer to cred + * flags - flags value from lookup request + * ignored here; currently only used to request + * insensitive lookups + * direntflgs - output parameter, directory entry flags + * ignored here; currently only used to indicate a lookup + * has more than one possible match when case is not considered + * realpnp - output parameter, real pathname + * ignored here; when lookup was performed case-insensitively, + * this field contains the "real" name of the file. + * + * Returns 0 on success, non-zero on error. + */ +static int +gfs_dir_lookup_dynamic(gfs_lookup_cb callback, gfs_dir_t *dp, + const char *nm, vnode_t *dvp, vnode_t **vpp, cred_t *cr, int flags, + int *direntflags, pathname_t *realpnp) +{ + gfs_file_t *fp; + ino64_t ino; + int ret; + + ASSERT(GFS_DIR_LOCKED(dp)); + + /* + * Drop the directory lock, as the lookup routine + * will need to allocate memory, or otherwise deadlock on this + * directory. + */ + gfs_dir_unlock(dp); + ret = callback(dvp, nm, vpp, &ino, cr, flags, direntflags, realpnp); + gfs_dir_lock(dp); + + /* + * The callback for extended attributes returns a vnode + * with v_data from an underlying fs. + */ + if (ret == 0 && !IS_XATTRDIR(dvp)) { + fp = (gfs_file_t *)((*vpp)->v_data); + fp->gfs_index = -1; + fp->gfs_ino = ino; + } + + return (ret); +} + +/* + * gfs_dir_lookup_static() + * + * This routine looks up the provided name amongst the static entries + * in the gfs directory and returns the corresponding vnode, if found. + * The first argument to the function is a pointer to the comparison + * function this function should use to decide if names are a match. + * + * If a match is found, and GFS_CACHE_VNODE is set and the vnode + * exists, we simply return the existing vnode. Otherwise, we call + * the static entry's callback routine, caching the result if + * necessary. If the idx pointer argument is non-NULL, we use it to + * return the index of the matching static entry. + * + * The gfs directory is expected to be locked by the caller prior to calling + * this function. The directory may be unlocked during the execution of + * this function, but will be locked upon return from the function. + * + * This function returns 0 if a match is found, ENOENT if not. + */ +static int +gfs_dir_lookup_static(int (*compare)(const char *, const char *), + gfs_dir_t *dp, const char *nm, vnode_t *dvp, int *idx, + vnode_t **vpp, pathname_t *rpnp) +{ + gfs_dirent_t *ge; + vnode_t *vp = NULL; + int i; + + ASSERT(GFS_DIR_LOCKED(dp)); + + /* + * Search static entries. + */ + for (i = 0; i < dp->gfsd_nstatic; i++) { + ge = &dp->gfsd_static[i]; + + if (compare(ge->gfse_name, nm) == 0) { + if (rpnp) + (void) strlcpy(rpnp->pn_buf, ge->gfse_name, + rpnp->pn_bufsize); + + if (ge->gfse_vnode) { + ASSERT(ge->gfse_flags & GFS_CACHE_VNODE); + vp = ge->gfse_vnode; + VN_HOLD(vp); + break; + } + + /* + * We drop the directory lock, as the constructor will + * need to do KM_SLEEP allocations. If we return from + * the constructor only to find that a parallel + * operation has completed, and GFS_CACHE_VNODE is set + * for this entry, we discard the result in favor of + * the cached vnode. + */ + gfs_dir_unlock(dp); + vp = ge->gfse_ctor(dvp); + gfs_dir_lock(dp); + + ((gfs_file_t *)vp->v_data)->gfs_index = i; + + /* Set the inode according to the callback. */ + ((gfs_file_t *)vp->v_data)->gfs_ino = + dp->gfsd_inode(dvp, i); + + if (ge->gfse_flags & GFS_CACHE_VNODE) { + if (ge->gfse_vnode == NULL) { + ge->gfse_vnode = vp; + } else { + /* + * A parallel constructor beat us to it; + * return existing vnode. We have to be + * careful because we can't release the + * current vnode while holding the + * directory lock; its inactive routine + * will try to lock this directory. + */ + vnode_t *oldvp = vp; + vp = ge->gfse_vnode; + VN_HOLD(vp); + + gfs_dir_unlock(dp); + VN_RELE(oldvp); + gfs_dir_lock(dp); + } + } + break; + } + } + + if (vp == NULL) + return (ENOENT); + else if (idx) + *idx = i; + *vpp = vp; + return (0); +} + +/* + * gfs_dir_lookup() + * + * Looks up the given name in the directory and returns the corresponding + * vnode, if found. + * + * First, we search statically defined entries, if any, with a call to + * gfs_dir_lookup_static(). If no static entry is found, and we have + * a callback function we try a dynamic lookup via gfs_dir_lookup_dynamic(). + * + * This function returns 0 on success, non-zero on error. + */ +int +gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, + int flags, int *direntflags, pathname_t *realpnp) +{ + gfs_dir_t *dp = dvp->v_data; + boolean_t casecheck; + vnode_t *dynvp = NULL; + vnode_t *vp = NULL; + int (*compare)(const char *, const char *); + int error, idx; + + ASSERT(dvp->v_type == VDIR); + + if (gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm) == 0) + return (0); + + casecheck = (flags & FIGNORECASE) != 0 && direntflags != NULL; + if (vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) || + (flags & FIGNORECASE)) + compare = strcasecmp; + else + compare = strcmp; + + gfs_dir_lock(dp); + + error = gfs_dir_lookup_static(compare, dp, nm, dvp, &idx, &vp, realpnp); + + if (vp && casecheck) { + gfs_dirent_t *ge; + int i; + + for (i = idx + 1; i < dp->gfsd_nstatic; i++) { + ge = &dp->gfsd_static[i]; + + if (strcasecmp(ge->gfse_name, nm) == 0) { + *direntflags |= ED_CASE_CONFLICT; + goto out; + } + } + } + + if ((error || casecheck) && dp->gfsd_lookup) + error = gfs_dir_lookup_dynamic(dp->gfsd_lookup, dp, nm, dvp, + &dynvp, cr, flags, direntflags, vp ? NULL : realpnp); + + if (vp && dynvp) { + /* static and dynamic entries are case-insensitive conflict */ + ASSERT(casecheck); + *direntflags |= ED_CASE_CONFLICT; + VN_RELE(dynvp); + } else if (vp == NULL) { + vp = dynvp; + } else if (error == ENOENT) { + error = 0; + } else if (error) { + VN_RELE(vp); + vp = NULL; + } + +out: + gfs_dir_unlock(dp); + + *vpp = vp; + return (error); +} + +/* + * gfs_dir_readdir: does a readdir() on the given directory + * + * dvp - directory vnode + * uiop - uio structure + * eofp - eof pointer + * data - arbitrary data passed to readdir callback + * + * This routine does all the readdir() dirty work. Even so, the caller must + * supply two callbacks in order to get full compatibility. + * + * If the directory contains static entries, an inode callback must be + * specified. This avoids having to create every vnode and call VOP_GETATTR() + * when reading the directory. This function has the following arguments: + * + * ino_t gfs_inode_cb(vnode_t *vp, int index); + * + * vp - vnode for the directory + * index - index in original gfs_dirent_t array + * + * Returns the inode number for the given entry. + * + * For directories with dynamic entries, a readdir callback must be provided. + * This is significantly more complex, thanks to the particulars of + * VOP_READDIR(). + * + * int gfs_readdir_cb(vnode_t *vp, void *dp, int *eofp, + * offset_t *off, offset_t *nextoff, void *data, int flags) + * + * vp - directory vnode + * dp - directory entry, sized according to maxlen given to + * gfs_dir_create(). callback must fill in d_name and + * d_ino (if a dirent64_t), or ed_name, ed_ino, and ed_eflags + * (if an edirent_t). edirent_t is used if V_RDDIR_ENTFLAGS + * is set in 'flags'. + * eofp - callback must set to 1 when EOF has been reached + * off - on entry, the last offset read from the directory. Callback + * must set to the offset of the current entry, typically left + * untouched. + * nextoff - callback must set to offset of next entry. Typically + * (off + 1) + * data - caller-supplied data + * flags - VOP_READDIR flags + * + * Return 0 on success, or error on failure. + */ +int +gfs_dir_readdir(vnode_t *dvp, uio_t *uiop, int *eofp, void *data, cred_t *cr, + caller_context_t *ct, int flags) +{ + gfs_readdir_state_t gstate; + int error, eof = 0; + ino64_t ino, pino; + offset_t off, next; + gfs_dir_t *dp = dvp->v_data; + + error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino); + if (error) + return (error); + + if ((error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1, uiop, + pino, ino, flags)) != 0) + return (error); + + while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 && + !eof) { + + if (off >= 0 && off < dp->gfsd_nstatic) { + ino = dp->gfsd_inode(dvp, off); + + if ((error = gfs_readdir_emit(&gstate, uiop, + off, ino, dp->gfsd_static[off].gfse_name, 0)) + != 0) + break; + + } else if (dp->gfsd_readdir) { + off -= dp->gfsd_nstatic; + + if ((error = dp->gfsd_readdir(dvp, + gstate.grd_dirent, &eof, &off, &next, + data, flags)) != 0 || eof) + break; + + off += dp->gfsd_nstatic + 2; + next += dp->gfsd_nstatic + 2; + + if ((error = gfs_readdir_emit_int(&gstate, uiop, + next)) != 0) + break; + } else { + /* + * Offset is beyond the end of the static entries, and + * we have no dynamic entries. Set EOF. + */ + eof = 1; + } + } + + return (gfs_readdir_fini(&gstate, error, eofp, eof)); +} + + +/* + * gfs_vop_lookup: VOP_LOOKUP() entry point + * + * For use directly in vnode ops table. Given a GFS directory, calls + * gfs_dir_lookup() as necessary. + */ +/* ARGSUSED */ +int +gfs_vop_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + return (gfs_dir_lookup(dvp, nm, vpp, cr, flags, direntflags, realpnp)); +} + +/* + * gfs_vop_readdir: VOP_READDIR() entry point + * + * For use directly in vnode ops table. Given a GFS directory, calls + * gfs_dir_readdir() as necessary. + */ +/* ARGSUSED */ +int +gfs_vop_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, + caller_context_t *ct, int flags) +{ + return (gfs_dir_readdir(vp, uiop, eofp, NULL, cr, ct, flags)); +} + + +/* + * gfs_vop_map: VOP_MAP() entry point + * + * Convenient routine for handling pseudo-files that wish to allow mmap() calls. + * This function only works for readonly files, and uses the read function for + * the vnode to fill in the data. The mapped data is immediately faulted in and + * filled with the necessary data during this call; there are no getpage() or + * putpage() routines. + */ +/* ARGSUSED */ +int +gfs_vop_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, + size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cred, + caller_context_t *ct) +{ + int rv; + ssize_t resid = len; + + /* + * Check for bad parameters + */ +#ifdef _ILP32 + if (len > MAXOFF_T) + return (ENOMEM); +#endif + if (vp->v_flag & VNOMAP) + return (ENOTSUP); + if (off > MAXOFF_T) + return (EFBIG); + if ((long)off < 0 || (long)(off + len) < 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + if ((prot & (PROT_EXEC | PROT_WRITE)) != 0) + return (EACCES); + + /* + * Find appropriate address if needed, otherwise clear address range. + */ + as_rangelock(as); + rv = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); + if (rv != 0) { + as_rangeunlock(as); + return (rv); + } + + /* + * Create mapping + */ + rv = as_map(as, *addrp, len, segvn_create, zfod_argsp); + as_rangeunlock(as); + if (rv != 0) + return (rv); + + /* + * Fill with data from read() + */ + rv = vn_rdwr(UIO_READ, vp, *addrp, len, off, UIO_USERSPACE, + 0, (rlim64_t)0, cred, &resid); + + if (rv == 0 && resid != 0) + rv = ENXIO; + + if (rv != 0) { + as_rangelock(as); + (void) as_unmap(as, *addrp, len); + as_rangeunlock(as); + } + + return (rv); +} + +/* + * gfs_vop_inactive: VOP_INACTIVE() entry point + * + * Given a vnode that is a GFS file or directory, call gfs_file_inactive() or + * gfs_dir_inactive() as necessary, and kmem_free()s associated private data. + */ +/* ARGSUSED */ +void +gfs_vop_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + gfs_file_t *fp = vp->v_data; + void *data; + + if (fp->gfs_type == GFS_DIR) + data = gfs_dir_inactive(vp); + else + data = gfs_file_inactive(vp); + + if (data != NULL) + kmem_free(data, fp->gfs_size); +} diff --git a/uts/common/fs/vnode.c b/uts/common/fs/vnode.c new file mode 100644 index 000000000000..382369c7fc72 --- /dev/null +++ b/uts/common/fs/vnode.c @@ -0,0 +1,4536 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/errno.h> +#include <sys/cred.h> +#include <sys/user.h> +#include <sys/uio.h> +#include <sys/file.h> +#include <sys/pathname.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/vnode.h> +#include <sys/rwstlock.h> +#include <sys/fem.h> +#include <sys/stat.h> +#include <sys/mode.h> +#include <sys/conf.h> +#include <sys/sysmacros.h> +#include <sys/cmn_err.h> +#include <sys/systm.h> +#include <sys/kmem.h> +#include <sys/debug.h> +#include <c2/audit.h> +#include <sys/acl.h> +#include <sys/nbmlock.h> +#include <sys/fcntl.h> +#include <fs/fs_subr.h> +#include <sys/taskq.h> +#include <fs/fs_reparse.h> + +/* Determine if this vnode is a file that is read-only */ +#define ISROFILE(vp) \ + ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \ + (vp)->v_type != VFIFO && vn_is_readonly(vp)) + +/* Tunable via /etc/system; used only by admin/install */ +int nfs_global_client_only; + +/* + * Array of vopstats_t for per-FS-type vopstats. This array has the same + * number of entries as and parallel to the vfssw table. (Arguably, it could + * be part of the vfssw table.) Once it's initialized, it's accessed using + * the same fstype index that is used to index into the vfssw table. + */ +vopstats_t **vopstats_fstype; + +/* vopstats initialization template used for fast initialization via bcopy() */ +static vopstats_t *vs_templatep; + +/* Kmem cache handle for vsk_anchor_t allocations */ +kmem_cache_t *vsk_anchor_cache; + +/* file events cleanup routine */ +extern void free_fopdata(vnode_t *); + +/* + * Root of AVL tree for the kstats associated with vopstats. Lock protects + * updates to vsktat_tree. + */ +avl_tree_t vskstat_tree; +kmutex_t vskstat_tree_lock; + +/* Global variable which enables/disables the vopstats collection */ +int vopstats_enabled = 1; + +/* + * forward declarations for internal vnode specific data (vsd) + */ +static void *vsd_realloc(void *, size_t, size_t); + +/* + * forward declarations for reparse point functions + */ +static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr); + +/* + * VSD -- VNODE SPECIFIC DATA + * The v_data pointer is typically used by a file system to store a + * pointer to the file system's private node (e.g. ufs inode, nfs rnode). + * However, there are times when additional project private data needs + * to be stored separately from the data (node) pointed to by v_data. + * This additional data could be stored by the file system itself or + * by a completely different kernel entity. VSD provides a way for + * callers to obtain a key and store a pointer to private data associated + * with a vnode. + * + * Callers are responsible for protecting the vsd by holding v_vsd_lock + * for calls to vsd_set() and vsd_get(). + */ + +/* + * vsd_lock protects: + * vsd_nkeys - creation and deletion of vsd keys + * vsd_list - insertion and deletion of vsd_node in the vsd_list + * vsd_destructor - adding and removing destructors to the list + */ +static kmutex_t vsd_lock; +static uint_t vsd_nkeys; /* size of destructor array */ +/* list of vsd_node's */ +static list_t *vsd_list = NULL; +/* per-key destructor funcs */ +static void (**vsd_destructor)(void *); + +/* + * The following is the common set of actions needed to update the + * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and + * VOPSTATS_UPDATE_IO() do almost the same thing, except for the + * recording of the bytes transferred. Since the code is similar + * but small, it is nearly a duplicate. Consequently any changes + * to one may need to be reflected in the other. + * Rundown of the variables: + * vp - Pointer to the vnode + * counter - Partial name structure member to update in vopstats for counts + * bytecounter - Partial name structure member to update in vopstats for bytes + * bytesval - Value to update in vopstats for bytes + * fstype - Index into vsanchor_fstype[], same as index into vfssw[] + * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i]) + */ + +#define VOPSTATS_UPDATE(vp, counter) { \ + vfs_t *vfsp = (vp)->v_vfsp; \ + if (vfsp && vfsp->vfs_implp && \ + (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \ + vopstats_t *vsp = &vfsp->vfs_vopstats; \ + uint64_t *stataddr = &(vsp->n##counter.value.ui64); \ + extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \ + size_t, uint64_t *); \ + __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \ + (*stataddr)++; \ + if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \ + vsp->n##counter.value.ui64++; \ + } \ + } \ +} + +#define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \ + vfs_t *vfsp = (vp)->v_vfsp; \ + if (vfsp && vfsp->vfs_implp && \ + (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \ + vopstats_t *vsp = &vfsp->vfs_vopstats; \ + uint64_t *stataddr = &(vsp->n##counter.value.ui64); \ + extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \ + size_t, uint64_t *); \ + __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \ + (*stataddr)++; \ + vsp->bytecounter.value.ui64 += bytesval; \ + if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \ + vsp->n##counter.value.ui64++; \ + vsp->bytecounter.value.ui64 += bytesval; \ + } \ + } \ +} + +/* + * If the filesystem does not support XIDs map credential + * If the vfsp is NULL, perhaps we should also map? + */ +#define VOPXID_MAP_CR(vp, cr) { \ + vfs_t *vfsp = (vp)->v_vfsp; \ + if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \ + cr = crgetmapped(cr); \ + } + +/* + * Convert stat(2) formats to vnode types and vice versa. (Knows about + * numerical order of S_IFMT and vnode types.) + */ +enum vtype iftovt_tab[] = { + VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, + VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON +}; + +ushort_t vttoif_tab[] = { + 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, + S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0 +}; + +/* + * The system vnode cache. + */ + +kmem_cache_t *vn_cache; + + +/* + * Vnode operations vector. + */ + +static const fs_operation_trans_def_t vn_ops_table[] = { + VOPNAME_OPEN, offsetof(struct vnodeops, vop_open), + fs_nosys, fs_nosys, + + VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close), + fs_nosys, fs_nosys, + + VOPNAME_READ, offsetof(struct vnodeops, vop_read), + fs_nosys, fs_nosys, + + VOPNAME_WRITE, offsetof(struct vnodeops, vop_write), + fs_nosys, fs_nosys, + + VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl), + fs_nosys, fs_nosys, + + VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl), + fs_setfl, fs_nosys, + + VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr), + fs_nosys, fs_nosys, + + VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr), + fs_nosys, fs_nosys, + + VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access), + fs_nosys, fs_nosys, + + VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup), + fs_nosys, fs_nosys, + + VOPNAME_CREATE, offsetof(struct vnodeops, vop_create), + fs_nosys, fs_nosys, + + VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove), + fs_nosys, fs_nosys, + + VOPNAME_LINK, offsetof(struct vnodeops, vop_link), + fs_nosys, fs_nosys, + + VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename), + fs_nosys, fs_nosys, + + VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir), + fs_nosys, fs_nosys, + + VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir), + fs_nosys, fs_nosys, + + VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir), + fs_nosys, fs_nosys, + + VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink), + fs_nosys, fs_nosys, + + VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink), + fs_nosys, fs_nosys, + + VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync), + fs_nosys, fs_nosys, + + VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive), + fs_nosys, fs_nosys, + + VOPNAME_FID, offsetof(struct vnodeops, vop_fid), + fs_nosys, fs_nosys, + + VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock), + fs_rwlock, fs_rwlock, + + VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock), + (fs_generic_func_p) fs_rwunlock, + (fs_generic_func_p) fs_rwunlock, /* no errors allowed */ + + VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek), + fs_nosys, fs_nosys, + + VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp), + fs_cmp, fs_cmp, /* no errors allowed */ + + VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock), + fs_frlock, fs_nosys, + + VOPNAME_SPACE, offsetof(struct vnodeops, vop_space), + fs_nosys, fs_nosys, + + VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp), + fs_nosys, fs_nosys, + + VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage), + fs_nosys, fs_nosys, + + VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage), + fs_nosys, fs_nosys, + + VOPNAME_MAP, offsetof(struct vnodeops, vop_map), + (fs_generic_func_p) fs_nosys_map, + (fs_generic_func_p) fs_nosys_map, + + VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap), + (fs_generic_func_p) fs_nosys_addmap, + (fs_generic_func_p) fs_nosys_addmap, + + VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap), + fs_nosys, fs_nosys, + + VOPNAME_POLL, offsetof(struct vnodeops, vop_poll), + (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll, + + VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump), + fs_nosys, fs_nosys, + + VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf), + fs_pathconf, fs_nosys, + + VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio), + fs_nosys, fs_nosys, + + VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl), + fs_nosys, fs_nosys, + + VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose), + (fs_generic_func_p) fs_dispose, + (fs_generic_func_p) fs_nodispose, + + VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr), + fs_nosys, fs_nosys, + + VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr), + fs_fab_acl, fs_nosys, + + VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock), + fs_shrlock, fs_nosys, + + VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent), + (fs_generic_func_p) fs_vnevent_nosupport, + (fs_generic_func_p) fs_vnevent_nosupport, + + VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf), + fs_nosys, fs_nosys, + + VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf), + fs_nosys, fs_nosys, + + NULL, 0, NULL, NULL +}; + +/* Extensible attribute (xva) routines. */ + +/* + * Zero out the structure, set the size of the requested/returned bitmaps, + * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer + * to the returned attributes array. + */ +void +xva_init(xvattr_t *xvap) +{ + bzero(xvap, sizeof (xvattr_t)); + xvap->xva_mapsize = XVA_MAPSIZE; + xvap->xva_magic = XVA_MAGIC; + xvap->xva_vattr.va_mask = AT_XVATTR; + xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0]; +} + +/* + * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t + * structure. Otherwise, returns NULL. + */ +xoptattr_t * +xva_getxoptattr(xvattr_t *xvap) +{ + xoptattr_t *xoap = NULL; + if (xvap->xva_vattr.va_mask & AT_XVATTR) + xoap = &xvap->xva_xoptattrs; + return (xoap); +} + +/* + * Used by the AVL routines to compare two vsk_anchor_t structures in the tree. + * We use the f_fsid reported by VFS_STATVFS() since we use that for the + * kstat name. + */ +static int +vska_compar(const void *n1, const void *n2) +{ + int ret; + ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid; + ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid; + + if (p1 < p2) { + ret = -1; + } else if (p1 > p2) { + ret = 1; + } else { + ret = 0; + } + + return (ret); +} + +/* + * Used to create a single template which will be bcopy()ed to a newly + * allocated vsanchor_combo_t structure in new_vsanchor(), below. + */ +static vopstats_t * +create_vopstats_template() +{ + vopstats_t *vsp; + + vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP); + bzero(vsp, sizeof (*vsp)); /* Start fresh */ + + /* VOP_OPEN */ + kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64); + /* VOP_CLOSE */ + kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64); + /* VOP_READ I/O */ + kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64); + kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64); + /* VOP_WRITE I/O */ + kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64); + kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64); + /* VOP_IOCTL */ + kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64); + /* VOP_SETFL */ + kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64); + /* VOP_GETATTR */ + kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64); + /* VOP_SETATTR */ + kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64); + /* VOP_ACCESS */ + kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64); + /* VOP_LOOKUP */ + kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64); + /* VOP_CREATE */ + kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64); + /* VOP_REMOVE */ + kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64); + /* VOP_LINK */ + kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64); + /* VOP_RENAME */ + kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64); + /* VOP_MKDIR */ + kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64); + /* VOP_RMDIR */ + kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64); + /* VOP_READDIR I/O */ + kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64); + kstat_named_init(&vsp->readdir_bytes, "readdir_bytes", + KSTAT_DATA_UINT64); + /* VOP_SYMLINK */ + kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64); + /* VOP_READLINK */ + kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64); + /* VOP_FSYNC */ + kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64); + /* VOP_INACTIVE */ + kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64); + /* VOP_FID */ + kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64); + /* VOP_RWLOCK */ + kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64); + /* VOP_RWUNLOCK */ + kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64); + /* VOP_SEEK */ + kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64); + /* VOP_CMP */ + kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64); + /* VOP_FRLOCK */ + kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64); + /* VOP_SPACE */ + kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64); + /* VOP_REALVP */ + kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64); + /* VOP_GETPAGE */ + kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64); + /* VOP_PUTPAGE */ + kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64); + /* VOP_MAP */ + kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64); + /* VOP_ADDMAP */ + kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64); + /* VOP_DELMAP */ + kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64); + /* VOP_POLL */ + kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64); + /* VOP_DUMP */ + kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64); + /* VOP_PATHCONF */ + kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64); + /* VOP_PAGEIO */ + kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64); + /* VOP_DUMPCTL */ + kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64); + /* VOP_DISPOSE */ + kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64); + /* VOP_SETSECATTR */ + kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64); + /* VOP_GETSECATTR */ + kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64); + /* VOP_SHRLOCK */ + kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64); + /* VOP_VNEVENT */ + kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64); + /* VOP_REQZCBUF */ + kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64); + /* VOP_RETZCBUF */ + kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64); + + return (vsp); +} + +/* + * Creates a kstat structure associated with a vopstats structure. + */ +kstat_t * +new_vskstat(char *ksname, vopstats_t *vsp) +{ + kstat_t *ksp; + + if (!vopstats_enabled) { + return (NULL); + } + + ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED, + sizeof (vopstats_t)/sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE); + if (ksp) { + ksp->ks_data = vsp; + kstat_install(ksp); + } + + return (ksp); +} + +/* + * Called from vfsinit() to initialize the support mechanisms for vopstats + */ +void +vopstats_startup() +{ + if (!vopstats_enabled) + return; + + /* + * Creates the AVL tree which holds per-vfs vopstat anchors. This + * is necessary since we need to check if a kstat exists before we + * attempt to create it. Also, initialize its lock. + */ + avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t), + offsetof(vsk_anchor_t, vsk_node)); + mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL); + + vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache", + sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL, + NULL, NULL, 0); + + /* + * Set up the array of pointers for the vopstats-by-FS-type. + * The entries will be allocated/initialized as each file system + * goes through modload/mod_installfs. + */ + vopstats_fstype = (vopstats_t **)kmem_zalloc( + (sizeof (vopstats_t *) * nfstype), KM_SLEEP); + + /* Set up the global vopstats initialization template */ + vs_templatep = create_vopstats_template(); +} + +/* + * We need to have the all of the counters zeroed. + * The initialization of the vopstats_t includes on the order of + * 50 calls to kstat_named_init(). Rather that do that on every call, + * we do it once in a template (vs_templatep) then bcopy it over. + */ +void +initialize_vopstats(vopstats_t *vsp) +{ + if (vsp == NULL) + return; + + bcopy(vs_templatep, vsp, sizeof (vopstats_t)); +} + +/* + * If possible, determine which vopstats by fstype to use and + * return a pointer to the caller. + */ +vopstats_t * +get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp) +{ + int fstype = 0; /* Index into vfssw[] */ + vopstats_t *vsp = NULL; + + if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 || + !vopstats_enabled) + return (NULL); + /* + * Set up the fstype. We go to so much trouble because all versions + * of NFS use the same fstype in their vfs even though they have + * distinct entries in the vfssw[] table. + * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry. + */ + if (vswp) { + fstype = vswp - vfssw; /* Gets us the index */ + } else { + fstype = vfsp->vfs_fstype; + } + + /* + * Point to the per-fstype vopstats. The only valid values are + * non-zero positive values less than the number of vfssw[] table + * entries. + */ + if (fstype > 0 && fstype < nfstype) { + vsp = vopstats_fstype[fstype]; + } + + return (vsp); +} + +/* + * Generate a kstat name, create the kstat structure, and allocate a + * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t + * to the caller. This must only be called from a mount. + */ +vsk_anchor_t * +get_vskstat_anchor(vfs_t *vfsp) +{ + char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */ + statvfs64_t statvfsbuf; /* Needed to find f_fsid */ + vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */ + kstat_t *ksp; /* Ptr to new kstat */ + avl_index_t where; /* Location in the AVL tree */ + + if (vfsp == NULL || vfsp->vfs_implp == NULL || + (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled) + return (NULL); + + /* Need to get the fsid to build a kstat name */ + if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) { + /* Create a name for our kstats based on fsid */ + (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx", + VOPSTATS_STR, statvfsbuf.f_fsid); + + /* Allocate and initialize the vsk_anchor_t */ + vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP); + bzero(vskp, sizeof (*vskp)); + vskp->vsk_fsid = statvfsbuf.f_fsid; + + mutex_enter(&vskstat_tree_lock); + if (avl_find(&vskstat_tree, vskp, &where) == NULL) { + avl_insert(&vskstat_tree, vskp, where); + mutex_exit(&vskstat_tree_lock); + + /* + * Now that we've got the anchor in the AVL + * tree, we can create the kstat. + */ + ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats); + if (ksp) { + vskp->vsk_ksp = ksp; + } + } else { + /* Oops, found one! Release memory and lock. */ + mutex_exit(&vskstat_tree_lock); + kmem_cache_free(vsk_anchor_cache, vskp); + vskp = NULL; + } + } + return (vskp); +} + +/* + * We're in the process of tearing down the vfs and need to cleanup + * the data structures associated with the vopstats. Must only be called + * from dounmount(). + */ +void +teardown_vopstats(vfs_t *vfsp) +{ + vsk_anchor_t *vskap; + avl_index_t where; + + if (vfsp == NULL || vfsp->vfs_implp == NULL || + (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled) + return; + + /* This is a safe check since VFS_STATS must be set (see above) */ + if ((vskap = vfsp->vfs_vskap) == NULL) + return; + + /* Whack the pointer right away */ + vfsp->vfs_vskap = NULL; + + /* Lock the tree, remove the node, and delete the kstat */ + mutex_enter(&vskstat_tree_lock); + if (avl_find(&vskstat_tree, vskap, &where)) { + avl_remove(&vskstat_tree, vskap); + } + + if (vskap->vsk_ksp) { + kstat_delete(vskap->vsk_ksp); + } + mutex_exit(&vskstat_tree_lock); + + kmem_cache_free(vsk_anchor_cache, vskap); +} + +/* + * Read or write a vnode. Called from kernel code. + */ +int +vn_rdwr( + enum uio_rw rw, + struct vnode *vp, + caddr_t base, + ssize_t len, + offset_t offset, + enum uio_seg seg, + int ioflag, + rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */ + cred_t *cr, + ssize_t *residp) +{ + struct uio uio; + struct iovec iov; + int error; + int in_crit = 0; + + if (rw == UIO_WRITE && ISROFILE(vp)) + return (EROFS); + + if (len < 0) + return (EIO); + + VOPXID_MAP_CR(vp, cr); + + iov.iov_base = base; + iov.iov_len = len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_loffset = offset; + uio.uio_segflg = (short)seg; + uio.uio_resid = len; + uio.uio_llimit = ulimit; + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, cr, &svmand); + if (error != 0) + goto done; + if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ, + uio.uio_offset, uio.uio_resid, svmand, NULL)) { + error = EACCES; + goto done; + } + } + + (void) VOP_RWLOCK(vp, + rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); + if (rw == UIO_WRITE) { + uio.uio_fmode = FWRITE; + uio.uio_extflg = UIO_COPY_DEFAULT; + error = VOP_WRITE(vp, &uio, ioflag, cr, NULL); + } else { + uio.uio_fmode = FREAD; + uio.uio_extflg = UIO_COPY_CACHED; + error = VOP_READ(vp, &uio, ioflag, cr, NULL); + } + VOP_RWUNLOCK(vp, + rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); + if (residp) + *residp = uio.uio_resid; + else if (uio.uio_resid) + error = EIO; + +done: + if (in_crit) + nbl_end_crit(vp); + return (error); +} + +/* + * Release a vnode. Call VOP_INACTIVE on last reference or + * decrement reference count. + * + * To avoid race conditions, the v_count is left at 1 for + * the call to VOP_INACTIVE. This prevents another thread + * from reclaiming and releasing the vnode *before* the + * VOP_INACTIVE routine has a chance to destroy the vnode. + * We can't have more than 1 thread calling VOP_INACTIVE + * on a vnode. + */ +void +vn_rele(vnode_t *vp) +{ + VERIFY(vp->v_count > 0); + mutex_enter(&vp->v_lock); + if (vp->v_count == 1) { + mutex_exit(&vp->v_lock); + VOP_INACTIVE(vp, CRED(), NULL); + return; + } + vp->v_count--; + mutex_exit(&vp->v_lock); +} + +/* + * Release a vnode referenced by the DNLC. Multiple DNLC references are treated + * as a single reference, so v_count is not decremented until the last DNLC hold + * is released. This makes it possible to distinguish vnodes that are referenced + * only by the DNLC. + */ +void +vn_rele_dnlc(vnode_t *vp) +{ + VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0)); + mutex_enter(&vp->v_lock); + if (--vp->v_count_dnlc == 0) { + if (vp->v_count == 1) { + mutex_exit(&vp->v_lock); + VOP_INACTIVE(vp, CRED(), NULL); + return; + } + vp->v_count--; + } + mutex_exit(&vp->v_lock); +} + +/* + * Like vn_rele() except that it clears v_stream under v_lock. + * This is used by sockfs when it dismantels the association between + * the sockfs node and the vnode in the underlaying file system. + * v_lock has to be held to prevent a thread coming through the lookupname + * path from accessing a stream head that is going away. + */ +void +vn_rele_stream(vnode_t *vp) +{ + VERIFY(vp->v_count > 0); + mutex_enter(&vp->v_lock); + vp->v_stream = NULL; + if (vp->v_count == 1) { + mutex_exit(&vp->v_lock); + VOP_INACTIVE(vp, CRED(), NULL); + return; + } + vp->v_count--; + mutex_exit(&vp->v_lock); +} + +static void +vn_rele_inactive(vnode_t *vp) +{ + VOP_INACTIVE(vp, CRED(), NULL); +} + +/* + * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it + * asynchronously using a taskq. This can avoid deadlocks caused by re-entering + * the file system as a result of releasing the vnode. Note, file systems + * already have to handle the race where the vnode is incremented before the + * inactive routine is called and does its locking. + * + * Warning: Excessive use of this routine can lead to performance problems. + * This is because taskqs throttle back allocation if too many are created. + */ +void +vn_rele_async(vnode_t *vp, taskq_t *taskq) +{ + VERIFY(vp->v_count > 0); + mutex_enter(&vp->v_lock); + if (vp->v_count == 1) { + mutex_exit(&vp->v_lock); + VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive, + vp, TQ_SLEEP) != NULL); + return; + } + vp->v_count--; + mutex_exit(&vp->v_lock); +} + +int +vn_open( + char *pnamep, + enum uio_seg seg, + int filemode, + int createmode, + struct vnode **vpp, + enum create crwhy, + mode_t umask) +{ + return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy, + umask, NULL, -1)); +} + + +/* + * Open/create a vnode. + * This may be callable by the kernel, the only known use + * of user context being that the current user credentials + * are used for permissions. crwhy is defined iff filemode & FCREAT. + */ +int +vn_openat( + char *pnamep, + enum uio_seg seg, + int filemode, + int createmode, + struct vnode **vpp, + enum create crwhy, + mode_t umask, + struct vnode *startvp, + int fd) +{ + struct vnode *vp; + int mode; + int accessflags; + int error; + int in_crit = 0; + int open_done = 0; + int shrlock_done = 0; + struct vattr vattr; + enum symfollow follow; + int estale_retry = 0; + struct shrlock shr; + struct shr_locowner shr_own; + + mode = 0; + accessflags = 0; + if (filemode & FREAD) + mode |= VREAD; + if (filemode & (FWRITE|FTRUNC)) + mode |= VWRITE; + if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN)) + mode |= VEXEC; + + /* symlink interpretation */ + if (filemode & FNOFOLLOW) + follow = NO_FOLLOW; + else + follow = FOLLOW; + + if (filemode & FAPPEND) + accessflags |= V_APPEND; + +top: + if (filemode & FCREAT) { + enum vcexcl excl; + + /* + * Wish to create a file. + */ + vattr.va_type = VREG; + vattr.va_mode = createmode; + vattr.va_mask = AT_TYPE|AT_MODE; + if (filemode & FTRUNC) { + vattr.va_size = 0; + vattr.va_mask |= AT_SIZE; + } + if (filemode & FEXCL) + excl = EXCL; + else + excl = NONEXCL; + + if (error = + vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy, + (filemode & ~(FTRUNC|FEXCL)), umask, startvp)) + return (error); + } else { + /* + * Wish to open a file. Just look it up. + */ + if (error = lookupnameat(pnamep, seg, follow, + NULLVPP, &vp, startvp)) { + if ((error == ESTALE) && + fs_need_estale_retry(estale_retry++)) + goto top; + return (error); + } + + /* + * Get the attributes to check whether file is large. + * We do this only if the FOFFMAX flag is not set and + * only for regular files. + */ + + if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) { + vattr.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(vp, &vattr, 0, + CRED(), NULL))) { + goto out; + } + if (vattr.va_size > (u_offset_t)MAXOFF32_T) { + /* + * Large File API - regular open fails + * if FOFFMAX flag is set in file mode + */ + error = EOVERFLOW; + goto out; + } + } + /* + * Can't write directories, active texts, or + * read-only filesystems. Can't truncate files + * on which mandatory locking is in effect. + */ + if (filemode & (FWRITE|FTRUNC)) { + /* + * Allow writable directory if VDIROPEN flag is set. + */ + if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) { + error = EISDIR; + goto out; + } + if (ISROFILE(vp)) { + error = EROFS; + goto out; + } + /* + * Can't truncate files on which + * sysv mandatory locking is in effect. + */ + if (filemode & FTRUNC) { + vnode_t *rvp; + + if (VOP_REALVP(vp, &rvp, NULL) != 0) + rvp = vp; + if (rvp->v_filocks != NULL) { + vattr.va_mask = AT_MODE; + if ((error = VOP_GETATTR(vp, + &vattr, 0, CRED(), NULL)) == 0 && + MANDLOCK(vp, vattr.va_mode)) + error = EAGAIN; + } + } + if (error) + goto out; + } + /* + * Check permissions. + */ + if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL)) + goto out; + /* + * Require FSEARCH to return a directory. + * Require FEXEC to return a regular file. + */ + if ((filemode & FSEARCH) && vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + if ((filemode & FEXEC) && vp->v_type != VREG) { + error = ENOEXEC; /* XXX: error code? */ + goto out; + } + } + + /* + * Do remaining checks for FNOFOLLOW and FNOLINKS. + */ + if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) { + error = ELOOP; + goto out; + } + if (filemode & FNOLINKS) { + vattr.va_mask = AT_NLINK; + if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) { + goto out; + } + if (vattr.va_nlink != 1) { + error = EMLINK; + goto out; + } + } + + /* + * Opening a socket corresponding to the AF_UNIX pathname + * in the filesystem name space is not supported. + * However, VSOCK nodes in namefs are supported in order + * to make fattach work for sockets. + * + * XXX This uses VOP_REALVP to distinguish between + * an unopened namefs node (where VOP_REALVP returns a + * different VSOCK vnode) and a VSOCK created by vn_create + * in some file system (where VOP_REALVP would never return + * a different vnode). + */ + if (vp->v_type == VSOCK) { + struct vnode *nvp; + + error = VOP_REALVP(vp, &nvp, NULL); + if (error != 0 || nvp == NULL || nvp == vp || + nvp->v_type != VSOCK) { + error = EOPNOTSUPP; + goto out; + } + } + + if ((vp->v_type == VREG) && nbl_need_check(vp)) { + /* get share reservation */ + shr.s_access = 0; + if (filemode & FWRITE) + shr.s_access |= F_WRACC; + if (filemode & FREAD) + shr.s_access |= F_RDACC; + shr.s_deny = 0; + shr.s_sysid = 0; + shr.s_pid = ttoproc(curthread)->p_pid; + shr_own.sl_pid = shr.s_pid; + shr_own.sl_id = fd; + shr.s_own_len = sizeof (shr_own); + shr.s_owner = (caddr_t)&shr_own; + error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(), + NULL); + if (error) + goto out; + shrlock_done = 1; + + /* nbmand conflict check if truncating file */ + if ((filemode & FTRUNC) && !(filemode & FCREAT)) { + nbl_start_crit(vp, RW_READER); + in_crit = 1; + + vattr.va_mask = AT_SIZE; + if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) + goto out; + if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0, + NULL)) { + error = EACCES; + goto out; + } + } + } + + /* + * Do opening protocol. + */ + error = VOP_OPEN(&vp, filemode, CRED(), NULL); + if (error) + goto out; + open_done = 1; + + /* + * Truncate if required. + */ + if ((filemode & FTRUNC) && !(filemode & FCREAT)) { + vattr.va_size = 0; + vattr.va_mask = AT_SIZE; + if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0) + goto out; + } +out: + ASSERT(vp->v_count > 0); + + if (in_crit) { + nbl_end_crit(vp); + in_crit = 0; + } + if (error) { + if (open_done) { + (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(), + NULL); + open_done = 0; + shrlock_done = 0; + } + if (shrlock_done) { + (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(), + NULL); + shrlock_done = 0; + } + + /* + * The following clause was added to handle a problem + * with NFS consistency. It is possible that a lookup + * of the file to be opened succeeded, but the file + * itself doesn't actually exist on the server. This + * is chiefly due to the DNLC containing an entry for + * the file which has been removed on the server. In + * this case, we just start over. If there was some + * other cause for the ESTALE error, then the lookup + * of the file will fail and the error will be returned + * above instead of looping around from here. + */ + VN_RELE(vp); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + } else + *vpp = vp; + return (error); +} + +/* + * The following two accessor functions are for the NFSv4 server. Since there + * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the + * vnode open counts correct when a client "upgrades" an open or does an + * open_downgrade. In NFS, an upgrade or downgrade can not only change the + * open mode (add or subtract read or write), but also change the share/deny + * modes. However, share reservations are not integrated with OPEN, yet, so + * we need to handle each separately. These functions are cleaner than having + * the NFS server manipulate the counts directly, however, nobody else should + * use these functions. + */ +void +vn_open_upgrade( + vnode_t *vp, + int filemode) +{ + ASSERT(vp->v_type == VREG); + + if (filemode & FREAD) + atomic_add_32(&(vp->v_rdcnt), 1); + if (filemode & FWRITE) + atomic_add_32(&(vp->v_wrcnt), 1); + +} + +void +vn_open_downgrade( + vnode_t *vp, + int filemode) +{ + ASSERT(vp->v_type == VREG); + + if (filemode & FREAD) { + ASSERT(vp->v_rdcnt > 0); + atomic_add_32(&(vp->v_rdcnt), -1); + } + if (filemode & FWRITE) { + ASSERT(vp->v_wrcnt > 0); + atomic_add_32(&(vp->v_wrcnt), -1); + } + +} + +int +vn_create( + char *pnamep, + enum uio_seg seg, + struct vattr *vap, + enum vcexcl excl, + int mode, + struct vnode **vpp, + enum create why, + int flag, + mode_t umask) +{ + return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag, + umask, NULL)); +} + +/* + * Create a vnode (makenode). + */ +int +vn_createat( + char *pnamep, + enum uio_seg seg, + struct vattr *vap, + enum vcexcl excl, + int mode, + struct vnode **vpp, + enum create why, + int flag, + mode_t umask, + struct vnode *startvp) +{ + struct vnode *dvp; /* ptr to parent dir vnode */ + struct vnode *vp = NULL; + struct pathname pn; + int error; + int in_crit = 0; + struct vattr vattr; + enum symfollow follow; + int estale_retry = 0; + uint32_t auditing = AU_AUDITING(); + + ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); + + /* symlink interpretation */ + if ((flag & FNOFOLLOW) || excl == EXCL) + follow = NO_FOLLOW; + else + follow = FOLLOW; + flag &= ~(FNOFOLLOW|FNOLINKS); + +top: + /* + * Lookup directory. + * If new object is a file, call lower level to create it. + * Note that it is up to the lower level to enforce exclusive + * creation, if the file is already there. + * This allows the lower level to do whatever + * locking or protocol that is needed to prevent races. + * If the new object is directory call lower level to make + * the new directory, with "." and "..". + */ + if (error = pn_get(pnamep, seg, &pn)) + return (error); + if (auditing) + audit_vncreate_start(); + dvp = NULL; + *vpp = NULL; + /* + * lookup will find the parent directory for the vnode. + * When it is done the pn holds the name of the entry + * in the directory. + * If this is a non-exclusive create we also find the node itself. + */ + error = lookuppnat(&pn, NULL, follow, &dvp, + (excl == EXCL) ? NULLVPP : vpp, startvp); + if (error) { + pn_free(&pn); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + if (why == CRMKDIR && error == EINVAL) + error = EEXIST; /* SVID */ + return (error); + } + + if (why != CRMKNOD) + vap->va_mode &= ~VSVTX; + + /* + * If default ACLs are defined for the directory don't apply the + * umask if umask is passed. + */ + + if (umask) { + + vsecattr_t vsec; + + vsec.vsa_aclcnt = 0; + vsec.vsa_aclentp = NULL; + vsec.vsa_dfaclcnt = 0; + vsec.vsa_dfaclentp = NULL; + vsec.vsa_mask = VSA_DFACLCNT; + error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL); + /* + * If error is ENOSYS then treat it as no error + * Don't want to force all file systems to support + * aclent_t style of ACL's. + */ + if (error == ENOSYS) + error = 0; + if (error) { + if (*vpp != NULL) + VN_RELE(*vpp); + goto out; + } else { + /* + * Apply the umask if no default ACLs. + */ + if (vsec.vsa_dfaclcnt == 0) + vap->va_mode &= ~umask; + + /* + * VOP_GETSECATTR() may have allocated memory for + * ACLs we didn't request, so double-check and + * free it if necessary. + */ + if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL) + kmem_free((caddr_t)vsec.vsa_aclentp, + vsec.vsa_aclcnt * sizeof (aclent_t)); + if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL) + kmem_free((caddr_t)vsec.vsa_dfaclentp, + vsec.vsa_dfaclcnt * sizeof (aclent_t)); + } + } + + /* + * In general we want to generate EROFS if the file system is + * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1 + * documents the open system call, and it says that O_CREAT has no + * effect if the file already exists. Bug 1119649 states + * that open(path, O_CREAT, ...) fails when attempting to open an + * existing file on a read only file system. Thus, the first part + * of the following if statement has 3 checks: + * if the file exists && + * it is being open with write access && + * the file system is read only + * then generate EROFS + */ + if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) || + (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) { + if (*vpp) + VN_RELE(*vpp); + error = EROFS; + } else if (excl == NONEXCL && *vpp != NULL) { + vnode_t *rvp; + + /* + * File already exists. If a mandatory lock has been + * applied, return error. + */ + vp = *vpp; + if (VOP_REALVP(vp, &rvp, NULL) != 0) + rvp = vp; + if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) { + nbl_start_crit(vp, RW_READER); + in_crit = 1; + } + if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) { + vattr.va_mask = AT_MODE|AT_SIZE; + if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) { + goto out; + } + if (MANDLOCK(vp, vattr.va_mode)) { + error = EAGAIN; + goto out; + } + /* + * File cannot be truncated if non-blocking mandatory + * locks are currently on the file. + */ + if ((vap->va_mask & AT_SIZE) && in_crit) { + u_offset_t offset; + ssize_t length; + + offset = vap->va_size > vattr.va_size ? + vattr.va_size : vap->va_size; + length = vap->va_size > vattr.va_size ? + vap->va_size - vattr.va_size : + vattr.va_size - vap->va_size; + if (nbl_conflict(vp, NBL_WRITE, offset, + length, 0, NULL)) { + error = EACCES; + goto out; + } + } + } + + /* + * If the file is the root of a VFS, we've crossed a + * mount point and the "containing" directory that we + * acquired above (dvp) is irrelevant because it's in + * a different file system. We apply VOP_CREATE to the + * target itself instead of to the containing directory + * and supply a null path name to indicate (conventionally) + * the node itself as the "component" of interest. + * + * The intercession of the file system is necessary to + * ensure that the appropriate permission checks are + * done. + */ + if (vp->v_flag & VROOT) { + ASSERT(why != CRMKDIR); + error = VOP_CREATE(vp, "", vap, excl, mode, vpp, + CRED(), flag, NULL, NULL); + /* + * If the create succeeded, it will have created + * a new reference to the vnode. Give up the + * original reference. The assertion should not + * get triggered because NBMAND locks only apply to + * VREG files. And if in_crit is non-zero for some + * reason, detect that here, rather than when we + * deference a null vp. + */ + ASSERT(in_crit == 0); + VN_RELE(vp); + vp = NULL; + goto out; + } + + /* + * Large File API - non-large open (FOFFMAX flag not set) + * of regular file fails if the file size exceeds MAXOFF32_T. + */ + if (why != CRMKDIR && + !(flag & FOFFMAX) && + (vp->v_type == VREG)) { + vattr.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(vp, &vattr, 0, + CRED(), NULL))) { + goto out; + } + if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) { + error = EOVERFLOW; + goto out; + } + } + } + + if (error == 0) { + /* + * Call mkdir() if specified, otherwise create(). + */ + int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */ + + if (why == CRMKDIR) + /* + * N.B., if vn_createat() ever requests + * case-insensitive behavior then it will need + * to be passed to VOP_MKDIR(). VOP_CREATE() + * will already get it via "flag" + */ + error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(), + NULL, 0, NULL); + else if (!must_be_dir) + error = VOP_CREATE(dvp, pn.pn_path, vap, + excl, mode, vpp, CRED(), flag, NULL, NULL); + else + error = ENOTDIR; + } + +out: + + if (auditing) + audit_vncreate_finish(*vpp, error); + if (in_crit) { + nbl_end_crit(vp); + in_crit = 0; + } + if (vp != NULL) { + VN_RELE(vp); + vp = NULL; + } + pn_free(&pn); + VN_RELE(dvp); + /* + * The following clause was added to handle a problem + * with NFS consistency. It is possible that a lookup + * of the file to be created succeeded, but the file + * itself doesn't actually exist on the server. This + * is chiefly due to the DNLC containing an entry for + * the file which has been removed on the server. In + * this case, we just start over. If there was some + * other cause for the ESTALE error, then the lookup + * of the file will fail and the error will be returned + * above instead of looping around from here. + */ + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + return (error); +} + +int +vn_link(char *from, char *to, enum uio_seg seg) +{ + return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg)); +} + +int +vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow, + vnode_t *tstartvp, char *to, enum uio_seg seg) +{ + struct vnode *fvp; /* from vnode ptr */ + struct vnode *tdvp; /* to directory vnode ptr */ + struct pathname pn; + int error; + struct vattr vattr; + dev_t fsid; + int estale_retry = 0; + uint32_t auditing = AU_AUDITING(); + +top: + fvp = tdvp = NULL; + if (error = pn_get(to, seg, &pn)) + return (error); + if (auditing && fstartvp != NULL) + audit_setfsat_path(1); + if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp)) + goto out; + if (auditing && tstartvp != NULL) + audit_setfsat_path(3); + if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp)) + goto out; + /* + * Make sure both source vnode and target directory vnode are + * in the same vfs and that it is writeable. + */ + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL)) + goto out; + fsid = vattr.va_fsid; + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL)) + goto out; + if (fsid != vattr.va_fsid) { + error = EXDEV; + goto out; + } + if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) { + error = EROFS; + goto out; + } + /* + * Do the link. + */ + (void) pn_fixslash(&pn); + error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0); +out: + pn_free(&pn); + if (fvp) + VN_RELE(fvp); + if (tdvp) + VN_RELE(tdvp); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + return (error); +} + +int +vn_rename(char *from, char *to, enum uio_seg seg) +{ + return (vn_renameat(NULL, from, NULL, to, seg)); +} + +int +vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, + char *tname, enum uio_seg seg) +{ + int error; + struct vattr vattr; + struct pathname fpn; /* from pathname */ + struct pathname tpn; /* to pathname */ + dev_t fsid; + int in_crit_src, in_crit_targ; + vnode_t *fromvp, *fvp; + vnode_t *tovp, *targvp; + int estale_retry = 0; + uint32_t auditing = AU_AUDITING(); + +top: + fvp = fromvp = tovp = targvp = NULL; + in_crit_src = in_crit_targ = 0; + /* + * Get to and from pathnames. + */ + if (error = pn_get(fname, seg, &fpn)) + return (error); + if (error = pn_get(tname, seg, &tpn)) { + pn_free(&fpn); + return (error); + } + + /* + * First we need to resolve the correct directories + * The passed in directories may only be a starting point, + * but we need the real directories the file(s) live in. + * For example the fname may be something like usr/lib/sparc + * and we were passed in the / directory, but we need to + * use the lib directory for the rename. + */ + + if (auditing && fdvp != NULL) + audit_setfsat_path(1); + /* + * Lookup to and from directories. + */ + if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) { + goto out; + } + + /* + * Make sure there is an entry. + */ + if (fvp == NULL) { + error = ENOENT; + goto out; + } + + if (auditing && tdvp != NULL) + audit_setfsat_path(3); + if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) { + goto out; + } + + /* + * Make sure both the from vnode directory and the to directory + * are in the same vfs and the to directory is writable. + * We check fsid's, not vfs pointers, so loopback fs works. + */ + if (fromvp != tovp) { + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL)) + goto out; + fsid = vattr.va_fsid; + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL)) + goto out; + if (fsid != vattr.va_fsid) { + error = EXDEV; + goto out; + } + } + + if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) { + error = EROFS; + goto out; + } + + if (targvp && (fvp != targvp)) { + nbl_start_crit(targvp, RW_READER); + in_crit_targ = 1; + if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { + error = EACCES; + goto out; + } + } + + if (nbl_need_check(fvp)) { + nbl_start_crit(fvp, RW_READER); + in_crit_src = 1; + if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) { + error = EACCES; + goto out; + } + } + + /* + * Do the rename. + */ + (void) pn_fixslash(&tpn); + error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(), + NULL, 0); + +out: + pn_free(&fpn); + pn_free(&tpn); + if (in_crit_src) + nbl_end_crit(fvp); + if (in_crit_targ) + nbl_end_crit(targvp); + if (fromvp) + VN_RELE(fromvp); + if (tovp) + VN_RELE(tovp); + if (targvp) + VN_RELE(targvp); + if (fvp) + VN_RELE(fvp); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + return (error); +} + +/* + * Remove a file or directory. + */ +int +vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag) +{ + return (vn_removeat(NULL, fnamep, seg, dirflag)); +} + +int +vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag) +{ + struct vnode *vp; /* entry vnode */ + struct vnode *dvp; /* ptr to parent dir vnode */ + struct vnode *coveredvp; + struct pathname pn; /* name of entry */ + enum vtype vtype; + int error; + struct vfs *vfsp; + struct vfs *dvfsp; /* ptr to parent dir vfs */ + int in_crit = 0; + int estale_retry = 0; + +top: + if (error = pn_get(fnamep, seg, &pn)) + return (error); + dvp = vp = NULL; + if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) { + pn_free(&pn); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + return (error); + } + + /* + * Make sure there is an entry. + */ + if (vp == NULL) { + error = ENOENT; + goto out; + } + + vfsp = vp->v_vfsp; + dvfsp = dvp->v_vfsp; + + /* + * If the named file is the root of a mounted filesystem, fail, + * unless it's marked unlinkable. In that case, unmount the + * filesystem and proceed to unlink the covered vnode. (If the + * covered vnode is a directory, use rmdir instead of unlink, + * to avoid file system corruption.) + */ + if (vp->v_flag & VROOT) { + if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) { + error = EBUSY; + goto out; + } + + /* + * Namefs specific code starts here. + */ + + if (dirflag == RMDIRECTORY) { + /* + * User called rmdir(2) on a file that has + * been namefs mounted on top of. Since + * namefs doesn't allow directories to + * be mounted on other files we know + * vp is not of type VDIR so fail to operation. + */ + error = ENOTDIR; + goto out; + } + + /* + * If VROOT is still set after grabbing vp->v_lock, + * noone has finished nm_unmount so far and coveredvp + * is valid. + * If we manage to grab vn_vfswlock(coveredvp) before releasing + * vp->v_lock, any race window is eliminated. + */ + + mutex_enter(&vp->v_lock); + if ((vp->v_flag & VROOT) == 0) { + /* Someone beat us to the unmount */ + mutex_exit(&vp->v_lock); + error = EBUSY; + goto out; + } + vfsp = vp->v_vfsp; + coveredvp = vfsp->vfs_vnodecovered; + ASSERT(coveredvp); + /* + * Note: Implementation of vn_vfswlock shows that ordering of + * v_lock / vn_vfswlock is not an issue here. + */ + error = vn_vfswlock(coveredvp); + mutex_exit(&vp->v_lock); + + if (error) + goto out; + + VN_HOLD(coveredvp); + VN_RELE(vp); + error = dounmount(vfsp, 0, CRED()); + + /* + * Unmounted the namefs file system; now get + * the object it was mounted over. + */ + vp = coveredvp; + /* + * If namefs was mounted over a directory, then + * we want to use rmdir() instead of unlink(). + */ + if (vp->v_type == VDIR) + dirflag = RMDIRECTORY; + + if (error) + goto out; + } + + /* + * Make sure filesystem is writeable. + * We check the parent directory's vfs in case this is an lofs vnode. + */ + if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) { + error = EROFS; + goto out; + } + + vtype = vp->v_type; + + /* + * If there is the possibility of an nbmand share reservation, make + * sure it's okay to remove the file. Keep a reference to the + * vnode, so that we can exit the nbl critical region after + * calling VOP_REMOVE. + * If there is no possibility of an nbmand share reservation, + * release the vnode reference now. Filesystems like NFS may + * behave differently if there is an extra reference, so get rid of + * this one. Fortunately, we can't have nbmand mounts on NFS + * filesystems. + */ + if (nbl_need_check(vp)) { + nbl_start_crit(vp, RW_READER); + in_crit = 1; + if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { + error = EACCES; + goto out; + } + } else { + VN_RELE(vp); + vp = NULL; + } + + if (dirflag == RMDIRECTORY) { + /* + * Caller is using rmdir(2), which can only be applied to + * directories. + */ + if (vtype != VDIR) { + error = ENOTDIR; + } else { + vnode_t *cwd; + proc_t *pp = curproc; + + mutex_enter(&pp->p_lock); + cwd = PTOU(pp)->u_cdir; + VN_HOLD(cwd); + mutex_exit(&pp->p_lock); + error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(), + NULL, 0); + VN_RELE(cwd); + } + } else { + /* + * Unlink(2) can be applied to anything. + */ + error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0); + } + +out: + pn_free(&pn); + if (in_crit) { + nbl_end_crit(vp); + in_crit = 0; + } + if (vp != NULL) + VN_RELE(vp); + if (dvp != NULL) + VN_RELE(dvp); + if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) + goto top; + return (error); +} + +/* + * Utility function to compare equality of vnodes. + * Compare the underlying real vnodes, if there are underlying vnodes. + * This is a more thorough comparison than the VN_CMP() macro provides. + */ +int +vn_compare(vnode_t *vp1, vnode_t *vp2) +{ + vnode_t *realvp; + + if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0) + vp1 = realvp; + if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0) + vp2 = realvp; + return (VN_CMP(vp1, vp2)); +} + +/* + * The number of locks to hash into. This value must be a power + * of 2 minus 1 and should probably also be prime. + */ +#define NUM_BUCKETS 1023 + +struct vn_vfslocks_bucket { + kmutex_t vb_lock; + vn_vfslocks_entry_t *vb_list; + char pad[64 - sizeof (kmutex_t) - sizeof (void *)]; +}; + +/* + * Total number of buckets will be NUM_BUCKETS + 1 . + */ + +#pragma align 64(vn_vfslocks_buckets) +static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1]; + +#define VN_VFSLOCKS_SHIFT 9 + +#define VN_VFSLOCKS_HASH(vfsvpptr) \ + ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS) + +/* + * vn_vfslocks_getlock() uses an HASH scheme to generate + * rwstlock using vfs/vnode pointer passed to it. + * + * vn_vfslocks_rele() releases a reference in the + * HASH table which allows the entry allocated by + * vn_vfslocks_getlock() to be freed at a later + * stage when the refcount drops to zero. + */ + +vn_vfslocks_entry_t * +vn_vfslocks_getlock(void *vfsvpptr) +{ + struct vn_vfslocks_bucket *bp; + vn_vfslocks_entry_t *vep; + vn_vfslocks_entry_t *tvep; + + ASSERT(vfsvpptr != NULL); + bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)]; + + mutex_enter(&bp->vb_lock); + for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) { + if (vep->ve_vpvfs == vfsvpptr) { + vep->ve_refcnt++; + mutex_exit(&bp->vb_lock); + return (vep); + } + } + mutex_exit(&bp->vb_lock); + vep = kmem_alloc(sizeof (*vep), KM_SLEEP); + rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL); + vep->ve_vpvfs = (char *)vfsvpptr; + vep->ve_refcnt = 1; + mutex_enter(&bp->vb_lock); + for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) { + if (tvep->ve_vpvfs == vfsvpptr) { + tvep->ve_refcnt++; + mutex_exit(&bp->vb_lock); + + /* + * There is already an entry in the hash + * destroy what we just allocated. + */ + rwst_destroy(&vep->ve_lock); + kmem_free(vep, sizeof (*vep)); + return (tvep); + } + } + vep->ve_next = bp->vb_list; + bp->vb_list = vep; + mutex_exit(&bp->vb_lock); + return (vep); +} + +void +vn_vfslocks_rele(vn_vfslocks_entry_t *vepent) +{ + struct vn_vfslocks_bucket *bp; + vn_vfslocks_entry_t *vep; + vn_vfslocks_entry_t *pvep; + + ASSERT(vepent != NULL); + ASSERT(vepent->ve_vpvfs != NULL); + + bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)]; + + mutex_enter(&bp->vb_lock); + vepent->ve_refcnt--; + + if ((int32_t)vepent->ve_refcnt < 0) + cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative"); + + if (vepent->ve_refcnt == 0) { + for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) { + if (vep->ve_vpvfs == vepent->ve_vpvfs) { + if (bp->vb_list == vep) + bp->vb_list = vep->ve_next; + else { + /* LINTED */ + pvep->ve_next = vep->ve_next; + } + mutex_exit(&bp->vb_lock); + rwst_destroy(&vep->ve_lock); + kmem_free(vep, sizeof (*vep)); + return; + } + pvep = vep; + } + cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found"); + } + mutex_exit(&bp->vb_lock); +} + +/* + * vn_vfswlock_wait is used to implement a lock which is logically a writers + * lock protecting the v_vfsmountedhere field. + * vn_vfswlock_wait has been modified to be similar to vn_vfswlock, + * except that it blocks to acquire the lock VVFSLOCK. + * + * traverse() and routines re-implementing part of traverse (e.g. autofs) + * need to hold this lock. mount(), vn_rename(), vn_remove() and so on + * need the non-blocking version of the writers lock i.e. vn_vfswlock + */ +int +vn_vfswlock_wait(vnode_t *vp) +{ + int retval; + vn_vfslocks_entry_t *vpvfsentry; + ASSERT(vp != NULL); + + vpvfsentry = vn_vfslocks_getlock(vp); + retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER); + + if (retval == EINTR) { + vn_vfslocks_rele(vpvfsentry); + return (EINTR); + } + return (retval); +} + +int +vn_vfsrlock_wait(vnode_t *vp) +{ + int retval; + vn_vfslocks_entry_t *vpvfsentry; + ASSERT(vp != NULL); + + vpvfsentry = vn_vfslocks_getlock(vp); + retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER); + + if (retval == EINTR) { + vn_vfslocks_rele(vpvfsentry); + return (EINTR); + } + + return (retval); +} + + +/* + * vn_vfswlock is used to implement a lock which is logically a writers lock + * protecting the v_vfsmountedhere field. + */ +int +vn_vfswlock(vnode_t *vp) +{ + vn_vfslocks_entry_t *vpvfsentry; + + /* + * If vp is NULL then somebody is trying to lock the covered vnode + * of /. (vfs_vnodecovered is NULL for /). This situation will + * only happen when unmounting /. Since that operation will fail + * anyway, return EBUSY here instead of in VFS_UNMOUNT. + */ + if (vp == NULL) + return (EBUSY); + + vpvfsentry = vn_vfslocks_getlock(vp); + + if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER)) + return (0); + + vn_vfslocks_rele(vpvfsentry); + return (EBUSY); +} + +int +vn_vfsrlock(vnode_t *vp) +{ + vn_vfslocks_entry_t *vpvfsentry; + + /* + * If vp is NULL then somebody is trying to lock the covered vnode + * of /. (vfs_vnodecovered is NULL for /). This situation will + * only happen when unmounting /. Since that operation will fail + * anyway, return EBUSY here instead of in VFS_UNMOUNT. + */ + if (vp == NULL) + return (EBUSY); + + vpvfsentry = vn_vfslocks_getlock(vp); + + if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER)) + return (0); + + vn_vfslocks_rele(vpvfsentry); + return (EBUSY); +} + +void +vn_vfsunlock(vnode_t *vp) +{ + vn_vfslocks_entry_t *vpvfsentry; + + /* + * ve_refcnt needs to be decremented twice. + * 1. To release refernce after a call to vn_vfslocks_getlock() + * 2. To release the reference from the locking routines like + * vn_vfsrlock/vn_vfswlock etc,. + */ + vpvfsentry = vn_vfslocks_getlock(vp); + vn_vfslocks_rele(vpvfsentry); + + rwst_exit(&vpvfsentry->ve_lock); + vn_vfslocks_rele(vpvfsentry); +} + +int +vn_vfswlock_held(vnode_t *vp) +{ + int held; + vn_vfslocks_entry_t *vpvfsentry; + + ASSERT(vp != NULL); + + vpvfsentry = vn_vfslocks_getlock(vp); + held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER); + + vn_vfslocks_rele(vpvfsentry); + return (held); +} + + +int +vn_make_ops( + const char *name, /* Name of file system */ + const fs_operation_def_t *templ, /* Operation specification */ + vnodeops_t **actual) /* Return the vnodeops */ +{ + int unused_ops; + int error; + + *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP); + + (*actual)->vnop_name = name; + + error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ); + if (error) { + kmem_free(*actual, sizeof (vnodeops_t)); + } + +#if DEBUG + if (unused_ops != 0) + cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied " + "but not used", name, unused_ops); +#endif + + return (error); +} + +/* + * Free the vnodeops created as a result of vn_make_ops() + */ +void +vn_freevnodeops(vnodeops_t *vnops) +{ + kmem_free(vnops, sizeof (vnodeops_t)); +} + +/* + * Vnode cache. + */ + +/* ARGSUSED */ +static int +vn_cache_constructor(void *buf, void *cdrarg, int kmflags) +{ + struct vnode *vp; + + vp = buf; + + mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL); + rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL); + vp->v_femhead = NULL; /* Must be done before vn_reinit() */ + vp->v_path = NULL; + vp->v_mpssdata = NULL; + vp->v_vsd = NULL; + vp->v_fopdata = NULL; + + return (0); +} + +/* ARGSUSED */ +static void +vn_cache_destructor(void *buf, void *cdrarg) +{ + struct vnode *vp; + + vp = buf; + + rw_destroy(&vp->v_nbllock); + cv_destroy(&vp->v_cv); + mutex_destroy(&vp->v_vsd_lock); + mutex_destroy(&vp->v_lock); +} + +void +vn_create_cache(void) +{ + /* LINTED */ + ASSERT((1 << VNODE_ALIGN_LOG2) == + P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN)); + vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), + VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL, + NULL, 0); +} + +void +vn_destroy_cache(void) +{ + kmem_cache_destroy(vn_cache); +} + +/* + * Used by file systems when fs-specific nodes (e.g., ufs inodes) are + * cached by the file system and vnodes remain associated. + */ +void +vn_recycle(vnode_t *vp) +{ + ASSERT(vp->v_pages == NULL); + + /* + * XXX - This really belongs in vn_reinit(), but we have some issues + * with the counts. Best to have it here for clean initialization. + */ + vp->v_rdcnt = 0; + vp->v_wrcnt = 0; + vp->v_mmap_read = 0; + vp->v_mmap_write = 0; + + /* + * If FEM was in use, make sure everything gets cleaned up + * NOTE: vp->v_femhead is initialized to NULL in the vnode + * constructor. + */ + if (vp->v_femhead) { + /* XXX - There should be a free_femhead() that does all this */ + ASSERT(vp->v_femhead->femh_list == NULL); + mutex_destroy(&vp->v_femhead->femh_lock); + kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead))); + vp->v_femhead = NULL; + } + if (vp->v_path) { + kmem_free(vp->v_path, strlen(vp->v_path) + 1); + vp->v_path = NULL; + } + + if (vp->v_fopdata != NULL) { + free_fopdata(vp); + } + vp->v_mpssdata = NULL; + vsd_free(vp); +} + +/* + * Used to reset the vnode fields including those that are directly accessible + * as well as those which require an accessor function. + * + * Does not initialize: + * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv + * v_data (since FS-nodes and vnodes point to each other and should + * be updated simultaneously) + * v_op (in case someone needs to make a VOP call on this object) + */ +void +vn_reinit(vnode_t *vp) +{ + vp->v_count = 1; + vp->v_count_dnlc = 0; + vp->v_vfsp = NULL; + vp->v_stream = NULL; + vp->v_vfsmountedhere = NULL; + vp->v_flag = 0; + vp->v_type = VNON; + vp->v_rdev = NODEV; + + vp->v_filocks = NULL; + vp->v_shrlocks = NULL; + vp->v_pages = NULL; + + vp->v_locality = NULL; + vp->v_xattrdir = NULL; + + /* Handles v_femhead, v_path, and the r/w/map counts */ + vn_recycle(vp); +} + +vnode_t * +vn_alloc(int kmflag) +{ + vnode_t *vp; + + vp = kmem_cache_alloc(vn_cache, kmflag); + + if (vp != NULL) { + vp->v_femhead = NULL; /* Must be done before vn_reinit() */ + vp->v_fopdata = NULL; + vn_reinit(vp); + } + + return (vp); +} + +void +vn_free(vnode_t *vp) +{ + ASSERT(vp->v_shrlocks == NULL); + ASSERT(vp->v_filocks == NULL); + + /* + * Some file systems call vn_free() with v_count of zero, + * some with v_count of 1. In any case, the value should + * never be anything else. + */ + ASSERT((vp->v_count == 0) || (vp->v_count == 1)); + ASSERT(vp->v_count_dnlc == 0); + if (vp->v_path != NULL) { + kmem_free(vp->v_path, strlen(vp->v_path) + 1); + vp->v_path = NULL; + } + + /* If FEM was in use, make sure everything gets cleaned up */ + if (vp->v_femhead) { + /* XXX - There should be a free_femhead() that does all this */ + ASSERT(vp->v_femhead->femh_list == NULL); + mutex_destroy(&vp->v_femhead->femh_lock); + kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead))); + vp->v_femhead = NULL; + } + + if (vp->v_fopdata != NULL) { + free_fopdata(vp); + } + vp->v_mpssdata = NULL; + vsd_free(vp); + kmem_cache_free(vn_cache, vp); +} + +/* + * vnode status changes, should define better states than 1, 0. + */ +void +vn_reclaim(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + + if (vfsp == NULL || + vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { + return; + } + (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED); +} + +void +vn_idle(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + + if (vfsp == NULL || + vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { + return; + } + (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED); +} +void +vn_exists(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + + if (vfsp == NULL || + vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { + return; + } + (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS); +} + +void +vn_invalid(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + + if (vfsp == NULL || + vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { + return; + } + (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED); +} + +/* Vnode event notification */ + +int +vnevent_support(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL) + return (EINVAL); + + return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct)); +} + +void +vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct); +} + +void +vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name, + caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct); +} + +void +vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct); +} + +void +vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct); +} + +void +vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct); +} + +void +vnevent_create(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct); +} + +void +vnevent_link(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct); +} + +void +vnevent_mountedover(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct); +} + +/* + * Vnode accessors. + */ + +int +vn_is_readonly(vnode_t *vp) +{ + return (vp->v_vfsp->vfs_flag & VFS_RDONLY); +} + +int +vn_has_flocks(vnode_t *vp) +{ + return (vp->v_filocks != NULL); +} + +int +vn_has_mandatory_locks(vnode_t *vp, int mode) +{ + return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode))); +} + +int +vn_has_cached_data(vnode_t *vp) +{ + return (vp->v_pages != NULL); +} + +/* + * Return 0 if the vnode in question shouldn't be permitted into a zone via + * zone_enter(2). + */ +int +vn_can_change_zones(vnode_t *vp) +{ + struct vfssw *vswp; + int allow = 1; + vnode_t *rvp; + + if (nfs_global_client_only != 0) + return (1); + + /* + * We always want to look at the underlying vnode if there is one. + */ + if (VOP_REALVP(vp, &rvp, NULL) != 0) + rvp = vp; + /* + * Some pseudo filesystems (including doorfs) don't actually register + * their vfsops_t, so the following may return NULL; we happily let + * such vnodes switch zones. + */ + vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp)); + if (vswp != NULL) { + if (vswp->vsw_flag & VSW_NOTZONESAFE) + allow = 0; + vfs_unrefvfssw(vswp); + } + return (allow); +} + +/* + * Return nonzero if the vnode is a mount point, zero if not. + */ +int +vn_ismntpt(vnode_t *vp) +{ + return (vp->v_vfsmountedhere != NULL); +} + +/* Retrieve the vfs (if any) mounted on this vnode */ +vfs_t * +vn_mountedvfs(vnode_t *vp) +{ + return (vp->v_vfsmountedhere); +} + +/* + * Return nonzero if the vnode is referenced by the dnlc, zero if not. + */ +int +vn_in_dnlc(vnode_t *vp) +{ + return (vp->v_count_dnlc > 0); +} + +/* + * vn_has_other_opens() checks whether a particular file is opened by more than + * just the caller and whether the open is for read and/or write. + * This routine is for calling after the caller has already called VOP_OPEN() + * and the caller wishes to know if they are the only one with it open for + * the mode(s) specified. + * + * Vnode counts are only kept on regular files (v_type=VREG). + */ +int +vn_has_other_opens( + vnode_t *vp, + v_mode_t mode) +{ + + ASSERT(vp != NULL); + + switch (mode) { + case V_WRITE: + if (vp->v_wrcnt > 1) + return (V_TRUE); + break; + case V_RDORWR: + if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1)) + return (V_TRUE); + break; + case V_RDANDWR: + if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1)) + return (V_TRUE); + break; + case V_READ: + if (vp->v_rdcnt > 1) + return (V_TRUE); + break; + } + + return (V_FALSE); +} + +/* + * vn_is_opened() checks whether a particular file is opened and + * whether the open is for read and/or write. + * + * Vnode counts are only kept on regular files (v_type=VREG). + */ +int +vn_is_opened( + vnode_t *vp, + v_mode_t mode) +{ + + ASSERT(vp != NULL); + + switch (mode) { + case V_WRITE: + if (vp->v_wrcnt) + return (V_TRUE); + break; + case V_RDANDWR: + if (vp->v_rdcnt && vp->v_wrcnt) + return (V_TRUE); + break; + case V_RDORWR: + if (vp->v_rdcnt || vp->v_wrcnt) + return (V_TRUE); + break; + case V_READ: + if (vp->v_rdcnt) + return (V_TRUE); + break; + } + + return (V_FALSE); +} + +/* + * vn_is_mapped() checks whether a particular file is mapped and whether + * the file is mapped read and/or write. + */ +int +vn_is_mapped( + vnode_t *vp, + v_mode_t mode) +{ + + ASSERT(vp != NULL); + +#if !defined(_LP64) + switch (mode) { + /* + * The atomic_add_64_nv functions force atomicity in the + * case of 32 bit architectures. Otherwise the 64 bit values + * require two fetches. The value of the fields may be + * (potentially) changed between the first fetch and the + * second + */ + case V_WRITE: + if (atomic_add_64_nv((&(vp->v_mmap_write)), 0)) + return (V_TRUE); + break; + case V_RDANDWR: + if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) && + (atomic_add_64_nv((&(vp->v_mmap_write)), 0))) + return (V_TRUE); + break; + case V_RDORWR: + if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) || + (atomic_add_64_nv((&(vp->v_mmap_write)), 0))) + return (V_TRUE); + break; + case V_READ: + if (atomic_add_64_nv((&(vp->v_mmap_read)), 0)) + return (V_TRUE); + break; + } +#else + switch (mode) { + case V_WRITE: + if (vp->v_mmap_write) + return (V_TRUE); + break; + case V_RDANDWR: + if (vp->v_mmap_read && vp->v_mmap_write) + return (V_TRUE); + break; + case V_RDORWR: + if (vp->v_mmap_read || vp->v_mmap_write) + return (V_TRUE); + break; + case V_READ: + if (vp->v_mmap_read) + return (V_TRUE); + break; + } +#endif + + return (V_FALSE); +} + +/* + * Set the operations vector for a vnode. + * + * FEM ensures that the v_femhead pointer is filled in before the + * v_op pointer is changed. This means that if the v_femhead pointer + * is NULL, and the v_op field hasn't changed since before which checked + * the v_femhead pointer; then our update is ok - we are not racing with + * FEM. + */ +void +vn_setops(vnode_t *vp, vnodeops_t *vnodeops) +{ + vnodeops_t *op; + + ASSERT(vp != NULL); + ASSERT(vnodeops != NULL); + + op = vp->v_op; + membar_consumer(); + /* + * If vp->v_femhead == NULL, then we'll call casptr() to do the + * compare-and-swap on vp->v_op. If either fails, then FEM is + * in effect on the vnode and we need to have FEM deal with it. + */ + if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) { + fem_setvnops(vp, vnodeops); + } +} + +/* + * Retrieve the operations vector for a vnode + * As with vn_setops(above); make sure we aren't racing with FEM. + * FEM sets the v_op to a special, internal, vnodeops that wouldn't + * make sense to the callers of this routine. + */ +vnodeops_t * +vn_getops(vnode_t *vp) +{ + vnodeops_t *op; + + ASSERT(vp != NULL); + + op = vp->v_op; + membar_consumer(); + if (vp->v_femhead == NULL && op == vp->v_op) { + return (op); + } else { + return (fem_getvnops(vp)); + } +} + +/* + * Returns non-zero (1) if the vnodeops matches that of the vnode. + * Returns zero (0) if not. + */ +int +vn_matchops(vnode_t *vp, vnodeops_t *vnodeops) +{ + return (vn_getops(vp) == vnodeops); +} + +/* + * Returns non-zero (1) if the specified operation matches the + * corresponding operation for that the vnode. + * Returns zero (0) if not. + */ + +#define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0)) + +int +vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp) +{ + const fs_operation_trans_def_t *otdp; + fs_generic_func_p *loc = NULL; + vnodeops_t *vop = vn_getops(vp); + + ASSERT(vopname != NULL); + + for (otdp = vn_ops_table; otdp->name != NULL; otdp++) { + if (MATCHNAME(otdp->name, vopname)) { + loc = (fs_generic_func_p *) + ((char *)(vop) + otdp->offset); + break; + } + } + + return ((loc != NULL) && (*loc == funcp)); +} + +/* + * fs_new_caller_id() needs to return a unique ID on a given local system. + * The IDs do not need to survive across reboots. These are primarily + * used so that (FEM) monitors can detect particular callers (such as + * the NFS server) to a given vnode/vfs operation. + */ +u_longlong_t +fs_new_caller_id() +{ + static uint64_t next_caller_id = 0LL; /* First call returns 1 */ + + return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1)); +} + +/* + * Given a starting vnode and a path, updates the path in the target vnode in + * a safe manner. If the vnode already has path information embedded, then the + * cached path is left untouched. + */ + +size_t max_vnode_path = 4 * MAXPATHLEN; + +void +vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, + const char *path, size_t plen) +{ + char *rpath; + vnode_t *base; + size_t rpathlen, rpathalloc; + int doslash = 1; + + if (*path == '/') { + base = rootvp; + path++; + plen--; + } else { + base = startvp; + } + + /* + * We cannot grab base->v_lock while we hold vp->v_lock because of + * the potential for deadlock. + */ + mutex_enter(&base->v_lock); + if (base->v_path == NULL) { + mutex_exit(&base->v_lock); + return; + } + + rpathlen = strlen(base->v_path); + rpathalloc = rpathlen + plen + 1; + /* Avoid adding a slash if there's already one there */ + if (base->v_path[rpathlen-1] == '/') + doslash = 0; + else + rpathalloc++; + + /* + * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held, + * so we must do this dance. If, by chance, something changes the path, + * just give up since there is no real harm. + */ + mutex_exit(&base->v_lock); + + /* Paths should stay within reason */ + if (rpathalloc > max_vnode_path) + return; + + rpath = kmem_alloc(rpathalloc, KM_SLEEP); + + mutex_enter(&base->v_lock); + if (base->v_path == NULL || strlen(base->v_path) != rpathlen) { + mutex_exit(&base->v_lock); + kmem_free(rpath, rpathalloc); + return; + } + bcopy(base->v_path, rpath, rpathlen); + mutex_exit(&base->v_lock); + + if (doslash) + rpath[rpathlen++] = '/'; + bcopy(path, rpath + rpathlen, plen); + rpath[rpathlen + plen] = '\0'; + + mutex_enter(&vp->v_lock); + if (vp->v_path != NULL) { + mutex_exit(&vp->v_lock); + kmem_free(rpath, rpathalloc); + } else { + vp->v_path = rpath; + mutex_exit(&vp->v_lock); + } +} + +/* + * Sets the path to the vnode to be the given string, regardless of current + * context. The string must be a complete path from rootdir. This is only used + * by fsop_root() for setting the path based on the mountpoint. + */ +void +vn_setpath_str(struct vnode *vp, const char *str, size_t len) +{ + char *buf = kmem_alloc(len + 1, KM_SLEEP); + + mutex_enter(&vp->v_lock); + if (vp->v_path != NULL) { + mutex_exit(&vp->v_lock); + kmem_free(buf, len + 1); + return; + } + + vp->v_path = buf; + bcopy(str, vp->v_path, len); + vp->v_path[len] = '\0'; + + mutex_exit(&vp->v_lock); +} + +/* + * Called from within filesystem's vop_rename() to handle renames once the + * target vnode is available. + */ +void +vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len) +{ + char *tmp; + + mutex_enter(&vp->v_lock); + tmp = vp->v_path; + vp->v_path = NULL; + mutex_exit(&vp->v_lock); + vn_setpath(rootdir, dvp, vp, nm, len); + if (tmp != NULL) + kmem_free(tmp, strlen(tmp) + 1); +} + +/* + * Similar to vn_setpath_str(), this function sets the path of the destination + * vnode to the be the same as the source vnode. + */ +void +vn_copypath(struct vnode *src, struct vnode *dst) +{ + char *buf; + int alloc; + + mutex_enter(&src->v_lock); + if (src->v_path == NULL) { + mutex_exit(&src->v_lock); + return; + } + alloc = strlen(src->v_path) + 1; + + /* avoid kmem_alloc() with lock held */ + mutex_exit(&src->v_lock); + buf = kmem_alloc(alloc, KM_SLEEP); + mutex_enter(&src->v_lock); + if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) { + mutex_exit(&src->v_lock); + kmem_free(buf, alloc); + return; + } + bcopy(src->v_path, buf, alloc); + mutex_exit(&src->v_lock); + + mutex_enter(&dst->v_lock); + if (dst->v_path != NULL) { + mutex_exit(&dst->v_lock); + kmem_free(buf, alloc); + return; + } + dst->v_path = buf; + mutex_exit(&dst->v_lock); +} + +/* + * XXX Private interface for segvn routines that handle vnode + * large page segments. + * + * return 1 if vp's file system VOP_PAGEIO() implementation + * can be safely used instead of VOP_GETPAGE() for handling + * pagefaults against regular non swap files. VOP_PAGEIO() + * interface is considered safe here if its implementation + * is very close to VOP_GETPAGE() implementation. + * e.g. It zero's out the part of the page beyond EOF. Doesn't + * panic if there're file holes but instead returns an error. + * Doesn't assume file won't be changed by user writes, etc. + * + * return 0 otherwise. + * + * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs. + */ +int +vn_vmpss_usepageio(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + char *fsname = vfssw[vfsp->vfs_fstype].vsw_name; + char *pageio_ok_fss[] = {"ufs", "nfs", NULL}; + char **fsok = pageio_ok_fss; + + if (fsname == NULL) { + return (0); + } + + for (; *fsok; fsok++) { + if (strcmp(*fsok, fsname) == 0) { + return (1); + } + } + return (0); +} + +/* VOP_XXX() macros call the corresponding fop_xxx() function */ + +int +fop_open( + vnode_t **vpp, + int mode, + cred_t *cr, + caller_context_t *ct) +{ + int ret; + vnode_t *vp = *vpp; + + VN_HOLD(vp); + /* + * Adding to the vnode counts before calling open + * avoids the need for a mutex. It circumvents a race + * condition where a query made on the vnode counts results in a + * false negative. The inquirer goes away believing the file is + * not open when there is an open on the file already under way. + * + * The counts are meant to prevent NFS from granting a delegation + * when it would be dangerous to do so. + * + * The vnode counts are only kept on regular files + */ + if ((*vpp)->v_type == VREG) { + if (mode & FREAD) + atomic_add_32(&((*vpp)->v_rdcnt), 1); + if (mode & FWRITE) + atomic_add_32(&((*vpp)->v_wrcnt), 1); + } + + VOPXID_MAP_CR(vp, cr); + + ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct); + + if (ret) { + /* + * Use the saved vp just in case the vnode ptr got trashed + * by the error. + */ + VOPSTATS_UPDATE(vp, open); + if ((vp->v_type == VREG) && (mode & FREAD)) + atomic_add_32(&(vp->v_rdcnt), -1); + if ((vp->v_type == VREG) && (mode & FWRITE)) + atomic_add_32(&(vp->v_wrcnt), -1); + } else { + /* + * Some filesystems will return a different vnode, + * but the same path was still used to open it. + * So if we do change the vnode and need to + * copy over the path, do so here, rather than special + * casing each filesystem. Adjust the vnode counts to + * reflect the vnode switch. + */ + VOPSTATS_UPDATE(*vpp, open); + if (*vpp != vp && *vpp != NULL) { + vn_copypath(vp, *vpp); + if (((*vpp)->v_type == VREG) && (mode & FREAD)) + atomic_add_32(&((*vpp)->v_rdcnt), 1); + if ((vp->v_type == VREG) && (mode & FREAD)) + atomic_add_32(&(vp->v_rdcnt), -1); + if (((*vpp)->v_type == VREG) && (mode & FWRITE)) + atomic_add_32(&((*vpp)->v_wrcnt), 1); + if ((vp->v_type == VREG) && (mode & FWRITE)) + atomic_add_32(&(vp->v_wrcnt), -1); + } + } + VN_RELE(vp); + return (ret); +} + +int +fop_close( + vnode_t *vp, + int flag, + int count, + offset_t offset, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct); + VOPSTATS_UPDATE(vp, close); + /* + * Check passed in count to handle possible dups. Vnode counts are only + * kept on regular files + */ + if ((vp->v_type == VREG) && (count == 1)) { + if (flag & FREAD) { + ASSERT(vp->v_rdcnt > 0); + atomic_add_32(&(vp->v_rdcnt), -1); + } + if (flag & FWRITE) { + ASSERT(vp->v_wrcnt > 0); + atomic_add_32(&(vp->v_wrcnt), -1); + } + } + return (err); +} + +int +fop_read( + vnode_t *vp, + uio_t *uiop, + int ioflag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + ssize_t resid_start = uiop->uio_resid; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct); + VOPSTATS_UPDATE_IO(vp, read, + read_bytes, (resid_start - uiop->uio_resid)); + return (err); +} + +int +fop_write( + vnode_t *vp, + uio_t *uiop, + int ioflag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + ssize_t resid_start = uiop->uio_resid; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct); + VOPSTATS_UPDATE_IO(vp, write, + write_bytes, (resid_start - uiop->uio_resid)); + return (err); +} + +int +fop_ioctl( + vnode_t *vp, + int cmd, + intptr_t arg, + int flag, + cred_t *cr, + int *rvalp, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct); + VOPSTATS_UPDATE(vp, ioctl); + return (err); +} + +int +fop_setfl( + vnode_t *vp, + int oflags, + int nflags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct); + VOPSTATS_UPDATE(vp, setfl); + return (err); +} + +int +fop_getattr( + vnode_t *vp, + vattr_t *vap, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + /* + * If this file system doesn't understand the xvattr extensions + * then turn off the xvattr bit. + */ + if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) { + vap->va_mask &= ~AT_XVATTR; + } + + /* + * We're only allowed to skip the ACL check iff we used a 32 bit + * ACE mask with VOP_ACCESS() to determine permissions. + */ + if ((flags & ATTR_NOACLCHECK) && + vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { + return (EINVAL); + } + err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct); + VOPSTATS_UPDATE(vp, getattr); + return (err); +} + +int +fop_setattr( + vnode_t *vp, + vattr_t *vap, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + /* + * If this file system doesn't understand the xvattr extensions + * then turn off the xvattr bit. + */ + if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) { + vap->va_mask &= ~AT_XVATTR; + } + + /* + * We're only allowed to skip the ACL check iff we used a 32 bit + * ACE mask with VOP_ACCESS() to determine permissions. + */ + if ((flags & ATTR_NOACLCHECK) && + vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { + return (EINVAL); + } + err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct); + VOPSTATS_UPDATE(vp, setattr); + return (err); +} + +int +fop_access( + vnode_t *vp, + int mode, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + if ((flags & V_ACE_MASK) && + vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { + return (EINVAL); + } + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct); + VOPSTATS_UPDATE(vp, access); + return (err); +} + +int +fop_lookup( + vnode_t *dvp, + char *nm, + vnode_t **vpp, + pathname_t *pnp, + int flags, + vnode_t *rdir, + cred_t *cr, + caller_context_t *ct, + int *deflags, /* Returned per-dirent flags */ + pathname_t *ppnp) /* Returned case-preserved name in directory */ +{ + int ret; + + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. It is required + * that if the vfs supports case-insensitive lookup, it also + * supports extended dirent flags. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) { + ret = xattr_dir_lookup(dvp, vpp, flags, cr); + } else { + ret = (*(dvp)->v_op->vop_lookup) + (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp); + } + if (ret == 0 && *vpp) { + VOPSTATS_UPDATE(*vpp, lookup); + if ((*vpp)->v_path == NULL) { + vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm)); + } + } + + return (ret); +} + +int +fop_create( + vnode_t *dvp, + char *name, + vattr_t *vap, + vcexcl_t excl, + int mode, + vnode_t **vpp, + cred_t *cr, + int flags, + caller_context_t *ct, + vsecattr_t *vsecp) /* ACL to set during create */ +{ + int ret; + + if (vsecp != NULL && + vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) { + return (EINVAL); + } + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + ret = (*(dvp)->v_op->vop_create) + (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp); + if (ret == 0 && *vpp) { + VOPSTATS_UPDATE(*vpp, create); + if ((*vpp)->v_path == NULL) { + vn_setpath(rootdir, dvp, *vpp, name, strlen(name)); + } + } + + return (ret); +} + +int +fop_remove( + vnode_t *dvp, + char *nm, + cred_t *cr, + caller_context_t *ct, + int flags) +{ + int err; + + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags); + VOPSTATS_UPDATE(dvp, remove); + return (err); +} + +int +fop_link( + vnode_t *tdvp, + vnode_t *svp, + char *tnm, + cred_t *cr, + caller_context_t *ct, + int flags) +{ + int err; + + /* + * If the target file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(tdvp, cr); + + err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags); + VOPSTATS_UPDATE(tdvp, link); + return (err); +} + +int +fop_rename( + vnode_t *sdvp, + char *snm, + vnode_t *tdvp, + char *tnm, + cred_t *cr, + caller_context_t *ct, + int flags) +{ + int err; + + /* + * If the file system involved does not support + * case-insensitive access and said access is requested, fail + * quickly. + */ + if (flags & FIGNORECASE && + ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))) + return (EINVAL); + + VOPXID_MAP_CR(tdvp, cr); + + err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags); + VOPSTATS_UPDATE(sdvp, rename); + return (err); +} + +int +fop_mkdir( + vnode_t *dvp, + char *dirname, + vattr_t *vap, + vnode_t **vpp, + cred_t *cr, + caller_context_t *ct, + int flags, + vsecattr_t *vsecp) /* ACL to set during create */ +{ + int ret; + + if (vsecp != NULL && + vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) { + return (EINVAL); + } + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + ret = (*(dvp)->v_op->vop_mkdir) + (dvp, dirname, vap, vpp, cr, ct, flags, vsecp); + if (ret == 0 && *vpp) { + VOPSTATS_UPDATE(*vpp, mkdir); + if ((*vpp)->v_path == NULL) { + vn_setpath(rootdir, dvp, *vpp, dirname, + strlen(dirname)); + } + } + + return (ret); +} + +int +fop_rmdir( + vnode_t *dvp, + char *nm, + vnode_t *cdir, + cred_t *cr, + caller_context_t *ct, + int flags) +{ + int err; + + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags); + VOPSTATS_UPDATE(dvp, rmdir); + return (err); +} + +int +fop_readdir( + vnode_t *vp, + uio_t *uiop, + cred_t *cr, + int *eofp, + caller_context_t *ct, + int flags) +{ + int err; + ssize_t resid_start = uiop->uio_resid; + + /* + * If this file system doesn't support retrieving directory + * entry flags and said access is requested, fail quickly. + */ + if (flags & V_RDDIR_ENTFLAGS && + vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0) + return (EINVAL); + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags); + VOPSTATS_UPDATE_IO(vp, readdir, + readdir_bytes, (resid_start - uiop->uio_resid)); + return (err); +} + +int +fop_symlink( + vnode_t *dvp, + char *linkname, + vattr_t *vap, + char *target, + cred_t *cr, + caller_context_t *ct, + int flags) +{ + int err; + xvattr_t xvattr; + + /* + * If this file system doesn't support case-insensitive access + * and said access is requested, fail quickly. + */ + if (flags & FIGNORECASE && + (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && + vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) + return (EINVAL); + + VOPXID_MAP_CR(dvp, cr); + + /* check for reparse point */ + if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) && + (strncmp(target, FS_REPARSE_TAG_STR, + strlen(FS_REPARSE_TAG_STR)) == 0)) { + if (!fs_reparse_mark(target, vap, &xvattr)) + vap = (vattr_t *)&xvattr; + } + + err = (*(dvp)->v_op->vop_symlink) + (dvp, linkname, vap, target, cr, ct, flags); + VOPSTATS_UPDATE(dvp, symlink); + return (err); +} + +int +fop_readlink( + vnode_t *vp, + uio_t *uiop, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct); + VOPSTATS_UPDATE(vp, readlink); + return (err); +} + +int +fop_fsync( + vnode_t *vp, + int syncflag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct); + VOPSTATS_UPDATE(vp, fsync); + return (err); +} + +void +fop_inactive( + vnode_t *vp, + cred_t *cr, + caller_context_t *ct) +{ + /* Need to update stats before vop call since we may lose the vnode */ + VOPSTATS_UPDATE(vp, inactive); + + VOPXID_MAP_CR(vp, cr); + + (*(vp)->v_op->vop_inactive)(vp, cr, ct); +} + +int +fop_fid( + vnode_t *vp, + fid_t *fidp, + caller_context_t *ct) +{ + int err; + + err = (*(vp)->v_op->vop_fid)(vp, fidp, ct); + VOPSTATS_UPDATE(vp, fid); + return (err); +} + +int +fop_rwlock( + vnode_t *vp, + int write_lock, + caller_context_t *ct) +{ + int ret; + + ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct)); + VOPSTATS_UPDATE(vp, rwlock); + return (ret); +} + +void +fop_rwunlock( + vnode_t *vp, + int write_lock, + caller_context_t *ct) +{ + (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct); + VOPSTATS_UPDATE(vp, rwunlock); +} + +int +fop_seek( + vnode_t *vp, + offset_t ooff, + offset_t *noffp, + caller_context_t *ct) +{ + int err; + + err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct); + VOPSTATS_UPDATE(vp, seek); + return (err); +} + +int +fop_cmp( + vnode_t *vp1, + vnode_t *vp2, + caller_context_t *ct) +{ + int err; + + err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct); + VOPSTATS_UPDATE(vp1, cmp); + return (err); +} + +int +fop_frlock( + vnode_t *vp, + int cmd, + flock64_t *bfp, + int flag, + offset_t offset, + struct flk_callback *flk_cbp, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_frlock) + (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct); + VOPSTATS_UPDATE(vp, frlock); + return (err); +} + +int +fop_space( + vnode_t *vp, + int cmd, + flock64_t *bfp, + int flag, + offset_t offset, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct); + VOPSTATS_UPDATE(vp, space); + return (err); +} + +int +fop_realvp( + vnode_t *vp, + vnode_t **vpp, + caller_context_t *ct) +{ + int err; + + err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct); + VOPSTATS_UPDATE(vp, realvp); + return (err); +} + +int +fop_getpage( + vnode_t *vp, + offset_t off, + size_t len, + uint_t *protp, + page_t **plarr, + size_t plsz, + struct seg *seg, + caddr_t addr, + enum seg_rw rw, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_getpage) + (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct); + VOPSTATS_UPDATE(vp, getpage); + return (err); +} + +int +fop_putpage( + vnode_t *vp, + offset_t off, + size_t len, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct); + VOPSTATS_UPDATE(vp, putpage); + return (err); +} + +int +fop_map( + vnode_t *vp, + offset_t off, + struct as *as, + caddr_t *addrp, + size_t len, + uchar_t prot, + uchar_t maxprot, + uint_t flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_map) + (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct); + VOPSTATS_UPDATE(vp, map); + return (err); +} + +int +fop_addmap( + vnode_t *vp, + offset_t off, + struct as *as, + caddr_t addr, + size_t len, + uchar_t prot, + uchar_t maxprot, + uint_t flags, + cred_t *cr, + caller_context_t *ct) +{ + int error; + u_longlong_t delta; + + VOPXID_MAP_CR(vp, cr); + + error = (*(vp)->v_op->vop_addmap) + (vp, off, as, addr, len, prot, maxprot, flags, cr, ct); + + if ((!error) && (vp->v_type == VREG)) { + delta = (u_longlong_t)btopr(len); + /* + * If file is declared MAP_PRIVATE, it can't be written back + * even if open for write. Handle as read. + */ + if (flags & MAP_PRIVATE) { + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)delta); + } else { + /* + * atomic_add_64 forces the fetch of a 64 bit value to + * be atomic on 32 bit machines + */ + if (maxprot & PROT_WRITE) + atomic_add_64((uint64_t *)(&(vp->v_mmap_write)), + (int64_t)delta); + if (maxprot & PROT_READ) + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)delta); + if (maxprot & PROT_EXEC) + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)delta); + } + } + VOPSTATS_UPDATE(vp, addmap); + return (error); +} + +int +fop_delmap( + vnode_t *vp, + offset_t off, + struct as *as, + caddr_t addr, + size_t len, + uint_t prot, + uint_t maxprot, + uint_t flags, + cred_t *cr, + caller_context_t *ct) +{ + int error; + u_longlong_t delta; + + VOPXID_MAP_CR(vp, cr); + + error = (*(vp)->v_op->vop_delmap) + (vp, off, as, addr, len, prot, maxprot, flags, cr, ct); + + /* + * NFS calls into delmap twice, the first time + * it simply establishes a callback mechanism and returns EAGAIN + * while the real work is being done upon the second invocation. + * We have to detect this here and only decrement the counts upon + * the second delmap request. + */ + if ((error != EAGAIN) && (vp->v_type == VREG)) { + + delta = (u_longlong_t)btopr(len); + + if (flags & MAP_PRIVATE) { + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)(-delta)); + } else { + /* + * atomic_add_64 forces the fetch of a 64 bit value + * to be atomic on 32 bit machines + */ + if (maxprot & PROT_WRITE) + atomic_add_64((uint64_t *)(&(vp->v_mmap_write)), + (int64_t)(-delta)); + if (maxprot & PROT_READ) + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)(-delta)); + if (maxprot & PROT_EXEC) + atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), + (int64_t)(-delta)); + } + } + VOPSTATS_UPDATE(vp, delmap); + return (error); +} + + +int +fop_poll( + vnode_t *vp, + short events, + int anyyet, + short *reventsp, + struct pollhead **phpp, + caller_context_t *ct) +{ + int err; + + err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct); + VOPSTATS_UPDATE(vp, poll); + return (err); +} + +int +fop_dump( + vnode_t *vp, + caddr_t addr, + offset_t lbdn, + offset_t dblks, + caller_context_t *ct) +{ + int err; + + /* ensure lbdn and dblks can be passed safely to bdev_dump */ + if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks)) + return (EIO); + + err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct); + VOPSTATS_UPDATE(vp, dump); + return (err); +} + +int +fop_pathconf( + vnode_t *vp, + int cmd, + ulong_t *valp, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct); + VOPSTATS_UPDATE(vp, pathconf); + return (err); +} + +int +fop_pageio( + vnode_t *vp, + struct page *pp, + u_offset_t io_off, + size_t io_len, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct); + VOPSTATS_UPDATE(vp, pageio); + return (err); +} + +int +fop_dumpctl( + vnode_t *vp, + int action, + offset_t *blkp, + caller_context_t *ct) +{ + int err; + err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct); + VOPSTATS_UPDATE(vp, dumpctl); + return (err); +} + +void +fop_dispose( + vnode_t *vp, + page_t *pp, + int flag, + int dn, + cred_t *cr, + caller_context_t *ct) +{ + /* Must do stats first since it's possible to lose the vnode */ + VOPSTATS_UPDATE(vp, dispose); + + VOPXID_MAP_CR(vp, cr); + + (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct); +} + +int +fop_setsecattr( + vnode_t *vp, + vsecattr_t *vsap, + int flag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + /* + * We're only allowed to skip the ACL check iff we used a 32 bit + * ACE mask with VOP_ACCESS() to determine permissions. + */ + if ((flag & ATTR_NOACLCHECK) && + vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { + return (EINVAL); + } + err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct); + VOPSTATS_UPDATE(vp, setsecattr); + return (err); +} + +int +fop_getsecattr( + vnode_t *vp, + vsecattr_t *vsap, + int flag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + /* + * We're only allowed to skip the ACL check iff we used a 32 bit + * ACE mask with VOP_ACCESS() to determine permissions. + */ + if ((flag & ATTR_NOACLCHECK) && + vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { + return (EINVAL); + } + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct); + VOPSTATS_UPDATE(vp, getsecattr); + return (err); +} + +int +fop_shrlock( + vnode_t *vp, + int cmd, + struct shrlock *shr, + int flag, + cred_t *cr, + caller_context_t *ct) +{ + int err; + + VOPXID_MAP_CR(vp, cr); + + err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct); + VOPSTATS_UPDATE(vp, shrlock); + return (err); +} + +int +fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm, + caller_context_t *ct) +{ + int err; + + err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct); + VOPSTATS_UPDATE(vp, vnevent); + return (err); +} + +int +fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr, + caller_context_t *ct) +{ + int err; + + if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0) + return (ENOTSUP); + err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct); + VOPSTATS_UPDATE(vp, reqzcbuf); + return (err); +} + +int +fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct) +{ + int err; + + if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0) + return (ENOTSUP); + err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct); + VOPSTATS_UPDATE(vp, retzcbuf); + return (err); +} + +/* + * Default destructor + * Needed because NULL destructor means that the key is unused + */ +/* ARGSUSED */ +void +vsd_defaultdestructor(void *value) +{} + +/* + * Create a key (index into per vnode array) + * Locks out vsd_create, vsd_destroy, and vsd_free + * May allocate memory with lock held + */ +void +vsd_create(uint_t *keyp, void (*destructor)(void *)) +{ + int i; + uint_t nkeys; + + /* + * if key is allocated, do nothing + */ + mutex_enter(&vsd_lock); + if (*keyp) { + mutex_exit(&vsd_lock); + return; + } + /* + * find an unused key + */ + if (destructor == NULL) + destructor = vsd_defaultdestructor; + + for (i = 0; i < vsd_nkeys; ++i) + if (vsd_destructor[i] == NULL) + break; + + /* + * if no unused keys, increase the size of the destructor array + */ + if (i == vsd_nkeys) { + if ((nkeys = (vsd_nkeys << 1)) == 0) + nkeys = 1; + vsd_destructor = + (void (**)(void *))vsd_realloc((void *)vsd_destructor, + (size_t)(vsd_nkeys * sizeof (void (*)(void *))), + (size_t)(nkeys * sizeof (void (*)(void *)))); + vsd_nkeys = nkeys; + } + + /* + * allocate the next available unused key + */ + vsd_destructor[i] = destructor; + *keyp = i + 1; + + /* create vsd_list, if it doesn't exist */ + if (vsd_list == NULL) { + vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP); + list_create(vsd_list, sizeof (struct vsd_node), + offsetof(struct vsd_node, vs_nodes)); + } + + mutex_exit(&vsd_lock); +} + +/* + * Destroy a key + * + * Assumes that the caller is preventing vsd_set and vsd_get + * Locks out vsd_create, vsd_destroy, and vsd_free + * May free memory with lock held + */ +void +vsd_destroy(uint_t *keyp) +{ + uint_t key; + struct vsd_node *vsd; + + /* + * protect the key namespace and our destructor lists + */ + mutex_enter(&vsd_lock); + key = *keyp; + *keyp = 0; + + ASSERT(key <= vsd_nkeys); + + /* + * if the key is valid + */ + if (key != 0) { + uint_t k = key - 1; + /* + * for every vnode with VSD, call key's destructor + */ + for (vsd = list_head(vsd_list); vsd != NULL; + vsd = list_next(vsd_list, vsd)) { + /* + * no VSD for key in this vnode + */ + if (key > vsd->vs_nkeys) + continue; + /* + * call destructor for key + */ + if (vsd->vs_value[k] && vsd_destructor[k]) + (*vsd_destructor[k])(vsd->vs_value[k]); + /* + * reset value for key + */ + vsd->vs_value[k] = NULL; + } + /* + * actually free the key (NULL destructor == unused) + */ + vsd_destructor[k] = NULL; + } + + mutex_exit(&vsd_lock); +} + +/* + * Quickly return the per vnode value that was stored with the specified key + * Assumes the caller is protecting key from vsd_create and vsd_destroy + * Assumes the caller is holding v_vsd_lock to protect the vsd. + */ +void * +vsd_get(vnode_t *vp, uint_t key) +{ + struct vsd_node *vsd; + + ASSERT(vp != NULL); + ASSERT(mutex_owned(&vp->v_vsd_lock)); + + vsd = vp->v_vsd; + + if (key && vsd != NULL && key <= vsd->vs_nkeys) + return (vsd->vs_value[key - 1]); + return (NULL); +} + +/* + * Set a per vnode value indexed with the specified key + * Assumes the caller is holding v_vsd_lock to protect the vsd. + */ +int +vsd_set(vnode_t *vp, uint_t key, void *value) +{ + struct vsd_node *vsd; + + ASSERT(vp != NULL); + ASSERT(mutex_owned(&vp->v_vsd_lock)); + + if (key == 0) + return (EINVAL); + + vsd = vp->v_vsd; + if (vsd == NULL) + vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP); + + /* + * If the vsd was just allocated, vs_nkeys will be 0, so the following + * code won't happen and we will continue down and allocate space for + * the vs_value array. + * If the caller is replacing one value with another, then it is up + * to the caller to free/rele/destroy the previous value (if needed). + */ + if (key <= vsd->vs_nkeys) { + vsd->vs_value[key - 1] = value; + return (0); + } + + ASSERT(key <= vsd_nkeys); + + if (vsd->vs_nkeys == 0) { + mutex_enter(&vsd_lock); /* lock out vsd_destroy() */ + /* + * Link onto list of all VSD nodes. + */ + list_insert_head(vsd_list, vsd); + mutex_exit(&vsd_lock); + } + + /* + * Allocate vnode local storage and set the value for key + */ + vsd->vs_value = vsd_realloc(vsd->vs_value, + vsd->vs_nkeys * sizeof (void *), + key * sizeof (void *)); + vsd->vs_nkeys = key; + vsd->vs_value[key - 1] = value; + + return (0); +} + +/* + * Called from vn_free() to run the destructor function for each vsd + * Locks out vsd_create and vsd_destroy + * Assumes that the destructor *DOES NOT* use vsd + */ +void +vsd_free(vnode_t *vp) +{ + int i; + struct vsd_node *vsd = vp->v_vsd; + + if (vsd == NULL) + return; + + if (vsd->vs_nkeys == 0) { + kmem_free(vsd, sizeof (*vsd)); + vp->v_vsd = NULL; + return; + } + + /* + * lock out vsd_create and vsd_destroy, call + * the destructor, and mark the value as destroyed. + */ + mutex_enter(&vsd_lock); + + for (i = 0; i < vsd->vs_nkeys; i++) { + if (vsd->vs_value[i] && vsd_destructor[i]) + (*vsd_destructor[i])(vsd->vs_value[i]); + vsd->vs_value[i] = NULL; + } + + /* + * remove from linked list of VSD nodes + */ + list_remove(vsd_list, vsd); + + mutex_exit(&vsd_lock); + + /* + * free up the VSD + */ + kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *)); + kmem_free(vsd, sizeof (struct vsd_node)); + vp->v_vsd = NULL; +} + +/* + * realloc + */ +static void * +vsd_realloc(void *old, size_t osize, size_t nsize) +{ + void *new; + + new = kmem_zalloc(nsize, KM_SLEEP); + if (old) { + bcopy(old, new, osize); + kmem_free(old, osize); + } + return (new); +} + +/* + * Setup the extensible system attribute for creating a reparse point. + * The symlink data 'target' is validated for proper format of a reparse + * string and a check also made to make sure the symlink data does not + * point to an existing file. + * + * return 0 if ok else -1. + */ +static int +fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr) +{ + xoptattr_t *xoap; + + if ((!target) || (!vap) || (!xvattr)) + return (-1); + + /* validate reparse string */ + if (reparse_validate((const char *)target)) + return (-1); + + xva_init(xvattr); + xvattr->xva_vattr = *vap; + xvattr->xva_vattr.va_mask |= AT_XVATTR; + xoap = xva_getxoptattr(xvattr); + ASSERT(xoap); + XVA_SET_REQ(xvattr, XAT_REPARSE); + xoap->xoa_reparse = 1; + + return (0); +} + +/* + * Function to check whether a symlink is a reparse point. + * Return B_TRUE if it is a reparse point, else return B_FALSE + */ +boolean_t +vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + xvattr_t xvattr; + xoptattr_t *xoap; + + if ((vp->v_type != VLNK) || + !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR))) + return (B_FALSE); + + xva_init(&xvattr); + xoap = xva_getxoptattr(&xvattr); + ASSERT(xoap); + XVA_SET_REQ(&xvattr, XAT_REPARSE); + + if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct)) + return (B_FALSE); + + if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) || + (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE)))) + return (B_FALSE); + + return (xoap->xoa_reparse ? B_TRUE : B_FALSE); +} diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c new file mode 100644 index 000000000000..a82718e8bc6e --- /dev/null +++ b/uts/common/fs/zfs/arc.c @@ -0,0 +1,4658 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * DVA-based Adjustable Replacement Cache + * + * While much of the theory of operation used here is + * based on the self-tuning, low overhead replacement cache + * presented by Megiddo and Modha at FAST 2003, there are some + * significant differences: + * + * 1. The Megiddo and Modha model assumes any page is evictable. + * Pages in its cache cannot be "locked" into memory. This makes + * the eviction algorithm simple: evict the last page in the list. + * This also make the performance characteristics easy to reason + * about. Our cache is not so simple. At any given moment, some + * subset of the blocks in the cache are un-evictable because we + * have handed out a reference to them. Blocks are only evictable + * when there are no external references active. This makes + * eviction far more problematic: we choose to evict the evictable + * blocks that are the "lowest" in the list. + * + * There are times when it is not possible to evict the requested + * space. In these circumstances we are unable to adjust the cache + * size. To prevent the cache growing unbounded at these times we + * implement a "cache throttle" that slows the flow of new data + * into the cache until we can make space available. + * + * 2. The Megiddo and Modha model assumes a fixed cache size. + * Pages are evicted when the cache is full and there is a cache + * miss. Our model has a variable sized cache. It grows with + * high use, but also tries to react to memory pressure from the + * operating system: decreasing its size when system memory is + * tight. + * + * 3. The Megiddo and Modha model assumes a fixed page size. All + * elements of the cache are therefor exactly the same size. So + * when adjusting the cache size following a cache miss, its simply + * a matter of choosing a single page to evict. In our model, we + * have variable sized cache blocks (rangeing from 512 bytes to + * 128K bytes). We therefor choose a set of blocks to evict to make + * space for a cache miss that approximates as closely as possible + * the space used by the new block. + * + * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache" + * by N. Megiddo & D. Modha, FAST 2003 + */ + +/* + * The locking model: + * + * A new reference to a cache buffer can be obtained in two + * ways: 1) via a hash table lookup using the DVA as a key, + * or 2) via one of the ARC lists. The arc_read() interface + * uses method 1, while the internal arc algorithms for + * adjusting the cache use method 2. We therefor provide two + * types of locks: 1) the hash table lock array, and 2) the + * arc list locks. + * + * Buffers do not have their own mutexs, rather they rely on the + * hash table mutexs for the bulk of their protection (i.e. most + * fields in the arc_buf_hdr_t are protected by these mutexs). + * + * buf_hash_find() returns the appropriate mutex (held) when it + * locates the requested buffer in the hash table. It returns + * NULL for the mutex if the buffer was not in the table. + * + * buf_hash_remove() expects the appropriate hash mutex to be + * already held before it is invoked. + * + * Each arc state also has a mutex which is used to protect the + * buffer list associated with the state. When attempting to + * obtain a hash table lock while holding an arc list lock you + * must use: mutex_tryenter() to avoid deadlock. Also note that + * the active state mutex must be held before the ghost state mutex. + * + * Arc buffers may have an associated eviction callback function. + * This function will be invoked prior to removing the buffer (e.g. + * in arc_do_user_evicts()). Note however that the data associated + * with the buffer may be evicted prior to the callback. The callback + * must be made with *no locks held* (to prevent deadlock). Additionally, + * the users of callbacks must ensure that their private data is + * protected from simultaneous callbacks from arc_buf_evict() + * and arc_do_user_evicts(). + * + * Note that the majority of the performance stats are manipulated + * with atomic operations. + * + * The L2ARC uses the l2arc_buflist_mtx global mutex for the following: + * + * - L2ARC buflist creation + * - L2ARC buflist eviction + * - L2ARC write completion, which walks L2ARC buflists + * - ARC header destruction, as it removes from L2ARC buflists + * - ARC header release, as it removes from L2ARC buflists + */ + +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/zfs_context.h> +#include <sys/arc.h> +#include <sys/refcount.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#ifdef _KERNEL +#include <sys/vmsystm.h> +#include <vm/anon.h> +#include <sys/fs/swapnode.h> +#include <sys/dnlc.h> +#endif +#include <sys/callb.h> +#include <sys/kstat.h> +#include <zfs_fletcher.h> + +static kmutex_t arc_reclaim_thr_lock; +static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */ +static uint8_t arc_thread_exit; + +extern int zfs_write_limit_shift; +extern uint64_t zfs_write_limit_max; +extern kmutex_t zfs_write_limit_lock; + +#define ARC_REDUCE_DNLC_PERCENT 3 +uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT; + +typedef enum arc_reclaim_strategy { + ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */ + ARC_RECLAIM_CONS /* Conservative reclaim strategy */ +} arc_reclaim_strategy_t; + +/* number of seconds before growing cache again */ +static int arc_grow_retry = 60; + +/* shift of arc_c for calculating both min and max arc_p */ +static int arc_p_min_shift = 4; + +/* log2(fraction of arc to reclaim) */ +static int arc_shrink_shift = 5; + +/* + * minimum lifespan of a prefetch block in clock ticks + * (initialized in arc_init()) + */ +static int arc_min_prefetch_lifespan; + +static int arc_dead; + +/* + * The arc has filled available memory and has now warmed up. + */ +static boolean_t arc_warm; + +/* + * These tunables are for performance analysis. + */ +uint64_t zfs_arc_max; +uint64_t zfs_arc_min; +uint64_t zfs_arc_meta_limit = 0; +int zfs_arc_grow_retry = 0; +int zfs_arc_shrink_shift = 0; +int zfs_arc_p_min_shift = 0; + +/* + * Note that buffers can be in one of 6 states: + * ARC_anon - anonymous (discussed below) + * ARC_mru - recently used, currently cached + * ARC_mru_ghost - recentely used, no longer in cache + * ARC_mfu - frequently used, currently cached + * ARC_mfu_ghost - frequently used, no longer in cache + * ARC_l2c_only - exists in L2ARC but not other states + * When there are no active references to the buffer, they are + * are linked onto a list in one of these arc states. These are + * the only buffers that can be evicted or deleted. Within each + * state there are multiple lists, one for meta-data and one for + * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, + * etc.) is tracked separately so that it can be managed more + * explicitly: favored over data, limited explicitly. + * + * Anonymous buffers are buffers that are not associated with + * a DVA. These are buffers that hold dirty block copies + * before they are written to stable storage. By definition, + * they are "ref'd" and are considered part of arc_mru + * that cannot be freed. Generally, they will aquire a DVA + * as they are written and migrate onto the arc_mru list. + * + * The ARC_l2c_only state is for buffers that are in the second + * level ARC but no longer in any of the ARC_m* lists. The second + * level ARC itself may also contain buffers that are in any of + * the ARC_m* states - meaning that a buffer can exist in two + * places. The reason for the ARC_l2c_only state is to keep the + * buffer header in the hash table, so that reads that hit the + * second level ARC benefit from these fast lookups. + */ + +typedef struct arc_state { + list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */ + uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */ + uint64_t arcs_size; /* total amount of data in this state */ + kmutex_t arcs_mtx; +} arc_state_t; + +/* The 6 states: */ +static arc_state_t ARC_anon; +static arc_state_t ARC_mru; +static arc_state_t ARC_mru_ghost; +static arc_state_t ARC_mfu; +static arc_state_t ARC_mfu_ghost; +static arc_state_t ARC_l2c_only; + +typedef struct arc_stats { + kstat_named_t arcstat_hits; + kstat_named_t arcstat_misses; + kstat_named_t arcstat_demand_data_hits; + kstat_named_t arcstat_demand_data_misses; + kstat_named_t arcstat_demand_metadata_hits; + kstat_named_t arcstat_demand_metadata_misses; + kstat_named_t arcstat_prefetch_data_hits; + kstat_named_t arcstat_prefetch_data_misses; + kstat_named_t arcstat_prefetch_metadata_hits; + kstat_named_t arcstat_prefetch_metadata_misses; + kstat_named_t arcstat_mru_hits; + kstat_named_t arcstat_mru_ghost_hits; + kstat_named_t arcstat_mfu_hits; + kstat_named_t arcstat_mfu_ghost_hits; + kstat_named_t arcstat_deleted; + kstat_named_t arcstat_recycle_miss; + kstat_named_t arcstat_mutex_miss; + kstat_named_t arcstat_evict_skip; + kstat_named_t arcstat_evict_l2_cached; + kstat_named_t arcstat_evict_l2_eligible; + kstat_named_t arcstat_evict_l2_ineligible; + kstat_named_t arcstat_hash_elements; + kstat_named_t arcstat_hash_elements_max; + kstat_named_t arcstat_hash_collisions; + kstat_named_t arcstat_hash_chains; + kstat_named_t arcstat_hash_chain_max; + kstat_named_t arcstat_p; + kstat_named_t arcstat_c; + kstat_named_t arcstat_c_min; + kstat_named_t arcstat_c_max; + kstat_named_t arcstat_size; + kstat_named_t arcstat_hdr_size; + kstat_named_t arcstat_data_size; + kstat_named_t arcstat_other_size; + kstat_named_t arcstat_l2_hits; + kstat_named_t arcstat_l2_misses; + kstat_named_t arcstat_l2_feeds; + kstat_named_t arcstat_l2_rw_clash; + kstat_named_t arcstat_l2_read_bytes; + kstat_named_t arcstat_l2_write_bytes; + kstat_named_t arcstat_l2_writes_sent; + kstat_named_t arcstat_l2_writes_done; + kstat_named_t arcstat_l2_writes_error; + kstat_named_t arcstat_l2_writes_hdr_miss; + kstat_named_t arcstat_l2_evict_lock_retry; + kstat_named_t arcstat_l2_evict_reading; + kstat_named_t arcstat_l2_free_on_write; + kstat_named_t arcstat_l2_abort_lowmem; + kstat_named_t arcstat_l2_cksum_bad; + kstat_named_t arcstat_l2_io_error; + kstat_named_t arcstat_l2_size; + kstat_named_t arcstat_l2_hdr_size; + kstat_named_t arcstat_memory_throttle_count; +} arc_stats_t; + +static arc_stats_t arc_stats = { + { "hits", KSTAT_DATA_UINT64 }, + { "misses", KSTAT_DATA_UINT64 }, + { "demand_data_hits", KSTAT_DATA_UINT64 }, + { "demand_data_misses", KSTAT_DATA_UINT64 }, + { "demand_metadata_hits", KSTAT_DATA_UINT64 }, + { "demand_metadata_misses", KSTAT_DATA_UINT64 }, + { "prefetch_data_hits", KSTAT_DATA_UINT64 }, + { "prefetch_data_misses", KSTAT_DATA_UINT64 }, + { "prefetch_metadata_hits", KSTAT_DATA_UINT64 }, + { "prefetch_metadata_misses", KSTAT_DATA_UINT64 }, + { "mru_hits", KSTAT_DATA_UINT64 }, + { "mru_ghost_hits", KSTAT_DATA_UINT64 }, + { "mfu_hits", KSTAT_DATA_UINT64 }, + { "mfu_ghost_hits", KSTAT_DATA_UINT64 }, + { "deleted", KSTAT_DATA_UINT64 }, + { "recycle_miss", KSTAT_DATA_UINT64 }, + { "mutex_miss", KSTAT_DATA_UINT64 }, + { "evict_skip", KSTAT_DATA_UINT64 }, + { "evict_l2_cached", KSTAT_DATA_UINT64 }, + { "evict_l2_eligible", KSTAT_DATA_UINT64 }, + { "evict_l2_ineligible", KSTAT_DATA_UINT64 }, + { "hash_elements", KSTAT_DATA_UINT64 }, + { "hash_elements_max", KSTAT_DATA_UINT64 }, + { "hash_collisions", KSTAT_DATA_UINT64 }, + { "hash_chains", KSTAT_DATA_UINT64 }, + { "hash_chain_max", KSTAT_DATA_UINT64 }, + { "p", KSTAT_DATA_UINT64 }, + { "c", KSTAT_DATA_UINT64 }, + { "c_min", KSTAT_DATA_UINT64 }, + { "c_max", KSTAT_DATA_UINT64 }, + { "size", KSTAT_DATA_UINT64 }, + { "hdr_size", KSTAT_DATA_UINT64 }, + { "data_size", KSTAT_DATA_UINT64 }, + { "other_size", KSTAT_DATA_UINT64 }, + { "l2_hits", KSTAT_DATA_UINT64 }, + { "l2_misses", KSTAT_DATA_UINT64 }, + { "l2_feeds", KSTAT_DATA_UINT64 }, + { "l2_rw_clash", KSTAT_DATA_UINT64 }, + { "l2_read_bytes", KSTAT_DATA_UINT64 }, + { "l2_write_bytes", KSTAT_DATA_UINT64 }, + { "l2_writes_sent", KSTAT_DATA_UINT64 }, + { "l2_writes_done", KSTAT_DATA_UINT64 }, + { "l2_writes_error", KSTAT_DATA_UINT64 }, + { "l2_writes_hdr_miss", KSTAT_DATA_UINT64 }, + { "l2_evict_lock_retry", KSTAT_DATA_UINT64 }, + { "l2_evict_reading", KSTAT_DATA_UINT64 }, + { "l2_free_on_write", KSTAT_DATA_UINT64 }, + { "l2_abort_lowmem", KSTAT_DATA_UINT64 }, + { "l2_cksum_bad", KSTAT_DATA_UINT64 }, + { "l2_io_error", KSTAT_DATA_UINT64 }, + { "l2_size", KSTAT_DATA_UINT64 }, + { "l2_hdr_size", KSTAT_DATA_UINT64 }, + { "memory_throttle_count", KSTAT_DATA_UINT64 } +}; + +#define ARCSTAT(stat) (arc_stats.stat.value.ui64) + +#define ARCSTAT_INCR(stat, val) \ + atomic_add_64(&arc_stats.stat.value.ui64, (val)); + +#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) +#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) + +#define ARCSTAT_MAX(stat, val) { \ + uint64_t m; \ + while ((val) > (m = arc_stats.stat.value.ui64) && \ + (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val)))) \ + continue; \ +} + +#define ARCSTAT_MAXSTAT(stat) \ + ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64) + +/* + * We define a macro to allow ARC hits/misses to be easily broken down by + * two separate conditions, giving a total of four different subtypes for + * each of hits and misses (so eight statistics total). + */ +#define ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \ + if (cond1) { \ + if (cond2) { \ + ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \ + } else { \ + ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \ + } \ + } else { \ + if (cond2) { \ + ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \ + } else { \ + ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\ + } \ + } + +kstat_t *arc_ksp; +static arc_state_t *arc_anon; +static arc_state_t *arc_mru; +static arc_state_t *arc_mru_ghost; +static arc_state_t *arc_mfu; +static arc_state_t *arc_mfu_ghost; +static arc_state_t *arc_l2c_only; + +/* + * There are several ARC variables that are critical to export as kstats -- + * but we don't want to have to grovel around in the kstat whenever we wish to + * manipulate them. For these variables, we therefore define them to be in + * terms of the statistic variable. This assures that we are not introducing + * the possibility of inconsistency by having shadow copies of the variables, + * while still allowing the code to be readable. + */ +#define arc_size ARCSTAT(arcstat_size) /* actual total arc size */ +#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ +#define arc_c ARCSTAT(arcstat_c) /* target size of cache */ +#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ +#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ + +static int arc_no_grow; /* Don't try to grow cache size */ +static uint64_t arc_tempreserve; +static uint64_t arc_loaned_bytes; +static uint64_t arc_meta_used; +static uint64_t arc_meta_limit; +static uint64_t arc_meta_max = 0; + +typedef struct l2arc_buf_hdr l2arc_buf_hdr_t; + +typedef struct arc_callback arc_callback_t; + +struct arc_callback { + void *acb_private; + arc_done_func_t *acb_done; + arc_buf_t *acb_buf; + zio_t *acb_zio_dummy; + arc_callback_t *acb_next; +}; + +typedef struct arc_write_callback arc_write_callback_t; + +struct arc_write_callback { + void *awcb_private; + arc_done_func_t *awcb_ready; + arc_done_func_t *awcb_done; + arc_buf_t *awcb_buf; +}; + +struct arc_buf_hdr { + /* protected by hash lock */ + dva_t b_dva; + uint64_t b_birth; + uint64_t b_cksum0; + + kmutex_t b_freeze_lock; + zio_cksum_t *b_freeze_cksum; + void *b_thawed; + + arc_buf_hdr_t *b_hash_next; + arc_buf_t *b_buf; + uint32_t b_flags; + uint32_t b_datacnt; + + arc_callback_t *b_acb; + kcondvar_t b_cv; + + /* immutable */ + arc_buf_contents_t b_type; + uint64_t b_size; + uint64_t b_spa; + + /* protected by arc state mutex */ + arc_state_t *b_state; + list_node_t b_arc_node; + + /* updated atomically */ + clock_t b_arc_access; + + /* self protecting */ + refcount_t b_refcnt; + + l2arc_buf_hdr_t *b_l2hdr; + list_node_t b_l2node; +}; + +static arc_buf_t *arc_eviction_list; +static kmutex_t arc_eviction_mtx; +static arc_buf_hdr_t arc_eviction_hdr; +static void arc_get_data_buf(arc_buf_t *buf); +static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock); +static int arc_evict_needed(arc_buf_contents_t type); +static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes); + +static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab); + +#define GHOST_STATE(state) \ + ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ + (state) == arc_l2c_only) + +/* + * Private ARC flags. These flags are private ARC only flags that will show up + * in b_flags in the arc_hdr_buf_t. Some flags are publicly declared, and can + * be passed in as arc_flags in things like arc_read. However, these flags + * should never be passed and should only be set by ARC code. When adding new + * public flags, make sure not to smash the private ones. + */ + +#define ARC_IN_HASH_TABLE (1 << 9) /* this buffer is hashed */ +#define ARC_IO_IN_PROGRESS (1 << 10) /* I/O in progress for buf */ +#define ARC_IO_ERROR (1 << 11) /* I/O failed for buf */ +#define ARC_FREED_IN_READ (1 << 12) /* buf freed while in read */ +#define ARC_BUF_AVAILABLE (1 << 13) /* block not in active use */ +#define ARC_INDIRECT (1 << 14) /* this is an indirect block */ +#define ARC_FREE_IN_PROGRESS (1 << 15) /* hdr about to be freed */ +#define ARC_L2_WRITING (1 << 16) /* L2ARC write in progress */ +#define ARC_L2_EVICTED (1 << 17) /* evicted during I/O */ +#define ARC_L2_WRITE_HEAD (1 << 18) /* head of write list */ + +#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE) +#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS) +#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_IO_ERROR) +#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_PREFETCH) +#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FREED_IN_READ) +#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE) +#define HDR_FREE_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FREE_IN_PROGRESS) +#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_L2CACHE) +#define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS && \ + (hdr)->b_l2hdr != NULL) +#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING) +#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED) +#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD) + +/* + * Other sizes + */ + +#define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t)) + +/* + * Hash table routines + */ + +#define HT_LOCK_PAD 64 + +struct ht_lock { + kmutex_t ht_lock; +#ifdef _KERNEL + unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))]; +#endif +}; + +#define BUF_LOCKS 256 +typedef struct buf_hash_table { + uint64_t ht_mask; + arc_buf_hdr_t **ht_table; + struct ht_lock ht_locks[BUF_LOCKS]; +} buf_hash_table_t; + +static buf_hash_table_t buf_hash_table; + +#define BUF_HASH_INDEX(spa, dva, birth) \ + (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask) +#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)]) +#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock)) +#define HDR_LOCK(hdr) \ + (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth))) + +uint64_t zfs_crc64_table[256]; + +/* + * Level 2 ARC + */ + +#define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */ +#define L2ARC_HEADROOM 2 /* num of writes */ +#define L2ARC_FEED_SECS 1 /* caching interval secs */ +#define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */ + +#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) +#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done) + +/* + * L2ARC Performance Tunables + */ +uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* default max write size */ +uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */ +uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */ +uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */ +boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ +boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */ +boolean_t l2arc_norw = B_TRUE; /* no reads during writes */ + +/* + * L2ARC Internals + */ +typedef struct l2arc_dev { + vdev_t *l2ad_vdev; /* vdev */ + spa_t *l2ad_spa; /* spa */ + uint64_t l2ad_hand; /* next write location */ + uint64_t l2ad_write; /* desired write size, bytes */ + uint64_t l2ad_boost; /* warmup write boost, bytes */ + uint64_t l2ad_start; /* first addr on device */ + uint64_t l2ad_end; /* last addr on device */ + uint64_t l2ad_evict; /* last addr eviction reached */ + boolean_t l2ad_first; /* first sweep through */ + boolean_t l2ad_writing; /* currently writing */ + list_t *l2ad_buflist; /* buffer list */ + list_node_t l2ad_node; /* device list node */ +} l2arc_dev_t; + +static list_t L2ARC_dev_list; /* device list */ +static list_t *l2arc_dev_list; /* device list pointer */ +static kmutex_t l2arc_dev_mtx; /* device list mutex */ +static l2arc_dev_t *l2arc_dev_last; /* last device used */ +static kmutex_t l2arc_buflist_mtx; /* mutex for all buflists */ +static list_t L2ARC_free_on_write; /* free after write buf list */ +static list_t *l2arc_free_on_write; /* free after write list ptr */ +static kmutex_t l2arc_free_on_write_mtx; /* mutex for list */ +static uint64_t l2arc_ndev; /* number of devices */ + +typedef struct l2arc_read_callback { + arc_buf_t *l2rcb_buf; /* read buffer */ + spa_t *l2rcb_spa; /* spa */ + blkptr_t l2rcb_bp; /* original blkptr */ + zbookmark_t l2rcb_zb; /* original bookmark */ + int l2rcb_flags; /* original flags */ +} l2arc_read_callback_t; + +typedef struct l2arc_write_callback { + l2arc_dev_t *l2wcb_dev; /* device info */ + arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ +} l2arc_write_callback_t; + +struct l2arc_buf_hdr { + /* protected by arc_buf_hdr mutex */ + l2arc_dev_t *b_dev; /* L2ARC device */ + uint64_t b_daddr; /* disk address, offset byte */ +}; + +typedef struct l2arc_data_free { + /* protected by l2arc_free_on_write_mtx */ + void *l2df_data; + size_t l2df_size; + void (*l2df_func)(void *, size_t); + list_node_t l2df_list_node; +} l2arc_data_free_t; + +static kmutex_t l2arc_feed_thr_lock; +static kcondvar_t l2arc_feed_thr_cv; +static uint8_t l2arc_thread_exit; + +static void l2arc_read_done(zio_t *zio); +static void l2arc_hdr_stat_add(void); +static void l2arc_hdr_stat_remove(void); + +static uint64_t +buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth) +{ + uint8_t *vdva = (uint8_t *)dva; + uint64_t crc = -1ULL; + int i; + + ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); + + for (i = 0; i < sizeof (dva_t); i++) + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF]; + + crc ^= (spa>>8) ^ birth; + + return (crc); +} + +#define BUF_EMPTY(buf) \ + ((buf)->b_dva.dva_word[0] == 0 && \ + (buf)->b_dva.dva_word[1] == 0 && \ + (buf)->b_birth == 0) + +#define BUF_EQUAL(spa, dva, birth, buf) \ + ((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \ + ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \ + ((buf)->b_birth == birth) && ((buf)->b_spa == spa) + +static void +buf_discard_identity(arc_buf_hdr_t *hdr) +{ + hdr->b_dva.dva_word[0] = 0; + hdr->b_dva.dva_word[1] = 0; + hdr->b_birth = 0; + hdr->b_cksum0 = 0; +} + +static arc_buf_hdr_t * +buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp) +{ + uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); + kmutex_t *hash_lock = BUF_HASH_LOCK(idx); + arc_buf_hdr_t *buf; + + mutex_enter(hash_lock); + for (buf = buf_hash_table.ht_table[idx]; buf != NULL; + buf = buf->b_hash_next) { + if (BUF_EQUAL(spa, dva, birth, buf)) { + *lockp = hash_lock; + return (buf); + } + } + mutex_exit(hash_lock); + *lockp = NULL; + return (NULL); +} + +/* + * Insert an entry into the hash table. If there is already an element + * equal to elem in the hash table, then the already existing element + * will be returned and the new element will not be inserted. + * Otherwise returns NULL. + */ +static arc_buf_hdr_t * +buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp) +{ + uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth); + kmutex_t *hash_lock = BUF_HASH_LOCK(idx); + arc_buf_hdr_t *fbuf; + uint32_t i; + + ASSERT(!HDR_IN_HASH_TABLE(buf)); + *lockp = hash_lock; + mutex_enter(hash_lock); + for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL; + fbuf = fbuf->b_hash_next, i++) { + if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf)) + return (fbuf); + } + + buf->b_hash_next = buf_hash_table.ht_table[idx]; + buf_hash_table.ht_table[idx] = buf; + buf->b_flags |= ARC_IN_HASH_TABLE; + + /* collect some hash table performance data */ + if (i > 0) { + ARCSTAT_BUMP(arcstat_hash_collisions); + if (i == 1) + ARCSTAT_BUMP(arcstat_hash_chains); + + ARCSTAT_MAX(arcstat_hash_chain_max, i); + } + + ARCSTAT_BUMP(arcstat_hash_elements); + ARCSTAT_MAXSTAT(arcstat_hash_elements); + + return (NULL); +} + +static void +buf_hash_remove(arc_buf_hdr_t *buf) +{ + arc_buf_hdr_t *fbuf, **bufp; + uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth); + + ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx))); + ASSERT(HDR_IN_HASH_TABLE(buf)); + + bufp = &buf_hash_table.ht_table[idx]; + while ((fbuf = *bufp) != buf) { + ASSERT(fbuf != NULL); + bufp = &fbuf->b_hash_next; + } + *bufp = buf->b_hash_next; + buf->b_hash_next = NULL; + buf->b_flags &= ~ARC_IN_HASH_TABLE; + + /* collect some hash table performance data */ + ARCSTAT_BUMPDOWN(arcstat_hash_elements); + + if (buf_hash_table.ht_table[idx] && + buf_hash_table.ht_table[idx]->b_hash_next == NULL) + ARCSTAT_BUMPDOWN(arcstat_hash_chains); +} + +/* + * Global data structures and functions for the buf kmem cache. + */ +static kmem_cache_t *hdr_cache; +static kmem_cache_t *buf_cache; + +static void +buf_fini(void) +{ + int i; + + kmem_free(buf_hash_table.ht_table, + (buf_hash_table.ht_mask + 1) * sizeof (void *)); + for (i = 0; i < BUF_LOCKS; i++) + mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); + kmem_cache_destroy(hdr_cache); + kmem_cache_destroy(buf_cache); +} + +/* + * Constructor callback - called when the cache is empty + * and a new buf is requested. + */ +/* ARGSUSED */ +static int +hdr_cons(void *vbuf, void *unused, int kmflag) +{ + arc_buf_hdr_t *buf = vbuf; + + bzero(buf, sizeof (arc_buf_hdr_t)); + refcount_create(&buf->b_refcnt); + cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); + arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS); + + return (0); +} + +/* ARGSUSED */ +static int +buf_cons(void *vbuf, void *unused, int kmflag) +{ + arc_buf_t *buf = vbuf; + + bzero(buf, sizeof (arc_buf_t)); + mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL); + arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS); + + return (0); +} + +/* + * Destructor callback - called when a cached buf is + * no longer required. + */ +/* ARGSUSED */ +static void +hdr_dest(void *vbuf, void *unused) +{ + arc_buf_hdr_t *buf = vbuf; + + ASSERT(BUF_EMPTY(buf)); + refcount_destroy(&buf->b_refcnt); + cv_destroy(&buf->b_cv); + mutex_destroy(&buf->b_freeze_lock); + arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS); +} + +/* ARGSUSED */ +static void +buf_dest(void *vbuf, void *unused) +{ + arc_buf_t *buf = vbuf; + + mutex_destroy(&buf->b_evict_lock); + rw_destroy(&buf->b_data_lock); + arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); +} + +/* + * Reclaim callback -- invoked when memory is low. + */ +/* ARGSUSED */ +static void +hdr_recl(void *unused) +{ + dprintf("hdr_recl called\n"); + /* + * umem calls the reclaim func when we destroy the buf cache, + * which is after we do arc_fini(). + */ + if (!arc_dead) + cv_signal(&arc_reclaim_thr_cv); +} + +static void +buf_init(void) +{ + uint64_t *ct; + uint64_t hsize = 1ULL << 12; + int i, j; + + /* + * The hash table is big enough to fill all of physical memory + * with an average 64K block size. The table will take up + * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers). + */ + while (hsize * 65536 < physmem * PAGESIZE) + hsize <<= 1; +retry: + buf_hash_table.ht_mask = hsize - 1; + buf_hash_table.ht_table = + kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP); + if (buf_hash_table.ht_table == NULL) { + ASSERT(hsize > (1ULL << 8)); + hsize >>= 1; + goto retry; + } + + hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t), + 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0); + buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t), + 0, buf_cons, buf_dest, NULL, NULL, NULL, 0); + + for (i = 0; i < 256; i++) + for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--) + *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY); + + for (i = 0; i < BUF_LOCKS; i++) { + mutex_init(&buf_hash_table.ht_locks[i].ht_lock, + NULL, MUTEX_DEFAULT, NULL); + } +} + +#define ARC_MINTIME (hz>>4) /* 62 ms */ + +static void +arc_cksum_verify(arc_buf_t *buf) +{ + zio_cksum_t zc; + + if (!(zfs_flags & ZFS_DEBUG_MODIFY)) + return; + + mutex_enter(&buf->b_hdr->b_freeze_lock); + if (buf->b_hdr->b_freeze_cksum == NULL || + (buf->b_hdr->b_flags & ARC_IO_ERROR)) { + mutex_exit(&buf->b_hdr->b_freeze_lock); + return; + } + fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc); + if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc)) + panic("buffer modified while frozen!"); + mutex_exit(&buf->b_hdr->b_freeze_lock); +} + +static int +arc_cksum_equal(arc_buf_t *buf) +{ + zio_cksum_t zc; + int equal; + + mutex_enter(&buf->b_hdr->b_freeze_lock); + fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc); + equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc); + mutex_exit(&buf->b_hdr->b_freeze_lock); + + return (equal); +} + +static void +arc_cksum_compute(arc_buf_t *buf, boolean_t force) +{ + if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY)) + return; + + mutex_enter(&buf->b_hdr->b_freeze_lock); + if (buf->b_hdr->b_freeze_cksum != NULL) { + mutex_exit(&buf->b_hdr->b_freeze_lock); + return; + } + buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); + fletcher_2_native(buf->b_data, buf->b_hdr->b_size, + buf->b_hdr->b_freeze_cksum); + mutex_exit(&buf->b_hdr->b_freeze_lock); +} + +void +arc_buf_thaw(arc_buf_t *buf) +{ + if (zfs_flags & ZFS_DEBUG_MODIFY) { + if (buf->b_hdr->b_state != arc_anon) + panic("modifying non-anon buffer!"); + if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS) + panic("modifying buffer while i/o in progress!"); + arc_cksum_verify(buf); + } + + mutex_enter(&buf->b_hdr->b_freeze_lock); + if (buf->b_hdr->b_freeze_cksum != NULL) { + kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t)); + buf->b_hdr->b_freeze_cksum = NULL; + } + + if (zfs_flags & ZFS_DEBUG_MODIFY) { + if (buf->b_hdr->b_thawed) + kmem_free(buf->b_hdr->b_thawed, 1); + buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP); + } + + mutex_exit(&buf->b_hdr->b_freeze_lock); +} + +void +arc_buf_freeze(arc_buf_t *buf) +{ + kmutex_t *hash_lock; + + if (!(zfs_flags & ZFS_DEBUG_MODIFY)) + return; + + hash_lock = HDR_LOCK(buf->b_hdr); + mutex_enter(hash_lock); + + ASSERT(buf->b_hdr->b_freeze_cksum != NULL || + buf->b_hdr->b_state == arc_anon); + arc_cksum_compute(buf, B_FALSE); + mutex_exit(hash_lock); +} + +static void +add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) +{ + ASSERT(MUTEX_HELD(hash_lock)); + + if ((refcount_add(&ab->b_refcnt, tag) == 1) && + (ab->b_state != arc_anon)) { + uint64_t delta = ab->b_size * ab->b_datacnt; + list_t *list = &ab->b_state->arcs_list[ab->b_type]; + uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type]; + + ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx)); + mutex_enter(&ab->b_state->arcs_mtx); + ASSERT(list_link_active(&ab->b_arc_node)); + list_remove(list, ab); + if (GHOST_STATE(ab->b_state)) { + ASSERT3U(ab->b_datacnt, ==, 0); + ASSERT3P(ab->b_buf, ==, NULL); + delta = ab->b_size; + } + ASSERT(delta > 0); + ASSERT3U(*size, >=, delta); + atomic_add_64(size, -delta); + mutex_exit(&ab->b_state->arcs_mtx); + /* remove the prefetch flag if we get a reference */ + if (ab->b_flags & ARC_PREFETCH) + ab->b_flags &= ~ARC_PREFETCH; + } +} + +static int +remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) +{ + int cnt; + arc_state_t *state = ab->b_state; + + ASSERT(state == arc_anon || MUTEX_HELD(hash_lock)); + ASSERT(!GHOST_STATE(state)); + + if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) && + (state != arc_anon)) { + uint64_t *size = &state->arcs_lsize[ab->b_type]; + + ASSERT(!MUTEX_HELD(&state->arcs_mtx)); + mutex_enter(&state->arcs_mtx); + ASSERT(!list_link_active(&ab->b_arc_node)); + list_insert_head(&state->arcs_list[ab->b_type], ab); + ASSERT(ab->b_datacnt > 0); + atomic_add_64(size, ab->b_size * ab->b_datacnt); + mutex_exit(&state->arcs_mtx); + } + return (cnt); +} + +/* + * Move the supplied buffer to the indicated state. The mutex + * for the buffer must be held by the caller. + */ +static void +arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock) +{ + arc_state_t *old_state = ab->b_state; + int64_t refcnt = refcount_count(&ab->b_refcnt); + uint64_t from_delta, to_delta; + + ASSERT(MUTEX_HELD(hash_lock)); + ASSERT(new_state != old_state); + ASSERT(refcnt == 0 || ab->b_datacnt > 0); + ASSERT(ab->b_datacnt == 0 || !GHOST_STATE(new_state)); + ASSERT(ab->b_datacnt <= 1 || old_state != arc_anon); + + from_delta = to_delta = ab->b_datacnt * ab->b_size; + + /* + * If this buffer is evictable, transfer it from the + * old state list to the new state list. + */ + if (refcnt == 0) { + if (old_state != arc_anon) { + int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx); + uint64_t *size = &old_state->arcs_lsize[ab->b_type]; + + if (use_mutex) + mutex_enter(&old_state->arcs_mtx); + + ASSERT(list_link_active(&ab->b_arc_node)); + list_remove(&old_state->arcs_list[ab->b_type], ab); + + /* + * If prefetching out of the ghost cache, + * we will have a non-zero datacnt. + */ + if (GHOST_STATE(old_state) && ab->b_datacnt == 0) { + /* ghost elements have a ghost size */ + ASSERT(ab->b_buf == NULL); + from_delta = ab->b_size; + } + ASSERT3U(*size, >=, from_delta); + atomic_add_64(size, -from_delta); + + if (use_mutex) + mutex_exit(&old_state->arcs_mtx); + } + if (new_state != arc_anon) { + int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx); + uint64_t *size = &new_state->arcs_lsize[ab->b_type]; + + if (use_mutex) + mutex_enter(&new_state->arcs_mtx); + + list_insert_head(&new_state->arcs_list[ab->b_type], ab); + + /* ghost elements have a ghost size */ + if (GHOST_STATE(new_state)) { + ASSERT(ab->b_datacnt == 0); + ASSERT(ab->b_buf == NULL); + to_delta = ab->b_size; + } + atomic_add_64(size, to_delta); + + if (use_mutex) + mutex_exit(&new_state->arcs_mtx); + } + } + + ASSERT(!BUF_EMPTY(ab)); + if (new_state == arc_anon && HDR_IN_HASH_TABLE(ab)) + buf_hash_remove(ab); + + /* adjust state sizes */ + if (to_delta) + atomic_add_64(&new_state->arcs_size, to_delta); + if (from_delta) { + ASSERT3U(old_state->arcs_size, >=, from_delta); + atomic_add_64(&old_state->arcs_size, -from_delta); + } + ab->b_state = new_state; + + /* adjust l2arc hdr stats */ + if (new_state == arc_l2c_only) + l2arc_hdr_stat_add(); + else if (old_state == arc_l2c_only) + l2arc_hdr_stat_remove(); +} + +void +arc_space_consume(uint64_t space, arc_space_type_t type) +{ + ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); + + switch (type) { + case ARC_SPACE_DATA: + ARCSTAT_INCR(arcstat_data_size, space); + break; + case ARC_SPACE_OTHER: + ARCSTAT_INCR(arcstat_other_size, space); + break; + case ARC_SPACE_HDRS: + ARCSTAT_INCR(arcstat_hdr_size, space); + break; + case ARC_SPACE_L2HDRS: + ARCSTAT_INCR(arcstat_l2_hdr_size, space); + break; + } + + atomic_add_64(&arc_meta_used, space); + atomic_add_64(&arc_size, space); +} + +void +arc_space_return(uint64_t space, arc_space_type_t type) +{ + ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); + + switch (type) { + case ARC_SPACE_DATA: + ARCSTAT_INCR(arcstat_data_size, -space); + break; + case ARC_SPACE_OTHER: + ARCSTAT_INCR(arcstat_other_size, -space); + break; + case ARC_SPACE_HDRS: + ARCSTAT_INCR(arcstat_hdr_size, -space); + break; + case ARC_SPACE_L2HDRS: + ARCSTAT_INCR(arcstat_l2_hdr_size, -space); + break; + } + + ASSERT(arc_meta_used >= space); + if (arc_meta_max < arc_meta_used) + arc_meta_max = arc_meta_used; + atomic_add_64(&arc_meta_used, -space); + ASSERT(arc_size >= space); + atomic_add_64(&arc_size, -space); +} + +void * +arc_data_buf_alloc(uint64_t size) +{ + if (arc_evict_needed(ARC_BUFC_DATA)) + cv_signal(&arc_reclaim_thr_cv); + atomic_add_64(&arc_size, size); + return (zio_data_buf_alloc(size)); +} + +void +arc_data_buf_free(void *buf, uint64_t size) +{ + zio_data_buf_free(buf, size); + ASSERT(arc_size >= size); + atomic_add_64(&arc_size, -size); +} + +arc_buf_t * +arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type) +{ + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + + ASSERT3U(size, >, 0); + hdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE); + ASSERT(BUF_EMPTY(hdr)); + hdr->b_size = size; + hdr->b_type = type; + hdr->b_spa = spa_guid(spa); + hdr->b_state = arc_anon; + hdr->b_arc_access = 0; + buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); + buf->b_hdr = hdr; + buf->b_data = NULL; + buf->b_efunc = NULL; + buf->b_private = NULL; + buf->b_next = NULL; + hdr->b_buf = buf; + arc_get_data_buf(buf); + hdr->b_datacnt = 1; + hdr->b_flags = 0; + ASSERT(refcount_is_zero(&hdr->b_refcnt)); + (void) refcount_add(&hdr->b_refcnt, tag); + + return (buf); +} + +static char *arc_onloan_tag = "onloan"; + +/* + * Loan out an anonymous arc buffer. Loaned buffers are not counted as in + * flight data by arc_tempreserve_space() until they are "returned". Loaned + * buffers must be returned to the arc before they can be used by the DMU or + * freed. + */ +arc_buf_t * +arc_loan_buf(spa_t *spa, int size) +{ + arc_buf_t *buf; + + buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA); + + atomic_add_64(&arc_loaned_bytes, size); + return (buf); +} + +/* + * Return a loaned arc buffer to the arc. + */ +void +arc_return_buf(arc_buf_t *buf, void *tag) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(buf->b_data != NULL); + (void) refcount_add(&hdr->b_refcnt, tag); + (void) refcount_remove(&hdr->b_refcnt, arc_onloan_tag); + + atomic_add_64(&arc_loaned_bytes, -hdr->b_size); +} + +/* Detach an arc_buf from a dbuf (tag) */ +void +arc_loan_inuse_buf(arc_buf_t *buf, void *tag) +{ + arc_buf_hdr_t *hdr; + + ASSERT(buf->b_data != NULL); + hdr = buf->b_hdr; + (void) refcount_add(&hdr->b_refcnt, arc_onloan_tag); + (void) refcount_remove(&hdr->b_refcnt, tag); + buf->b_efunc = NULL; + buf->b_private = NULL; + + atomic_add_64(&arc_loaned_bytes, hdr->b_size); +} + +static arc_buf_t * +arc_buf_clone(arc_buf_t *from) +{ + arc_buf_t *buf; + arc_buf_hdr_t *hdr = from->b_hdr; + uint64_t size = hdr->b_size; + + ASSERT(hdr->b_state != arc_anon); + + buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); + buf->b_hdr = hdr; + buf->b_data = NULL; + buf->b_efunc = NULL; + buf->b_private = NULL; + buf->b_next = hdr->b_buf; + hdr->b_buf = buf; + arc_get_data_buf(buf); + bcopy(from->b_data, buf->b_data, size); + hdr->b_datacnt += 1; + return (buf); +} + +void +arc_buf_add_ref(arc_buf_t *buf, void* tag) +{ + arc_buf_hdr_t *hdr; + kmutex_t *hash_lock; + + /* + * Check to see if this buffer is evicted. Callers + * must verify b_data != NULL to know if the add_ref + * was successful. + */ + mutex_enter(&buf->b_evict_lock); + if (buf->b_data == NULL) { + mutex_exit(&buf->b_evict_lock); + return; + } + hash_lock = HDR_LOCK(buf->b_hdr); + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + mutex_exit(&buf->b_evict_lock); + + ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); + add_reference(hdr, hash_lock, tag); + DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); + arc_access(hdr, hash_lock); + mutex_exit(hash_lock); + ARCSTAT_BUMP(arcstat_hits); + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, + data, metadata, hits); +} + +/* + * Free the arc data buffer. If it is an l2arc write in progress, + * the buffer is placed on l2arc_free_on_write to be freed later. + */ +static void +arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t), + void *data, size_t size) +{ + if (HDR_L2_WRITING(hdr)) { + l2arc_data_free_t *df; + df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP); + df->l2df_data = data; + df->l2df_size = size; + df->l2df_func = free_func; + mutex_enter(&l2arc_free_on_write_mtx); + list_insert_head(l2arc_free_on_write, df); + mutex_exit(&l2arc_free_on_write_mtx); + ARCSTAT_BUMP(arcstat_l2_free_on_write); + } else { + free_func(data, size); + } +} + +static void +arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) +{ + arc_buf_t **bufp; + + /* free up data associated with the buf */ + if (buf->b_data) { + arc_state_t *state = buf->b_hdr->b_state; + uint64_t size = buf->b_hdr->b_size; + arc_buf_contents_t type = buf->b_hdr->b_type; + + arc_cksum_verify(buf); + + if (!recycle) { + if (type == ARC_BUFC_METADATA) { + arc_buf_data_free(buf->b_hdr, zio_buf_free, + buf->b_data, size); + arc_space_return(size, ARC_SPACE_DATA); + } else { + ASSERT(type == ARC_BUFC_DATA); + arc_buf_data_free(buf->b_hdr, + zio_data_buf_free, buf->b_data, size); + ARCSTAT_INCR(arcstat_data_size, -size); + atomic_add_64(&arc_size, -size); + } + } + if (list_link_active(&buf->b_hdr->b_arc_node)) { + uint64_t *cnt = &state->arcs_lsize[type]; + + ASSERT(refcount_is_zero(&buf->b_hdr->b_refcnt)); + ASSERT(state != arc_anon); + + ASSERT3U(*cnt, >=, size); + atomic_add_64(cnt, -size); + } + ASSERT3U(state->arcs_size, >=, size); + atomic_add_64(&state->arcs_size, -size); + buf->b_data = NULL; + ASSERT(buf->b_hdr->b_datacnt > 0); + buf->b_hdr->b_datacnt -= 1; + } + + /* only remove the buf if requested */ + if (!all) + return; + + /* remove the buf from the hdr list */ + for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next) + continue; + *bufp = buf->b_next; + buf->b_next = NULL; + + ASSERT(buf->b_efunc == NULL); + + /* clean up the buf */ + buf->b_hdr = NULL; + kmem_cache_free(buf_cache, buf); +} + +static void +arc_hdr_destroy(arc_buf_hdr_t *hdr) +{ + ASSERT(refcount_is_zero(&hdr->b_refcnt)); + ASSERT3P(hdr->b_state, ==, arc_anon); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr; + + if (l2hdr != NULL) { + boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx); + /* + * To prevent arc_free() and l2arc_evict() from + * attempting to free the same buffer at the same time, + * a FREE_IN_PROGRESS flag is given to arc_free() to + * give it priority. l2arc_evict() can't destroy this + * header while we are waiting on l2arc_buflist_mtx. + * + * The hdr may be removed from l2ad_buflist before we + * grab l2arc_buflist_mtx, so b_l2hdr is rechecked. + */ + if (!buflist_held) { + mutex_enter(&l2arc_buflist_mtx); + l2hdr = hdr->b_l2hdr; + } + + if (l2hdr != NULL) { + list_remove(l2hdr->b_dev->l2ad_buflist, hdr); + ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); + kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); + if (hdr->b_state == arc_l2c_only) + l2arc_hdr_stat_remove(); + hdr->b_l2hdr = NULL; + } + + if (!buflist_held) + mutex_exit(&l2arc_buflist_mtx); + } + + if (!BUF_EMPTY(hdr)) { + ASSERT(!HDR_IN_HASH_TABLE(hdr)); + buf_discard_identity(hdr); + } + while (hdr->b_buf) { + arc_buf_t *buf = hdr->b_buf; + + if (buf->b_efunc) { + mutex_enter(&arc_eviction_mtx); + mutex_enter(&buf->b_evict_lock); + ASSERT(buf->b_hdr != NULL); + arc_buf_destroy(hdr->b_buf, FALSE, FALSE); + hdr->b_buf = buf->b_next; + buf->b_hdr = &arc_eviction_hdr; + buf->b_next = arc_eviction_list; + arc_eviction_list = buf; + mutex_exit(&buf->b_evict_lock); + mutex_exit(&arc_eviction_mtx); + } else { + arc_buf_destroy(hdr->b_buf, FALSE, TRUE); + } + } + if (hdr->b_freeze_cksum != NULL) { + kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t)); + hdr->b_freeze_cksum = NULL; + } + if (hdr->b_thawed) { + kmem_free(hdr->b_thawed, 1); + hdr->b_thawed = NULL; + } + + ASSERT(!list_link_active(&hdr->b_arc_node)); + ASSERT3P(hdr->b_hash_next, ==, NULL); + ASSERT3P(hdr->b_acb, ==, NULL); + kmem_cache_free(hdr_cache, hdr); +} + +void +arc_buf_free(arc_buf_t *buf, void *tag) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + int hashed = hdr->b_state != arc_anon; + + ASSERT(buf->b_efunc == NULL); + ASSERT(buf->b_data != NULL); + + if (hashed) { + kmutex_t *hash_lock = HDR_LOCK(hdr); + + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + + (void) remove_reference(hdr, hash_lock, tag); + if (hdr->b_datacnt > 1) { + arc_buf_destroy(buf, FALSE, TRUE); + } else { + ASSERT(buf == hdr->b_buf); + ASSERT(buf->b_efunc == NULL); + hdr->b_flags |= ARC_BUF_AVAILABLE; + } + mutex_exit(hash_lock); + } else if (HDR_IO_IN_PROGRESS(hdr)) { + int destroy_hdr; + /* + * We are in the middle of an async write. Don't destroy + * this buffer unless the write completes before we finish + * decrementing the reference count. + */ + mutex_enter(&arc_eviction_mtx); + (void) remove_reference(hdr, NULL, tag); + ASSERT(refcount_is_zero(&hdr->b_refcnt)); + destroy_hdr = !HDR_IO_IN_PROGRESS(hdr); + mutex_exit(&arc_eviction_mtx); + if (destroy_hdr) + arc_hdr_destroy(hdr); + } else { + if (remove_reference(hdr, NULL, tag) > 0) + arc_buf_destroy(buf, FALSE, TRUE); + else + arc_hdr_destroy(hdr); + } +} + +int +arc_buf_remove_ref(arc_buf_t *buf, void* tag) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + kmutex_t *hash_lock = HDR_LOCK(hdr); + int no_callback = (buf->b_efunc == NULL); + + if (hdr->b_state == arc_anon) { + ASSERT(hdr->b_datacnt == 1); + arc_buf_free(buf, tag); + return (no_callback); + } + + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + ASSERT(hdr->b_state != arc_anon); + ASSERT(buf->b_data != NULL); + + (void) remove_reference(hdr, hash_lock, tag); + if (hdr->b_datacnt > 1) { + if (no_callback) + arc_buf_destroy(buf, FALSE, TRUE); + } else if (no_callback) { + ASSERT(hdr->b_buf == buf && buf->b_next == NULL); + ASSERT(buf->b_efunc == NULL); + hdr->b_flags |= ARC_BUF_AVAILABLE; + } + ASSERT(no_callback || hdr->b_datacnt > 1 || + refcount_is_zero(&hdr->b_refcnt)); + mutex_exit(hash_lock); + return (no_callback); +} + +int +arc_buf_size(arc_buf_t *buf) +{ + return (buf->b_hdr->b_size); +} + +/* + * Evict buffers from list until we've removed the specified number of + * bytes. Move the removed buffers to the appropriate evict state. + * If the recycle flag is set, then attempt to "recycle" a buffer: + * - look for a buffer to evict that is `bytes' long. + * - return the data block from this buffer rather than freeing it. + * This flag is used by callers that are trying to make space for a + * new buffer in a full arc cache. + * + * This function makes a "best effort". It skips over any buffers + * it can't get a hash_lock on, and so may not catch all candidates. + * It may also return without evicting as much space as requested. + */ +static void * +arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, + arc_buf_contents_t type) +{ + arc_state_t *evicted_state; + uint64_t bytes_evicted = 0, skipped = 0, missed = 0; + arc_buf_hdr_t *ab, *ab_prev = NULL; + list_t *list = &state->arcs_list[type]; + kmutex_t *hash_lock; + boolean_t have_lock; + void *stolen = NULL; + + ASSERT(state == arc_mru || state == arc_mfu); + + evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; + + mutex_enter(&state->arcs_mtx); + mutex_enter(&evicted_state->arcs_mtx); + + for (ab = list_tail(list); ab; ab = ab_prev) { + ab_prev = list_prev(list, ab); + /* prefetch buffers have a minimum lifespan */ + if (HDR_IO_IN_PROGRESS(ab) || + (spa && ab->b_spa != spa) || + (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) && + ddi_get_lbolt() - ab->b_arc_access < + arc_min_prefetch_lifespan)) { + skipped++; + continue; + } + /* "lookahead" for better eviction candidate */ + if (recycle && ab->b_size != bytes && + ab_prev && ab_prev->b_size == bytes) + continue; + hash_lock = HDR_LOCK(ab); + have_lock = MUTEX_HELD(hash_lock); + if (have_lock || mutex_tryenter(hash_lock)) { + ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0); + ASSERT(ab->b_datacnt > 0); + while (ab->b_buf) { + arc_buf_t *buf = ab->b_buf; + if (!mutex_tryenter(&buf->b_evict_lock)) { + missed += 1; + break; + } + if (buf->b_data) { + bytes_evicted += ab->b_size; + if (recycle && ab->b_type == type && + ab->b_size == bytes && + !HDR_L2_WRITING(ab)) { + stolen = buf->b_data; + recycle = FALSE; + } + } + if (buf->b_efunc) { + mutex_enter(&arc_eviction_mtx); + arc_buf_destroy(buf, + buf->b_data == stolen, FALSE); + ab->b_buf = buf->b_next; + buf->b_hdr = &arc_eviction_hdr; + buf->b_next = arc_eviction_list; + arc_eviction_list = buf; + mutex_exit(&arc_eviction_mtx); + mutex_exit(&buf->b_evict_lock); + } else { + mutex_exit(&buf->b_evict_lock); + arc_buf_destroy(buf, + buf->b_data == stolen, TRUE); + } + } + + if (ab->b_l2hdr) { + ARCSTAT_INCR(arcstat_evict_l2_cached, + ab->b_size); + } else { + if (l2arc_write_eligible(ab->b_spa, ab)) { + ARCSTAT_INCR(arcstat_evict_l2_eligible, + ab->b_size); + } else { + ARCSTAT_INCR( + arcstat_evict_l2_ineligible, + ab->b_size); + } + } + + if (ab->b_datacnt == 0) { + arc_change_state(evicted_state, ab, hash_lock); + ASSERT(HDR_IN_HASH_TABLE(ab)); + ab->b_flags |= ARC_IN_HASH_TABLE; + ab->b_flags &= ~ARC_BUF_AVAILABLE; + DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, ab); + } + if (!have_lock) + mutex_exit(hash_lock); + if (bytes >= 0 && bytes_evicted >= bytes) + break; + } else { + missed += 1; + } + } + + mutex_exit(&evicted_state->arcs_mtx); + mutex_exit(&state->arcs_mtx); + + if (bytes_evicted < bytes) + dprintf("only evicted %lld bytes from %x", + (longlong_t)bytes_evicted, state); + + if (skipped) + ARCSTAT_INCR(arcstat_evict_skip, skipped); + + if (missed) + ARCSTAT_INCR(arcstat_mutex_miss, missed); + + /* + * We have just evicted some date into the ghost state, make + * sure we also adjust the ghost state size if necessary. + */ + if (arc_no_grow && + arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size > arc_c) { + int64_t mru_over = arc_anon->arcs_size + arc_mru->arcs_size + + arc_mru_ghost->arcs_size - arc_c; + + if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) { + int64_t todelete = + MIN(arc_mru_ghost->arcs_lsize[type], mru_over); + arc_evict_ghost(arc_mru_ghost, NULL, todelete); + } else if (arc_mfu_ghost->arcs_lsize[type] > 0) { + int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type], + arc_mru_ghost->arcs_size + + arc_mfu_ghost->arcs_size - arc_c); + arc_evict_ghost(arc_mfu_ghost, NULL, todelete); + } + } + + return (stolen); +} + +/* + * Remove buffers from list until we've removed the specified number of + * bytes. Destroy the buffers that are removed. + */ +static void +arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes) +{ + arc_buf_hdr_t *ab, *ab_prev; + arc_buf_hdr_t marker = { 0 }; + list_t *list = &state->arcs_list[ARC_BUFC_DATA]; + kmutex_t *hash_lock; + uint64_t bytes_deleted = 0; + uint64_t bufs_skipped = 0; + + ASSERT(GHOST_STATE(state)); +top: + mutex_enter(&state->arcs_mtx); + for (ab = list_tail(list); ab; ab = ab_prev) { + ab_prev = list_prev(list, ab); + if (spa && ab->b_spa != spa) + continue; + + /* ignore markers */ + if (ab->b_spa == 0) + continue; + + hash_lock = HDR_LOCK(ab); + /* caller may be trying to modify this buffer, skip it */ + if (MUTEX_HELD(hash_lock)) + continue; + if (mutex_tryenter(hash_lock)) { + ASSERT(!HDR_IO_IN_PROGRESS(ab)); + ASSERT(ab->b_buf == NULL); + ARCSTAT_BUMP(arcstat_deleted); + bytes_deleted += ab->b_size; + + if (ab->b_l2hdr != NULL) { + /* + * This buffer is cached on the 2nd Level ARC; + * don't destroy the header. + */ + arc_change_state(arc_l2c_only, ab, hash_lock); + mutex_exit(hash_lock); + } else { + arc_change_state(arc_anon, ab, hash_lock); + mutex_exit(hash_lock); + arc_hdr_destroy(ab); + } + + DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab); + if (bytes >= 0 && bytes_deleted >= bytes) + break; + } else if (bytes < 0) { + /* + * Insert a list marker and then wait for the + * hash lock to become available. Once its + * available, restart from where we left off. + */ + list_insert_after(list, ab, &marker); + mutex_exit(&state->arcs_mtx); + mutex_enter(hash_lock); + mutex_exit(hash_lock); + mutex_enter(&state->arcs_mtx); + ab_prev = list_prev(list, &marker); + list_remove(list, &marker); + } else + bufs_skipped += 1; + } + mutex_exit(&state->arcs_mtx); + + if (list == &state->arcs_list[ARC_BUFC_DATA] && + (bytes < 0 || bytes_deleted < bytes)) { + list = &state->arcs_list[ARC_BUFC_METADATA]; + goto top; + } + + if (bufs_skipped) { + ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped); + ASSERT(bytes >= 0); + } + + if (bytes_deleted < bytes) + dprintf("only deleted %lld bytes from %p", + (longlong_t)bytes_deleted, state); +} + +static void +arc_adjust(void) +{ + int64_t adjustment, delta; + + /* + * Adjust MRU size + */ + + adjustment = MIN((int64_t)(arc_size - arc_c), + (int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used - + arc_p)); + + if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) { + delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment); + (void) arc_evict(arc_mru, NULL, delta, FALSE, ARC_BUFC_DATA); + adjustment -= delta; + } + + if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) { + delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment); + (void) arc_evict(arc_mru, NULL, delta, FALSE, + ARC_BUFC_METADATA); + } + + /* + * Adjust MFU size + */ + + adjustment = arc_size - arc_c; + + if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) { + delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]); + (void) arc_evict(arc_mfu, NULL, delta, FALSE, ARC_BUFC_DATA); + adjustment -= delta; + } + + if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) { + int64_t delta = MIN(adjustment, + arc_mfu->arcs_lsize[ARC_BUFC_METADATA]); + (void) arc_evict(arc_mfu, NULL, delta, FALSE, + ARC_BUFC_METADATA); + } + + /* + * Adjust ghost lists + */ + + adjustment = arc_mru->arcs_size + arc_mru_ghost->arcs_size - arc_c; + + if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) { + delta = MIN(arc_mru_ghost->arcs_size, adjustment); + arc_evict_ghost(arc_mru_ghost, NULL, delta); + } + + adjustment = + arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size - arc_c; + + if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) { + delta = MIN(arc_mfu_ghost->arcs_size, adjustment); + arc_evict_ghost(arc_mfu_ghost, NULL, delta); + } +} + +static void +arc_do_user_evicts(void) +{ + mutex_enter(&arc_eviction_mtx); + while (arc_eviction_list != NULL) { + arc_buf_t *buf = arc_eviction_list; + arc_eviction_list = buf->b_next; + mutex_enter(&buf->b_evict_lock); + buf->b_hdr = NULL; + mutex_exit(&buf->b_evict_lock); + mutex_exit(&arc_eviction_mtx); + + if (buf->b_efunc != NULL) + VERIFY(buf->b_efunc(buf) == 0); + + buf->b_efunc = NULL; + buf->b_private = NULL; + kmem_cache_free(buf_cache, buf); + mutex_enter(&arc_eviction_mtx); + } + mutex_exit(&arc_eviction_mtx); +} + +/* + * Flush all *evictable* data from the cache for the given spa. + * NOTE: this will not touch "active" (i.e. referenced) data. + */ +void +arc_flush(spa_t *spa) +{ + uint64_t guid = 0; + + if (spa) + guid = spa_guid(spa); + + while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) { + (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA); + if (spa) + break; + } + while (list_head(&arc_mru->arcs_list[ARC_BUFC_METADATA])) { + (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_METADATA); + if (spa) + break; + } + while (list_head(&arc_mfu->arcs_list[ARC_BUFC_DATA])) { + (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_DATA); + if (spa) + break; + } + while (list_head(&arc_mfu->arcs_list[ARC_BUFC_METADATA])) { + (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_METADATA); + if (spa) + break; + } + + arc_evict_ghost(arc_mru_ghost, guid, -1); + arc_evict_ghost(arc_mfu_ghost, guid, -1); + + mutex_enter(&arc_reclaim_thr_lock); + arc_do_user_evicts(); + mutex_exit(&arc_reclaim_thr_lock); + ASSERT(spa || arc_eviction_list == NULL); +} + +void +arc_shrink(void) +{ + if (arc_c > arc_c_min) { + uint64_t to_free; + +#ifdef _KERNEL + to_free = MAX(arc_c >> arc_shrink_shift, ptob(needfree)); +#else + to_free = arc_c >> arc_shrink_shift; +#endif + if (arc_c > arc_c_min + to_free) + atomic_add_64(&arc_c, -to_free); + else + arc_c = arc_c_min; + + atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); + if (arc_c > arc_size) + arc_c = MAX(arc_size, arc_c_min); + if (arc_p > arc_c) + arc_p = (arc_c >> 1); + ASSERT(arc_c >= arc_c_min); + ASSERT((int64_t)arc_p >= 0); + } + + if (arc_size > arc_c) + arc_adjust(); +} + +static int +arc_reclaim_needed(void) +{ + uint64_t extra; + +#ifdef _KERNEL + + if (needfree) + return (1); + + /* + * take 'desfree' extra pages, so we reclaim sooner, rather than later + */ + extra = desfree; + + /* + * check that we're out of range of the pageout scanner. It starts to + * schedule paging if freemem is less than lotsfree and needfree. + * lotsfree is the high-water mark for pageout, and needfree is the + * number of needed free pages. We add extra pages here to make sure + * the scanner doesn't start up while we're freeing memory. + */ + if (freemem < lotsfree + needfree + extra) + return (1); + + /* + * check to make sure that swapfs has enough space so that anon + * reservations can still succeed. anon_resvmem() checks that the + * availrmem is greater than swapfs_minfree, and the number of reserved + * swap pages. We also add a bit of extra here just to prevent + * circumstances from getting really dire. + */ + if (availrmem < swapfs_minfree + swapfs_reserve + extra) + return (1); + +#if defined(__i386) + /* + * If we're on an i386 platform, it's possible that we'll exhaust the + * kernel heap space before we ever run out of available physical + * memory. Most checks of the size of the heap_area compare against + * tune.t_minarmem, which is the minimum available real memory that we + * can have in the system. However, this is generally fixed at 25 pages + * which is so low that it's useless. In this comparison, we seek to + * calculate the total heap-size, and reclaim if more than 3/4ths of the + * heap is allocated. (Or, in the calculation, if less than 1/4th is + * free) + */ + if (btop(vmem_size(heap_arena, VMEM_FREE)) < + (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) + return (1); +#endif + +#else + if (spa_get_random(100) == 0) + return (1); +#endif + return (0); +} + +static void +arc_kmem_reap_now(arc_reclaim_strategy_t strat) +{ + size_t i; + kmem_cache_t *prev_cache = NULL; + kmem_cache_t *prev_data_cache = NULL; + extern kmem_cache_t *zio_buf_cache[]; + extern kmem_cache_t *zio_data_buf_cache[]; + +#ifdef _KERNEL + if (arc_meta_used >= arc_meta_limit) { + /* + * We are exceeding our meta-data cache limit. + * Purge some DNLC entries to release holds on meta-data. + */ + dnlc_reduce_cache((void *)(uintptr_t)arc_reduce_dnlc_percent); + } +#if defined(__i386) + /* + * Reclaim unused memory from all kmem caches. + */ + kmem_reap(); +#endif +#endif + + /* + * An aggressive reclamation will shrink the cache size as well as + * reap free buffers from the arc kmem caches. + */ + if (strat == ARC_RECLAIM_AGGR) + arc_shrink(); + + for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { + if (zio_buf_cache[i] != prev_cache) { + prev_cache = zio_buf_cache[i]; + kmem_cache_reap_now(zio_buf_cache[i]); + } + if (zio_data_buf_cache[i] != prev_data_cache) { + prev_data_cache = zio_data_buf_cache[i]; + kmem_cache_reap_now(zio_data_buf_cache[i]); + } + } + kmem_cache_reap_now(buf_cache); + kmem_cache_reap_now(hdr_cache); +} + +static void +arc_reclaim_thread(void) +{ + clock_t growtime = 0; + arc_reclaim_strategy_t last_reclaim = ARC_RECLAIM_CONS; + callb_cpr_t cpr; + + CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG); + + mutex_enter(&arc_reclaim_thr_lock); + while (arc_thread_exit == 0) { + if (arc_reclaim_needed()) { + + if (arc_no_grow) { + if (last_reclaim == ARC_RECLAIM_CONS) { + last_reclaim = ARC_RECLAIM_AGGR; + } else { + last_reclaim = ARC_RECLAIM_CONS; + } + } else { + arc_no_grow = TRUE; + last_reclaim = ARC_RECLAIM_AGGR; + membar_producer(); + } + + /* reset the growth delay for every reclaim */ + growtime = ddi_get_lbolt() + (arc_grow_retry * hz); + + arc_kmem_reap_now(last_reclaim); + arc_warm = B_TRUE; + + } else if (arc_no_grow && ddi_get_lbolt() >= growtime) { + arc_no_grow = FALSE; + } + + arc_adjust(); + + if (arc_eviction_list != NULL) + arc_do_user_evicts(); + + /* block until needed, or one second, whichever is shorter */ + CALLB_CPR_SAFE_BEGIN(&cpr); + (void) cv_timedwait(&arc_reclaim_thr_cv, + &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz)); + CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock); + } + + arc_thread_exit = 0; + cv_broadcast(&arc_reclaim_thr_cv); + CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */ + thread_exit(); +} + +/* + * Adapt arc info given the number of bytes we are trying to add and + * the state that we are comming from. This function is only called + * when we are adding new content to the cache. + */ +static void +arc_adapt(int bytes, arc_state_t *state) +{ + int mult; + uint64_t arc_p_min = (arc_c >> arc_p_min_shift); + + if (state == arc_l2c_only) + return; + + ASSERT(bytes > 0); + /* + * Adapt the target size of the MRU list: + * - if we just hit in the MRU ghost list, then increase + * the target size of the MRU list. + * - if we just hit in the MFU ghost list, then increase + * the target size of the MFU list by decreasing the + * target size of the MRU list. + */ + if (state == arc_mru_ghost) { + mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ? + 1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size)); + mult = MIN(mult, 10); /* avoid wild arc_p adjustment */ + + arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult); + } else if (state == arc_mfu_ghost) { + uint64_t delta; + + mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ? + 1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size)); + mult = MIN(mult, 10); + + delta = MIN(bytes * mult, arc_p); + arc_p = MAX(arc_p_min, arc_p - delta); + } + ASSERT((int64_t)arc_p >= 0); + + if (arc_reclaim_needed()) { + cv_signal(&arc_reclaim_thr_cv); + return; + } + + if (arc_no_grow) + return; + + if (arc_c >= arc_c_max) + return; + + /* + * If we're within (2 * maxblocksize) bytes of the target + * cache size, increment the target cache size + */ + if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { + atomic_add_64(&arc_c, (int64_t)bytes); + if (arc_c > arc_c_max) + arc_c = arc_c_max; + else if (state == arc_anon) + atomic_add_64(&arc_p, (int64_t)bytes); + if (arc_p > arc_c) + arc_p = arc_c; + } + ASSERT((int64_t)arc_p >= 0); +} + +/* + * Check if the cache has reached its limits and eviction is required + * prior to insert. + */ +static int +arc_evict_needed(arc_buf_contents_t type) +{ + if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) + return (1); + +#ifdef _KERNEL + /* + * If zio data pages are being allocated out of a separate heap segment, + * then enforce that the size of available vmem for this area remains + * above about 1/32nd free. + */ + if (type == ARC_BUFC_DATA && zio_arena != NULL && + vmem_size(zio_arena, VMEM_FREE) < + (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) + return (1); +#endif + + if (arc_reclaim_needed()) + return (1); + + return (arc_size > arc_c); +} + +/* + * The buffer, supplied as the first argument, needs a data block. + * So, if we are at cache max, determine which cache should be victimized. + * We have the following cases: + * + * 1. Insert for MRU, p > sizeof(arc_anon + arc_mru) -> + * In this situation if we're out of space, but the resident size of the MFU is + * under the limit, victimize the MFU cache to satisfy this insertion request. + * + * 2. Insert for MRU, p <= sizeof(arc_anon + arc_mru) -> + * Here, we've used up all of the available space for the MRU, so we need to + * evict from our own cache instead. Evict from the set of resident MRU + * entries. + * + * 3. Insert for MFU (c - p) > sizeof(arc_mfu) -> + * c minus p represents the MFU space in the cache, since p is the size of the + * cache that is dedicated to the MRU. In this situation there's still space on + * the MFU side, so the MRU side needs to be victimized. + * + * 4. Insert for MFU (c - p) < sizeof(arc_mfu) -> + * MFU's resident set is consuming more space than it has been allotted. In + * this situation, we must victimize our own cache, the MFU, for this insertion. + */ +static void +arc_get_data_buf(arc_buf_t *buf) +{ + arc_state_t *state = buf->b_hdr->b_state; + uint64_t size = buf->b_hdr->b_size; + arc_buf_contents_t type = buf->b_hdr->b_type; + + arc_adapt(size, state); + + /* + * We have not yet reached cache maximum size, + * just allocate a new buffer. + */ + if (!arc_evict_needed(type)) { + if (type == ARC_BUFC_METADATA) { + buf->b_data = zio_buf_alloc(size); + arc_space_consume(size, ARC_SPACE_DATA); + } else { + ASSERT(type == ARC_BUFC_DATA); + buf->b_data = zio_data_buf_alloc(size); + ARCSTAT_INCR(arcstat_data_size, size); + atomic_add_64(&arc_size, size); + } + goto out; + } + + /* + * If we are prefetching from the mfu ghost list, this buffer + * will end up on the mru list; so steal space from there. + */ + if (state == arc_mfu_ghost) + state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu; + else if (state == arc_mru_ghost) + state = arc_mru; + + if (state == arc_mru || state == arc_anon) { + uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size; + state = (arc_mfu->arcs_lsize[type] >= size && + arc_p > mru_used) ? arc_mfu : arc_mru; + } else { + /* MFU cases */ + uint64_t mfu_space = arc_c - arc_p; + state = (arc_mru->arcs_lsize[type] >= size && + mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu; + } + if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) { + if (type == ARC_BUFC_METADATA) { + buf->b_data = zio_buf_alloc(size); + arc_space_consume(size, ARC_SPACE_DATA); + } else { + ASSERT(type == ARC_BUFC_DATA); + buf->b_data = zio_data_buf_alloc(size); + ARCSTAT_INCR(arcstat_data_size, size); + atomic_add_64(&arc_size, size); + } + ARCSTAT_BUMP(arcstat_recycle_miss); + } + ASSERT(buf->b_data != NULL); +out: + /* + * Update the state size. Note that ghost states have a + * "ghost size" and so don't need to be updated. + */ + if (!GHOST_STATE(buf->b_hdr->b_state)) { + arc_buf_hdr_t *hdr = buf->b_hdr; + + atomic_add_64(&hdr->b_state->arcs_size, size); + if (list_link_active(&hdr->b_arc_node)) { + ASSERT(refcount_is_zero(&hdr->b_refcnt)); + atomic_add_64(&hdr->b_state->arcs_lsize[type], size); + } + /* + * If we are growing the cache, and we are adding anonymous + * data, and we have outgrown arc_p, update arc_p + */ + if (arc_size < arc_c && hdr->b_state == arc_anon && + arc_anon->arcs_size + arc_mru->arcs_size > arc_p) + arc_p = MIN(arc_c, arc_p + size); + } +} + +/* + * This routine is called whenever a buffer is accessed. + * NOTE: the hash lock is dropped in this function. + */ +static void +arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) +{ + clock_t now; + + ASSERT(MUTEX_HELD(hash_lock)); + + if (buf->b_state == arc_anon) { + /* + * This buffer is not in the cache, and does not + * appear in our "ghost" list. Add the new buffer + * to the MRU state. + */ + + ASSERT(buf->b_arc_access == 0); + buf->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf); + arc_change_state(arc_mru, buf, hash_lock); + + } else if (buf->b_state == arc_mru) { + now = ddi_get_lbolt(); + + /* + * If this buffer is here because of a prefetch, then either: + * - clear the flag if this is a "referencing" read + * (any subsequent access will bump this into the MFU state). + * or + * - move the buffer to the head of the list if this is + * another prefetch (to make it less likely to be evicted). + */ + if ((buf->b_flags & ARC_PREFETCH) != 0) { + if (refcount_count(&buf->b_refcnt) == 0) { + ASSERT(list_link_active(&buf->b_arc_node)); + } else { + buf->b_flags &= ~ARC_PREFETCH; + ARCSTAT_BUMP(arcstat_mru_hits); + } + buf->b_arc_access = now; + return; + } + + /* + * This buffer has been "accessed" only once so far, + * but it is still in the cache. Move it to the MFU + * state. + */ + if (now > buf->b_arc_access + ARC_MINTIME) { + /* + * More than 125ms have passed since we + * instantiated this buffer. Move it to the + * most frequently used state. + */ + buf->b_arc_access = now; + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); + arc_change_state(arc_mfu, buf, hash_lock); + } + ARCSTAT_BUMP(arcstat_mru_hits); + } else if (buf->b_state == arc_mru_ghost) { + arc_state_t *new_state; + /* + * This buffer has been "accessed" recently, but + * was evicted from the cache. Move it to the + * MFU state. + */ + + if (buf->b_flags & ARC_PREFETCH) { + new_state = arc_mru; + if (refcount_count(&buf->b_refcnt) > 0) + buf->b_flags &= ~ARC_PREFETCH; + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf); + } else { + new_state = arc_mfu; + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); + } + + buf->b_arc_access = ddi_get_lbolt(); + arc_change_state(new_state, buf, hash_lock); + + ARCSTAT_BUMP(arcstat_mru_ghost_hits); + } else if (buf->b_state == arc_mfu) { + /* + * This buffer has been accessed more than once and is + * still in the cache. Keep it in the MFU state. + * + * NOTE: an add_reference() that occurred when we did + * the arc_read() will have kicked this off the list. + * If it was a prefetch, we will explicitly move it to + * the head of the list now. + */ + if ((buf->b_flags & ARC_PREFETCH) != 0) { + ASSERT(refcount_count(&buf->b_refcnt) == 0); + ASSERT(list_link_active(&buf->b_arc_node)); + } + ARCSTAT_BUMP(arcstat_mfu_hits); + buf->b_arc_access = ddi_get_lbolt(); + } else if (buf->b_state == arc_mfu_ghost) { + arc_state_t *new_state = arc_mfu; + /* + * This buffer has been accessed more than once but has + * been evicted from the cache. Move it back to the + * MFU state. + */ + + if (buf->b_flags & ARC_PREFETCH) { + /* + * This is a prefetch access... + * move this block back to the MRU state. + */ + ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0); + new_state = arc_mru; + } + + buf->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); + arc_change_state(new_state, buf, hash_lock); + + ARCSTAT_BUMP(arcstat_mfu_ghost_hits); + } else if (buf->b_state == arc_l2c_only) { + /* + * This buffer is on the 2nd Level ARC. + */ + + buf->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); + arc_change_state(arc_mfu, buf, hash_lock); + } else { + ASSERT(!"invalid arc state"); + } +} + +/* a generic arc_done_func_t which you can use */ +/* ARGSUSED */ +void +arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) +{ + if (zio == NULL || zio->io_error == 0) + bcopy(buf->b_data, arg, buf->b_hdr->b_size); + VERIFY(arc_buf_remove_ref(buf, arg) == 1); +} + +/* a generic arc_done_func_t */ +void +arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) +{ + arc_buf_t **bufp = arg; + if (zio && zio->io_error) { + VERIFY(arc_buf_remove_ref(buf, arg) == 1); + *bufp = NULL; + } else { + *bufp = buf; + ASSERT(buf->b_data); + } +} + +static void +arc_read_done(zio_t *zio) +{ + arc_buf_hdr_t *hdr, *found; + arc_buf_t *buf; + arc_buf_t *abuf; /* buffer we're assigning to callback */ + kmutex_t *hash_lock; + arc_callback_t *callback_list, *acb; + int freeable = FALSE; + + buf = zio->io_private; + hdr = buf->b_hdr; + + /* + * The hdr was inserted into hash-table and removed from lists + * prior to starting I/O. We should find this header, since + * it's in the hash table, and it should be legit since it's + * not possible to evict it during the I/O. The only possible + * reason for it not to be found is if we were freed during the + * read. + */ + found = buf_hash_find(hdr->b_spa, &hdr->b_dva, hdr->b_birth, + &hash_lock); + + ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) || + (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) || + (found == hdr && HDR_L2_READING(hdr))); + + hdr->b_flags &= ~ARC_L2_EVICTED; + if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH)) + hdr->b_flags &= ~ARC_L2CACHE; + + /* byteswap if necessary */ + callback_list = hdr->b_acb; + ASSERT(callback_list != NULL); + if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) { + arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ? + byteswap_uint64_array : + dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap; + func(buf->b_data, hdr->b_size); + } + + arc_cksum_compute(buf, B_FALSE); + + if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) { + /* + * Only call arc_access on anonymous buffers. This is because + * if we've issued an I/O for an evicted buffer, we've already + * called arc_access (to prevent any simultaneous readers from + * getting confused). + */ + arc_access(hdr, hash_lock); + } + + /* create copies of the data buffer for the callers */ + abuf = buf; + for (acb = callback_list; acb; acb = acb->acb_next) { + if (acb->acb_done) { + if (abuf == NULL) + abuf = arc_buf_clone(buf); + acb->acb_buf = abuf; + abuf = NULL; + } + } + hdr->b_acb = NULL; + hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + ASSERT(!HDR_BUF_AVAILABLE(hdr)); + if (abuf == buf) { + ASSERT(buf->b_efunc == NULL); + ASSERT(hdr->b_datacnt == 1); + hdr->b_flags |= ARC_BUF_AVAILABLE; + } + + ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL); + + if (zio->io_error != 0) { + hdr->b_flags |= ARC_IO_ERROR; + if (hdr->b_state != arc_anon) + arc_change_state(arc_anon, hdr, hash_lock); + if (HDR_IN_HASH_TABLE(hdr)) + buf_hash_remove(hdr); + freeable = refcount_is_zero(&hdr->b_refcnt); + } + + /* + * Broadcast before we drop the hash_lock to avoid the possibility + * that the hdr (and hence the cv) might be freed before we get to + * the cv_broadcast(). + */ + cv_broadcast(&hdr->b_cv); + + if (hash_lock) { + mutex_exit(hash_lock); + } else { + /* + * This block was freed while we waited for the read to + * complete. It has been removed from the hash table and + * moved to the anonymous state (so that it won't show up + * in the cache). + */ + ASSERT3P(hdr->b_state, ==, arc_anon); + freeable = refcount_is_zero(&hdr->b_refcnt); + } + + /* execute each callback and free its structure */ + while ((acb = callback_list) != NULL) { + if (acb->acb_done) + acb->acb_done(zio, acb->acb_buf, acb->acb_private); + + if (acb->acb_zio_dummy != NULL) { + acb->acb_zio_dummy->io_error = zio->io_error; + zio_nowait(acb->acb_zio_dummy); + } + + callback_list = acb->acb_next; + kmem_free(acb, sizeof (arc_callback_t)); + } + + if (freeable) + arc_hdr_destroy(hdr); +} + +/* + * "Read" the block block at the specified DVA (in bp) via the + * cache. If the block is found in the cache, invoke the provided + * callback immediately and return. Note that the `zio' parameter + * in the callback will be NULL in this case, since no IO was + * required. If the block is not in the cache pass the read request + * on to the spa with a substitute callback function, so that the + * requested block will be added to the cache. + * + * If a read request arrives for a block that has a read in-progress, + * either wait for the in-progress read to complete (and return the + * results); or, if this is a read with a "done" func, add a record + * to the read to invoke the "done" func when the read completes, + * and return; or just return. + * + * arc_read_done() will invoke all the requested "done" functions + * for readers of this block. + * + * Normal callers should use arc_read and pass the arc buffer and offset + * for the bp. But if you know you don't need locking, you can use + * arc_read_bp. + */ +int +arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb) +{ + int err; + + if (pbuf == NULL) { + /* + * XXX This happens from traverse callback funcs, for + * the objset_phys_t block. + */ + return (arc_read_nolock(pio, spa, bp, done, private, priority, + zio_flags, arc_flags, zb)); + } + + ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt)); + ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size); + rw_enter(&pbuf->b_data_lock, RW_READER); + + err = arc_read_nolock(pio, spa, bp, done, private, priority, + zio_flags, arc_flags, zb); + rw_exit(&pbuf->b_data_lock); + + return (err); +} + +int +arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb) +{ + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + kmutex_t *hash_lock; + zio_t *rzio; + uint64_t guid = spa_guid(spa); + +top: + hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp), + &hash_lock); + if (hdr && hdr->b_datacnt > 0) { + + *arc_flags |= ARC_CACHED; + + if (HDR_IO_IN_PROGRESS(hdr)) { + + if (*arc_flags & ARC_WAIT) { + cv_wait(&hdr->b_cv, hash_lock); + mutex_exit(hash_lock); + goto top; + } + ASSERT(*arc_flags & ARC_NOWAIT); + + if (done) { + arc_callback_t *acb = NULL; + + acb = kmem_zalloc(sizeof (arc_callback_t), + KM_SLEEP); + acb->acb_done = done; + acb->acb_private = private; + if (pio != NULL) + acb->acb_zio_dummy = zio_null(pio, + spa, NULL, NULL, NULL, zio_flags); + + ASSERT(acb->acb_done != NULL); + acb->acb_next = hdr->b_acb; + hdr->b_acb = acb; + add_reference(hdr, hash_lock, private); + mutex_exit(hash_lock); + return (0); + } + mutex_exit(hash_lock); + return (0); + } + + ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); + + if (done) { + add_reference(hdr, hash_lock, private); + /* + * If this block is already in use, create a new + * copy of the data so that we will be guaranteed + * that arc_release() will always succeed. + */ + buf = hdr->b_buf; + ASSERT(buf); + ASSERT(buf->b_data); + if (HDR_BUF_AVAILABLE(hdr)) { + ASSERT(buf->b_efunc == NULL); + hdr->b_flags &= ~ARC_BUF_AVAILABLE; + } else { + buf = arc_buf_clone(buf); + } + + } else if (*arc_flags & ARC_PREFETCH && + refcount_count(&hdr->b_refcnt) == 0) { + hdr->b_flags |= ARC_PREFETCH; + } + DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); + arc_access(hdr, hash_lock); + if (*arc_flags & ARC_L2CACHE) + hdr->b_flags |= ARC_L2CACHE; + mutex_exit(hash_lock); + ARCSTAT_BUMP(arcstat_hits); + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, + data, metadata, hits); + + if (done) + done(NULL, buf, private); + } else { + uint64_t size = BP_GET_LSIZE(bp); + arc_callback_t *acb; + vdev_t *vd = NULL; + uint64_t addr; + boolean_t devw = B_FALSE; + + if (hdr == NULL) { + /* this block is not in the cache */ + arc_buf_hdr_t *exists; + arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp); + buf = arc_buf_alloc(spa, size, private, type); + hdr = buf->b_hdr; + hdr->b_dva = *BP_IDENTITY(bp); + hdr->b_birth = BP_PHYSICAL_BIRTH(bp); + hdr->b_cksum0 = bp->blk_cksum.zc_word[0]; + exists = buf_hash_insert(hdr, &hash_lock); + if (exists) { + /* somebody beat us to the hash insert */ + mutex_exit(hash_lock); + buf_discard_identity(hdr); + (void) arc_buf_remove_ref(buf, private); + goto top; /* restart the IO request */ + } + /* if this is a prefetch, we don't have a reference */ + if (*arc_flags & ARC_PREFETCH) { + (void) remove_reference(hdr, hash_lock, + private); + hdr->b_flags |= ARC_PREFETCH; + } + if (*arc_flags & ARC_L2CACHE) + hdr->b_flags |= ARC_L2CACHE; + if (BP_GET_LEVEL(bp) > 0) + hdr->b_flags |= ARC_INDIRECT; + } else { + /* this block is in the ghost cache */ + ASSERT(GHOST_STATE(hdr->b_state)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0); + ASSERT(hdr->b_buf == NULL); + + /* if this is a prefetch, we don't have a reference */ + if (*arc_flags & ARC_PREFETCH) + hdr->b_flags |= ARC_PREFETCH; + else + add_reference(hdr, hash_lock, private); + if (*arc_flags & ARC_L2CACHE) + hdr->b_flags |= ARC_L2CACHE; + buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); + buf->b_hdr = hdr; + buf->b_data = NULL; + buf->b_efunc = NULL; + buf->b_private = NULL; + buf->b_next = NULL; + hdr->b_buf = buf; + ASSERT(hdr->b_datacnt == 0); + hdr->b_datacnt = 1; + arc_get_data_buf(buf); + arc_access(hdr, hash_lock); + } + + ASSERT(!GHOST_STATE(hdr->b_state)); + + acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); + acb->acb_done = done; + acb->acb_private = private; + + ASSERT(hdr->b_acb == NULL); + hdr->b_acb = acb; + hdr->b_flags |= ARC_IO_IN_PROGRESS; + + if (HDR_L2CACHE(hdr) && hdr->b_l2hdr != NULL && + (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) { + devw = hdr->b_l2hdr->b_dev->l2ad_writing; + addr = hdr->b_l2hdr->b_daddr; + /* + * Lock out device removal. + */ + if (vdev_is_dead(vd) || + !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER)) + vd = NULL; + } + + mutex_exit(hash_lock); + + ASSERT3U(hdr->b_size, ==, size); + DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp, + uint64_t, size, zbookmark_t *, zb); + ARCSTAT_BUMP(arcstat_misses); + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, + data, metadata, misses); + + if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) { + /* + * Read from the L2ARC if the following are true: + * 1. The L2ARC vdev was previously cached. + * 2. This buffer still has L2ARC metadata. + * 3. This buffer isn't currently writing to the L2ARC. + * 4. The L2ARC entry wasn't evicted, which may + * also have invalidated the vdev. + * 5. This isn't prefetch and l2arc_noprefetch is set. + */ + if (hdr->b_l2hdr != NULL && + !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && + !(l2arc_noprefetch && HDR_PREFETCH(hdr))) { + l2arc_read_callback_t *cb; + + DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP(arcstat_l2_hits); + + cb = kmem_zalloc(sizeof (l2arc_read_callback_t), + KM_SLEEP); + cb->l2rcb_buf = buf; + cb->l2rcb_spa = spa; + cb->l2rcb_bp = *bp; + cb->l2rcb_zb = *zb; + cb->l2rcb_flags = zio_flags; + + /* + * l2arc read. The SCL_L2ARC lock will be + * released by l2arc_read_done(). + */ + rzio = zio_read_phys(pio, vd, addr, size, + buf->b_data, ZIO_CHECKSUM_OFF, + l2arc_read_done, cb, priority, zio_flags | + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_DONT_PROPAGATE | + ZIO_FLAG_DONT_RETRY, B_FALSE); + DTRACE_PROBE2(l2arc__read, vdev_t *, vd, + zio_t *, rzio); + ARCSTAT_INCR(arcstat_l2_read_bytes, size); + + if (*arc_flags & ARC_NOWAIT) { + zio_nowait(rzio); + return (0); + } + + ASSERT(*arc_flags & ARC_WAIT); + if (zio_wait(rzio) == 0) + return (0); + + /* l2arc read error; goto zio_read() */ + } else { + DTRACE_PROBE1(l2arc__miss, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP(arcstat_l2_misses); + if (HDR_L2_WRITING(hdr)) + ARCSTAT_BUMP(arcstat_l2_rw_clash); + spa_config_exit(spa, SCL_L2ARC, vd); + } + } else { + if (vd != NULL) + spa_config_exit(spa, SCL_L2ARC, vd); + if (l2arc_ndev != 0) { + DTRACE_PROBE1(l2arc__miss, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP(arcstat_l2_misses); + } + } + + rzio = zio_read(pio, spa, bp, buf->b_data, size, + arc_read_done, buf, priority, zio_flags, zb); + + if (*arc_flags & ARC_WAIT) + return (zio_wait(rzio)); + + ASSERT(*arc_flags & ARC_NOWAIT); + zio_nowait(rzio); + } + return (0); +} + +void +arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private) +{ + ASSERT(buf->b_hdr != NULL); + ASSERT(buf->b_hdr->b_state != arc_anon); + ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt) || func == NULL); + ASSERT(buf->b_efunc == NULL); + ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr)); + + buf->b_efunc = func; + buf->b_private = private; +} + +/* + * This is used by the DMU to let the ARC know that a buffer is + * being evicted, so the ARC should clean up. If this arc buf + * is not yet in the evicted state, it will be put there. + */ +int +arc_buf_evict(arc_buf_t *buf) +{ + arc_buf_hdr_t *hdr; + kmutex_t *hash_lock; + arc_buf_t **bufp; + + mutex_enter(&buf->b_evict_lock); + hdr = buf->b_hdr; + if (hdr == NULL) { + /* + * We are in arc_do_user_evicts(). + */ + ASSERT(buf->b_data == NULL); + mutex_exit(&buf->b_evict_lock); + return (0); + } else if (buf->b_data == NULL) { + arc_buf_t copy = *buf; /* structure assignment */ + /* + * We are on the eviction list; process this buffer now + * but let arc_do_user_evicts() do the reaping. + */ + buf->b_efunc = NULL; + mutex_exit(&buf->b_evict_lock); + VERIFY(copy.b_efunc(©) == 0); + return (1); + } + hash_lock = HDR_LOCK(hdr); + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + + ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt); + ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); + + /* + * Pull this buffer off of the hdr + */ + bufp = &hdr->b_buf; + while (*bufp != buf) + bufp = &(*bufp)->b_next; + *bufp = buf->b_next; + + ASSERT(buf->b_data != NULL); + arc_buf_destroy(buf, FALSE, FALSE); + + if (hdr->b_datacnt == 0) { + arc_state_t *old_state = hdr->b_state; + arc_state_t *evicted_state; + + ASSERT(hdr->b_buf == NULL); + ASSERT(refcount_is_zero(&hdr->b_refcnt)); + + evicted_state = + (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; + + mutex_enter(&old_state->arcs_mtx); + mutex_enter(&evicted_state->arcs_mtx); + + arc_change_state(evicted_state, hdr, hash_lock); + ASSERT(HDR_IN_HASH_TABLE(hdr)); + hdr->b_flags |= ARC_IN_HASH_TABLE; + hdr->b_flags &= ~ARC_BUF_AVAILABLE; + + mutex_exit(&evicted_state->arcs_mtx); + mutex_exit(&old_state->arcs_mtx); + } + mutex_exit(hash_lock); + mutex_exit(&buf->b_evict_lock); + + VERIFY(buf->b_efunc(buf) == 0); + buf->b_efunc = NULL; + buf->b_private = NULL; + buf->b_hdr = NULL; + buf->b_next = NULL; + kmem_cache_free(buf_cache, buf); + return (1); +} + +/* + * Release this buffer from the cache. This must be done + * after a read and prior to modifying the buffer contents. + * If the buffer has more than one reference, we must make + * a new hdr for the buffer. + */ +void +arc_release(arc_buf_t *buf, void *tag) +{ + arc_buf_hdr_t *hdr; + kmutex_t *hash_lock = NULL; + l2arc_buf_hdr_t *l2hdr; + uint64_t buf_size; + + /* + * It would be nice to assert that if it's DMU metadata (level > + * 0 || it's the dnode file), then it must be syncing context. + * But we don't know that information at this level. + */ + + mutex_enter(&buf->b_evict_lock); + hdr = buf->b_hdr; + + /* this buffer is not on any list */ + ASSERT(refcount_count(&hdr->b_refcnt) > 0); + + if (hdr->b_state == arc_anon) { + /* this buffer is already released */ + ASSERT(buf->b_efunc == NULL); + } else { + hash_lock = HDR_LOCK(hdr); + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + } + + l2hdr = hdr->b_l2hdr; + if (l2hdr) { + mutex_enter(&l2arc_buflist_mtx); + hdr->b_l2hdr = NULL; + buf_size = hdr->b_size; + } + + /* + * Do we have more than one buf? + */ + if (hdr->b_datacnt > 1) { + arc_buf_hdr_t *nhdr; + arc_buf_t **bufp; + uint64_t blksz = hdr->b_size; + uint64_t spa = hdr->b_spa; + arc_buf_contents_t type = hdr->b_type; + uint32_t flags = hdr->b_flags; + + ASSERT(hdr->b_buf != buf || buf->b_next != NULL); + /* + * Pull the data off of this hdr and attach it to + * a new anonymous hdr. + */ + (void) remove_reference(hdr, hash_lock, tag); + bufp = &hdr->b_buf; + while (*bufp != buf) + bufp = &(*bufp)->b_next; + *bufp = buf->b_next; + buf->b_next = NULL; + + ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size); + atomic_add_64(&hdr->b_state->arcs_size, -hdr->b_size); + if (refcount_is_zero(&hdr->b_refcnt)) { + uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type]; + ASSERT3U(*size, >=, hdr->b_size); + atomic_add_64(size, -hdr->b_size); + } + hdr->b_datacnt -= 1; + arc_cksum_verify(buf); + + mutex_exit(hash_lock); + + nhdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE); + nhdr->b_size = blksz; + nhdr->b_spa = spa; + nhdr->b_type = type; + nhdr->b_buf = buf; + nhdr->b_state = arc_anon; + nhdr->b_arc_access = 0; + nhdr->b_flags = flags & ARC_L2_WRITING; + nhdr->b_l2hdr = NULL; + nhdr->b_datacnt = 1; + nhdr->b_freeze_cksum = NULL; + (void) refcount_add(&nhdr->b_refcnt, tag); + buf->b_hdr = nhdr; + mutex_exit(&buf->b_evict_lock); + atomic_add_64(&arc_anon->arcs_size, blksz); + } else { + mutex_exit(&buf->b_evict_lock); + ASSERT(refcount_count(&hdr->b_refcnt) == 1); + ASSERT(!list_link_active(&hdr->b_arc_node)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + if (hdr->b_state != arc_anon) + arc_change_state(arc_anon, hdr, hash_lock); + hdr->b_arc_access = 0; + if (hash_lock) + mutex_exit(hash_lock); + + buf_discard_identity(hdr); + arc_buf_thaw(buf); + } + buf->b_efunc = NULL; + buf->b_private = NULL; + + if (l2hdr) { + list_remove(l2hdr->b_dev->l2ad_buflist, hdr); + kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); + ARCSTAT_INCR(arcstat_l2_size, -buf_size); + mutex_exit(&l2arc_buflist_mtx); + } +} + +/* + * Release this buffer. If it does not match the provided BP, fill it + * with that block's contents. + */ +/* ARGSUSED */ +int +arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa, + zbookmark_t *zb) +{ + arc_release(buf, tag); + return (0); +} + +int +arc_released(arc_buf_t *buf) +{ + int released; + + mutex_enter(&buf->b_evict_lock); + released = (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon); + mutex_exit(&buf->b_evict_lock); + return (released); +} + +int +arc_has_callback(arc_buf_t *buf) +{ + int callback; + + mutex_enter(&buf->b_evict_lock); + callback = (buf->b_efunc != NULL); + mutex_exit(&buf->b_evict_lock); + return (callback); +} + +#ifdef ZFS_DEBUG +int +arc_referenced(arc_buf_t *buf) +{ + int referenced; + + mutex_enter(&buf->b_evict_lock); + referenced = (refcount_count(&buf->b_hdr->b_refcnt)); + mutex_exit(&buf->b_evict_lock); + return (referenced); +} +#endif + +static void +arc_write_ready(zio_t *zio) +{ + arc_write_callback_t *callback = zio->io_private; + arc_buf_t *buf = callback->awcb_buf; + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt)); + callback->awcb_ready(zio, buf, callback->awcb_private); + + /* + * If the IO is already in progress, then this is a re-write + * attempt, so we need to thaw and re-compute the cksum. + * It is the responsibility of the callback to handle the + * accounting for any re-write attempt. + */ + if (HDR_IO_IN_PROGRESS(hdr)) { + mutex_enter(&hdr->b_freeze_lock); + if (hdr->b_freeze_cksum != NULL) { + kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t)); + hdr->b_freeze_cksum = NULL; + } + mutex_exit(&hdr->b_freeze_lock); + } + arc_cksum_compute(buf, B_FALSE); + hdr->b_flags |= ARC_IO_IN_PROGRESS; +} + +static void +arc_write_done(zio_t *zio) +{ + arc_write_callback_t *callback = zio->io_private; + arc_buf_t *buf = callback->awcb_buf; + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(hdr->b_acb == NULL); + + if (zio->io_error == 0) { + hdr->b_dva = *BP_IDENTITY(zio->io_bp); + hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp); + hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0]; + } else { + ASSERT(BUF_EMPTY(hdr)); + } + + /* + * If the block to be written was all-zero, we may have + * compressed it away. In this case no write was performed + * so there will be no dva/birth/checksum. The buffer must + * therefore remain anonymous (and uncached). + */ + if (!BUF_EMPTY(hdr)) { + arc_buf_hdr_t *exists; + kmutex_t *hash_lock; + + ASSERT(zio->io_error == 0); + + arc_cksum_verify(buf); + + exists = buf_hash_insert(hdr, &hash_lock); + if (exists) { + /* + * This can only happen if we overwrite for + * sync-to-convergence, because we remove + * buffers from the hash table when we arc_free(). + */ + if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { + if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp)) + panic("bad overwrite, hdr=%p exists=%p", + (void *)hdr, (void *)exists); + ASSERT(refcount_is_zero(&exists->b_refcnt)); + arc_change_state(arc_anon, exists, hash_lock); + mutex_exit(hash_lock); + arc_hdr_destroy(exists); + exists = buf_hash_insert(hdr, &hash_lock); + ASSERT3P(exists, ==, NULL); + } else { + /* Dedup */ + ASSERT(hdr->b_datacnt == 1); + ASSERT(hdr->b_state == arc_anon); + ASSERT(BP_GET_DEDUP(zio->io_bp)); + ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); + } + } + hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + /* if it's not anon, we are doing a scrub */ + if (!exists && hdr->b_state == arc_anon) + arc_access(hdr, hash_lock); + mutex_exit(hash_lock); + } else { + hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + } + + ASSERT(!refcount_is_zero(&hdr->b_refcnt)); + callback->awcb_done(zio, buf, callback->awcb_private); + + kmem_free(callback, sizeof (arc_write_callback_t)); +} + +zio_t * +arc_write(zio_t *pio, spa_t *spa, uint64_t txg, + blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, + arc_done_func_t *ready, arc_done_func_t *done, void *private, + int priority, int zio_flags, const zbookmark_t *zb) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + arc_write_callback_t *callback; + zio_t *zio; + + ASSERT(ready != NULL); + ASSERT(done != NULL); + ASSERT(!HDR_IO_ERROR(hdr)); + ASSERT((hdr->b_flags & ARC_IO_IN_PROGRESS) == 0); + ASSERT(hdr->b_acb == NULL); + if (l2arc) + hdr->b_flags |= ARC_L2CACHE; + callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); + callback->awcb_ready = ready; + callback->awcb_done = done; + callback->awcb_private = private; + callback->awcb_buf = buf; + + zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp, + arc_write_ready, arc_write_done, callback, priority, zio_flags, zb); + + return (zio); +} + +static int +arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) +{ +#ifdef _KERNEL + uint64_t available_memory = ptob(freemem); + static uint64_t page_load = 0; + static uint64_t last_txg = 0; + +#if defined(__i386) + available_memory = + MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); +#endif + if (available_memory >= zfs_write_limit_max) + return (0); + + if (txg > last_txg) { + last_txg = txg; + page_load = 0; + } + /* + * If we are in pageout, we know that memory is already tight, + * the arc is already going to be evicting, so we just want to + * continue to let page writes occur as quickly as possible. + */ + if (curproc == proc_pageout) { + if (page_load > MAX(ptob(minfree), available_memory) / 4) + return (ERESTART); + /* Note: reserve is inflated, so we deflate */ + page_load += reserve / 8; + return (0); + } else if (page_load > 0 && arc_reclaim_needed()) { + /* memory is low, delay before restarting */ + ARCSTAT_INCR(arcstat_memory_throttle_count, 1); + return (EAGAIN); + } + page_load = 0; + + if (arc_size > arc_c_min) { + uint64_t evictable_memory = + arc_mru->arcs_lsize[ARC_BUFC_DATA] + + arc_mru->arcs_lsize[ARC_BUFC_METADATA] + + arc_mfu->arcs_lsize[ARC_BUFC_DATA] + + arc_mfu->arcs_lsize[ARC_BUFC_METADATA]; + available_memory += MIN(evictable_memory, arc_size - arc_c_min); + } + + if (inflight_data > available_memory / 4) { + ARCSTAT_INCR(arcstat_memory_throttle_count, 1); + return (ERESTART); + } +#endif + return (0); +} + +void +arc_tempreserve_clear(uint64_t reserve) +{ + atomic_add_64(&arc_tempreserve, -reserve); + ASSERT((int64_t)arc_tempreserve >= 0); +} + +int +arc_tempreserve_space(uint64_t reserve, uint64_t txg) +{ + int error; + uint64_t anon_size; + +#ifdef ZFS_DEBUG + /* + * Once in a while, fail for no reason. Everything should cope. + */ + if (spa_get_random(10000) == 0) { + dprintf("forcing random failure\n"); + return (ERESTART); + } +#endif + if (reserve > arc_c/4 && !arc_no_grow) + arc_c = MIN(arc_c_max, reserve * 4); + if (reserve > arc_c) + return (ENOMEM); + + /* + * Don't count loaned bufs as in flight dirty data to prevent long + * network delays from blocking transactions that are ready to be + * assigned to a txg. + */ + anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0); + + /* + * Writes will, almost always, require additional memory allocations + * in order to compress/encrypt/etc the data. We therefor need to + * make sure that there is sufficient available memory for this. + */ + if (error = arc_memory_throttle(reserve, anon_size, txg)) + return (error); + + /* + * Throttle writes when the amount of dirty data in the cache + * gets too large. We try to keep the cache less than half full + * of dirty blocks so that our sync times don't grow too large. + * Note: if two requests come in concurrently, we might let them + * both succeed, when one of them should fail. Not a huge deal. + */ + + if (reserve + arc_tempreserve + anon_size > arc_c / 2 && + anon_size > arc_c / 4) { + dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK " + "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n", + arc_tempreserve>>10, + arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10, + arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10, + reserve>>10, arc_c>>10); + return (ERESTART); + } + atomic_add_64(&arc_tempreserve, reserve); + return (0); +} + +void +arc_init(void) +{ + mutex_init(&arc_reclaim_thr_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL); + + /* Convert seconds to clock ticks */ + arc_min_prefetch_lifespan = 1 * hz; + + /* Start out with 1/8 of all memory */ + arc_c = physmem * PAGESIZE / 8; + +#ifdef _KERNEL + /* + * On architectures where the physical memory can be larger + * than the addressable space (intel in 32-bit mode), we may + * need to limit the cache to 1/8 of VM size. + */ + arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); +#endif + + /* set min cache to 1/32 of all memory, or 64MB, whichever is more */ + arc_c_min = MAX(arc_c / 4, 64<<20); + /* set max to 3/4 of all memory, or all but 1GB, whichever is more */ + if (arc_c * 8 >= 1<<30) + arc_c_max = (arc_c * 8) - (1<<30); + else + arc_c_max = arc_c_min; + arc_c_max = MAX(arc_c * 6, arc_c_max); + + /* + * Allow the tunables to override our calculations if they are + * reasonable (ie. over 64MB) + */ + if (zfs_arc_max > 64<<20 && zfs_arc_max < physmem * PAGESIZE) + arc_c_max = zfs_arc_max; + if (zfs_arc_min > 64<<20 && zfs_arc_min <= arc_c_max) + arc_c_min = zfs_arc_min; + + arc_c = arc_c_max; + arc_p = (arc_c >> 1); + + /* limit meta-data to 1/4 of the arc capacity */ + arc_meta_limit = arc_c_max / 4; + + /* Allow the tunable to override if it is reasonable */ + if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max) + arc_meta_limit = zfs_arc_meta_limit; + + if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0) + arc_c_min = arc_meta_limit / 2; + + if (zfs_arc_grow_retry > 0) + arc_grow_retry = zfs_arc_grow_retry; + + if (zfs_arc_shrink_shift > 0) + arc_shrink_shift = zfs_arc_shrink_shift; + + if (zfs_arc_p_min_shift > 0) + arc_p_min_shift = zfs_arc_p_min_shift; + + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) + arc_c = arc_c / 2; + if (arc_c < arc_c_min) + arc_c = arc_c_min; + + arc_anon = &ARC_anon; + arc_mru = &ARC_mru; + arc_mru_ghost = &ARC_mru_ghost; + arc_mfu = &ARC_mfu; + arc_mfu_ghost = &ARC_mfu_ghost; + arc_l2c_only = &ARC_l2c_only; + arc_size = 0; + + mutex_init(&arc_anon->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&arc_mru->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&arc_mru_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&arc_mfu->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&arc_mfu_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&arc_l2c_only->arcs_mtx, NULL, MUTEX_DEFAULT, NULL); + + list_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mru->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + + buf_init(); + + arc_thread_exit = 0; + arc_eviction_list = NULL; + mutex_init(&arc_eviction_mtx, NULL, MUTEX_DEFAULT, NULL); + bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t)); + + arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED, + sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); + + if (arc_ksp != NULL) { + arc_ksp->ks_data = &arc_stats; + kstat_install(arc_ksp); + } + + (void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0, + TS_RUN, minclsyspri); + + arc_dead = FALSE; + arc_warm = B_FALSE; + + if (zfs_write_limit_max == 0) + zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift; + else + zfs_write_limit_shift = 0; + mutex_init(&zfs_write_limit_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +arc_fini(void) +{ + mutex_enter(&arc_reclaim_thr_lock); + arc_thread_exit = 1; + while (arc_thread_exit != 0) + cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock); + mutex_exit(&arc_reclaim_thr_lock); + + arc_flush(NULL); + + arc_dead = TRUE; + + if (arc_ksp != NULL) { + kstat_delete(arc_ksp); + arc_ksp = NULL; + } + + mutex_destroy(&arc_eviction_mtx); + mutex_destroy(&arc_reclaim_thr_lock); + cv_destroy(&arc_reclaim_thr_cv); + + list_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]); + list_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); + list_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]); + list_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); + list_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]); + list_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); + list_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]); + list_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + + mutex_destroy(&arc_anon->arcs_mtx); + mutex_destroy(&arc_mru->arcs_mtx); + mutex_destroy(&arc_mru_ghost->arcs_mtx); + mutex_destroy(&arc_mfu->arcs_mtx); + mutex_destroy(&arc_mfu_ghost->arcs_mtx); + mutex_destroy(&arc_l2c_only->arcs_mtx); + + mutex_destroy(&zfs_write_limit_lock); + + buf_fini(); + + ASSERT(arc_loaned_bytes == 0); +} + +/* + * Level 2 ARC + * + * The level 2 ARC (L2ARC) is a cache layer in-between main memory and disk. + * It uses dedicated storage devices to hold cached data, which are populated + * using large infrequent writes. The main role of this cache is to boost + * the performance of random read workloads. The intended L2ARC devices + * include short-stroked disks, solid state disks, and other media with + * substantially faster read latency than disk. + * + * +-----------------------+ + * | ARC | + * +-----------------------+ + * | ^ ^ + * | | | + * l2arc_feed_thread() arc_read() + * | | | + * | l2arc read | + * V | | + * +---------------+ | + * | L2ARC | | + * +---------------+ | + * | ^ | + * l2arc_write() | | + * | | | + * V | | + * +-------+ +-------+ + * | vdev | | vdev | + * | cache | | cache | + * +-------+ +-------+ + * +=========+ .-----. + * : L2ARC : |-_____-| + * : devices : | Disks | + * +=========+ `-_____-' + * + * Read requests are satisfied from the following sources, in order: + * + * 1) ARC + * 2) vdev cache of L2ARC devices + * 3) L2ARC devices + * 4) vdev cache of disks + * 5) disks + * + * Some L2ARC device types exhibit extremely slow write performance. + * To accommodate for this there are some significant differences between + * the L2ARC and traditional cache design: + * + * 1. There is no eviction path from the ARC to the L2ARC. Evictions from + * the ARC behave as usual, freeing buffers and placing headers on ghost + * lists. The ARC does not send buffers to the L2ARC during eviction as + * this would add inflated write latencies for all ARC memory pressure. + * + * 2. The L2ARC attempts to cache data from the ARC before it is evicted. + * It does this by periodically scanning buffers from the eviction-end of + * the MFU and MRU ARC lists, copying them to the L2ARC devices if they are + * not already there. It scans until a headroom of buffers is satisfied, + * which itself is a buffer for ARC eviction. The thread that does this is + * l2arc_feed_thread(), illustrated below; example sizes are included to + * provide a better sense of ratio than this diagram: + * + * head --> tail + * +---------------------+----------+ + * ARC_mfu |:::::#:::::::::::::::|o#o###o###|-->. # already on L2ARC + * +---------------------+----------+ | o L2ARC eligible + * ARC_mru |:#:::::::::::::::::::|#o#ooo####|-->| : ARC buffer + * +---------------------+----------+ | + * 15.9 Gbytes ^ 32 Mbytes | + * headroom | + * l2arc_feed_thread() + * | + * l2arc write hand <--[oooo]--' + * | 8 Mbyte + * | write max + * V + * +==============================+ + * L2ARC dev |####|#|###|###| |####| ... | + * +==============================+ + * 32 Gbytes + * + * 3. If an ARC buffer is copied to the L2ARC but then hit instead of + * evicted, then the L2ARC has cached a buffer much sooner than it probably + * needed to, potentially wasting L2ARC device bandwidth and storage. It is + * safe to say that this is an uncommon case, since buffers at the end of + * the ARC lists have moved there due to inactivity. + * + * 4. If the ARC evicts faster than the L2ARC can maintain a headroom, + * then the L2ARC simply misses copying some buffers. This serves as a + * pressure valve to prevent heavy read workloads from both stalling the ARC + * with waits and clogging the L2ARC with writes. This also helps prevent + * the potential for the L2ARC to churn if it attempts to cache content too + * quickly, such as during backups of the entire pool. + * + * 5. After system boot and before the ARC has filled main memory, there are + * no evictions from the ARC and so the tails of the ARC_mfu and ARC_mru + * lists can remain mostly static. Instead of searching from tail of these + * lists as pictured, the l2arc_feed_thread() will search from the list heads + * for eligible buffers, greatly increasing its chance of finding them. + * + * The L2ARC device write speed is also boosted during this time so that + * the L2ARC warms up faster. Since there have been no ARC evictions yet, + * there are no L2ARC reads, and no fear of degrading read performance + * through increased writes. + * + * 6. Writes to the L2ARC devices are grouped and sent in-sequence, so that + * the vdev queue can aggregate them into larger and fewer writes. Each + * device is written to in a rotor fashion, sweeping writes through + * available space then repeating. + * + * 7. The L2ARC does not store dirty content. It never needs to flush + * write buffers back to disk based storage. + * + * 8. If an ARC buffer is written (and dirtied) which also exists in the + * L2ARC, the now stale L2ARC buffer is immediately dropped. + * + * The performance of the L2ARC can be tweaked by a number of tunables, which + * may be necessary for different workloads: + * + * l2arc_write_max max write bytes per interval + * l2arc_write_boost extra write bytes during device warmup + * l2arc_noprefetch skip caching prefetched buffers + * l2arc_headroom number of max device writes to precache + * l2arc_feed_secs seconds between L2ARC writing + * + * Tunables may be removed or added as future performance improvements are + * integrated, and also may become zpool properties. + * + * There are three key functions that control how the L2ARC warms up: + * + * l2arc_write_eligible() check if a buffer is eligible to cache + * l2arc_write_size() calculate how much to write + * l2arc_write_interval() calculate sleep delay between writes + * + * These three functions determine what to write, how much, and how quickly + * to send writes. + */ + +static boolean_t +l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab) +{ + /* + * A buffer is *not* eligible for the L2ARC if it: + * 1. belongs to a different spa. + * 2. is already cached on the L2ARC. + * 3. has an I/O in progress (it may be an incomplete read). + * 4. is flagged not eligible (zfs property). + */ + if (ab->b_spa != spa_guid || ab->b_l2hdr != NULL || + HDR_IO_IN_PROGRESS(ab) || !HDR_L2CACHE(ab)) + return (B_FALSE); + + return (B_TRUE); +} + +static uint64_t +l2arc_write_size(l2arc_dev_t *dev) +{ + uint64_t size; + + size = dev->l2ad_write; + + if (arc_warm == B_FALSE) + size += dev->l2ad_boost; + + return (size); + +} + +static clock_t +l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) +{ + clock_t interval, next, now; + + /* + * If the ARC lists are busy, increase our write rate; if the + * lists are stale, idle back. This is achieved by checking + * how much we previously wrote - if it was more than half of + * what we wanted, schedule the next write much sooner. + */ + if (l2arc_feed_again && wrote > (wanted / 2)) + interval = (hz * l2arc_feed_min_ms) / 1000; + else + interval = hz * l2arc_feed_secs; + + now = ddi_get_lbolt(); + next = MAX(now, MIN(now + interval, began + interval)); + + return (next); +} + +static void +l2arc_hdr_stat_add(void) +{ + ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE); + ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE); +} + +static void +l2arc_hdr_stat_remove(void) +{ + ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE)); + ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE); +} + +/* + * Cycle through L2ARC devices. This is how L2ARC load balances. + * If a device is returned, this also returns holding the spa config lock. + */ +static l2arc_dev_t * +l2arc_dev_get_next(void) +{ + l2arc_dev_t *first, *next = NULL; + + /* + * Lock out the removal of spas (spa_namespace_lock), then removal + * of cache devices (l2arc_dev_mtx). Once a device has been selected, + * both locks will be dropped and a spa config lock held instead. + */ + mutex_enter(&spa_namespace_lock); + mutex_enter(&l2arc_dev_mtx); + + /* if there are no vdevs, there is nothing to do */ + if (l2arc_ndev == 0) + goto out; + + first = NULL; + next = l2arc_dev_last; + do { + /* loop around the list looking for a non-faulted vdev */ + if (next == NULL) { + next = list_head(l2arc_dev_list); + } else { + next = list_next(l2arc_dev_list, next); + if (next == NULL) + next = list_head(l2arc_dev_list); + } + + /* if we have come back to the start, bail out */ + if (first == NULL) + first = next; + else if (next == first) + break; + + } while (vdev_is_dead(next->l2ad_vdev)); + + /* if we were unable to find any usable vdevs, return NULL */ + if (vdev_is_dead(next->l2ad_vdev)) + next = NULL; + + l2arc_dev_last = next; + +out: + mutex_exit(&l2arc_dev_mtx); + + /* + * Grab the config lock to prevent the 'next' device from being + * removed while we are writing to it. + */ + if (next != NULL) + spa_config_enter(next->l2ad_spa, SCL_L2ARC, next, RW_READER); + mutex_exit(&spa_namespace_lock); + + return (next); +} + +/* + * Free buffers that were tagged for destruction. + */ +static void +l2arc_do_free_on_write() +{ + list_t *buflist; + l2arc_data_free_t *df, *df_prev; + + mutex_enter(&l2arc_free_on_write_mtx); + buflist = l2arc_free_on_write; + + for (df = list_tail(buflist); df; df = df_prev) { + df_prev = list_prev(buflist, df); + ASSERT(df->l2df_data != NULL); + ASSERT(df->l2df_func != NULL); + df->l2df_func(df->l2df_data, df->l2df_size); + list_remove(buflist, df); + kmem_free(df, sizeof (l2arc_data_free_t)); + } + + mutex_exit(&l2arc_free_on_write_mtx); +} + +/* + * A write to a cache device has completed. Update all headers to allow + * reads from these buffers to begin. + */ +static void +l2arc_write_done(zio_t *zio) +{ + l2arc_write_callback_t *cb; + l2arc_dev_t *dev; + list_t *buflist; + arc_buf_hdr_t *head, *ab, *ab_prev; + l2arc_buf_hdr_t *abl2; + kmutex_t *hash_lock; + + cb = zio->io_private; + ASSERT(cb != NULL); + dev = cb->l2wcb_dev; + ASSERT(dev != NULL); + head = cb->l2wcb_head; + ASSERT(head != NULL); + buflist = dev->l2ad_buflist; + ASSERT(buflist != NULL); + DTRACE_PROBE2(l2arc__iodone, zio_t *, zio, + l2arc_write_callback_t *, cb); + + if (zio->io_error != 0) + ARCSTAT_BUMP(arcstat_l2_writes_error); + + mutex_enter(&l2arc_buflist_mtx); + + /* + * All writes completed, or an error was hit. + */ + for (ab = list_prev(buflist, head); ab; ab = ab_prev) { + ab_prev = list_prev(buflist, ab); + + hash_lock = HDR_LOCK(ab); + if (!mutex_tryenter(hash_lock)) { + /* + * This buffer misses out. It may be in a stage + * of eviction. Its ARC_L2_WRITING flag will be + * left set, denying reads to this buffer. + */ + ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss); + continue; + } + + if (zio->io_error != 0) { + /* + * Error - drop L2ARC entry. + */ + list_remove(buflist, ab); + abl2 = ab->b_l2hdr; + ab->b_l2hdr = NULL; + kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); + ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); + } + + /* + * Allow ARC to begin reads to this L2ARC entry. + */ + ab->b_flags &= ~ARC_L2_WRITING; + + mutex_exit(hash_lock); + } + + atomic_inc_64(&l2arc_writes_done); + list_remove(buflist, head); + kmem_cache_free(hdr_cache, head); + mutex_exit(&l2arc_buflist_mtx); + + l2arc_do_free_on_write(); + + kmem_free(cb, sizeof (l2arc_write_callback_t)); +} + +/* + * A read to a cache device completed. Validate buffer contents before + * handing over to the regular ARC routines. + */ +static void +l2arc_read_done(zio_t *zio) +{ + l2arc_read_callback_t *cb; + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + kmutex_t *hash_lock; + int equal; + + ASSERT(zio->io_vd != NULL); + ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); + + spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd); + + cb = zio->io_private; + ASSERT(cb != NULL); + buf = cb->l2rcb_buf; + ASSERT(buf != NULL); + + hash_lock = HDR_LOCK(buf->b_hdr); + mutex_enter(hash_lock); + hdr = buf->b_hdr; + ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + + /* + * Check this survived the L2ARC journey. + */ + equal = arc_cksum_equal(buf); + if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { + mutex_exit(hash_lock); + zio->io_private = buf; + zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ + zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ + arc_read_done(zio); + } else { + mutex_exit(hash_lock); + /* + * Buffer didn't survive caching. Increment stats and + * reissue to the original storage device. + */ + if (zio->io_error != 0) { + ARCSTAT_BUMP(arcstat_l2_io_error); + } else { + zio->io_error = EIO; + } + if (!equal) + ARCSTAT_BUMP(arcstat_l2_cksum_bad); + + /* + * If there's no waiter, issue an async i/o to the primary + * storage now. If there *is* a waiter, the caller must + * issue the i/o in a context where it's OK to block. + */ + if (zio->io_waiter == NULL) { + zio_t *pio = zio_unique_parent(zio); + + ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); + + zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp, + buf->b_data, zio->io_size, arc_read_done, buf, + zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); + } + } + + kmem_free(cb, sizeof (l2arc_read_callback_t)); +} + +/* + * This is the list priority from which the L2ARC will search for pages to + * cache. This is used within loops (0..3) to cycle through lists in the + * desired order. This order can have a significant effect on cache + * performance. + * + * Currently the metadata lists are hit first, MFU then MRU, followed by + * the data lists. This function returns a locked list, and also returns + * the lock pointer. + */ +static list_t * +l2arc_list_locked(int list_num, kmutex_t **lock) +{ + list_t *list; + + ASSERT(list_num >= 0 && list_num <= 3); + + switch (list_num) { + case 0: + list = &arc_mfu->arcs_list[ARC_BUFC_METADATA]; + *lock = &arc_mfu->arcs_mtx; + break; + case 1: + list = &arc_mru->arcs_list[ARC_BUFC_METADATA]; + *lock = &arc_mru->arcs_mtx; + break; + case 2: + list = &arc_mfu->arcs_list[ARC_BUFC_DATA]; + *lock = &arc_mfu->arcs_mtx; + break; + case 3: + list = &arc_mru->arcs_list[ARC_BUFC_DATA]; + *lock = &arc_mru->arcs_mtx; + break; + } + + ASSERT(!(MUTEX_HELD(*lock))); + mutex_enter(*lock); + return (list); +} + +/* + * Evict buffers from the device write hand to the distance specified in + * bytes. This distance may span populated buffers, it may span nothing. + * This is clearing a region on the L2ARC device ready for writing. + * If the 'all' boolean is set, every buffer is evicted. + */ +static void +l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) +{ + list_t *buflist; + l2arc_buf_hdr_t *abl2; + arc_buf_hdr_t *ab, *ab_prev; + kmutex_t *hash_lock; + uint64_t taddr; + + buflist = dev->l2ad_buflist; + + if (buflist == NULL) + return; + + if (!all && dev->l2ad_first) { + /* + * This is the first sweep through the device. There is + * nothing to evict. + */ + return; + } + + if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) { + /* + * When nearing the end of the device, evict to the end + * before the device write hand jumps to the start. + */ + taddr = dev->l2ad_end; + } else { + taddr = dev->l2ad_hand + distance; + } + DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist, + uint64_t, taddr, boolean_t, all); + +top: + mutex_enter(&l2arc_buflist_mtx); + for (ab = list_tail(buflist); ab; ab = ab_prev) { + ab_prev = list_prev(buflist, ab); + + hash_lock = HDR_LOCK(ab); + if (!mutex_tryenter(hash_lock)) { + /* + * Missed the hash lock. Retry. + */ + ARCSTAT_BUMP(arcstat_l2_evict_lock_retry); + mutex_exit(&l2arc_buflist_mtx); + mutex_enter(hash_lock); + mutex_exit(hash_lock); + goto top; + } + + if (HDR_L2_WRITE_HEAD(ab)) { + /* + * We hit a write head node. Leave it for + * l2arc_write_done(). + */ + list_remove(buflist, ab); + mutex_exit(hash_lock); + continue; + } + + if (!all && ab->b_l2hdr != NULL && + (ab->b_l2hdr->b_daddr > taddr || + ab->b_l2hdr->b_daddr < dev->l2ad_hand)) { + /* + * We've evicted to the target address, + * or the end of the device. + */ + mutex_exit(hash_lock); + break; + } + + if (HDR_FREE_IN_PROGRESS(ab)) { + /* + * Already on the path to destruction. + */ + mutex_exit(hash_lock); + continue; + } + + if (ab->b_state == arc_l2c_only) { + ASSERT(!HDR_L2_READING(ab)); + /* + * This doesn't exist in the ARC. Destroy. + * arc_hdr_destroy() will call list_remove() + * and decrement arcstat_l2_size. + */ + arc_change_state(arc_anon, ab, hash_lock); + arc_hdr_destroy(ab); + } else { + /* + * Invalidate issued or about to be issued + * reads, since we may be about to write + * over this location. + */ + if (HDR_L2_READING(ab)) { + ARCSTAT_BUMP(arcstat_l2_evict_reading); + ab->b_flags |= ARC_L2_EVICTED; + } + + /* + * Tell ARC this no longer exists in L2ARC. + */ + if (ab->b_l2hdr != NULL) { + abl2 = ab->b_l2hdr; + ab->b_l2hdr = NULL; + kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); + ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); + } + list_remove(buflist, ab); + + /* + * This may have been leftover after a + * failed write. + */ + ab->b_flags &= ~ARC_L2_WRITING; + } + mutex_exit(hash_lock); + } + mutex_exit(&l2arc_buflist_mtx); + + vdev_space_update(dev->l2ad_vdev, -(taddr - dev->l2ad_evict), 0, 0); + dev->l2ad_evict = taddr; +} + +/* + * Find and write ARC buffers to the L2ARC device. + * + * An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid + * for reading until they have completed writing. + */ +static uint64_t +l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) +{ + arc_buf_hdr_t *ab, *ab_prev, *head; + l2arc_buf_hdr_t *hdrl2; + list_t *list; + uint64_t passed_sz, write_sz, buf_sz, headroom; + void *buf_data; + kmutex_t *hash_lock, *list_lock; + boolean_t have_lock, full; + l2arc_write_callback_t *cb; + zio_t *pio, *wzio; + uint64_t guid = spa_guid(spa); + + ASSERT(dev->l2ad_vdev != NULL); + + pio = NULL; + write_sz = 0; + full = B_FALSE; + head = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE); + head->b_flags |= ARC_L2_WRITE_HEAD; + + /* + * Copy buffers for L2ARC writing. + */ + mutex_enter(&l2arc_buflist_mtx); + for (int try = 0; try <= 3; try++) { + list = l2arc_list_locked(try, &list_lock); + passed_sz = 0; + + /* + * L2ARC fast warmup. + * + * Until the ARC is warm and starts to evict, read from the + * head of the ARC lists rather than the tail. + */ + headroom = target_sz * l2arc_headroom; + if (arc_warm == B_FALSE) + ab = list_head(list); + else + ab = list_tail(list); + + for (; ab; ab = ab_prev) { + if (arc_warm == B_FALSE) + ab_prev = list_next(list, ab); + else + ab_prev = list_prev(list, ab); + + hash_lock = HDR_LOCK(ab); + have_lock = MUTEX_HELD(hash_lock); + if (!have_lock && !mutex_tryenter(hash_lock)) { + /* + * Skip this buffer rather than waiting. + */ + continue; + } + + passed_sz += ab->b_size; + if (passed_sz > headroom) { + /* + * Searched too far. + */ + mutex_exit(hash_lock); + break; + } + + if (!l2arc_write_eligible(guid, ab)) { + mutex_exit(hash_lock); + continue; + } + + if ((write_sz + ab->b_size) > target_sz) { + full = B_TRUE; + mutex_exit(hash_lock); + break; + } + + if (pio == NULL) { + /* + * Insert a dummy header on the buflist so + * l2arc_write_done() can find where the + * write buffers begin without searching. + */ + list_insert_head(dev->l2ad_buflist, head); + + cb = kmem_alloc( + sizeof (l2arc_write_callback_t), KM_SLEEP); + cb->l2wcb_dev = dev; + cb->l2wcb_head = head; + pio = zio_root(spa, l2arc_write_done, cb, + ZIO_FLAG_CANFAIL); + } + + /* + * Create and add a new L2ARC header. + */ + hdrl2 = kmem_zalloc(sizeof (l2arc_buf_hdr_t), KM_SLEEP); + hdrl2->b_dev = dev; + hdrl2->b_daddr = dev->l2ad_hand; + + ab->b_flags |= ARC_L2_WRITING; + ab->b_l2hdr = hdrl2; + list_insert_head(dev->l2ad_buflist, ab); + buf_data = ab->b_buf->b_data; + buf_sz = ab->b_size; + + /* + * Compute and store the buffer cksum before + * writing. On debug the cksum is verified first. + */ + arc_cksum_verify(ab->b_buf); + arc_cksum_compute(ab->b_buf, B_TRUE); + + mutex_exit(hash_lock); + + wzio = zio_write_phys(pio, dev->l2ad_vdev, + dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, + NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, + ZIO_FLAG_CANFAIL, B_FALSE); + + DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, + zio_t *, wzio); + (void) zio_nowait(wzio); + + /* + * Keep the clock hand suitably device-aligned. + */ + buf_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); + + write_sz += buf_sz; + dev->l2ad_hand += buf_sz; + } + + mutex_exit(list_lock); + + if (full == B_TRUE) + break; + } + mutex_exit(&l2arc_buflist_mtx); + + if (pio == NULL) { + ASSERT3U(write_sz, ==, 0); + kmem_cache_free(hdr_cache, head); + return (0); + } + + ASSERT3U(write_sz, <=, target_sz); + ARCSTAT_BUMP(arcstat_l2_writes_sent); + ARCSTAT_INCR(arcstat_l2_write_bytes, write_sz); + ARCSTAT_INCR(arcstat_l2_size, write_sz); + vdev_space_update(dev->l2ad_vdev, write_sz, 0, 0); + + /* + * Bump device hand to the device start if it is approaching the end. + * l2arc_evict() will already have evicted ahead for this case. + */ + if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { + vdev_space_update(dev->l2ad_vdev, + dev->l2ad_end - dev->l2ad_hand, 0, 0); + dev->l2ad_hand = dev->l2ad_start; + dev->l2ad_evict = dev->l2ad_start; + dev->l2ad_first = B_FALSE; + } + + dev->l2ad_writing = B_TRUE; + (void) zio_wait(pio); + dev->l2ad_writing = B_FALSE; + + return (write_sz); +} + +/* + * This thread feeds the L2ARC at regular intervals. This is the beating + * heart of the L2ARC. + */ +static void +l2arc_feed_thread(void) +{ + callb_cpr_t cpr; + l2arc_dev_t *dev; + spa_t *spa; + uint64_t size, wrote; + clock_t begin, next = ddi_get_lbolt(); + + CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG); + + mutex_enter(&l2arc_feed_thr_lock); + + while (l2arc_thread_exit == 0) { + CALLB_CPR_SAFE_BEGIN(&cpr); + (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, + next); + CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); + next = ddi_get_lbolt() + hz; + + /* + * Quick check for L2ARC devices. + */ + mutex_enter(&l2arc_dev_mtx); + if (l2arc_ndev == 0) { + mutex_exit(&l2arc_dev_mtx); + continue; + } + mutex_exit(&l2arc_dev_mtx); + begin = ddi_get_lbolt(); + + /* + * This selects the next l2arc device to write to, and in + * doing so the next spa to feed from: dev->l2ad_spa. This + * will return NULL if there are now no l2arc devices or if + * they are all faulted. + * + * If a device is returned, its spa's config lock is also + * held to prevent device removal. l2arc_dev_get_next() + * will grab and release l2arc_dev_mtx. + */ + if ((dev = l2arc_dev_get_next()) == NULL) + continue; + + spa = dev->l2ad_spa; + ASSERT(spa != NULL); + + /* + * If the pool is read-only then force the feed thread to + * sleep a little longer. + */ + if (!spa_writeable(spa)) { + next = ddi_get_lbolt() + 5 * l2arc_feed_secs * hz; + spa_config_exit(spa, SCL_L2ARC, dev); + continue; + } + + /* + * Avoid contributing to memory pressure. + */ + if (arc_reclaim_needed()) { + ARCSTAT_BUMP(arcstat_l2_abort_lowmem); + spa_config_exit(spa, SCL_L2ARC, dev); + continue; + } + + ARCSTAT_BUMP(arcstat_l2_feeds); + + size = l2arc_write_size(dev); + + /* + * Evict L2ARC buffers that will be overwritten. + */ + l2arc_evict(dev, size, B_FALSE); + + /* + * Write ARC buffers. + */ + wrote = l2arc_write_buffers(spa, dev, size); + + /* + * Calculate interval between writes. + */ + next = l2arc_write_interval(begin, size, wrote); + spa_config_exit(spa, SCL_L2ARC, dev); + } + + l2arc_thread_exit = 0; + cv_broadcast(&l2arc_feed_thr_cv); + CALLB_CPR_EXIT(&cpr); /* drops l2arc_feed_thr_lock */ + thread_exit(); +} + +boolean_t +l2arc_vdev_present(vdev_t *vd) +{ + l2arc_dev_t *dev; + + mutex_enter(&l2arc_dev_mtx); + for (dev = list_head(l2arc_dev_list); dev != NULL; + dev = list_next(l2arc_dev_list, dev)) { + if (dev->l2ad_vdev == vd) + break; + } + mutex_exit(&l2arc_dev_mtx); + + return (dev != NULL); +} + +/* + * Add a vdev for use by the L2ARC. By this point the spa has already + * validated the vdev and opened it. + */ +void +l2arc_add_vdev(spa_t *spa, vdev_t *vd) +{ + l2arc_dev_t *adddev; + + ASSERT(!l2arc_vdev_present(vd)); + + /* + * Create a new l2arc device entry. + */ + adddev = kmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP); + adddev->l2ad_spa = spa; + adddev->l2ad_vdev = vd; + adddev->l2ad_write = l2arc_write_max; + adddev->l2ad_boost = l2arc_write_boost; + adddev->l2ad_start = VDEV_LABEL_START_SIZE; + adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd); + adddev->l2ad_hand = adddev->l2ad_start; + adddev->l2ad_evict = adddev->l2ad_start; + adddev->l2ad_first = B_TRUE; + adddev->l2ad_writing = B_FALSE; + ASSERT3U(adddev->l2ad_write, >, 0); + + /* + * This is a list of all ARC buffers that are still valid on the + * device. + */ + adddev->l2ad_buflist = kmem_zalloc(sizeof (list_t), KM_SLEEP); + list_create(adddev->l2ad_buflist, sizeof (arc_buf_hdr_t), + offsetof(arc_buf_hdr_t, b_l2node)); + + vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand); + + /* + * Add device to global list + */ + mutex_enter(&l2arc_dev_mtx); + list_insert_head(l2arc_dev_list, adddev); + atomic_inc_64(&l2arc_ndev); + mutex_exit(&l2arc_dev_mtx); +} + +/* + * Remove a vdev from the L2ARC. + */ +void +l2arc_remove_vdev(vdev_t *vd) +{ + l2arc_dev_t *dev, *nextdev, *remdev = NULL; + + /* + * Find the device by vdev + */ + mutex_enter(&l2arc_dev_mtx); + for (dev = list_head(l2arc_dev_list); dev; dev = nextdev) { + nextdev = list_next(l2arc_dev_list, dev); + if (vd == dev->l2ad_vdev) { + remdev = dev; + break; + } + } + ASSERT(remdev != NULL); + + /* + * Remove device from global list + */ + list_remove(l2arc_dev_list, remdev); + l2arc_dev_last = NULL; /* may have been invalidated */ + atomic_dec_64(&l2arc_ndev); + mutex_exit(&l2arc_dev_mtx); + + /* + * Clear all buflists and ARC references. L2ARC device flush. + */ + l2arc_evict(remdev, 0, B_TRUE); + list_destroy(remdev->l2ad_buflist); + kmem_free(remdev->l2ad_buflist, sizeof (list_t)); + kmem_free(remdev, sizeof (l2arc_dev_t)); +} + +void +l2arc_init(void) +{ + l2arc_thread_exit = 0; + l2arc_ndev = 0; + l2arc_writes_sent = 0; + l2arc_writes_done = 0; + + mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL); + + l2arc_dev_list = &L2ARC_dev_list; + l2arc_free_on_write = &L2ARC_free_on_write; + list_create(l2arc_dev_list, sizeof (l2arc_dev_t), + offsetof(l2arc_dev_t, l2ad_node)); + list_create(l2arc_free_on_write, sizeof (l2arc_data_free_t), + offsetof(l2arc_data_free_t, l2df_list_node)); +} + +void +l2arc_fini(void) +{ + /* + * This is called from dmu_fini(), which is called from spa_fini(); + * Because of this, we can assume that all l2arc devices have + * already been removed when the pools themselves were removed. + */ + + l2arc_do_free_on_write(); + + mutex_destroy(&l2arc_feed_thr_lock); + cv_destroy(&l2arc_feed_thr_cv); + mutex_destroy(&l2arc_dev_mtx); + mutex_destroy(&l2arc_buflist_mtx); + mutex_destroy(&l2arc_free_on_write_mtx); + + list_destroy(l2arc_dev_list); + list_destroy(l2arc_free_on_write); +} + +void +l2arc_start(void) +{ + if (!(spa_mode_global & FWRITE)) + return; + + (void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0, + TS_RUN, minclsyspri); +} + +void +l2arc_stop(void) +{ + if (!(spa_mode_global & FWRITE)) + return; + + mutex_enter(&l2arc_feed_thr_lock); + cv_signal(&l2arc_feed_thr_cv); /* kick thread out of startup */ + l2arc_thread_exit = 1; + while (l2arc_thread_exit != 0) + cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock); + mutex_exit(&l2arc_feed_thr_lock); +} diff --git a/uts/common/fs/zfs/bplist.c b/uts/common/fs/zfs/bplist.c new file mode 100644 index 000000000000..066ccc6b1e05 --- /dev/null +++ b/uts/common/fs/zfs/bplist.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/bplist.h> +#include <sys/zfs_context.h> + + +void +bplist_create(bplist_t *bpl) +{ + mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&bpl->bpl_list, sizeof (bplist_entry_t), + offsetof(bplist_entry_t, bpe_node)); +} + +void +bplist_destroy(bplist_t *bpl) +{ + list_destroy(&bpl->bpl_list); + mutex_destroy(&bpl->bpl_lock); +} + +void +bplist_append(bplist_t *bpl, const blkptr_t *bp) +{ + bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_SLEEP); + + mutex_enter(&bpl->bpl_lock); + bpe->bpe_blk = *bp; + list_insert_tail(&bpl->bpl_list, bpe); + mutex_exit(&bpl->bpl_lock); +} + +void +bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx) +{ + bplist_entry_t *bpe; + + mutex_enter(&bpl->bpl_lock); + while (bpe = list_head(&bpl->bpl_list)) { + list_remove(&bpl->bpl_list, bpe); + mutex_exit(&bpl->bpl_lock); + func(arg, &bpe->bpe_blk, tx); + kmem_free(bpe, sizeof (*bpe)); + mutex_enter(&bpl->bpl_lock); + } + mutex_exit(&bpl->bpl_lock); +} diff --git a/uts/common/fs/zfs/bpobj.c b/uts/common/fs/zfs/bpobj.c new file mode 100644 index 000000000000..72be31235607 --- /dev/null +++ b/uts/common/fs/zfs/bpobj.c @@ -0,0 +1,495 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/bpobj.h> +#include <sys/zfs_context.h> +#include <sys/refcount.h> + +uint64_t +bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) +{ + int size; + + if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) + size = BPOBJ_SIZE_V0; + else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) + size = BPOBJ_SIZE_V1; + else + size = sizeof (bpobj_phys_t); + + return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, + DMU_OT_BPOBJ_HDR, size, tx)); +} + +void +bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) +{ + int64_t i; + bpobj_t bpo; + dmu_object_info_t doi; + int epb; + dmu_buf_t *dbuf = NULL; + + VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); + + mutex_enter(&bpo.bpo_lock); + + if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) + goto out; + + VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); + epb = doi.doi_data_block_size / sizeof (uint64_t); + + for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { + uint64_t *objarray; + uint64_t offset, blkoff; + + offset = i * sizeof (uint64_t); + blkoff = P2PHASE(i, epb); + + if (dbuf == NULL || dbuf->db_offset > offset) { + if (dbuf) + dmu_buf_rele(dbuf, FTAG); + VERIFY3U(0, ==, dmu_buf_hold(os, + bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); + } + + ASSERT3U(offset, >=, dbuf->db_offset); + ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); + + objarray = dbuf->db_data; + bpobj_free(os, objarray[blkoff], tx); + } + if (dbuf) { + dmu_buf_rele(dbuf, FTAG); + dbuf = NULL; + } + VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); + +out: + mutex_exit(&bpo.bpo_lock); + bpobj_close(&bpo); + + VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); +} + +int +bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) +{ + dmu_object_info_t doi; + int err; + + err = dmu_object_info(os, object, &doi); + if (err) + return (err); + + bzero(bpo, sizeof (*bpo)); + mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); + + ASSERT(bpo->bpo_dbuf == NULL); + ASSERT(bpo->bpo_phys == NULL); + ASSERT(object != 0); + ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); + + err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); + if (err) + return (err); + + bpo->bpo_os = os; + bpo->bpo_object = object; + bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; + bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); + bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); + bpo->bpo_phys = bpo->bpo_dbuf->db_data; + return (0); +} + +void +bpobj_close(bpobj_t *bpo) +{ + /* Lame workaround for closing a bpobj that was never opened. */ + if (bpo->bpo_object == 0) + return; + + dmu_buf_rele(bpo->bpo_dbuf, bpo); + if (bpo->bpo_cached_dbuf != NULL) + dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); + bpo->bpo_dbuf = NULL; + bpo->bpo_phys = NULL; + bpo->bpo_cached_dbuf = NULL; + bpo->bpo_object = 0; + + mutex_destroy(&bpo->bpo_lock); +} + +static int +bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, + boolean_t free) +{ + dmu_object_info_t doi; + int epb; + int64_t i; + int err = 0; + dmu_buf_t *dbuf = NULL; + + mutex_enter(&bpo->bpo_lock); + + if (free) + dmu_buf_will_dirty(bpo->bpo_dbuf, tx); + + for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) { + blkptr_t *bparray; + blkptr_t *bp; + uint64_t offset, blkoff; + + offset = i * sizeof (blkptr_t); + blkoff = P2PHASE(i, bpo->bpo_epb); + + if (dbuf == NULL || dbuf->db_offset > offset) { + if (dbuf) + dmu_buf_rele(dbuf, FTAG); + err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, + FTAG, &dbuf, 0); + if (err) + break; + } + + ASSERT3U(offset, >=, dbuf->db_offset); + ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); + + bparray = dbuf->db_data; + bp = &bparray[blkoff]; + err = func(arg, bp, tx); + if (err) + break; + if (free) { + bpo->bpo_phys->bpo_bytes -= + bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); + ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); + if (bpo->bpo_havecomp) { + bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp); + bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp); + } + bpo->bpo_phys->bpo_num_blkptrs--; + ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); + } + } + if (dbuf) { + dmu_buf_rele(dbuf, FTAG); + dbuf = NULL; + } + if (free) { + i++; + VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, + i * sizeof (blkptr_t), -1ULL, tx)); + } + if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) + goto out; + + ASSERT(bpo->bpo_havecomp); + err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); + if (err) { + mutex_exit(&bpo->bpo_lock); + return (err); + } + epb = doi.doi_data_block_size / sizeof (uint64_t); + + for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { + uint64_t *objarray; + uint64_t offset, blkoff; + bpobj_t sublist; + uint64_t used_before, comp_before, uncomp_before; + uint64_t used_after, comp_after, uncomp_after; + + offset = i * sizeof (uint64_t); + blkoff = P2PHASE(i, epb); + + if (dbuf == NULL || dbuf->db_offset > offset) { + if (dbuf) + dmu_buf_rele(dbuf, FTAG); + err = dmu_buf_hold(bpo->bpo_os, + bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0); + if (err) + break; + } + + ASSERT3U(offset, >=, dbuf->db_offset); + ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); + + objarray = dbuf->db_data; + err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]); + if (err) + break; + if (free) { + err = bpobj_space(&sublist, + &used_before, &comp_before, &uncomp_before); + if (err) + break; + } + err = bpobj_iterate_impl(&sublist, func, arg, tx, free); + if (free) { + VERIFY3U(0, ==, bpobj_space(&sublist, + &used_after, &comp_after, &uncomp_after)); + bpo->bpo_phys->bpo_bytes -= used_before - used_after; + ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); + bpo->bpo_phys->bpo_comp -= comp_before - comp_after; + bpo->bpo_phys->bpo_uncomp -= + uncomp_before - uncomp_after; + } + + bpobj_close(&sublist); + if (err) + break; + if (free) { + err = dmu_object_free(bpo->bpo_os, + objarray[blkoff], tx); + if (err) + break; + bpo->bpo_phys->bpo_num_subobjs--; + ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0); + } + } + if (dbuf) { + dmu_buf_rele(dbuf, FTAG); + dbuf = NULL; + } + if (free) { + VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, + bpo->bpo_phys->bpo_subobjs, + (i + 1) * sizeof (uint64_t), -1ULL, tx)); + } + +out: + /* If there are no entries, there should be no bytes. */ + ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 || + (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) || + bpo->bpo_phys->bpo_bytes == 0); + + mutex_exit(&bpo->bpo_lock); + return (err); +} + +/* + * Iterate and remove the entries. If func returns nonzero, iteration + * will stop and that entry will not be removed. + */ +int +bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) +{ + return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE)); +} + +/* + * Iterate the entries. If func returns nonzero, iteration will stop. + */ +int +bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) +{ + return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE)); +} + +void +bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) +{ + bpobj_t subbpo; + uint64_t used, comp, uncomp, subsubobjs; + + ASSERT(bpo->bpo_havesubobj); + ASSERT(bpo->bpo_havecomp); + + VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); + VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); + + if (used == 0) { + /* No point in having an empty subobj. */ + bpobj_close(&subbpo); + bpobj_free(bpo->bpo_os, subobj, tx); + return; + } + + dmu_buf_will_dirty(bpo->bpo_dbuf, tx); + if (bpo->bpo_phys->bpo_subobjs == 0) { + bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, + DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); + } + + mutex_enter(&bpo->bpo_lock); + dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), + sizeof (subobj), &subobj, tx); + bpo->bpo_phys->bpo_num_subobjs++; + + /* + * If subobj has only one block of subobjs, then move subobj's + * subobjs to bpo's subobj list directly. This reduces + * recursion in bpobj_iterate due to nested subobjs. + */ + subsubobjs = subbpo.bpo_phys->bpo_subobjs; + if (subsubobjs != 0) { + dmu_object_info_t doi; + + VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); + if (doi.doi_max_offset == doi.doi_data_block_size) { + dmu_buf_t *subdb; + uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; + + VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs, + 0, FTAG, &subdb, 0)); + dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), + numsubsub * sizeof (subobj), subdb->db_data, tx); + dmu_buf_rele(subdb, FTAG); + bpo->bpo_phys->bpo_num_subobjs += numsubsub; + + dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); + subbpo.bpo_phys->bpo_subobjs = 0; + VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os, + subsubobjs, tx)); + } + } + bpo->bpo_phys->bpo_bytes += used; + bpo->bpo_phys->bpo_comp += comp; + bpo->bpo_phys->bpo_uncomp += uncomp; + mutex_exit(&bpo->bpo_lock); + + bpobj_close(&subbpo); +} + +void +bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) +{ + blkptr_t stored_bp = *bp; + uint64_t offset; + int blkoff; + blkptr_t *bparray; + + ASSERT(!BP_IS_HOLE(bp)); + + /* We never need the fill count. */ + stored_bp.blk_fill = 0; + + /* The bpobj will compress better if we can leave off the checksum */ + if (!BP_GET_DEDUP(bp)) + bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum)); + + mutex_enter(&bpo->bpo_lock); + + offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); + blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); + + if (bpo->bpo_cached_dbuf == NULL || + offset < bpo->bpo_cached_dbuf->db_offset || + offset >= bpo->bpo_cached_dbuf->db_offset + + bpo->bpo_cached_dbuf->db_size) { + if (bpo->bpo_cached_dbuf) + dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); + VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, + offset, bpo, &bpo->bpo_cached_dbuf, 0)); + } + + dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); + bparray = bpo->bpo_cached_dbuf->db_data; + bparray[blkoff] = stored_bp; + + dmu_buf_will_dirty(bpo->bpo_dbuf, tx); + bpo->bpo_phys->bpo_num_blkptrs++; + bpo->bpo_phys->bpo_bytes += + bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); + if (bpo->bpo_havecomp) { + bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp); + bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp); + } + mutex_exit(&bpo->bpo_lock); +} + +struct space_range_arg { + spa_t *spa; + uint64_t mintxg; + uint64_t maxtxg; + uint64_t used; + uint64_t comp; + uint64_t uncomp; +}; + +/* ARGSUSED */ +static int +space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + struct space_range_arg *sra = arg; + + if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { + sra->used += bp_get_dsize_sync(sra->spa, bp); + sra->comp += BP_GET_PSIZE(bp); + sra->uncomp += BP_GET_UCSIZE(bp); + } + return (0); +} + +int +bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + mutex_enter(&bpo->bpo_lock); + + *usedp = bpo->bpo_phys->bpo_bytes; + if (bpo->bpo_havecomp) { + *compp = bpo->bpo_phys->bpo_comp; + *uncompp = bpo->bpo_phys->bpo_uncomp; + mutex_exit(&bpo->bpo_lock); + return (0); + } else { + mutex_exit(&bpo->bpo_lock); + return (bpobj_space_range(bpo, 0, UINT64_MAX, + usedp, compp, uncompp)); + } +} + +/* + * Return the amount of space in the bpobj which is: + * mintxg < blk_birth <= maxtxg + */ +int +bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + struct space_range_arg sra = { 0 }; + int err; + + /* + * As an optimization, if they want the whole txg range, just + * get bpo_bytes rather than iterating over the bps. + */ + if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp) + return (bpobj_space(bpo, usedp, compp, uncompp)); + + sra.spa = dmu_objset_spa(bpo->bpo_os); + sra.mintxg = mintxg; + sra.maxtxg = maxtxg; + + err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL); + *usedp = sra.used; + *compp = sra.comp; + *uncompp = sra.uncomp; + return (err); +} diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c new file mode 100644 index 000000000000..9c4e0296db2b --- /dev/null +++ b/uts/common/fs/zfs/dbuf.c @@ -0,0 +1,2707 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dbuf.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dmu_tx.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu_zfetch.h> +#include <sys/sa.h> +#include <sys/sa_impl.h> + +static void dbuf_destroy(dmu_buf_impl_t *db); +static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); + +/* + * Global data structures and functions for the dbuf cache. + */ +static kmem_cache_t *dbuf_cache; + +/* ARGSUSED */ +static int +dbuf_cons(void *vdb, void *unused, int kmflag) +{ + dmu_buf_impl_t *db = vdb; + bzero(db, sizeof (dmu_buf_impl_t)); + + mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); + cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); + refcount_create(&db->db_holds); + return (0); +} + +/* ARGSUSED */ +static void +dbuf_dest(void *vdb, void *unused) +{ + dmu_buf_impl_t *db = vdb; + mutex_destroy(&db->db_mtx); + cv_destroy(&db->db_changed); + refcount_destroy(&db->db_holds); +} + +/* + * dbuf hash table routines + */ +static dbuf_hash_table_t dbuf_hash_table; + +static uint64_t dbuf_hash_count; + +static uint64_t +dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) +{ + uintptr_t osv = (uintptr_t)os; + uint64_t crc = -1ULL; + + ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF]; + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF]; + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF]; + crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF]; + + crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16); + + return (crc); +} + +#define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid); + +#define DBUF_EQUAL(dbuf, os, obj, level, blkid) \ + ((dbuf)->db.db_object == (obj) && \ + (dbuf)->db_objset == (os) && \ + (dbuf)->db_level == (level) && \ + (dbuf)->db_blkid == (blkid)) + +dmu_buf_impl_t * +dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) +{ + dbuf_hash_table_t *h = &dbuf_hash_table; + objset_t *os = dn->dn_objset; + uint64_t obj = dn->dn_object; + uint64_t hv = DBUF_HASH(os, obj, level, blkid); + uint64_t idx = hv & h->hash_table_mask; + dmu_buf_impl_t *db; + + mutex_enter(DBUF_HASH_MUTEX(h, idx)); + for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { + if (DBUF_EQUAL(db, os, obj, level, blkid)) { + mutex_enter(&db->db_mtx); + if (db->db_state != DB_EVICTING) { + mutex_exit(DBUF_HASH_MUTEX(h, idx)); + return (db); + } + mutex_exit(&db->db_mtx); + } + } + mutex_exit(DBUF_HASH_MUTEX(h, idx)); + return (NULL); +} + +/* + * Insert an entry into the hash table. If there is already an element + * equal to elem in the hash table, then the already existing element + * will be returned and the new element will not be inserted. + * Otherwise returns NULL. + */ +static dmu_buf_impl_t * +dbuf_hash_insert(dmu_buf_impl_t *db) +{ + dbuf_hash_table_t *h = &dbuf_hash_table; + objset_t *os = db->db_objset; + uint64_t obj = db->db.db_object; + int level = db->db_level; + uint64_t blkid = db->db_blkid; + uint64_t hv = DBUF_HASH(os, obj, level, blkid); + uint64_t idx = hv & h->hash_table_mask; + dmu_buf_impl_t *dbf; + + mutex_enter(DBUF_HASH_MUTEX(h, idx)); + for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) { + if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { + mutex_enter(&dbf->db_mtx); + if (dbf->db_state != DB_EVICTING) { + mutex_exit(DBUF_HASH_MUTEX(h, idx)); + return (dbf); + } + mutex_exit(&dbf->db_mtx); + } + } + + mutex_enter(&db->db_mtx); + db->db_hash_next = h->hash_table[idx]; + h->hash_table[idx] = db; + mutex_exit(DBUF_HASH_MUTEX(h, idx)); + atomic_add_64(&dbuf_hash_count, 1); + + return (NULL); +} + +/* + * Remove an entry from the hash table. This operation will + * fail if there are any existing holds on the db. + */ +static void +dbuf_hash_remove(dmu_buf_impl_t *db) +{ + dbuf_hash_table_t *h = &dbuf_hash_table; + uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object, + db->db_level, db->db_blkid); + uint64_t idx = hv & h->hash_table_mask; + dmu_buf_impl_t *dbf, **dbp; + + /* + * We musn't hold db_mtx to maintin lock ordering: + * DBUF_HASH_MUTEX > db_mtx. + */ + ASSERT(refcount_is_zero(&db->db_holds)); + ASSERT(db->db_state == DB_EVICTING); + ASSERT(!MUTEX_HELD(&db->db_mtx)); + + mutex_enter(DBUF_HASH_MUTEX(h, idx)); + dbp = &h->hash_table[idx]; + while ((dbf = *dbp) != db) { + dbp = &dbf->db_hash_next; + ASSERT(dbf != NULL); + } + *dbp = db->db_hash_next; + db->db_hash_next = NULL; + mutex_exit(DBUF_HASH_MUTEX(h, idx)); + atomic_add_64(&dbuf_hash_count, -1); +} + +static arc_evict_func_t dbuf_do_evict; + +static void +dbuf_evict_user(dmu_buf_impl_t *db) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (db->db_level != 0 || db->db_evict_func == NULL) + return; + + if (db->db_user_data_ptr_ptr) + *db->db_user_data_ptr_ptr = db->db.db_data; + db->db_evict_func(&db->db, db->db_user_ptr); + db->db_user_ptr = NULL; + db->db_user_data_ptr_ptr = NULL; + db->db_evict_func = NULL; +} + +boolean_t +dbuf_is_metadata(dmu_buf_impl_t *db) +{ + if (db->db_level > 0) { + return (B_TRUE); + } else { + boolean_t is_metadata; + + DB_DNODE_ENTER(db); + is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata; + DB_DNODE_EXIT(db); + + return (is_metadata); + } +} + +void +dbuf_evict(dmu_buf_impl_t *db) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(db->db_buf == NULL); + ASSERT(db->db_data_pending == NULL); + + dbuf_clear(db); + dbuf_destroy(db); +} + +void +dbuf_init(void) +{ + uint64_t hsize = 1ULL << 16; + dbuf_hash_table_t *h = &dbuf_hash_table; + int i; + + /* + * The hash table is big enough to fill all of physical memory + * with an average 4K block size. The table will take up + * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers). + */ + while (hsize * 4096 < physmem * PAGESIZE) + hsize <<= 1; + +retry: + h->hash_table_mask = hsize - 1; + h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); + if (h->hash_table == NULL) { + /* XXX - we should really return an error instead of assert */ + ASSERT(hsize > (1ULL << 10)); + hsize >>= 1; + goto retry; + } + + dbuf_cache = kmem_cache_create("dmu_buf_impl_t", + sizeof (dmu_buf_impl_t), + 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); + + for (i = 0; i < DBUF_MUTEXES; i++) + mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); +} + +void +dbuf_fini(void) +{ + dbuf_hash_table_t *h = &dbuf_hash_table; + int i; + + for (i = 0; i < DBUF_MUTEXES; i++) + mutex_destroy(&h->hash_mutexes[i]); + kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); + kmem_cache_destroy(dbuf_cache); +} + +/* + * Other stuff. + */ + +#ifdef ZFS_DEBUG +static void +dbuf_verify(dmu_buf_impl_t *db) +{ + dnode_t *dn; + dbuf_dirty_record_t *dr; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY)) + return; + + ASSERT(db->db_objset != NULL); + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + if (dn == NULL) { + ASSERT(db->db_parent == NULL); + ASSERT(db->db_blkptr == NULL); + } else { + ASSERT3U(db->db.db_object, ==, dn->dn_object); + ASSERT3P(db->db_objset, ==, dn->dn_objset); + ASSERT3U(db->db_level, <, dn->dn_nlevels); + ASSERT(db->db_blkid == DMU_BONUS_BLKID || + db->db_blkid == DMU_SPILL_BLKID || + !list_is_empty(&dn->dn_dbufs)); + } + if (db->db_blkid == DMU_BONUS_BLKID) { + ASSERT(dn != NULL); + ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); + ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID); + } else if (db->db_blkid == DMU_SPILL_BLKID) { + ASSERT(dn != NULL); + ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); + ASSERT3U(db->db.db_offset, ==, 0); + } else { + ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); + } + + for (dr = db->db_data_pending; dr != NULL; dr = dr->dr_next) + ASSERT(dr->dr_dbuf == db); + + for (dr = db->db_last_dirty; dr != NULL; dr = dr->dr_next) + ASSERT(dr->dr_dbuf == db); + + /* + * We can't assert that db_size matches dn_datablksz because it + * can be momentarily different when another thread is doing + * dnode_set_blksz(). + */ + if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) { + dr = db->db_data_pending; + /* + * It should only be modified in syncing context, so + * make sure we only have one copy of the data. + */ + ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf); + } + + /* verify db->db_blkptr */ + if (db->db_blkptr) { + if (db->db_parent == dn->dn_dbuf) { + /* db is pointed to by the dnode */ + /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ + if (DMU_OBJECT_IS_SPECIAL(db->db.db_object)) + ASSERT(db->db_parent == NULL); + else + ASSERT(db->db_parent != NULL); + if (db->db_blkid != DMU_SPILL_BLKID) + ASSERT3P(db->db_blkptr, ==, + &dn->dn_phys->dn_blkptr[db->db_blkid]); + } else { + /* db is pointed to by an indirect block */ + int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT; + ASSERT3U(db->db_parent->db_level, ==, db->db_level+1); + ASSERT3U(db->db_parent->db.db_object, ==, + db->db.db_object); + /* + * dnode_grow_indblksz() can make this fail if we don't + * have the struct_rwlock. XXX indblksz no longer + * grows. safe to do this now? + */ + if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) { + ASSERT3P(db->db_blkptr, ==, + ((blkptr_t *)db->db_parent->db.db_data + + db->db_blkid % epb)); + } + } + } + if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) && + (db->db_buf == NULL || db->db_buf->b_data) && + db->db.db_data && db->db_blkid != DMU_BONUS_BLKID && + db->db_state != DB_FILL && !dn->dn_free_txg) { + /* + * If the blkptr isn't set but they have nonzero data, + * it had better be dirty, otherwise we'll lose that + * data when we evict this buffer. + */ + if (db->db_dirtycnt == 0) { + uint64_t *buf = db->db.db_data; + int i; + + for (i = 0; i < db->db.db_size >> 3; i++) { + ASSERT(buf[i] == 0); + } + } + } + DB_DNODE_EXIT(db); +} +#endif + +static void +dbuf_update_data(dmu_buf_impl_t *db) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + if (db->db_level == 0 && db->db_user_data_ptr_ptr) { + ASSERT(!refcount_is_zero(&db->db_holds)); + *db->db_user_data_ptr_ptr = db->db.db_data; + } +} + +static void +dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf)); + db->db_buf = buf; + if (buf != NULL) { + ASSERT(buf->b_data != NULL); + db->db.db_data = buf->b_data; + if (!arc_released(buf)) + arc_set_callback(buf, dbuf_do_evict, db); + dbuf_update_data(db); + } else { + dbuf_evict_user(db); + db->db.db_data = NULL; + if (db->db_state != DB_NOFILL) + db->db_state = DB_UNCACHED; + } +} + +/* + * Loan out an arc_buf for read. Return the loaned arc_buf. + */ +arc_buf_t * +dbuf_loan_arcbuf(dmu_buf_impl_t *db) +{ + arc_buf_t *abuf; + + mutex_enter(&db->db_mtx); + if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) { + int blksz = db->db.db_size; + spa_t *spa; + + mutex_exit(&db->db_mtx); + DB_GET_SPA(&spa, db); + abuf = arc_loan_buf(spa, blksz); + bcopy(db->db.db_data, abuf->b_data, blksz); + } else { + abuf = db->db_buf; + arc_loan_inuse_buf(abuf, db); + dbuf_set_data(db, NULL); + mutex_exit(&db->db_mtx); + } + return (abuf); +} + +uint64_t +dbuf_whichblock(dnode_t *dn, uint64_t offset) +{ + if (dn->dn_datablkshift) { + return (offset >> dn->dn_datablkshift); + } else { + ASSERT3U(offset, <, dn->dn_datablksz); + return (0); + } +} + +static void +dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) +{ + dmu_buf_impl_t *db = vdb; + + mutex_enter(&db->db_mtx); + ASSERT3U(db->db_state, ==, DB_READ); + /* + * All reads are synchronous, so we must have a hold on the dbuf + */ + ASSERT(refcount_count(&db->db_holds) > 0); + ASSERT(db->db_buf == NULL); + ASSERT(db->db.db_data == NULL); + if (db->db_level == 0 && db->db_freed_in_flight) { + /* we were freed in flight; disregard any error */ + arc_release(buf, db); + bzero(buf->b_data, db->db.db_size); + arc_buf_freeze(buf); + db->db_freed_in_flight = FALSE; + dbuf_set_data(db, buf); + db->db_state = DB_CACHED; + } else if (zio == NULL || zio->io_error == 0) { + dbuf_set_data(db, buf); + db->db_state = DB_CACHED; + } else { + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT3P(db->db_buf, ==, NULL); + VERIFY(arc_buf_remove_ref(buf, db) == 1); + db->db_state = DB_UNCACHED; + } + cv_broadcast(&db->db_changed); + dbuf_rele_and_unlock(db, NULL); +} + +static void +dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) +{ + dnode_t *dn; + spa_t *spa; + zbookmark_t zb; + uint32_t aflags = ARC_NOWAIT; + arc_buf_t *pbuf; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + ASSERT(!refcount_is_zero(&db->db_holds)); + /* We need the struct_rwlock to prevent db_blkptr from changing. */ + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(db->db_state == DB_UNCACHED); + ASSERT(db->db_buf == NULL); + + if (db->db_blkid == DMU_BONUS_BLKID) { + int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); + + ASSERT3U(bonuslen, <=, db->db.db_size); + db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); + arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + if (bonuslen < DN_MAX_BONUSLEN) + bzero(db->db.db_data, DN_MAX_BONUSLEN); + if (bonuslen) + bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); + DB_DNODE_EXIT(db); + dbuf_update_data(db); + db->db_state = DB_CACHED; + mutex_exit(&db->db_mtx); + return; + } + + /* + * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() + * processes the delete record and clears the bp while we are waiting + * for the dn_mtx (resulting in a "no" from block_freed). + */ + if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) || + (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) || + BP_IS_HOLE(db->db_blkptr)))) { + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + + dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa, + db->db.db_size, db, type)); + DB_DNODE_EXIT(db); + bzero(db->db.db_data, db->db.db_size); + db->db_state = DB_CACHED; + *flags |= DB_RF_CACHED; + mutex_exit(&db->db_mtx); + return; + } + + spa = dn->dn_objset->os_spa; + DB_DNODE_EXIT(db); + + db->db_state = DB_READ; + mutex_exit(&db->db_mtx); + + if (DBUF_IS_L2CACHEABLE(db)) + aflags |= ARC_L2CACHE; + + SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ? + db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, + db->db.db_object, db->db_level, db->db_blkid); + + dbuf_add_ref(db, NULL); + /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */ + + if (db->db_parent) + pbuf = db->db_parent->db_buf; + else + pbuf = db->db_objset->os_phys_buf; + + (void) dsl_read(zio, spa, db->db_blkptr, pbuf, + dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, + (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, + &aflags, &zb); + if (aflags & ARC_CACHED) + *flags |= DB_RF_CACHED; +} + +int +dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) +{ + int err = 0; + int havepzio = (zio != NULL); + int prefetch; + dnode_t *dn; + + /* + * We don't have to hold the mutex to check db_state because it + * can't be freed while we have a hold on the buffer. + */ + ASSERT(!refcount_is_zero(&db->db_holds)); + + if (db->db_state == DB_NOFILL) + return (EIO); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_enter(&dn->dn_struct_rwlock, RW_READER); + + prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && + (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && + DBUF_IS_CACHEABLE(db); + + mutex_enter(&db->db_mtx); + if (db->db_state == DB_CACHED) { + mutex_exit(&db->db_mtx); + if (prefetch) + dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, + db->db.db_size, TRUE); + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_exit(&dn->dn_struct_rwlock); + DB_DNODE_EXIT(db); + } else if (db->db_state == DB_UNCACHED) { + spa_t *spa = dn->dn_objset->os_spa; + + if (zio == NULL) + zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + dbuf_read_impl(db, zio, &flags); + + /* dbuf_read_impl has dropped db_mtx for us */ + + if (prefetch) + dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, + db->db.db_size, flags & DB_RF_CACHED); + + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_exit(&dn->dn_struct_rwlock); + DB_DNODE_EXIT(db); + + if (!havepzio) + err = zio_wait(zio); + } else { + mutex_exit(&db->db_mtx); + if (prefetch) + dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, + db->db.db_size, TRUE); + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_exit(&dn->dn_struct_rwlock); + DB_DNODE_EXIT(db); + + mutex_enter(&db->db_mtx); + if ((flags & DB_RF_NEVERWAIT) == 0) { + while (db->db_state == DB_READ || + db->db_state == DB_FILL) { + ASSERT(db->db_state == DB_READ || + (flags & DB_RF_HAVESTRUCT) == 0); + cv_wait(&db->db_changed, &db->db_mtx); + } + if (db->db_state == DB_UNCACHED) + err = EIO; + } + mutex_exit(&db->db_mtx); + } + + ASSERT(err || havepzio || db->db_state == DB_CACHED); + return (err); +} + +static void +dbuf_noread(dmu_buf_impl_t *db) +{ + ASSERT(!refcount_is_zero(&db->db_holds)); + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + mutex_enter(&db->db_mtx); + while (db->db_state == DB_READ || db->db_state == DB_FILL) + cv_wait(&db->db_changed, &db->db_mtx); + if (db->db_state == DB_UNCACHED) { + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + spa_t *spa; + + ASSERT(db->db_buf == NULL); + ASSERT(db->db.db_data == NULL); + DB_GET_SPA(&spa, db); + dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); + db->db_state = DB_FILL; + } else if (db->db_state == DB_NOFILL) { + dbuf_set_data(db, NULL); + } else { + ASSERT3U(db->db_state, ==, DB_CACHED); + } + mutex_exit(&db->db_mtx); +} + +/* + * This is our just-in-time copy function. It makes a copy of + * buffers, that have been modified in a previous transaction + * group, before we modify them in the current active group. + * + * This function is used in two places: when we are dirtying a + * buffer for the first time in a txg, and when we are freeing + * a range in a dnode that includes this buffer. + * + * Note that when we are called from dbuf_free_range() we do + * not put a hold on the buffer, we just traverse the active + * dbuf list for the dnode. + */ +static void +dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) +{ + dbuf_dirty_record_t *dr = db->db_last_dirty; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(db->db.db_data != NULL); + ASSERT(db->db_level == 0); + ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); + + if (dr == NULL || + (dr->dt.dl.dr_data != + ((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf))) + return; + + /* + * If the last dirty record for this dbuf has not yet synced + * and its referencing the dbuf data, either: + * reset the reference to point to a new copy, + * or (if there a no active holders) + * just null out the current db_data pointer. + */ + ASSERT(dr->dr_txg >= txg - 2); + if (db->db_blkid == DMU_BONUS_BLKID) { + /* Note that the data bufs here are zio_bufs */ + dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); + arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); + } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { + int size = db->db.db_size; + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + spa_t *spa; + + DB_GET_SPA(&spa, db); + dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); + bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); + } else { + dbuf_set_data(db, NULL); + } +} + +void +dbuf_unoverride(dbuf_dirty_record_t *dr) +{ + dmu_buf_impl_t *db = dr->dr_dbuf; + blkptr_t *bp = &dr->dt.dl.dr_overridden_by; + uint64_t txg = dr->dr_txg; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC); + ASSERT(db->db_level == 0); + + if (db->db_blkid == DMU_BONUS_BLKID || + dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) + return; + + ASSERT(db->db_data_pending != dr); + + /* free this block */ + if (!BP_IS_HOLE(bp)) { + spa_t *spa; + + DB_GET_SPA(&spa, db); + zio_free(spa, txg, bp); + } + dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; + /* + * Release the already-written buffer, so we leave it in + * a consistent dirty state. Note that all callers are + * modifying the buffer, so they will immediately do + * another (redundant) arc_release(). Therefore, leave + * the buf thawed to save the effort of freezing & + * immediately re-thawing it. + */ + arc_release(dr->dt.dl.dr_data, db); +} + +/* + * Evict (if its unreferenced) or clear (if its referenced) any level-0 + * data blocks in the free range, so that any future readers will find + * empty blocks. Also, if we happen accross any level-1 dbufs in the + * range that have not already been marked dirty, mark them dirty so + * they stay in memory. + */ +void +dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db, *db_next; + uint64_t txg = tx->tx_txg; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + uint64_t first_l1 = start >> epbs; + uint64_t last_l1 = end >> epbs; + + if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) { + end = dn->dn_maxblkid; + last_l1 = end >> epbs; + } + dprintf_dnode(dn, "start=%llu end=%llu\n", start, end); + mutex_enter(&dn->dn_dbufs_mtx); + for (db = list_head(&dn->dn_dbufs); db; db = db_next) { + db_next = list_next(&dn->dn_dbufs, db); + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + + if (db->db_level == 1 && + db->db_blkid >= first_l1 && db->db_blkid <= last_l1) { + mutex_enter(&db->db_mtx); + if (db->db_last_dirty && + db->db_last_dirty->dr_txg < txg) { + dbuf_add_ref(db, FTAG); + mutex_exit(&db->db_mtx); + dbuf_will_dirty(db, tx); + dbuf_rele(db, FTAG); + } else { + mutex_exit(&db->db_mtx); + } + } + + if (db->db_level != 0) + continue; + dprintf_dbuf(db, "found buf %s\n", ""); + if (db->db_blkid < start || db->db_blkid > end) + continue; + + /* found a level 0 buffer in the range */ + if (dbuf_undirty(db, tx)) + continue; + + mutex_enter(&db->db_mtx); + if (db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL || + db->db_state == DB_EVICTING) { + ASSERT(db->db.db_data == NULL); + mutex_exit(&db->db_mtx); + continue; + } + if (db->db_state == DB_READ || db->db_state == DB_FILL) { + /* will be handled in dbuf_read_done or dbuf_rele */ + db->db_freed_in_flight = TRUE; + mutex_exit(&db->db_mtx); + continue; + } + if (refcount_count(&db->db_holds) == 0) { + ASSERT(db->db_buf); + dbuf_clear(db); + continue; + } + /* The dbuf is referenced */ + + if (db->db_last_dirty != NULL) { + dbuf_dirty_record_t *dr = db->db_last_dirty; + + if (dr->dr_txg == txg) { + /* + * This buffer is "in-use", re-adjust the file + * size to reflect that this buffer may + * contain new data when we sync. + */ + if (db->db_blkid != DMU_SPILL_BLKID && + db->db_blkid > dn->dn_maxblkid) + dn->dn_maxblkid = db->db_blkid; + dbuf_unoverride(dr); + } else { + /* + * This dbuf is not dirty in the open context. + * Either uncache it (if its not referenced in + * the open context) or reset its contents to + * empty. + */ + dbuf_fix_old_data(db, txg); + } + } + /* clear the contents if its cached */ + if (db->db_state == DB_CACHED) { + ASSERT(db->db.db_data != NULL); + arc_release(db->db_buf, db); + bzero(db->db.db_data, db->db.db_size); + arc_buf_freeze(db->db_buf); + } + + mutex_exit(&db->db_mtx); + } + mutex_exit(&dn->dn_dbufs_mtx); +} + +static int +dbuf_block_freeable(dmu_buf_impl_t *db) +{ + dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; + uint64_t birth_txg = 0; + + /* + * We don't need any locking to protect db_blkptr: + * If it's syncing, then db_last_dirty will be set + * so we'll ignore db_blkptr. + */ + ASSERT(MUTEX_HELD(&db->db_mtx)); + if (db->db_last_dirty) + birth_txg = db->db_last_dirty->dr_txg; + else if (db->db_blkptr) + birth_txg = db->db_blkptr->blk_birth; + + /* + * If we don't exist or are in a snapshot, we can't be freed. + * Don't pass the bp to dsl_dataset_block_freeable() since we + * are holding the db_mtx lock and might deadlock if we are + * prefetching a dedup-ed block. + */ + if (birth_txg) + return (ds == NULL || + dsl_dataset_block_freeable(ds, NULL, birth_txg)); + else + return (FALSE); +} + +void +dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) +{ + arc_buf_t *buf, *obuf; + int osize = db->db.db_size; + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + dnode_t *dn; + + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + /* XXX does *this* func really need the lock? */ + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + + /* + * This call to dbuf_will_dirty() with the dn_struct_rwlock held + * is OK, because there can be no other references to the db + * when we are changing its size, so no concurrent DB_FILL can + * be happening. + */ + /* + * XXX we should be doing a dbuf_read, checking the return + * value and returning that up to our callers + */ + dbuf_will_dirty(db, tx); + + /* create the data buffer for the new block */ + buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type); + + /* copy old block data to the new block */ + obuf = db->db_buf; + bcopy(obuf->b_data, buf->b_data, MIN(osize, size)); + /* zero the remainder */ + if (size > osize) + bzero((uint8_t *)buf->b_data + osize, size - osize); + + mutex_enter(&db->db_mtx); + dbuf_set_data(db, buf); + VERIFY(arc_buf_remove_ref(obuf, db) == 1); + db->db.db_size = size; + + if (db->db_level == 0) { + ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); + db->db_last_dirty->dt.dl.dr_data = buf; + } + mutex_exit(&db->db_mtx); + + dnode_willuse_space(dn, size-osize, tx); + DB_DNODE_EXIT(db); +} + +void +dbuf_release_bp(dmu_buf_impl_t *db) +{ + objset_t *os; + zbookmark_t zb; + + DB_GET_OBJSET(&os, db); + ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); + ASSERT(arc_released(os->os_phys_buf) || + list_link_active(&os->os_dsl_dataset->ds_synced_link)); + ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); + + zb.zb_objset = os->os_dsl_dataset ? + os->os_dsl_dataset->ds_object : 0; + zb.zb_object = db->db.db_object; + zb.zb_level = db->db_level; + zb.zb_blkid = db->db_blkid; + (void) arc_release_bp(db->db_buf, db, + db->db_blkptr, os->os_spa, &zb); +} + +dbuf_dirty_record_t * +dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) +{ + dnode_t *dn; + objset_t *os; + dbuf_dirty_record_t **drp, *dr; + int drop_struct_lock = FALSE; + boolean_t do_free_accounting = B_FALSE; + int txgoff = tx->tx_txg & TXG_MASK; + + ASSERT(tx->tx_txg != 0); + ASSERT(!refcount_is_zero(&db->db_holds)); + DMU_TX_DIRTY_BUF(tx, db); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + /* + * Shouldn't dirty a regular buffer in syncing context. Private + * objects may be dirtied in syncing context, but only if they + * were already pre-dirtied in open context. + */ + ASSERT(!dmu_tx_is_syncing(tx) || + BP_IS_HOLE(dn->dn_objset->os_rootbp) || + DMU_OBJECT_IS_SPECIAL(dn->dn_object) || + dn->dn_objset->os_dsl_dataset == NULL); + /* + * We make this assert for private objects as well, but after we + * check if we're already dirty. They are allowed to re-dirty + * in syncing context. + */ + ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || + dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == + (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); + + mutex_enter(&db->db_mtx); + /* + * XXX make this true for indirects too? The problem is that + * transactions created with dmu_tx_create_assigned() from + * syncing context don't bother holding ahead. + */ + ASSERT(db->db_level != 0 || + db->db_state == DB_CACHED || db->db_state == DB_FILL || + db->db_state == DB_NOFILL); + + mutex_enter(&dn->dn_mtx); + /* + * Don't set dirtyctx to SYNC if we're just modifying this as we + * initialize the objset. + */ + if (dn->dn_dirtyctx == DN_UNDIRTIED && + !BP_IS_HOLE(dn->dn_objset->os_rootbp)) { + dn->dn_dirtyctx = + (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); + ASSERT(dn->dn_dirtyctx_firstset == NULL); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + } + mutex_exit(&dn->dn_mtx); + + if (db->db_blkid == DMU_SPILL_BLKID) + dn->dn_have_spill = B_TRUE; + + /* + * If this buffer is already dirty, we're done. + */ + drp = &db->db_last_dirty; + ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg || + db->db.db_object == DMU_META_DNODE_OBJECT); + while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) + drp = &dr->dr_next; + if (dr && dr->dr_txg == tx->tx_txg) { + DB_DNODE_EXIT(db); + + if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) { + /* + * If this buffer has already been written out, + * we now need to reset its state. + */ + dbuf_unoverride(dr); + if (db->db.db_object != DMU_META_DNODE_OBJECT && + db->db_state != DB_NOFILL) + arc_buf_thaw(db->db_buf); + } + mutex_exit(&db->db_mtx); + return (dr); + } + + /* + * Only valid if not already dirty. + */ + ASSERT(dn->dn_object == 0 || + dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == + (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); + + ASSERT3U(dn->dn_nlevels, >, db->db_level); + ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || + dn->dn_phys->dn_nlevels > db->db_level || + dn->dn_next_nlevels[txgoff] > db->db_level || + dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || + dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); + + /* + * We should only be dirtying in syncing context if it's the + * mos or we're initializing the os or it's a special object. + * However, we are allowed to dirty in syncing context provided + * we already dirtied it in open context. Hence we must make + * this assertion only if we're not already dirty. + */ + os = dn->dn_objset; + ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || + os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); + ASSERT(db->db.db_size != 0); + + dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); + + if (db->db_blkid != DMU_BONUS_BLKID) { + /* + * Update the accounting. + * Note: we delay "free accounting" until after we drop + * the db_mtx. This keeps us from grabbing other locks + * (and possibly deadlocking) in bp_get_dsize() while + * also holding the db_mtx. + */ + dnode_willuse_space(dn, db->db.db_size, tx); + do_free_accounting = dbuf_block_freeable(db); + } + + /* + * If this buffer is dirty in an old transaction group we need + * to make a copy of it so that the changes we make in this + * transaction group won't leak out when we sync the older txg. + */ + dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); + if (db->db_level == 0) { + void *data_old = db->db_buf; + + if (db->db_state != DB_NOFILL) { + if (db->db_blkid == DMU_BONUS_BLKID) { + dbuf_fix_old_data(db, tx->tx_txg); + data_old = db->db.db_data; + } else if (db->db.db_object != DMU_META_DNODE_OBJECT) { + /* + * Release the data buffer from the cache so + * that we can modify it without impacting + * possible other users of this cached data + * block. Note that indirect blocks and + * private objects are not released until the + * syncing state (since they are only modified + * then). + */ + arc_release(db->db_buf, db); + dbuf_fix_old_data(db, tx->tx_txg); + data_old = db->db_buf; + } + ASSERT(data_old != NULL); + } + dr->dt.dl.dr_data = data_old; + } else { + mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL); + list_create(&dr->dt.di.dr_children, + sizeof (dbuf_dirty_record_t), + offsetof(dbuf_dirty_record_t, dr_dirty_node)); + } + dr->dr_dbuf = db; + dr->dr_txg = tx->tx_txg; + dr->dr_next = *drp; + *drp = dr; + + /* + * We could have been freed_in_flight between the dbuf_noread + * and dbuf_dirty. We win, as though the dbuf_noread() had + * happened after the free. + */ + if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && + db->db_blkid != DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + db->db_freed_in_flight = FALSE; + } + + /* + * This buffer is now part of this txg + */ + dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); + db->db_dirtycnt += 1; + ASSERT3U(db->db_dirtycnt, <=, 3); + + mutex_exit(&db->db_mtx); + + if (db->db_blkid == DMU_BONUS_BLKID || + db->db_blkid == DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + ASSERT(!list_link_active(&dr->dr_dirty_node)); + list_insert_tail(&dn->dn_dirty_records[txgoff], dr); + mutex_exit(&dn->dn_mtx); + dnode_setdirty(dn, tx); + DB_DNODE_EXIT(db); + return (dr); + } else if (do_free_accounting) { + blkptr_t *bp = db->db_blkptr; + int64_t willfree = (bp && !BP_IS_HOLE(bp)) ? + bp_get_dsize(os->os_spa, bp) : db->db.db_size; + /* + * This is only a guess -- if the dbuf is dirty + * in a previous txg, we don't know how much + * space it will use on disk yet. We should + * really have the struct_rwlock to access + * db_blkptr, but since this is just a guess, + * it's OK if we get an odd answer. + */ + ddt_prefetch(os->os_spa, bp); + dnode_willuse_space(dn, -willfree, tx); + } + + if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + drop_struct_lock = TRUE; + } + + if (db->db_level == 0) { + dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); + ASSERT(dn->dn_maxblkid >= db->db_blkid); + } + + if (db->db_level+1 < dn->dn_nlevels) { + dmu_buf_impl_t *parent = db->db_parent; + dbuf_dirty_record_t *di; + int parent_held = FALSE; + + if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + + parent = dbuf_hold_level(dn, db->db_level+1, + db->db_blkid >> epbs, FTAG); + ASSERT(parent != NULL); + parent_held = TRUE; + } + if (drop_struct_lock) + rw_exit(&dn->dn_struct_rwlock); + ASSERT3U(db->db_level+1, ==, parent->db_level); + di = dbuf_dirty(parent, tx); + if (parent_held) + dbuf_rele(parent, FTAG); + + mutex_enter(&db->db_mtx); + /* possible race with dbuf_undirty() */ + if (db->db_last_dirty == dr || + dn->dn_object == DMU_META_DNODE_OBJECT) { + mutex_enter(&di->dt.di.dr_mtx); + ASSERT3U(di->dr_txg, ==, tx->tx_txg); + ASSERT(!list_link_active(&dr->dr_dirty_node)); + list_insert_tail(&di->dt.di.dr_children, dr); + mutex_exit(&di->dt.di.dr_mtx); + dr->dr_parent = di; + } + mutex_exit(&db->db_mtx); + } else { + ASSERT(db->db_level+1 == dn->dn_nlevels); + ASSERT(db->db_blkid < dn->dn_nblkptr); + ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf); + mutex_enter(&dn->dn_mtx); + ASSERT(!list_link_active(&dr->dr_dirty_node)); + list_insert_tail(&dn->dn_dirty_records[txgoff], dr); + mutex_exit(&dn->dn_mtx); + if (drop_struct_lock) + rw_exit(&dn->dn_struct_rwlock); + } + + dnode_setdirty(dn, tx); + DB_DNODE_EXIT(db); + return (dr); +} + +static int +dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) +{ + dnode_t *dn; + uint64_t txg = tx->tx_txg; + dbuf_dirty_record_t *dr, **drp; + + ASSERT(txg != 0); + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + + mutex_enter(&db->db_mtx); + /* + * If this buffer is not dirty, we're done. + */ + for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) + if (dr->dr_txg <= txg) + break; + if (dr == NULL || dr->dr_txg < txg) { + mutex_exit(&db->db_mtx); + return (0); + } + ASSERT(dr->dr_txg == txg); + ASSERT(dr->dr_dbuf == db); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + /* + * If this buffer is currently held, we cannot undirty + * it, since one of the current holders may be in the + * middle of an update. Note that users of dbuf_undirty() + * should not place a hold on the dbuf before the call. + */ + if (refcount_count(&db->db_holds) > db->db_dirtycnt) { + mutex_exit(&db->db_mtx); + /* Make sure we don't toss this buffer at sync phase */ + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + DB_DNODE_EXIT(db); + return (0); + } + + dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); + + ASSERT(db->db.db_size != 0); + + /* XXX would be nice to fix up dn_towrite_space[] */ + + *drp = dr->dr_next; + + if (dr->dr_parent) { + mutex_enter(&dr->dr_parent->dt.di.dr_mtx); + list_remove(&dr->dr_parent->dt.di.dr_children, dr); + mutex_exit(&dr->dr_parent->dt.di.dr_mtx); + } else if (db->db_level+1 == dn->dn_nlevels) { + ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); + mutex_enter(&dn->dn_mtx); + list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); + mutex_exit(&dn->dn_mtx); + } + DB_DNODE_EXIT(db); + + if (db->db_level == 0) { + if (db->db_state != DB_NOFILL) { + dbuf_unoverride(dr); + + ASSERT(db->db_buf != NULL); + ASSERT(dr->dt.dl.dr_data != NULL); + if (dr->dt.dl.dr_data != db->db_buf) + VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, + db) == 1); + } + } else { + ASSERT(db->db_buf != NULL); + ASSERT(list_head(&dr->dt.di.dr_children) == NULL); + mutex_destroy(&dr->dt.di.dr_mtx); + list_destroy(&dr->dt.di.dr_children); + } + kmem_free(dr, sizeof (dbuf_dirty_record_t)); + + ASSERT(db->db_dirtycnt > 0); + db->db_dirtycnt -= 1; + + if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { + arc_buf_t *buf = db->db_buf; + + ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); + dbuf_set_data(db, NULL); + VERIFY(arc_buf_remove_ref(buf, db) == 1); + dbuf_evict(db); + return (1); + } + + mutex_exit(&db->db_mtx); + return (0); +} + +#pragma weak dmu_buf_will_dirty = dbuf_will_dirty +void +dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) +{ + int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; + + ASSERT(tx->tx_txg != 0); + ASSERT(!refcount_is_zero(&db->db_holds)); + + DB_DNODE_ENTER(db); + if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) + rf |= DB_RF_HAVESTRUCT; + DB_DNODE_EXIT(db); + (void) dbuf_read(db, NULL, rf); + (void) dbuf_dirty(db, tx); +} + +void +dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + db->db_state = DB_NOFILL; + + dmu_buf_will_fill(db_fake, tx); +} + +void +dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT(tx->tx_txg != 0); + ASSERT(db->db_level == 0); + ASSERT(!refcount_is_zero(&db->db_holds)); + + ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || + dmu_tx_private_ok(tx)); + + dbuf_noread(db); + (void) dbuf_dirty(db, tx); +} + +#pragma weak dmu_buf_fill_done = dbuf_fill_done +/* ARGSUSED */ +void +dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx) +{ + mutex_enter(&db->db_mtx); + DBUF_VERIFY(db); + + if (db->db_state == DB_FILL) { + if (db->db_level == 0 && db->db_freed_in_flight) { + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + /* we were freed while filling */ + /* XXX dbuf_undirty? */ + bzero(db->db.db_data, db->db.db_size); + db->db_freed_in_flight = FALSE; + } + db->db_state = DB_CACHED; + cv_broadcast(&db->db_changed); + } + mutex_exit(&db->db_mtx); +} + +/* + * Directly assign a provided arc buf to a given dbuf if it's not referenced + * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf. + */ +void +dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) +{ + ASSERT(!refcount_is_zero(&db->db_holds)); + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT(db->db_level == 0); + ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA); + ASSERT(buf != NULL); + ASSERT(arc_buf_size(buf) == db->db.db_size); + ASSERT(tx->tx_txg != 0); + + arc_return_buf(buf, db); + ASSERT(arc_released(buf)); + + mutex_enter(&db->db_mtx); + + while (db->db_state == DB_READ || db->db_state == DB_FILL) + cv_wait(&db->db_changed, &db->db_mtx); + + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + + if (db->db_state == DB_CACHED && + refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { + mutex_exit(&db->db_mtx); + (void) dbuf_dirty(db, tx); + bcopy(buf->b_data, db->db.db_data, db->db.db_size); + VERIFY(arc_buf_remove_ref(buf, db) == 1); + xuio_stat_wbuf_copied(); + return; + } + + xuio_stat_wbuf_nocopy(); + if (db->db_state == DB_CACHED) { + dbuf_dirty_record_t *dr = db->db_last_dirty; + + ASSERT(db->db_buf != NULL); + if (dr != NULL && dr->dr_txg == tx->tx_txg) { + ASSERT(dr->dt.dl.dr_data == db->db_buf); + if (!arc_released(db->db_buf)) { + ASSERT(dr->dt.dl.dr_override_state == + DR_OVERRIDDEN); + arc_release(db->db_buf, db); + } + dr->dt.dl.dr_data = buf; + VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { + arc_release(db->db_buf, db); + VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + } + db->db_buf = NULL; + } + ASSERT(db->db_buf == NULL); + dbuf_set_data(db, buf); + db->db_state = DB_FILL; + mutex_exit(&db->db_mtx); + (void) dbuf_dirty(db, tx); + dbuf_fill_done(db, tx); +} + +/* + * "Clear" the contents of this dbuf. This will mark the dbuf + * EVICTING and clear *most* of its references. Unfortunetely, + * when we are not holding the dn_dbufs_mtx, we can't clear the + * entry in the dn_dbufs list. We have to wait until dbuf_destroy() + * in this case. For callers from the DMU we will usually see: + * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy() + * For the arc callback, we will usually see: + * dbuf_do_evict()->dbuf_clear();dbuf_destroy() + * Sometimes, though, we will get a mix of these two: + * DMU: dbuf_clear()->arc_buf_evict() + * ARC: dbuf_do_evict()->dbuf_destroy() + */ +void +dbuf_clear(dmu_buf_impl_t *db) +{ + dnode_t *dn; + dmu_buf_impl_t *parent = db->db_parent; + dmu_buf_impl_t *dndb; + int dbuf_gone = FALSE; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(refcount_is_zero(&db->db_holds)); + + dbuf_evict_user(db); + + if (db->db_state == DB_CACHED) { + ASSERT(db->db.db_data != NULL); + if (db->db_blkid == DMU_BONUS_BLKID) { + zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + } + db->db.db_data = NULL; + db->db_state = DB_UNCACHED; + } + + ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); + ASSERT(db->db_data_pending == NULL); + + db->db_state = DB_EVICTING; + db->db_blkptr = NULL; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + dndb = dn->dn_dbuf; + if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { + list_remove(&dn->dn_dbufs, db); + (void) atomic_dec_32_nv(&dn->dn_dbufs_count); + membar_producer(); + DB_DNODE_EXIT(db); + /* + * Decrementing the dbuf count means that the hold corresponding + * to the removed dbuf is no longer discounted in dnode_move(), + * so the dnode cannot be moved until after we release the hold. + * The membar_producer() ensures visibility of the decremented + * value in dnode_move(), since DB_DNODE_EXIT doesn't actually + * release any lock. + */ + dnode_rele(dn, db); + db->db_dnode_handle = NULL; + } else { + DB_DNODE_EXIT(db); + } + + if (db->db_buf) + dbuf_gone = arc_buf_evict(db->db_buf); + + if (!dbuf_gone) + mutex_exit(&db->db_mtx); + + /* + * If this dbuf is referenced from an indirect dbuf, + * decrement the ref count on the indirect dbuf. + */ + if (parent && parent != dndb) + dbuf_rele(parent, db); +} + +static int +dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, + dmu_buf_impl_t **parentp, blkptr_t **bpp) +{ + int nlevels, epbs; + + *parentp = NULL; + *bpp = NULL; + + ASSERT(blkid != DMU_BONUS_BLKID); + + if (blkid == DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + if (dn->dn_have_spill && + (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) + *bpp = &dn->dn_phys->dn_spill; + else + *bpp = NULL; + dbuf_add_ref(dn->dn_dbuf, NULL); + *parentp = dn->dn_dbuf; + mutex_exit(&dn->dn_mtx); + return (0); + } + + if (dn->dn_phys->dn_nlevels == 0) + nlevels = 1; + else + nlevels = dn->dn_phys->dn_nlevels; + + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + + ASSERT3U(level * epbs, <, 64); + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + if (level >= nlevels || + (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { + /* the buffer has no parent yet */ + return (ENOENT); + } else if (level < nlevels-1) { + /* this block is referenced from an indirect block */ + int err = dbuf_hold_impl(dn, level+1, + blkid >> epbs, fail_sparse, NULL, parentp); + if (err) + return (err); + err = dbuf_read(*parentp, NULL, + (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL)); + if (err) { + dbuf_rele(*parentp, NULL); + *parentp = NULL; + return (err); + } + *bpp = ((blkptr_t *)(*parentp)->db.db_data) + + (blkid & ((1ULL << epbs) - 1)); + return (0); + } else { + /* the block is referenced from the dnode */ + ASSERT3U(level, ==, nlevels-1); + ASSERT(dn->dn_phys->dn_nblkptr == 0 || + blkid < dn->dn_phys->dn_nblkptr); + if (dn->dn_dbuf) { + dbuf_add_ref(dn->dn_dbuf, NULL); + *parentp = dn->dn_dbuf; + } + *bpp = &dn->dn_phys->dn_blkptr[blkid]; + return (0); + } +} + +static dmu_buf_impl_t * +dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, + dmu_buf_impl_t *parent, blkptr_t *blkptr) +{ + objset_t *os = dn->dn_objset; + dmu_buf_impl_t *db, *odb; + + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + ASSERT(dn->dn_type != DMU_OT_NONE); + + db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); + + db->db_objset = os; + db->db.db_object = dn->dn_object; + db->db_level = level; + db->db_blkid = blkid; + db->db_last_dirty = NULL; + db->db_dirtycnt = 0; + db->db_dnode_handle = dn->dn_handle; + db->db_parent = parent; + db->db_blkptr = blkptr; + + db->db_user_ptr = NULL; + db->db_user_data_ptr_ptr = NULL; + db->db_evict_func = NULL; + db->db_immediate_evict = 0; + db->db_freed_in_flight = 0; + + if (blkid == DMU_BONUS_BLKID) { + ASSERT3P(parent, ==, dn->dn_dbuf); + db->db.db_size = DN_MAX_BONUSLEN - + (dn->dn_nblkptr-1) * sizeof (blkptr_t); + ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); + db->db.db_offset = DMU_BONUS_BLKID; + db->db_state = DB_UNCACHED; + /* the bonus dbuf is not placed in the hash table */ + arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); + return (db); + } else if (blkid == DMU_SPILL_BLKID) { + db->db.db_size = (blkptr != NULL) ? + BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE; + db->db.db_offset = 0; + } else { + int blocksize = + db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz; + db->db.db_size = blocksize; + db->db.db_offset = db->db_blkid * blocksize; + } + + /* + * Hold the dn_dbufs_mtx while we get the new dbuf + * in the hash table *and* added to the dbufs list. + * This prevents a possible deadlock with someone + * trying to look up this dbuf before its added to the + * dn_dbufs list. + */ + mutex_enter(&dn->dn_dbufs_mtx); + db->db_state = DB_EVICTING; + if ((odb = dbuf_hash_insert(db)) != NULL) { + /* someone else inserted it first */ + kmem_cache_free(dbuf_cache, db); + mutex_exit(&dn->dn_dbufs_mtx); + return (odb); + } + list_insert_head(&dn->dn_dbufs, db); + db->db_state = DB_UNCACHED; + mutex_exit(&dn->dn_dbufs_mtx); + arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); + + if (parent && parent != dn->dn_dbuf) + dbuf_add_ref(parent, db); + + ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || + refcount_count(&dn->dn_holds) > 0); + (void) refcount_add(&dn->dn_holds, db); + (void) atomic_inc_32_nv(&dn->dn_dbufs_count); + + dprintf_dbuf(db, "db=%p\n", db); + + return (db); +} + +static int +dbuf_do_evict(void *private) +{ + arc_buf_t *buf = private; + dmu_buf_impl_t *db = buf->b_private; + + if (!MUTEX_HELD(&db->db_mtx)) + mutex_enter(&db->db_mtx); + + ASSERT(refcount_is_zero(&db->db_holds)); + + if (db->db_state != DB_EVICTING) { + ASSERT(db->db_state == DB_CACHED); + DBUF_VERIFY(db); + db->db_buf = NULL; + dbuf_evict(db); + } else { + mutex_exit(&db->db_mtx); + dbuf_destroy(db); + } + return (0); +} + +static void +dbuf_destroy(dmu_buf_impl_t *db) +{ + ASSERT(refcount_is_zero(&db->db_holds)); + + if (db->db_blkid != DMU_BONUS_BLKID) { + /* + * If this dbuf is still on the dn_dbufs list, + * remove it from that list. + */ + if (db->db_dnode_handle != NULL) { + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + mutex_enter(&dn->dn_dbufs_mtx); + list_remove(&dn->dn_dbufs, db); + (void) atomic_dec_32_nv(&dn->dn_dbufs_count); + mutex_exit(&dn->dn_dbufs_mtx); + DB_DNODE_EXIT(db); + /* + * Decrementing the dbuf count means that the hold + * corresponding to the removed dbuf is no longer + * discounted in dnode_move(), so the dnode cannot be + * moved until after we release the hold. + */ + dnode_rele(dn, db); + db->db_dnode_handle = NULL; + } + dbuf_hash_remove(db); + } + db->db_parent = NULL; + db->db_buf = NULL; + + ASSERT(!list_link_active(&db->db_link)); + ASSERT(db->db.db_data == NULL); + ASSERT(db->db_hash_next == NULL); + ASSERT(db->db_blkptr == NULL); + ASSERT(db->db_data_pending == NULL); + + kmem_cache_free(dbuf_cache, db); + arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); +} + +void +dbuf_prefetch(dnode_t *dn, uint64_t blkid) +{ + dmu_buf_impl_t *db = NULL; + blkptr_t *bp = NULL; + + ASSERT(blkid != DMU_BONUS_BLKID); + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + + if (dnode_block_freed(dn, blkid)) + return; + + /* dbuf_find() returns with db_mtx held */ + if (db = dbuf_find(dn, 0, blkid)) { + /* + * This dbuf is already in the cache. We assume that + * it is already CACHED, or else about to be either + * read or filled. + */ + mutex_exit(&db->db_mtx); + return; + } + + if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { + if (bp && !BP_IS_HOLE(bp)) { + int priority = dn->dn_type == DMU_OT_DDT_ZAP ? + ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ; + arc_buf_t *pbuf; + dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; + uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + zbookmark_t zb; + + SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, + dn->dn_object, 0, blkid); + + if (db) + pbuf = db->db_buf; + else + pbuf = dn->dn_objset->os_phys_buf; + + (void) dsl_read(NULL, dn->dn_objset->os_spa, + bp, pbuf, NULL, NULL, priority, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, &zb); + } + if (db) + dbuf_rele(db, NULL); + } +} + +/* + * Returns with db_holds incremented, and db_mtx not held. + * Note: dn_struct_rwlock must be held. + */ +int +dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, + void *tag, dmu_buf_impl_t **dbp) +{ + dmu_buf_impl_t *db, *parent = NULL; + + ASSERT(blkid != DMU_BONUS_BLKID); + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + ASSERT3U(dn->dn_nlevels, >, level); + + *dbp = NULL; +top: + /* dbuf_find() returns with db_mtx held */ + db = dbuf_find(dn, level, blkid); + + if (db == NULL) { + blkptr_t *bp = NULL; + int err; + + ASSERT3P(parent, ==, NULL); + err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); + if (fail_sparse) { + if (err == 0 && bp && BP_IS_HOLE(bp)) + err = ENOENT; + if (err) { + if (parent) + dbuf_rele(parent, NULL); + return (err); + } + } + if (err && err != ENOENT) + return (err); + db = dbuf_create(dn, level, blkid, parent, bp); + } + + if (db->db_buf && refcount_is_zero(&db->db_holds)) { + arc_buf_add_ref(db->db_buf, db); + if (db->db_buf->b_data == NULL) { + dbuf_clear(db); + if (parent) { + dbuf_rele(parent, NULL); + parent = NULL; + } + goto top; + } + ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); + } + + ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf)); + + /* + * If this buffer is currently syncing out, and we are are + * still referencing it from db_data, we need to make a copy + * of it in case we decide we want to dirty it again in this txg. + */ + if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && + dn->dn_object != DMU_META_DNODE_OBJECT && + db->db_state == DB_CACHED && db->db_data_pending) { + dbuf_dirty_record_t *dr = db->db_data_pending; + + if (dr->dt.dl.dr_data == db->db_buf) { + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + + dbuf_set_data(db, + arc_buf_alloc(dn->dn_objset->os_spa, + db->db.db_size, db, type)); + bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data, + db->db.db_size); + } + } + + (void) refcount_add(&db->db_holds, tag); + dbuf_update_data(db); + DBUF_VERIFY(db); + mutex_exit(&db->db_mtx); + + /* NOTE: we can't rele the parent until after we drop the db_mtx */ + if (parent) + dbuf_rele(parent, NULL); + + ASSERT3P(DB_DNODE(db), ==, dn); + ASSERT3U(db->db_blkid, ==, blkid); + ASSERT3U(db->db_level, ==, level); + *dbp = db; + + return (0); +} + +dmu_buf_impl_t * +dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag) +{ + dmu_buf_impl_t *db; + int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db); + return (err ? NULL : db); +} + +dmu_buf_impl_t * +dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) +{ + dmu_buf_impl_t *db; + int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); + return (err ? NULL : db); +} + +void +dbuf_create_bonus(dnode_t *dn) +{ + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + + ASSERT(dn->dn_bonus == NULL); + dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL); +} + +int +dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + + if (db->db_blkid != DMU_SPILL_BLKID) + return (ENOTSUP); + if (blksz == 0) + blksz = SPA_MINBLOCKSIZE; + if (blksz > SPA_MAXBLOCKSIZE) + blksz = SPA_MAXBLOCKSIZE; + else + blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dbuf_new_size(db, blksz, tx); + rw_exit(&dn->dn_struct_rwlock); + DB_DNODE_EXIT(db); + + return (0); +} + +void +dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx) +{ + dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx); +} + +#pragma weak dmu_buf_add_ref = dbuf_add_ref +void +dbuf_add_ref(dmu_buf_impl_t *db, void *tag) +{ + int64_t holds = refcount_add(&db->db_holds, tag); + ASSERT(holds > 1); +} + +/* + * If you call dbuf_rele() you had better not be referencing the dnode handle + * unless you have some other direct or indirect hold on the dnode. (An indirect + * hold is a hold on one of the dnode's dbufs, including the bonus buffer.) + * Without that, the dbuf_rele() could lead to a dnode_rele() followed by the + * dnode's parent dbuf evicting its dnode handles. + */ +#pragma weak dmu_buf_rele = dbuf_rele +void +dbuf_rele(dmu_buf_impl_t *db, void *tag) +{ + mutex_enter(&db->db_mtx); + dbuf_rele_and_unlock(db, tag); +} + +/* + * dbuf_rele() for an already-locked dbuf. This is necessary to allow + * db_dirtycnt and db_holds to be updated atomically. + */ +void +dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) +{ + int64_t holds; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + DBUF_VERIFY(db); + + /* + * Remove the reference to the dbuf before removing its hold on the + * dnode so we can guarantee in dnode_move() that a referenced bonus + * buffer has a corresponding dnode hold. + */ + holds = refcount_remove(&db->db_holds, tag); + ASSERT(holds >= 0); + + /* + * We can't freeze indirects if there is a possibility that they + * may be modified in the current syncing context. + */ + if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) + arc_buf_freeze(db->db_buf); + + if (holds == db->db_dirtycnt && + db->db_level == 0 && db->db_immediate_evict) + dbuf_evict_user(db); + + if (holds == 0) { + if (db->db_blkid == DMU_BONUS_BLKID) { + mutex_exit(&db->db_mtx); + + /* + * If the dnode moves here, we cannot cross this barrier + * until the move completes. + */ + DB_DNODE_ENTER(db); + (void) atomic_dec_32_nv(&DB_DNODE(db)->dn_dbufs_count); + DB_DNODE_EXIT(db); + /* + * The bonus buffer's dnode hold is no longer discounted + * in dnode_move(). The dnode cannot move until after + * the dnode_rele(). + */ + dnode_rele(DB_DNODE(db), db); + } else if (db->db_buf == NULL) { + /* + * This is a special case: we never associated this + * dbuf with any data allocated from the ARC. + */ + ASSERT(db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL); + dbuf_evict(db); + } else if (arc_released(db->db_buf)) { + arc_buf_t *buf = db->db_buf; + /* + * This dbuf has anonymous data associated with it. + */ + dbuf_set_data(db, NULL); + VERIFY(arc_buf_remove_ref(buf, db) == 1); + dbuf_evict(db); + } else { + VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); + if (!DBUF_IS_CACHEABLE(db)) + dbuf_clear(db); + else + mutex_exit(&db->db_mtx); + } + } else { + mutex_exit(&db->db_mtx); + } +} + +#pragma weak dmu_buf_refcount = dbuf_refcount +uint64_t +dbuf_refcount(dmu_buf_impl_t *db) +{ + return (refcount_count(&db->db_holds)); +} + +void * +dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, + dmu_buf_evict_func_t *evict_func) +{ + return (dmu_buf_update_user(db_fake, NULL, user_ptr, + user_data_ptr_ptr, evict_func)); +} + +void * +dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, + dmu_buf_evict_func_t *evict_func) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + db->db_immediate_evict = TRUE; + return (dmu_buf_update_user(db_fake, NULL, user_ptr, + user_data_ptr_ptr, evict_func)); +} + +void * +dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, + void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + ASSERT(db->db_level == 0); + + ASSERT((user_ptr == NULL) == (evict_func == NULL)); + + mutex_enter(&db->db_mtx); + + if (db->db_user_ptr == old_user_ptr) { + db->db_user_ptr = user_ptr; + db->db_user_data_ptr_ptr = user_data_ptr_ptr; + db->db_evict_func = evict_func; + + dbuf_update_data(db); + } else { + old_user_ptr = db->db_user_ptr; + } + + mutex_exit(&db->db_mtx); + return (old_user_ptr); +} + +void * +dmu_buf_get_user(dmu_buf_t *db_fake) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + ASSERT(!refcount_is_zero(&db->db_holds)); + + return (db->db_user_ptr); +} + +boolean_t +dmu_buf_freeable(dmu_buf_t *dbuf) +{ + boolean_t res = B_FALSE; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; + + if (db->db_blkptr) + res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset, + db->db_blkptr, db->db_blkptr->blk_birth); + + return (res); +} + +static void +dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) +{ + /* ASSERT(dmu_tx_is_syncing(tx) */ + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (db->db_blkptr != NULL) + return; + + if (db->db_blkid == DMU_SPILL_BLKID) { + db->db_blkptr = &dn->dn_phys->dn_spill; + BP_ZERO(db->db_blkptr); + return; + } + if (db->db_level == dn->dn_phys->dn_nlevels-1) { + /* + * This buffer was allocated at a time when there was + * no available blkptrs from the dnode, or it was + * inappropriate to hook it in (i.e., nlevels mis-match). + */ + ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); + ASSERT(db->db_parent == NULL); + db->db_parent = dn->dn_dbuf; + db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid]; + DBUF_VERIFY(db); + } else { + dmu_buf_impl_t *parent = db->db_parent; + int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + + ASSERT(dn->dn_phys->dn_nlevels > 1); + if (parent == NULL) { + mutex_exit(&db->db_mtx); + rw_enter(&dn->dn_struct_rwlock, RW_READER); + (void) dbuf_hold_impl(dn, db->db_level+1, + db->db_blkid >> epbs, FALSE, db, &parent); + rw_exit(&dn->dn_struct_rwlock); + mutex_enter(&db->db_mtx); + db->db_parent = parent; + } + db->db_blkptr = (blkptr_t *)parent->db.db_data + + (db->db_blkid & ((1ULL << epbs) - 1)); + DBUF_VERIFY(db); + } +} + +static void +dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = dr->dr_dbuf; + dnode_t *dn; + zio_t *zio; + + ASSERT(dmu_tx_is_syncing(tx)); + + dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); + + mutex_enter(&db->db_mtx); + + ASSERT(db->db_level > 0); + DBUF_VERIFY(db); + + if (db->db_buf == NULL) { + mutex_exit(&db->db_mtx); + (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); + mutex_enter(&db->db_mtx); + } + ASSERT3U(db->db_state, ==, DB_CACHED); + ASSERT(db->db_buf != NULL); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); + dbuf_check_blkptr(dn, db); + DB_DNODE_EXIT(db); + + db->db_data_pending = dr; + + mutex_exit(&db->db_mtx); + dbuf_write(dr, db->db_buf, tx); + + zio = dr->dr_zio; + mutex_enter(&dr->dt.di.dr_mtx); + dbuf_sync_list(&dr->dt.di.dr_children, tx); + ASSERT(list_head(&dr->dt.di.dr_children) == NULL); + mutex_exit(&dr->dt.di.dr_mtx); + zio_nowait(zio); +} + +static void +dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) +{ + arc_buf_t **datap = &dr->dt.dl.dr_data; + dmu_buf_impl_t *db = dr->dr_dbuf; + dnode_t *dn; + objset_t *os; + uint64_t txg = tx->tx_txg; + + ASSERT(dmu_tx_is_syncing(tx)); + + dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); + + mutex_enter(&db->db_mtx); + /* + * To be synced, we must be dirtied. But we + * might have been freed after the dirty. + */ + if (db->db_state == DB_UNCACHED) { + /* This buffer has been freed since it was dirtied */ + ASSERT(db->db.db_data == NULL); + } else if (db->db_state == DB_FILL) { + /* This buffer was freed and is now being re-filled */ + ASSERT(db->db.db_data != dr->dt.dl.dr_data); + } else { + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); + } + DBUF_VERIFY(db); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + if (db->db_blkid == DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dn->dn_phys->dn_flags |= DNODE_FLAG_SPILL_BLKPTR; + mutex_exit(&dn->dn_mtx); + } + + /* + * If this is a bonus buffer, simply copy the bonus data into the + * dnode. It will be written out when the dnode is synced (and it + * will be synced, since it must have been dirty for dbuf_sync to + * be called). + */ + if (db->db_blkid == DMU_BONUS_BLKID) { + dbuf_dirty_record_t **drp; + + ASSERT(*datap != NULL); + ASSERT3U(db->db_level, ==, 0); + ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); + bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); + DB_DNODE_EXIT(db); + + if (*datap != db->db.db_data) { + zio_buf_free(*datap, DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + } + db->db_data_pending = NULL; + drp = &db->db_last_dirty; + while (*drp != dr) + drp = &(*drp)->dr_next; + ASSERT(dr->dr_next == NULL); + ASSERT(dr->dr_dbuf == db); + *drp = dr->dr_next; + kmem_free(dr, sizeof (dbuf_dirty_record_t)); + ASSERT(db->db_dirtycnt > 0); + db->db_dirtycnt -= 1; + dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); + return; + } + + os = dn->dn_objset; + + /* + * This function may have dropped the db_mtx lock allowing a dmu_sync + * operation to sneak in. As a result, we need to ensure that we + * don't check the dr_override_state until we have returned from + * dbuf_check_blkptr. + */ + dbuf_check_blkptr(dn, db); + + /* + * If this buffer is in the middle of an immediate write, + * wait for the synchronous IO to complete. + */ + while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { + ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); + cv_wait(&db->db_changed, &db->db_mtx); + ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); + } + + if (db->db_state != DB_NOFILL && + dn->dn_object != DMU_META_DNODE_OBJECT && + refcount_count(&db->db_holds) > 1 && + dr->dt.dl.dr_override_state != DR_OVERRIDDEN && + *datap == db->db_buf) { + /* + * If this buffer is currently "in use" (i.e., there + * are active holds and db_data still references it), + * then make a copy before we start the write so that + * any modifications from the open txg will not leak + * into this write. + * + * NOTE: this copy does not need to be made for + * objects only modified in the syncing context (e.g. + * DNONE_DNODE blocks). + */ + int blksz = arc_buf_size(*datap); + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + *datap = arc_buf_alloc(os->os_spa, blksz, db, type); + bcopy(db->db.db_data, (*datap)->b_data, blksz); + } + db->db_data_pending = dr; + + mutex_exit(&db->db_mtx); + + dbuf_write(dr, *datap, tx); + + ASSERT(!list_link_active(&dr->dr_dirty_node)); + if (dn->dn_object == DMU_META_DNODE_OBJECT) { + list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr); + DB_DNODE_EXIT(db); + } else { + /* + * Although zio_nowait() does not "wait for an IO", it does + * initiate the IO. If this is an empty write it seems plausible + * that the IO could actually be completed before the nowait + * returns. We need to DB_DNODE_EXIT() first in case + * zio_nowait() invalidates the dbuf. + */ + DB_DNODE_EXIT(db); + zio_nowait(dr->dr_zio); + } +} + +void +dbuf_sync_list(list_t *list, dmu_tx_t *tx) +{ + dbuf_dirty_record_t *dr; + + while (dr = list_head(list)) { + if (dr->dr_zio != NULL) { + /* + * If we find an already initialized zio then we + * are processing the meta-dnode, and we have finished. + * The dbufs for all dnodes are put back on the list + * during processing, so that we can zio_wait() + * these IOs after initiating all child IOs. + */ + ASSERT3U(dr->dr_dbuf->db.db_object, ==, + DMU_META_DNODE_OBJECT); + break; + } + list_remove(list, dr); + if (dr->dr_dbuf->db_level > 0) + dbuf_sync_indirect(dr, tx); + else + dbuf_sync_leaf(dr, tx); + } +} + +/* ARGSUSED */ +static void +dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) +{ + dmu_buf_impl_t *db = vdb; + dnode_t *dn; + blkptr_t *bp = zio->io_bp; + blkptr_t *bp_orig = &zio->io_bp_orig; + spa_t *spa = zio->io_spa; + int64_t delta; + uint64_t fill = 0; + int i; + + ASSERT(db->db_blkptr == bp); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + delta = bp_get_dsize_sync(spa, bp) - bp_get_dsize_sync(spa, bp_orig); + dnode_diduse_space(dn, delta - zio->io_prev_space_delta); + zio->io_prev_space_delta = delta; + + if (BP_IS_HOLE(bp)) { + ASSERT(bp->blk_fill == 0); + DB_DNODE_EXIT(db); + return; + } + + ASSERT((db->db_blkid != DMU_SPILL_BLKID && + BP_GET_TYPE(bp) == dn->dn_type) || + (db->db_blkid == DMU_SPILL_BLKID && + BP_GET_TYPE(bp) == dn->dn_bonustype)); + ASSERT(BP_GET_LEVEL(bp) == db->db_level); + + mutex_enter(&db->db_mtx); + +#ifdef ZFS_DEBUG + if (db->db_blkid == DMU_SPILL_BLKID) { + ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); + ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && + db->db_blkptr == &dn->dn_phys->dn_spill); + } +#endif + + if (db->db_level == 0) { + mutex_enter(&dn->dn_mtx); + if (db->db_blkid > dn->dn_phys->dn_maxblkid && + db->db_blkid != DMU_SPILL_BLKID) + dn->dn_phys->dn_maxblkid = db->db_blkid; + mutex_exit(&dn->dn_mtx); + + if (dn->dn_type == DMU_OT_DNODE) { + dnode_phys_t *dnp = db->db.db_data; + for (i = db->db.db_size >> DNODE_SHIFT; i > 0; + i--, dnp++) { + if (dnp->dn_type != DMU_OT_NONE) + fill++; + } + } else { + fill = 1; + } + } else { + blkptr_t *ibp = db->db.db_data; + ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); + for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) { + if (BP_IS_HOLE(ibp)) + continue; + fill += ibp->blk_fill; + } + } + DB_DNODE_EXIT(db); + + bp->blk_fill = fill; + + mutex_exit(&db->db_mtx); +} + +/* ARGSUSED */ +static void +dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) +{ + dmu_buf_impl_t *db = vdb; + blkptr_t *bp = zio->io_bp; + blkptr_t *bp_orig = &zio->io_bp_orig; + uint64_t txg = zio->io_txg; + dbuf_dirty_record_t **drp, *dr; + + ASSERT3U(zio->io_error, ==, 0); + ASSERT(db->db_blkptr == bp); + + if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { + ASSERT(BP_EQUAL(bp, bp_orig)); + } else { + objset_t *os; + dsl_dataset_t *ds; + dmu_tx_t *tx; + + DB_GET_OBJSET(&os, db); + ds = os->os_dsl_dataset; + tx = os->os_synctx; + + (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); + dsl_dataset_block_born(ds, bp, tx); + } + + mutex_enter(&db->db_mtx); + + DBUF_VERIFY(db); + + drp = &db->db_last_dirty; + while ((dr = *drp) != db->db_data_pending) + drp = &dr->dr_next; + ASSERT(!list_link_active(&dr->dr_dirty_node)); + ASSERT(dr->dr_txg == txg); + ASSERT(dr->dr_dbuf == db); + ASSERT(dr->dr_next == NULL); + *drp = dr->dr_next; + +#ifdef ZFS_DEBUG + if (db->db_blkid == DMU_SPILL_BLKID) { + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); + ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && + db->db_blkptr == &dn->dn_phys->dn_spill); + DB_DNODE_EXIT(db); + } +#endif + + if (db->db_level == 0) { + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); + if (db->db_state != DB_NOFILL) { + if (dr->dt.dl.dr_data != db->db_buf) + VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, + db) == 1); + else if (!arc_released(db->db_buf)) + arc_set_callback(db->db_buf, dbuf_do_evict, db); + } + } else { + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + ASSERT(list_head(&dr->dt.di.dr_children) == NULL); + ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); + if (!BP_IS_HOLE(db->db_blkptr)) { + int epbs = + dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, + db->db.db_size); + ASSERT3U(dn->dn_phys->dn_maxblkid + >> (db->db_level * epbs), >=, db->db_blkid); + arc_set_callback(db->db_buf, dbuf_do_evict, db); + } + DB_DNODE_EXIT(db); + mutex_destroy(&dr->dt.di.dr_mtx); + list_destroy(&dr->dt.di.dr_children); + } + kmem_free(dr, sizeof (dbuf_dirty_record_t)); + + cv_broadcast(&db->db_changed); + ASSERT(db->db_dirtycnt > 0); + db->db_dirtycnt -= 1; + db->db_data_pending = NULL; + dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); +} + +static void +dbuf_write_nofill_ready(zio_t *zio) +{ + dbuf_write_ready(zio, NULL, zio->io_private); +} + +static void +dbuf_write_nofill_done(zio_t *zio) +{ + dbuf_write_done(zio, NULL, zio->io_private); +} + +static void +dbuf_write_override_ready(zio_t *zio) +{ + dbuf_dirty_record_t *dr = zio->io_private; + dmu_buf_impl_t *db = dr->dr_dbuf; + + dbuf_write_ready(zio, NULL, db); +} + +static void +dbuf_write_override_done(zio_t *zio) +{ + dbuf_dirty_record_t *dr = zio->io_private; + dmu_buf_impl_t *db = dr->dr_dbuf; + blkptr_t *obp = &dr->dt.dl.dr_overridden_by; + + mutex_enter(&db->db_mtx); + if (!BP_EQUAL(zio->io_bp, obp)) { + if (!BP_IS_HOLE(obp)) + dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp); + arc_release(dr->dt.dl.dr_data, db); + } + mutex_exit(&db->db_mtx); + + dbuf_write_done(zio, NULL, db); +} + +static void +dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = dr->dr_dbuf; + dnode_t *dn; + objset_t *os; + dmu_buf_impl_t *parent = db->db_parent; + uint64_t txg = tx->tx_txg; + zbookmark_t zb; + zio_prop_t zp; + zio_t *zio; + int wp_flag = 0; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + os = dn->dn_objset; + + if (db->db_state != DB_NOFILL) { + if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) { + /* + * Private object buffers are released here rather + * than in dbuf_dirty() since they are only modified + * in the syncing context and we don't want the + * overhead of making multiple copies of the data. + */ + if (BP_IS_HOLE(db->db_blkptr)) { + arc_buf_thaw(data); + } else { + dbuf_release_bp(db); + } + } + } + + if (parent != dn->dn_dbuf) { + ASSERT(parent && parent->db_data_pending); + ASSERT(db->db_level == parent->db_level-1); + ASSERT(arc_released(parent->db_buf)); + zio = parent->db_data_pending->dr_zio; + } else { + ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 && + db->db_blkid != DMU_SPILL_BLKID) || + (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0)); + if (db->db_blkid != DMU_SPILL_BLKID) + ASSERT3P(db->db_blkptr, ==, + &dn->dn_phys->dn_blkptr[db->db_blkid]); + zio = dn->dn_zio; + } + + ASSERT(db->db_level == 0 || data == db->db_buf); + ASSERT3U(db->db_blkptr->blk_birth, <=, txg); + ASSERT(zio); + + SET_BOOKMARK(&zb, os->os_dsl_dataset ? + os->os_dsl_dataset->ds_object : DMU_META_OBJSET, + db->db.db_object, db->db_level, db->db_blkid); + + if (db->db_blkid == DMU_SPILL_BLKID) + wp_flag = WP_SPILL; + wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; + + dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); + DB_DNODE_EXIT(db); + + if (db->db_level == 0 && dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { + ASSERT(db->db_state != DB_NOFILL); + dr->dr_zio = zio_write(zio, os->os_spa, txg, + db->db_blkptr, data->b_data, arc_buf_size(data), &zp, + dbuf_write_override_ready, dbuf_write_override_done, dr, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); + mutex_enter(&db->db_mtx); + dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; + zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, + dr->dt.dl.dr_copies); + mutex_exit(&db->db_mtx); + } else if (db->db_state == DB_NOFILL) { + ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF); + dr->dr_zio = zio_write(zio, os->os_spa, txg, + db->db_blkptr, NULL, db->db.db_size, &zp, + dbuf_write_nofill_ready, dbuf_write_nofill_done, db, + ZIO_PRIORITY_ASYNC_WRITE, + ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); + } else { + ASSERT(arc_released(data)); + dr->dr_zio = arc_write(zio, os->os_spa, txg, + db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db), &zp, + dbuf_write_ready, dbuf_write_done, db, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); + } +} diff --git a/uts/common/fs/zfs/ddt.c b/uts/common/fs/zfs/ddt.c new file mode 100644 index 000000000000..718331496765 --- /dev/null +++ b/uts/common/fs/zfs/ddt.c @@ -0,0 +1,1146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/zio.h> +#include <sys/ddt.h> +#include <sys/zap.h> +#include <sys/dmu_tx.h> +#include <sys/arc.h> +#include <sys/dsl_pool.h> +#include <sys/zio_checksum.h> +#include <sys/zio_compress.h> +#include <sys/dsl_scan.h> + +/* + * Enable/disable prefetching of dedup-ed blocks which are going to be freed. + */ +int zfs_dedup_prefetch = 1; + +static const ddt_ops_t *ddt_ops[DDT_TYPES] = { + &ddt_zap_ops, +}; + +static const char *ddt_class_name[DDT_CLASSES] = { + "ditto", + "duplicate", + "unique", +}; + +static void +ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + dmu_tx_t *tx) +{ + spa_t *spa = ddt->ddt_spa; + objset_t *os = ddt->ddt_os; + uint64_t *objectp = &ddt->ddt_object[type][class]; + boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup; + char name[DDT_NAMELEN]; + + ddt_object_name(ddt, type, class, name); + + ASSERT(*objectp == 0); + VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); + ASSERT(*objectp != 0); + + VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, + sizeof (uint64_t), 1, objectp, tx) == 0); + + VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, + sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), + &ddt->ddt_histogram[type][class], tx) == 0); +} + +static void +ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + dmu_tx_t *tx) +{ + spa_t *spa = ddt->ddt_spa; + objset_t *os = ddt->ddt_os; + uint64_t *objectp = &ddt->ddt_object[type][class]; + char name[DDT_NAMELEN]; + + ddt_object_name(ddt, type, class, name); + + ASSERT(*objectp != 0); + ASSERT(ddt_object_count(ddt, type, class) == 0); + ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); + VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); + VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); + VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); + bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); + + *objectp = 0; +} + +static int +ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) +{ + ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; + dmu_object_info_t doi; + char name[DDT_NAMELEN]; + int error; + + ddt_object_name(ddt, type, class, name); + + error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, + sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); + + if (error) + return (error); + + error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, + sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), + &ddt->ddt_histogram[type][class]); + + /* + * Seed the cached statistics. + */ + VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); + + ddo->ddo_count = ddt_object_count(ddt, type, class); + ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; + ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; + + ASSERT(error == 0); + return (error); +} + +static void +ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + dmu_tx_t *tx) +{ + ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; + dmu_object_info_t doi; + char name[DDT_NAMELEN]; + + ddt_object_name(ddt, type, class, name); + + VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, + sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), + &ddt->ddt_histogram[type][class], tx) == 0); + + /* + * Cache DDT statistics; this is the only time they'll change. + */ + VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); + + ddo->ddo_count = ddt_object_count(ddt, type, class); + ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; + ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; +} + +static int +ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + ddt_entry_t *dde) +{ + if (!ddt_object_exists(ddt, type, class)) + return (ENOENT); + + return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, + ddt->ddt_object[type][class], dde)); +} + +static void +ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + ddt_entry_t *dde) +{ + if (!ddt_object_exists(ddt, type, class)) + return; + + ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, + ddt->ddt_object[type][class], dde); +} + +int +ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + ddt_entry_t *dde, dmu_tx_t *tx) +{ + ASSERT(ddt_object_exists(ddt, type, class)); + + return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, + ddt->ddt_object[type][class], dde, tx)); +} + +static int +ddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + ddt_entry_t *dde, dmu_tx_t *tx) +{ + ASSERT(ddt_object_exists(ddt, type, class)); + + return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, + ddt->ddt_object[type][class], dde, tx)); +} + +int +ddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + uint64_t *walk, ddt_entry_t *dde) +{ + ASSERT(ddt_object_exists(ddt, type, class)); + + return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, + ddt->ddt_object[type][class], dde, walk)); +} + +uint64_t +ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class) +{ + ASSERT(ddt_object_exists(ddt, type, class)); + + return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, + ddt->ddt_object[type][class])); +} + +int +ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + dmu_object_info_t *doi) +{ + if (!ddt_object_exists(ddt, type, class)) + return (ENOENT); + + return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], + doi)); +} + +boolean_t +ddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) +{ + return (!!ddt->ddt_object[type][class]); +} + +void +ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + char *name) +{ + (void) sprintf(name, DMU_POOL_DDT, + zio_checksum_table[ddt->ddt_checksum].ci_name, + ddt_ops[type]->ddt_op_name, ddt_class_name[class]); +} + +void +ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) +{ + ASSERT(txg != 0); + + for (int d = 0; d < SPA_DVAS_PER_BP; d++) + bp->blk_dva[d] = ddp->ddp_dva[d]; + BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); +} + +void +ddt_bp_create(enum zio_checksum checksum, + const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) +{ + BP_ZERO(bp); + + if (ddp != NULL) + ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); + + bp->blk_cksum = ddk->ddk_cksum; + bp->blk_fill = 1; + + BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); + BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); + BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); + BP_SET_CHECKSUM(bp, checksum); + BP_SET_TYPE(bp, DMU_OT_DEDUP); + BP_SET_LEVEL(bp, 0); + BP_SET_DEDUP(bp, 0); + BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); +} + +void +ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) +{ + ddk->ddk_cksum = bp->blk_cksum; + ddk->ddk_prop = 0; + + DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); + DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); + DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); +} + +void +ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) +{ + ASSERT(ddp->ddp_phys_birth == 0); + + for (int d = 0; d < SPA_DVAS_PER_BP; d++) + ddp->ddp_dva[d] = bp->blk_dva[d]; + ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); +} + +void +ddt_phys_clear(ddt_phys_t *ddp) +{ + bzero(ddp, sizeof (*ddp)); +} + +void +ddt_phys_addref(ddt_phys_t *ddp) +{ + ddp->ddp_refcnt++; +} + +void +ddt_phys_decref(ddt_phys_t *ddp) +{ + ASSERT((int64_t)ddp->ddp_refcnt > 0); + ddp->ddp_refcnt--; +} + +void +ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) +{ + blkptr_t blk; + + ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); + ddt_phys_clear(ddp); + zio_free(ddt->ddt_spa, txg, &blk); +} + +ddt_phys_t * +ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) +{ + ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && + BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) + return (ddp); + } + return (NULL); +} + +uint64_t +ddt_phys_total_refcnt(const ddt_entry_t *dde) +{ + uint64_t refcnt = 0; + + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) + refcnt += dde->dde_phys[p].ddp_refcnt; + + return (refcnt); +} + +static void +ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) +{ + spa_t *spa = ddt->ddt_spa; + ddt_phys_t *ddp = dde->dde_phys; + ddt_key_t *ddk = &dde->dde_key; + uint64_t lsize = DDK_GET_LSIZE(ddk); + uint64_t psize = DDK_GET_PSIZE(ddk); + + bzero(dds, sizeof (*dds)); + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + uint64_t dsize = 0; + uint64_t refcnt = ddp->ddp_refcnt; + + if (ddp->ddp_phys_birth == 0) + continue; + + for (int d = 0; d < SPA_DVAS_PER_BP; d++) + dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); + + dds->dds_blocks += 1; + dds->dds_lsize += lsize; + dds->dds_psize += psize; + dds->dds_dsize += dsize; + + dds->dds_ref_blocks += refcnt; + dds->dds_ref_lsize += lsize * refcnt; + dds->dds_ref_psize += psize * refcnt; + dds->dds_ref_dsize += dsize * refcnt; + } +} + +void +ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) +{ + const uint64_t *s = (const uint64_t *)src; + uint64_t *d = (uint64_t *)dst; + uint64_t *d_end = (uint64_t *)(dst + 1); + + ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ + + while (d < d_end) + *d++ += (*s++ ^ neg) - neg; +} + +static void +ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) +{ + ddt_stat_t dds; + ddt_histogram_t *ddh; + int bucket; + + ddt_stat_generate(ddt, dde, &dds); + + bucket = highbit(dds.dds_ref_blocks) - 1; + ASSERT(bucket >= 0); + + ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; + + ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); +} + +void +ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) +{ + for (int h = 0; h < 64; h++) + ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); +} + +void +ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) +{ + bzero(dds, sizeof (*dds)); + + for (int h = 0; h < 64; h++) + ddt_stat_add(dds, &ddh->ddh_stat[h], 0); +} + +boolean_t +ddt_histogram_empty(const ddt_histogram_t *ddh) +{ + const uint64_t *s = (const uint64_t *)ddh; + const uint64_t *s_end = (const uint64_t *)(ddh + 1); + + while (s < s_end) + if (*s++ != 0) + return (B_FALSE); + + return (B_TRUE); +} + +void +ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) +{ + /* Sum the statistics we cached in ddt_object_sync(). */ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; + class++) { + ddt_object_t *ddo = + &ddt->ddt_object_stats[type][class]; + ddo_total->ddo_count += ddo->ddo_count; + ddo_total->ddo_dspace += ddo->ddo_dspace; + ddo_total->ddo_mspace += ddo->ddo_mspace; + } + } + } + + /* ... and compute the averages. */ + if (ddo_total->ddo_count != 0) { + ddo_total->ddo_dspace /= ddo_total->ddo_count; + ddo_total->ddo_mspace /= ddo_total->ddo_count; + } +} + +void +ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) +{ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; + class++) { + ddt_histogram_add(ddh, + &ddt->ddt_histogram_cache[type][class]); + } + } + } +} + +void +ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) +{ + ddt_histogram_t *ddh_total; + + ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); + ddt_get_dedup_histogram(spa, ddh_total); + ddt_histogram_stat(dds_total, ddh_total); + kmem_free(ddh_total, sizeof (ddt_histogram_t)); +} + +uint64_t +ddt_get_dedup_dspace(spa_t *spa) +{ + ddt_stat_t dds_total = { 0 }; + + ddt_get_dedup_stats(spa, &dds_total); + return (dds_total.dds_ref_dsize - dds_total.dds_dsize); +} + +uint64_t +ddt_get_pool_dedup_ratio(spa_t *spa) +{ + ddt_stat_t dds_total = { 0 }; + + ddt_get_dedup_stats(spa, &dds_total); + if (dds_total.dds_dsize == 0) + return (100); + + return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); +} + +int +ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) +{ + spa_t *spa = ddt->ddt_spa; + uint64_t total_refcnt = 0; + uint64_t ditto = spa->spa_dedup_ditto; + int total_copies = 0; + int desired_copies = 0; + + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + zio_t *zio = dde->dde_lead_zio[p]; + uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ + if (zio != NULL) + refcnt += zio->io_parent_count; /* pending refs */ + if (ddp == ddp_willref) + refcnt++; /* caller's ref */ + if (refcnt != 0) { + total_refcnt += refcnt; + total_copies += p; + } + } + + if (ditto == 0 || ditto > UINT32_MAX) + ditto = UINT32_MAX; + + if (total_refcnt >= 1) + desired_copies++; + if (total_refcnt >= ditto) + desired_copies++; + if (total_refcnt >= ditto * ditto) + desired_copies++; + + return (MAX(desired_copies, total_copies) - total_copies); +} + +int +ddt_ditto_copies_present(ddt_entry_t *dde) +{ + ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; + dva_t *dva = ddp->ddp_dva; + int copies = 0 - DVA_GET_GANG(dva); + + for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) + if (DVA_IS_VALID(dva)) + copies++; + + ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); + + return (copies); +} + +size_t +ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) +{ + uchar_t *version = dst++; + int cpfunc = ZIO_COMPRESS_ZLE; + zio_compress_info_t *ci = &zio_compress_table[cpfunc]; + size_t c_len; + + ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ + + c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); + + if (c_len == s_len) { + cpfunc = ZIO_COMPRESS_OFF; + bcopy(src, dst, s_len); + } + + *version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc; + + return (c_len + 1); +} + +void +ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) +{ + uchar_t version = *src++; + int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; + zio_compress_info_t *ci = &zio_compress_table[cpfunc]; + + if (ci->ci_decompress != NULL) + (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); + else + bcopy(src, dst, d_len); + + if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK) + byteswap_uint64_array(dst, d_len); +} + +ddt_t * +ddt_select_by_checksum(spa_t *spa, enum zio_checksum c) +{ + return (spa->spa_ddt[c]); +} + +ddt_t * +ddt_select(spa_t *spa, const blkptr_t *bp) +{ + return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); +} + +void +ddt_enter(ddt_t *ddt) +{ + mutex_enter(&ddt->ddt_lock); +} + +void +ddt_exit(ddt_t *ddt) +{ + mutex_exit(&ddt->ddt_lock); +} + +static ddt_entry_t * +ddt_alloc(const ddt_key_t *ddk) +{ + ddt_entry_t *dde; + + dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); + cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); + + dde->dde_key = *ddk; + + return (dde); +} + +static void +ddt_free(ddt_entry_t *dde) +{ + ASSERT(!dde->dde_loading); + + for (int p = 0; p < DDT_PHYS_TYPES; p++) + ASSERT(dde->dde_lead_zio[p] == NULL); + + if (dde->dde_repair_data != NULL) + zio_buf_free(dde->dde_repair_data, + DDK_GET_PSIZE(&dde->dde_key)); + + cv_destroy(&dde->dde_cv); + kmem_free(dde, sizeof (*dde)); +} + +void +ddt_remove(ddt_t *ddt, ddt_entry_t *dde) +{ + ASSERT(MUTEX_HELD(&ddt->ddt_lock)); + + avl_remove(&ddt->ddt_tree, dde); + ddt_free(dde); +} + +ddt_entry_t * +ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) +{ + ddt_entry_t *dde, dde_search; + enum ddt_type type; + enum ddt_class class; + avl_index_t where; + int error; + + ASSERT(MUTEX_HELD(&ddt->ddt_lock)); + + ddt_key_fill(&dde_search.dde_key, bp); + + dde = avl_find(&ddt->ddt_tree, &dde_search, &where); + if (dde == NULL) { + if (!add) + return (NULL); + dde = ddt_alloc(&dde_search.dde_key); + avl_insert(&ddt->ddt_tree, dde, where); + } + + while (dde->dde_loading) + cv_wait(&dde->dde_cv, &ddt->ddt_lock); + + if (dde->dde_loaded) + return (dde); + + dde->dde_loading = B_TRUE; + + ddt_exit(ddt); + + error = ENOENT; + + for (type = 0; type < DDT_TYPES; type++) { + for (class = 0; class < DDT_CLASSES; class++) { + error = ddt_object_lookup(ddt, type, class, dde); + if (error != ENOENT) + break; + } + if (error != ENOENT) + break; + } + + ASSERT(error == 0 || error == ENOENT); + + ddt_enter(ddt); + + ASSERT(dde->dde_loaded == B_FALSE); + ASSERT(dde->dde_loading == B_TRUE); + + dde->dde_type = type; /* will be DDT_TYPES if no entry found */ + dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ + dde->dde_loaded = B_TRUE; + dde->dde_loading = B_FALSE; + + if (error == 0) + ddt_stat_update(ddt, dde, -1ULL); + + cv_broadcast(&dde->dde_cv); + + return (dde); +} + +void +ddt_prefetch(spa_t *spa, const blkptr_t *bp) +{ + ddt_t *ddt; + ddt_entry_t dde; + + if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) + return; + + /* + * We only remove the DDT once all tables are empty and only + * prefetch dedup blocks when there are entries in the DDT. + * Thus no locking is required as the DDT can't disappear on us. + */ + ddt = ddt_select(spa, bp); + ddt_key_fill(&dde.dde_key, bp); + + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { + ddt_object_prefetch(ddt, type, class, &dde); + } + } +} + +int +ddt_entry_compare(const void *x1, const void *x2) +{ + const ddt_entry_t *dde1 = x1; + const ddt_entry_t *dde2 = x2; + const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; + const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; + + for (int i = 0; i < DDT_KEY_WORDS; i++) { + if (u1[i] < u2[i]) + return (-1); + if (u1[i] > u2[i]) + return (1); + } + + return (0); +} + +static ddt_t * +ddt_table_alloc(spa_t *spa, enum zio_checksum c) +{ + ddt_t *ddt; + + ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); + + mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&ddt->ddt_tree, ddt_entry_compare, + sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); + avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, + sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); + ddt->ddt_checksum = c; + ddt->ddt_spa = spa; + ddt->ddt_os = spa->spa_meta_objset; + + return (ddt); +} + +static void +ddt_table_free(ddt_t *ddt) +{ + ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); + ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); + avl_destroy(&ddt->ddt_tree); + avl_destroy(&ddt->ddt_repair_tree); + mutex_destroy(&ddt->ddt_lock); + kmem_free(ddt, sizeof (*ddt)); +} + +void +ddt_create(spa_t *spa) +{ + spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; + + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) + spa->spa_ddt[c] = ddt_table_alloc(spa, c); +} + +int +ddt_load(spa_t *spa) +{ + int error; + + ddt_create(spa); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, + &spa->spa_ddt_stat_object); + + if (error) + return (error == ENOENT ? 0 : error); + + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; + class++) { + error = ddt_object_load(ddt, type, class); + if (error != 0 && error != ENOENT) + return (error); + } + } + + /* + * Seed the cached histograms. + */ + bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, + sizeof (ddt->ddt_histogram)); + } + + return (0); +} + +void +ddt_unload(spa_t *spa) +{ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + if (spa->spa_ddt[c]) { + ddt_table_free(spa->spa_ddt[c]); + spa->spa_ddt[c] = NULL; + } + } +} + +boolean_t +ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) +{ + ddt_t *ddt; + ddt_entry_t dde; + + if (!BP_GET_DEDUP(bp)) + return (B_FALSE); + + if (max_class == DDT_CLASS_UNIQUE) + return (B_TRUE); + + ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; + + ddt_key_fill(&dde.dde_key, bp); + + for (enum ddt_type type = 0; type < DDT_TYPES; type++) + for (enum ddt_class class = 0; class <= max_class; class++) + if (ddt_object_lookup(ddt, type, class, &dde) == 0) + return (B_TRUE); + + return (B_FALSE); +} + +ddt_entry_t * +ddt_repair_start(ddt_t *ddt, const blkptr_t *bp) +{ + ddt_key_t ddk; + ddt_entry_t *dde; + + ddt_key_fill(&ddk, bp); + + dde = ddt_alloc(&ddk); + + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { + /* + * We can only do repair if there are multiple copies + * of the block. For anything in the UNIQUE class, + * there's definitely only one copy, so don't even try. + */ + if (class != DDT_CLASS_UNIQUE && + ddt_object_lookup(ddt, type, class, dde) == 0) + return (dde); + } + } + + bzero(dde->dde_phys, sizeof (dde->dde_phys)); + + return (dde); +} + +void +ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) +{ + avl_index_t where; + + ddt_enter(ddt); + + if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && + avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) + avl_insert(&ddt->ddt_repair_tree, dde, where); + else + ddt_free(dde); + + ddt_exit(ddt); +} + +static void +ddt_repair_entry_done(zio_t *zio) +{ + ddt_entry_t *rdde = zio->io_private; + + ddt_free(rdde); +} + +static void +ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) +{ + ddt_phys_t *ddp = dde->dde_phys; + ddt_phys_t *rddp = rdde->dde_phys; + ddt_key_t *ddk = &dde->dde_key; + ddt_key_t *rddk = &rdde->dde_key; + zio_t *zio; + blkptr_t blk; + + zio = zio_null(rio, rio->io_spa, NULL, + ddt_repair_entry_done, rdde, rio->io_flags); + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { + if (ddp->ddp_phys_birth == 0 || + ddp->ddp_phys_birth != rddp->ddp_phys_birth || + bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) + continue; + ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); + zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, + rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, + ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); + } + + zio_nowait(zio); +} + +static void +ddt_repair_table(ddt_t *ddt, zio_t *rio) +{ + spa_t *spa = ddt->ddt_spa; + ddt_entry_t *dde, *rdde_next, *rdde; + avl_tree_t *t = &ddt->ddt_repair_tree; + blkptr_t blk; + + if (spa_sync_pass(spa) > 1) + return; + + ddt_enter(ddt); + for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { + rdde_next = AVL_NEXT(t, rdde); + avl_remove(&ddt->ddt_repair_tree, rdde); + ddt_exit(ddt); + ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); + dde = ddt_repair_start(ddt, &blk); + ddt_repair_entry(ddt, dde, rdde, rio); + ddt_repair_done(ddt, dde); + ddt_enter(ddt); + } + ddt_exit(ddt); +} + +static void +ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) +{ + dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; + ddt_phys_t *ddp = dde->dde_phys; + ddt_key_t *ddk = &dde->dde_key; + enum ddt_type otype = dde->dde_type; + enum ddt_type ntype = DDT_TYPE_CURRENT; + enum ddt_class oclass = dde->dde_class; + enum ddt_class nclass; + uint64_t total_refcnt = 0; + + ASSERT(dde->dde_loaded); + ASSERT(!dde->dde_loading); + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + ASSERT(dde->dde_lead_zio[p] == NULL); + ASSERT((int64_t)ddp->ddp_refcnt >= 0); + if (ddp->ddp_phys_birth == 0) { + ASSERT(ddp->ddp_refcnt == 0); + continue; + } + if (p == DDT_PHYS_DITTO) { + if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) + ddt_phys_free(ddt, ddk, ddp, txg); + continue; + } + if (ddp->ddp_refcnt == 0) + ddt_phys_free(ddt, ddk, ddp, txg); + total_refcnt += ddp->ddp_refcnt; + } + + if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) + nclass = DDT_CLASS_DITTO; + else if (total_refcnt > 1) + nclass = DDT_CLASS_DUPLICATE; + else + nclass = DDT_CLASS_UNIQUE; + + if (otype != DDT_TYPES && + (otype != ntype || oclass != nclass || total_refcnt == 0)) { + VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); + ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); + } + + if (total_refcnt != 0) { + dde->dde_type = ntype; + dde->dde_class = nclass; + ddt_stat_update(ddt, dde, 0); + if (!ddt_object_exists(ddt, ntype, nclass)) + ddt_object_create(ddt, ntype, nclass, tx); + VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); + + /* + * If the class changes, the order that we scan this bp + * changes. If it decreases, we could miss it, so + * scan it right now. (This covers both class changing + * while we are doing ddt_walk(), and when we are + * traversing.) + */ + if (nclass < oclass) { + dsl_scan_ddt_entry(dp->dp_scan, + ddt->ddt_checksum, dde, tx); + } + } +} + +static void +ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) +{ + spa_t *spa = ddt->ddt_spa; + ddt_entry_t *dde; + void *cookie = NULL; + + if (avl_numnodes(&ddt->ddt_tree) == 0) + return; + + ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); + + if (spa->spa_ddt_stat_object == 0) { + spa->spa_ddt_stat_object = zap_create(ddt->ddt_os, + DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, + &spa->spa_ddt_stat_object, tx) == 0); + } + + while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { + ddt_sync_entry(ddt, dde, tx, txg); + ddt_free(dde); + } + + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + uint64_t count = 0; + for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { + if (ddt_object_exists(ddt, type, class)) { + ddt_object_sync(ddt, type, class, tx); + count += ddt_object_count(ddt, type, class); + } + } + for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { + if (count == 0 && ddt_object_exists(ddt, type, class)) + ddt_object_destroy(ddt, type, class, tx); + } + } + + bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, + sizeof (ddt->ddt_histogram)); +} + +void +ddt_sync(spa_t *spa, uint64_t txg) +{ + dmu_tx_t *tx; + zio_t *rio = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); + + ASSERT(spa_syncing_txg(spa) == txg); + + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + if (ddt == NULL) + continue; + ddt_sync_table(ddt, tx, txg); + ddt_repair_table(ddt, rio); + } + + (void) zio_wait(rio); + + dmu_tx_commit(tx); +} + +int +ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) +{ + do { + do { + do { + ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; + int error = ENOENT; + if (ddt_object_exists(ddt, ddb->ddb_type, + ddb->ddb_class)) { + error = ddt_object_walk(ddt, + ddb->ddb_type, ddb->ddb_class, + &ddb->ddb_cursor, dde); + } + dde->dde_type = ddb->ddb_type; + dde->dde_class = ddb->ddb_class; + if (error == 0) + return (0); + if (error != ENOENT) + return (error); + ddb->ddb_cursor = 0; + } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); + ddb->ddb_checksum = 0; + } while (++ddb->ddb_type < DDT_TYPES); + ddb->ddb_type = 0; + } while (++ddb->ddb_class < DDT_CLASSES); + + return (ENOENT); +} diff --git a/uts/common/fs/zfs/ddt_zap.c b/uts/common/fs/zfs/ddt_zap.c new file mode 100644 index 000000000000..d6a991c7c19e --- /dev/null +++ b/uts/common/fs/zfs/ddt_zap.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/ddt.h> +#include <sys/zap.h> +#include <sys/dmu_tx.h> +#include <util/sscanf.h> + +int ddt_zap_leaf_blockshift = 12; +int ddt_zap_indirect_blockshift = 12; + +static int +ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) +{ + zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; + + if (prehash) + flags |= ZAP_FLAG_PRE_HASHED_KEY; + + *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, + ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift, + DMU_OT_NONE, 0, tx); + + return (*objectp == 0 ? ENOTSUP : 0); +} + +static int +ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + return (zap_destroy(os, object, tx)); +} + +static int +ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) +{ + uchar_t cbuf[sizeof (dde->dde_phys) + 1]; + uint64_t one, csize; + int error; + + error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS, &one, &csize); + if (error) + return (error); + + ASSERT(one == 1); + ASSERT(csize <= sizeof (cbuf)); + + error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS, 1, csize, cbuf); + if (error) + return (error); + + ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys)); + + return (0); +} + +static void +ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde) +{ + (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS); +} + +static int +ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) +{ + uchar_t cbuf[sizeof (dde->dde_phys) + 1]; + uint64_t csize; + + csize = ddt_compress(dde->dde_phys, cbuf, + sizeof (dde->dde_phys), sizeof (cbuf)); + + return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS, 1, csize, cbuf, tx)); +} + +static int +ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) +{ + return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS, tx)); +} + +static int +ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) +{ + zap_cursor_t zc; + zap_attribute_t za; + int error; + + zap_cursor_init_serialized(&zc, os, object, *walk); + if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { + uchar_t cbuf[sizeof (dde->dde_phys) + 1]; + uint64_t csize = za.za_num_integers; + ASSERT(za.za_integer_length == 1); + error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, + DDT_KEY_WORDS, 1, csize, cbuf); + ASSERT(error == 0); + if (error == 0) { + ddt_decompress(cbuf, dde->dde_phys, csize, + sizeof (dde->dde_phys)); + dde->dde_key = *(ddt_key_t *)za.za_name; + } + zap_cursor_advance(&zc); + *walk = zap_cursor_serialize(&zc); + } + zap_cursor_fini(&zc); + return (error); +} + +static uint64_t +ddt_zap_count(objset_t *os, uint64_t object) +{ + uint64_t count = 0; + + VERIFY(zap_count(os, object, &count) == 0); + + return (count); +} + +const ddt_ops_t ddt_zap_ops = { + "zap", + ddt_zap_create, + ddt_zap_destroy, + ddt_zap_lookup, + ddt_zap_prefetch, + ddt_zap_update, + ddt_zap_remove, + ddt_zap_walk, + ddt_zap_count, +}; diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c new file mode 100644 index 000000000000..39234eba53b2 --- /dev/null +++ b/uts/common/fs/zfs/dmu.c @@ -0,0 +1,1764 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/zfs_context.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_traverse.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_prop.h> +#include <sys/dmu_zfetch.h> +#include <sys/zfs_ioctl.h> +#include <sys/zap.h> +#include <sys/zio_checksum.h> +#include <sys/sa.h> +#ifdef _KERNEL +#include <sys/vmsystm.h> +#include <sys/zfs_znode.h> +#endif + +const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { + { byteswap_uint8_array, TRUE, "unallocated" }, + { zap_byteswap, TRUE, "object directory" }, + { byteswap_uint64_array, TRUE, "object array" }, + { byteswap_uint8_array, TRUE, "packed nvlist" }, + { byteswap_uint64_array, TRUE, "packed nvlist size" }, + { byteswap_uint64_array, TRUE, "bpobj" }, + { byteswap_uint64_array, TRUE, "bpobj header" }, + { byteswap_uint64_array, TRUE, "SPA space map header" }, + { byteswap_uint64_array, TRUE, "SPA space map" }, + { byteswap_uint64_array, TRUE, "ZIL intent log" }, + { dnode_buf_byteswap, TRUE, "DMU dnode" }, + { dmu_objset_byteswap, TRUE, "DMU objset" }, + { byteswap_uint64_array, TRUE, "DSL directory" }, + { zap_byteswap, TRUE, "DSL directory child map"}, + { zap_byteswap, TRUE, "DSL dataset snap map" }, + { zap_byteswap, TRUE, "DSL props" }, + { byteswap_uint64_array, TRUE, "DSL dataset" }, + { zfs_znode_byteswap, TRUE, "ZFS znode" }, + { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" }, + { byteswap_uint8_array, FALSE, "ZFS plain file" }, + { zap_byteswap, TRUE, "ZFS directory" }, + { zap_byteswap, TRUE, "ZFS master node" }, + { zap_byteswap, TRUE, "ZFS delete queue" }, + { byteswap_uint8_array, FALSE, "zvol object" }, + { zap_byteswap, TRUE, "zvol prop" }, + { byteswap_uint8_array, FALSE, "other uint8[]" }, + { byteswap_uint64_array, FALSE, "other uint64[]" }, + { zap_byteswap, TRUE, "other ZAP" }, + { zap_byteswap, TRUE, "persistent error log" }, + { byteswap_uint8_array, TRUE, "SPA history" }, + { byteswap_uint64_array, TRUE, "SPA history offsets" }, + { zap_byteswap, TRUE, "Pool properties" }, + { zap_byteswap, TRUE, "DSL permissions" }, + { zfs_acl_byteswap, TRUE, "ZFS ACL" }, + { byteswap_uint8_array, TRUE, "ZFS SYSACL" }, + { byteswap_uint8_array, TRUE, "FUID table" }, + { byteswap_uint64_array, TRUE, "FUID table size" }, + { zap_byteswap, TRUE, "DSL dataset next clones"}, + { zap_byteswap, TRUE, "scan work queue" }, + { zap_byteswap, TRUE, "ZFS user/group used" }, + { zap_byteswap, TRUE, "ZFS user/group quota" }, + { zap_byteswap, TRUE, "snapshot refcount tags"}, + { zap_byteswap, TRUE, "DDT ZAP algorithm" }, + { zap_byteswap, TRUE, "DDT statistics" }, + { byteswap_uint8_array, TRUE, "System attributes" }, + { zap_byteswap, TRUE, "SA master node" }, + { zap_byteswap, TRUE, "SA attr registration" }, + { zap_byteswap, TRUE, "SA attr layouts" }, + { zap_byteswap, TRUE, "scan translations" }, + { byteswap_uint8_array, FALSE, "deduplicated block" }, + { zap_byteswap, TRUE, "DSL deadlist map" }, + { byteswap_uint64_array, TRUE, "DSL deadlist map hdr" }, + { zap_byteswap, TRUE, "DSL dir clones" }, + { byteswap_uint64_array, TRUE, "bpobj subobj" }, +}; + +int +dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, + void *tag, dmu_buf_t **dbp, int flags) +{ + dnode_t *dn; + uint64_t blkid; + dmu_buf_impl_t *db; + int err; + int db_flags = DB_RF_CANFAIL; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + blkid = dbuf_whichblock(dn, offset); + rw_enter(&dn->dn_struct_rwlock, RW_READER); + db = dbuf_hold(dn, blkid, tag); + rw_exit(&dn->dn_struct_rwlock); + if (db == NULL) { + err = EIO; + } else { + err = dbuf_read(db, NULL, db_flags); + if (err) { + dbuf_rele(db, tag); + db = NULL; + } + } + + dnode_rele(dn, FTAG); + *dbp = &db->db; /* NULL db plus first field offset is NULL */ + return (err); +} + +int +dmu_bonus_max(void) +{ + return (DN_MAX_BONUSLEN); +} + +int +dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + int error; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + if (dn->dn_bonus != db) { + error = EINVAL; + } else if (newsize < 0 || newsize > db_fake->db_size) { + error = EINVAL; + } else { + dnode_setbonuslen(dn, newsize, tx); + error = 0; + } + + DB_DNODE_EXIT(db); + return (error); +} + +int +dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + int error; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + if (type > DMU_OT_NUMTYPES) { + error = EINVAL; + } else if (dn->dn_bonus != db) { + error = EINVAL; + } else { + dnode_setbonus_type(dn, type, tx); + error = 0; + } + + DB_DNODE_EXIT(db); + return (error); +} + +dmu_object_type_t +dmu_get_bonustype(dmu_buf_t *db_fake) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + dmu_object_type_t type; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + type = dn->dn_bonustype; + DB_DNODE_EXIT(db); + + return (type); +} + +int +dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int error; + + error = dnode_hold(os, object, FTAG, &dn); + dbuf_rm_spill(dn, tx); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_rm_spill(dn, tx); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + return (error); +} + +/* + * returns ENOENT, EIO, or 0. + */ +int +dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) +{ + dnode_t *dn; + dmu_buf_impl_t *db; + int error; + + error = dnode_hold(os, object, FTAG, &dn); + if (error) + return (error); + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + if (dn->dn_bonus == NULL) { + rw_exit(&dn->dn_struct_rwlock); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_bonus == NULL) + dbuf_create_bonus(dn); + } + db = dn->dn_bonus; + + /* as long as the bonus buf is held, the dnode will be held */ + if (refcount_add(&db->db_holds, tag) == 1) { + VERIFY(dnode_add_ref(dn, db)); + (void) atomic_inc_32_nv(&dn->dn_dbufs_count); + } + + /* + * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's + * hold and incrementing the dbuf count to ensure that dnode_move() sees + * a dnode hold for every dbuf. + */ + rw_exit(&dn->dn_struct_rwlock); + + dnode_rele(dn, FTAG); + + VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH)); + + *dbp = &db->db; + return (0); +} + +/* + * returns ENOENT, EIO, or 0. + * + * This interface will allocate a blank spill dbuf when a spill blk + * doesn't already exist on the dnode. + * + * if you only want to find an already existing spill db, then + * dmu_spill_hold_existing() should be used. + */ +int +dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp) +{ + dmu_buf_impl_t *db = NULL; + int err; + + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_enter(&dn->dn_struct_rwlock, RW_READER); + + db = dbuf_hold(dn, DMU_SPILL_BLKID, tag); + + if ((flags & DB_RF_HAVESTRUCT) == 0) + rw_exit(&dn->dn_struct_rwlock); + + ASSERT(db != NULL); + err = dbuf_read(db, NULL, flags); + if (err == 0) + *dbp = &db->db; + else + dbuf_rele(db, tag); + return (err); +} + +int +dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus; + dnode_t *dn; + int err; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) { + err = EINVAL; + } else { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + + if (!dn->dn_have_spill) { + err = ENOENT; + } else { + err = dmu_spill_hold_by_dnode(dn, + DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp); + } + + rw_exit(&dn->dn_struct_rwlock); + } + + DB_DNODE_EXIT(db); + return (err); +} + +int +dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus; + dnode_t *dn; + int err; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_spill_hold_by_dnode(dn, DB_RF_CANFAIL, tag, dbp); + DB_DNODE_EXIT(db); + + return (err); +} + +/* + * Note: longer-term, we should modify all of the dmu_buf_*() interfaces + * to take a held dnode rather than <os, object> -- the lookup is wasteful, + * and can induce severe lock contention when writing to several files + * whose dnodes are in the same block. + */ +static int +dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, + int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) +{ + dsl_pool_t *dp = NULL; + dmu_buf_t **dbp; + uint64_t blkid, nblks, i; + uint32_t dbuf_flags; + int err; + zio_t *zio; + hrtime_t start; + + ASSERT(length <= DMU_MAX_ACCESS); + + dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; + if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) + dbuf_flags |= DB_RF_NOPREFETCH; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + if (dn->dn_datablkshift) { + int blkshift = dn->dn_datablkshift; + nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - + P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; + } else { + if (offset + length > dn->dn_datablksz) { + zfs_panic_recover("zfs: accessing past end of object " + "%llx/%llx (size=%u access=%llu+%llu)", + (longlong_t)dn->dn_objset-> + os_dsl_dataset->ds_object, + (longlong_t)dn->dn_object, dn->dn_datablksz, + (longlong_t)offset, (longlong_t)length); + rw_exit(&dn->dn_struct_rwlock); + return (EIO); + } + nblks = 1; + } + dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); + + if (dn->dn_objset->os_dsl_dataset) + dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool; + if (dp && dsl_pool_sync_context(dp)) + start = gethrtime(); + zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); + blkid = dbuf_whichblock(dn, offset); + for (i = 0; i < nblks; i++) { + dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); + if (db == NULL) { + rw_exit(&dn->dn_struct_rwlock); + dmu_buf_rele_array(dbp, nblks, tag); + zio_nowait(zio); + return (EIO); + } + /* initiate async i/o */ + if (read) { + (void) dbuf_read(db, zio, dbuf_flags); + } + dbp[i] = &db->db; + } + rw_exit(&dn->dn_struct_rwlock); + + /* wait for async i/o */ + err = zio_wait(zio); + /* track read overhead when we are in sync context */ + if (dp && dsl_pool_sync_context(dp)) + dp->dp_read_overhead += gethrtime() - start; + if (err) { + dmu_buf_rele_array(dbp, nblks, tag); + return (err); + } + + /* wait for other io to complete */ + if (read) { + for (i = 0; i < nblks; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + mutex_enter(&db->db_mtx); + while (db->db_state == DB_READ || + db->db_state == DB_FILL) + cv_wait(&db->db_changed, &db->db_mtx); + if (db->db_state == DB_UNCACHED) + err = EIO; + mutex_exit(&db->db_mtx); + if (err) { + dmu_buf_rele_array(dbp, nblks, tag); + return (err); + } + } + } + + *numbufsp = nblks; + *dbpp = dbp; + return (0); +} + +static int +dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, + uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, + numbufsp, dbpp, DMU_READ_PREFETCH); + + dnode_rele(dn, FTAG); + + return (err); +} + +int +dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset, + uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + int err; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, + numbufsp, dbpp, DMU_READ_PREFETCH); + DB_DNODE_EXIT(db); + + return (err); +} + +void +dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) +{ + int i; + dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; + + if (numbufs == 0) + return; + + for (i = 0; i < numbufs; i++) { + if (dbp[i]) + dbuf_rele(dbp[i], tag); + } + + kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); +} + +void +dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) +{ + dnode_t *dn; + uint64_t blkid; + int nblks, i, err; + + if (zfs_prefetch_disable) + return; + + if (len == 0) { /* they're interested in the bonus buffer */ + dn = DMU_META_DNODE(os); + + if (object == 0 || object >= DN_MAX_OBJECT) + return; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); + dbuf_prefetch(dn, blkid); + rw_exit(&dn->dn_struct_rwlock); + return; + } + + /* + * XXX - Note, if the dnode for the requested object is not + * already cached, we will do a *synchronous* read in the + * dnode_hold() call. The same is true for any indirects. + */ + err = dnode_hold(os, object, FTAG, &dn); + if (err != 0) + return; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + if (dn->dn_datablkshift) { + int blkshift = dn->dn_datablkshift; + nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - + P2ALIGN(offset, 1<<blkshift)) >> blkshift; + } else { + nblks = (offset < dn->dn_datablksz); + } + + if (nblks != 0) { + blkid = dbuf_whichblock(dn, offset); + for (i = 0; i < nblks; i++) + dbuf_prefetch(dn, blkid+i); + } + + rw_exit(&dn->dn_struct_rwlock); + + dnode_rele(dn, FTAG); +} + +/* + * Get the next "chunk" of file data to free. We traverse the file from + * the end so that the file gets shorter over time (if we crashes in the + * middle, this will leave us in a better state). We find allocated file + * data by simply searching the allocated level 1 indirects. + */ +static int +get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit) +{ + uint64_t len = *start - limit; + uint64_t blkcnt = 0; + uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1)); + uint64_t iblkrange = + dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); + + ASSERT(limit <= *start); + + if (len <= iblkrange * maxblks) { + *start = limit; + return (0); + } + ASSERT(ISP2(iblkrange)); + + while (*start > limit && blkcnt < maxblks) { + int err; + + /* find next allocated L1 indirect */ + err = dnode_next_offset(dn, + DNODE_FIND_BACKWARDS, start, 2, 1, 0); + + /* if there are no more, then we are done */ + if (err == ESRCH) { + *start = limit; + return (0); + } else if (err) { + return (err); + } + blkcnt += 1; + + /* reset offset to end of "next" block back */ + *start = P2ALIGN(*start, iblkrange); + if (*start <= limit) + *start = limit; + else + *start -= 1; + } + return (0); +} + +static int +dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, + uint64_t length, boolean_t free_dnode) +{ + dmu_tx_t *tx; + uint64_t object_size, start, end, len; + boolean_t trunc = (length == DMU_OBJECT_END); + int align, err; + + align = 1 << dn->dn_datablkshift; + ASSERT(align > 0); + object_size = align == 1 ? dn->dn_datablksz : + (dn->dn_maxblkid + 1) << dn->dn_datablkshift; + + end = offset + length; + if (trunc || end > object_size) + end = object_size; + if (end <= offset) + return (0); + length = end - offset; + + while (length) { + start = end; + /* assert(offset <= start) */ + err = get_next_chunk(dn, &start, offset); + if (err) + return (err); + len = trunc ? DMU_OBJECT_END : end - start; + + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, dn->dn_object, start, len); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + + dnode_free_range(dn, start, trunc ? -1 : len, tx); + + if (start == 0 && free_dnode) { + ASSERT(trunc); + dnode_free(dn, tx); + } + + length -= end - start; + + dmu_tx_commit(tx); + end = start; + } + return (0); +} + +int +dmu_free_long_range(objset_t *os, uint64_t object, + uint64_t offset, uint64_t length) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err != 0) + return (err); + err = dmu_free_long_range_impl(os, dn, offset, length, FALSE); + dnode_rele(dn, FTAG); + return (err); +} + +int +dmu_free_object(objset_t *os, uint64_t object) +{ + dnode_t *dn; + dmu_tx_t *tx; + int err; + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, + FTAG, &dn); + if (err != 0) + return (err); + if (dn->dn_nlevels == 1) { + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, object); + dmu_tx_hold_free(tx, dn->dn_object, 0, DMU_OBJECT_END); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err == 0) { + dnode_free_range(dn, 0, DMU_OBJECT_END, tx); + dnode_free(dn, tx); + dmu_tx_commit(tx); + } else { + dmu_tx_abort(tx); + } + } else { + err = dmu_free_long_range_impl(os, dn, 0, DMU_OBJECT_END, TRUE); + } + dnode_rele(dn, FTAG); + return (err); +} + +int +dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size, dmu_tx_t *tx) +{ + dnode_t *dn; + int err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + ASSERT(offset < UINT64_MAX); + ASSERT(size == -1ULL || size <= UINT64_MAX - offset); + dnode_free_range(dn, offset, size, tx); + dnode_rele(dn, FTAG); + return (0); +} + +int +dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + void *buf, uint32_t flags) +{ + dnode_t *dn; + dmu_buf_t **dbp; + int numbufs, err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + /* + * Deal with odd block sizes, where there can't be data past the first + * block. If we ever do the tail block optimization, we will need to + * handle that here as well. + */ + if (dn->dn_maxblkid == 0) { + int newsz = offset > dn->dn_datablksz ? 0 : + MIN(size, dn->dn_datablksz - offset); + bzero((char *)buf + newsz, size - newsz); + size = newsz; + } + + while (size > 0) { + uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); + int i; + + /* + * NB: we could do this block-at-a-time, but it's nice + * to be reading in parallel. + */ + err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, + TRUE, FTAG, &numbufs, &dbp, flags); + if (err) + break; + + for (i = 0; i < numbufs; i++) { + int tocpy; + int bufoff; + dmu_buf_t *db = dbp[i]; + + ASSERT(size > 0); + + bufoff = offset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + + bcopy((char *)db->db_data + bufoff, buf, tocpy); + + offset += tocpy; + size -= tocpy; + buf = (char *)buf + tocpy; + } + dmu_buf_rele_array(dbp, numbufs, FTAG); + } + dnode_rele(dn, FTAG); + return (err); +} + +void +dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + int numbufs, i; + + if (size == 0) + return; + + VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, + FALSE, FTAG, &numbufs, &dbp)); + + for (i = 0; i < numbufs; i++) { + int tocpy; + int bufoff; + dmu_buf_t *db = dbp[i]; + + ASSERT(size > 0); + + bufoff = offset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + + ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); + + if (tocpy == db->db_size) + dmu_buf_will_fill(db, tx); + else + dmu_buf_will_dirty(db, tx); + + bcopy(buf, (char *)db->db_data + bufoff, tocpy); + + if (tocpy == db->db_size) + dmu_buf_fill_done(db, tx); + + offset += tocpy; + size -= tocpy; + buf = (char *)buf + tocpy; + } + dmu_buf_rele_array(dbp, numbufs, FTAG); +} + +void +dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + int numbufs, i; + + if (size == 0) + return; + + VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, + FALSE, FTAG, &numbufs, &dbp)); + + for (i = 0; i < numbufs; i++) { + dmu_buf_t *db = dbp[i]; + + dmu_buf_will_not_fill(db, tx); + } + dmu_buf_rele_array(dbp, numbufs, FTAG); +} + +/* + * DMU support for xuio + */ +kstat_t *xuio_ksp = NULL; + +int +dmu_xuio_init(xuio_t *xuio, int nblk) +{ + dmu_xuio_t *priv; + uio_t *uio = &xuio->xu_uio; + + uio->uio_iovcnt = nblk; + uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP); + + priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP); + priv->cnt = nblk; + priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP); + priv->iovp = uio->uio_iov; + XUIO_XUZC_PRIV(xuio) = priv; + + if (XUIO_XUZC_RW(xuio) == UIO_READ) + XUIOSTAT_INCR(xuiostat_onloan_rbuf, nblk); + else + XUIOSTAT_INCR(xuiostat_onloan_wbuf, nblk); + + return (0); +} + +void +dmu_xuio_fini(xuio_t *xuio) +{ + dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); + int nblk = priv->cnt; + + kmem_free(priv->iovp, nblk * sizeof (iovec_t)); + kmem_free(priv->bufs, nblk * sizeof (arc_buf_t *)); + kmem_free(priv, sizeof (dmu_xuio_t)); + + if (XUIO_XUZC_RW(xuio) == UIO_READ) + XUIOSTAT_INCR(xuiostat_onloan_rbuf, -nblk); + else + XUIOSTAT_INCR(xuiostat_onloan_wbuf, -nblk); +} + +/* + * Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf } + * and increase priv->next by 1. + */ +int +dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n) +{ + struct iovec *iov; + uio_t *uio = &xuio->xu_uio; + dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); + int i = priv->next++; + + ASSERT(i < priv->cnt); + ASSERT(off + n <= arc_buf_size(abuf)); + iov = uio->uio_iov + i; + iov->iov_base = (char *)abuf->b_data + off; + iov->iov_len = n; + priv->bufs[i] = abuf; + return (0); +} + +int +dmu_xuio_cnt(xuio_t *xuio) +{ + dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); + return (priv->cnt); +} + +arc_buf_t * +dmu_xuio_arcbuf(xuio_t *xuio, int i) +{ + dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); + + ASSERT(i < priv->cnt); + return (priv->bufs[i]); +} + +void +dmu_xuio_clear(xuio_t *xuio, int i) +{ + dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); + + ASSERT(i < priv->cnt); + priv->bufs[i] = NULL; +} + +static void +xuio_stat_init(void) +{ + xuio_ksp = kstat_create("zfs", 0, "xuio_stats", "misc", + KSTAT_TYPE_NAMED, sizeof (xuio_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + if (xuio_ksp != NULL) { + xuio_ksp->ks_data = &xuio_stats; + kstat_install(xuio_ksp); + } +} + +static void +xuio_stat_fini(void) +{ + if (xuio_ksp != NULL) { + kstat_delete(xuio_ksp); + xuio_ksp = NULL; + } +} + +void +xuio_stat_wbuf_copied() +{ + XUIOSTAT_BUMP(xuiostat_wbuf_copied); +} + +void +xuio_stat_wbuf_nocopy() +{ + XUIOSTAT_BUMP(xuiostat_wbuf_nocopy); +} + +#ifdef _KERNEL +int +dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +{ + dmu_buf_t **dbp; + int numbufs, i, err; + xuio_t *xuio = NULL; + + /* + * NB: we could do this block-at-a-time, but it's nice + * to be reading in parallel. + */ + err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, + &numbufs, &dbp); + if (err) + return (err); + + if (uio->uio_extflg == UIO_XUIO) + xuio = (xuio_t *)uio; + + for (i = 0; i < numbufs; i++) { + int tocpy; + int bufoff; + dmu_buf_t *db = dbp[i]; + + ASSERT(size > 0); + + bufoff = uio->uio_loffset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + + if (xuio) { + dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; + arc_buf_t *dbuf_abuf = dbi->db_buf; + arc_buf_t *abuf = dbuf_loan_arcbuf(dbi); + err = dmu_xuio_add(xuio, abuf, bufoff, tocpy); + if (!err) { + uio->uio_resid -= tocpy; + uio->uio_loffset += tocpy; + } + + if (abuf == dbuf_abuf) + XUIOSTAT_BUMP(xuiostat_rbuf_nocopy); + else + XUIOSTAT_BUMP(xuiostat_rbuf_copied); + } else { + err = uiomove((char *)db->db_data + bufoff, tocpy, + UIO_READ, uio); + } + if (err) + break; + + size -= tocpy; + } + dmu_buf_rele_array(dbp, numbufs, FTAG); + + return (err); +} + +static int +dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + int numbufs; + int err = 0; + int i; + + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, + FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); + if (err) + return (err); + + for (i = 0; i < numbufs; i++) { + int tocpy; + int bufoff; + dmu_buf_t *db = dbp[i]; + + ASSERT(size > 0); + + bufoff = uio->uio_loffset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + + ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); + + if (tocpy == db->db_size) + dmu_buf_will_fill(db, tx); + else + dmu_buf_will_dirty(db, tx); + + /* + * XXX uiomove could block forever (eg. nfs-backed + * pages). There needs to be a uiolockdown() function + * to lock the pages in memory, so that uiomove won't + * block. + */ + err = uiomove((char *)db->db_data + bufoff, tocpy, + UIO_WRITE, uio); + + if (tocpy == db->db_size) + dmu_buf_fill_done(db, tx); + + if (err) + break; + + size -= tocpy; + } + + dmu_buf_rele_array(dbp, numbufs, FTAG); + return (err); +} + +int +dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, + dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_write_uio_dnode(dn, uio, size, tx); + DB_DNODE_EXIT(db); + + return (err); +} + +int +dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_write_uio_dnode(dn, uio, size, tx); + + dnode_rele(dn, FTAG); + + return (err); +} + +int +dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + page_t *pp, dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + int numbufs, i; + int err; + + if (size == 0) + return (0); + + err = dmu_buf_hold_array(os, object, offset, size, + FALSE, FTAG, &numbufs, &dbp); + if (err) + return (err); + + for (i = 0; i < numbufs; i++) { + int tocpy, copied, thiscpy; + int bufoff; + dmu_buf_t *db = dbp[i]; + caddr_t va; + + ASSERT(size > 0); + ASSERT3U(db->db_size, >=, PAGESIZE); + + bufoff = offset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + + ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); + + if (tocpy == db->db_size) + dmu_buf_will_fill(db, tx); + else + dmu_buf_will_dirty(db, tx); + + for (copied = 0; copied < tocpy; copied += PAGESIZE) { + ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); + thiscpy = MIN(PAGESIZE, tocpy - copied); + va = zfs_map_page(pp, S_READ); + bcopy(va, (char *)db->db_data + bufoff, thiscpy); + zfs_unmap_page(pp, va); + pp = pp->p_next; + bufoff += PAGESIZE; + } + + if (tocpy == db->db_size) + dmu_buf_fill_done(db, tx); + + offset += tocpy; + size -= tocpy; + } + dmu_buf_rele_array(dbp, numbufs, FTAG); + return (err); +} +#endif + +/* + * Allocate a loaned anonymous arc buffer. + */ +arc_buf_t * +dmu_request_arcbuf(dmu_buf_t *handle, int size) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; + spa_t *spa; + + DB_GET_SPA(&spa, db); + return (arc_loan_buf(spa, size)); +} + +/* + * Free a loaned arc buffer. + */ +void +dmu_return_arcbuf(arc_buf_t *buf) +{ + arc_return_buf(buf, FTAG); + VERIFY(arc_buf_remove_ref(buf, FTAG) == 1); +} + +/* + * When possible directly assign passed loaned arc buffer to a dbuf. + * If this is not possible copy the contents of passed arc buf via + * dmu_write(). + */ +void +dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, + dmu_tx_t *tx) +{ + dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; + dnode_t *dn; + dmu_buf_impl_t *db; + uint32_t blksz = (uint32_t)arc_buf_size(buf); + uint64_t blkid; + + DB_DNODE_ENTER(dbuf); + dn = DB_DNODE(dbuf); + rw_enter(&dn->dn_struct_rwlock, RW_READER); + blkid = dbuf_whichblock(dn, offset); + VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); + rw_exit(&dn->dn_struct_rwlock); + DB_DNODE_EXIT(dbuf); + + if (offset == db->db.db_offset && blksz == db->db.db_size) { + dbuf_assign_arcbuf(db, buf, tx); + dbuf_rele(db, FTAG); + } else { + objset_t *os; + uint64_t object; + + DB_DNODE_ENTER(dbuf); + dn = DB_DNODE(dbuf); + os = dn->dn_objset; + object = dn->dn_object; + DB_DNODE_EXIT(dbuf); + + dbuf_rele(db, FTAG); + dmu_write(os, object, offset, blksz, buf->b_data, tx); + dmu_return_arcbuf(buf); + XUIOSTAT_BUMP(xuiostat_wbuf_copied); + } +} + +typedef struct { + dbuf_dirty_record_t *dsa_dr; + dmu_sync_cb_t *dsa_done; + zgd_t *dsa_zgd; + dmu_tx_t *dsa_tx; +} dmu_sync_arg_t; + +/* ARGSUSED */ +static void +dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) +{ + dmu_sync_arg_t *dsa = varg; + dmu_buf_t *db = dsa->dsa_zgd->zgd_db; + blkptr_t *bp = zio->io_bp; + + if (zio->io_error == 0) { + if (BP_IS_HOLE(bp)) { + /* + * A block of zeros may compress to a hole, but the + * block size still needs to be known for replay. + */ + BP_SET_LSIZE(bp, db->db_size); + } else { + ASSERT(BP_GET_LEVEL(bp) == 0); + bp->blk_fill = 1; + } + } +} + +static void +dmu_sync_late_arrival_ready(zio_t *zio) +{ + dmu_sync_ready(zio, NULL, zio->io_private); +} + +/* ARGSUSED */ +static void +dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) +{ + dmu_sync_arg_t *dsa = varg; + dbuf_dirty_record_t *dr = dsa->dsa_dr; + dmu_buf_impl_t *db = dr->dr_dbuf; + + mutex_enter(&db->db_mtx); + ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); + if (zio->io_error == 0) { + dr->dt.dl.dr_overridden_by = *zio->io_bp; + dr->dt.dl.dr_override_state = DR_OVERRIDDEN; + dr->dt.dl.dr_copies = zio->io_prop.zp_copies; + if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) + BP_ZERO(&dr->dt.dl.dr_overridden_by); + } else { + dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; + } + cv_broadcast(&db->db_changed); + mutex_exit(&db->db_mtx); + + dsa->dsa_done(dsa->dsa_zgd, zio->io_error); + + kmem_free(dsa, sizeof (*dsa)); +} + +static void +dmu_sync_late_arrival_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + dmu_sync_arg_t *dsa = zio->io_private; + + if (zio->io_error == 0 && !BP_IS_HOLE(bp)) { + ASSERT(zio->io_bp->blk_birth == zio->io_txg); + ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); + zio_free(zio->io_spa, zio->io_txg, zio->io_bp); + } + + dmu_tx_commit(dsa->dsa_tx); + + dsa->dsa_done(dsa->dsa_zgd, zio->io_error); + + kmem_free(dsa, sizeof (*dsa)); +} + +static int +dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, + zio_prop_t *zp, zbookmark_t *zb) +{ + dmu_sync_arg_t *dsa; + dmu_tx_t *tx; + + tx = dmu_tx_create(os); + dmu_tx_hold_space(tx, zgd->zgd_db->db_size); + if (dmu_tx_assign(tx, TXG_WAIT) != 0) { + dmu_tx_abort(tx); + return (EIO); /* Make zl_get_data do txg_waited_synced() */ + } + + dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); + dsa->dsa_dr = NULL; + dsa->dsa_done = done; + dsa->dsa_zgd = zgd; + dsa->dsa_tx = tx; + + zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, + zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, + dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa, + ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); + + return (0); +} + +/* + * Intent log support: sync the block associated with db to disk. + * N.B. and XXX: the caller is responsible for making sure that the + * data isn't changing while dmu_sync() is writing it. + * + * Return values: + * + * EEXIST: this txg has already been synced, so there's nothing to to. + * The caller should not log the write. + * + * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. + * The caller should not log the write. + * + * EALREADY: this block is already in the process of being synced. + * The caller should track its progress (somehow). + * + * EIO: could not do the I/O. + * The caller should do a txg_wait_synced(). + * + * 0: the I/O has been initiated. + * The caller should log this blkptr in the done callback. + * It is possible that the I/O will fail, in which case + * the error will be reported to the done callback and + * propagated to pio from zio_done(). + */ +int +dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) +{ + blkptr_t *bp = zgd->zgd_bp; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db; + objset_t *os = db->db_objset; + dsl_dataset_t *ds = os->os_dsl_dataset; + dbuf_dirty_record_t *dr; + dmu_sync_arg_t *dsa; + zbookmark_t zb; + zio_prop_t zp; + dnode_t *dn; + + ASSERT(pio != NULL); + ASSERT(BP_IS_HOLE(bp)); + ASSERT(txg != 0); + + SET_BOOKMARK(&zb, ds->ds_object, + db->db.db_object, db->db_level, db->db_blkid); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp); + DB_DNODE_EXIT(db); + + /* + * If we're frozen (running ziltest), we always need to generate a bp. + */ + if (txg > spa_freeze_txg(os->os_spa)) + return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb)); + + /* + * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf() + * and us. If we determine that this txg is not yet syncing, + * but it begins to sync a moment later, that's OK because the + * sync thread will block in dbuf_sync_leaf() until we drop db_mtx. + */ + mutex_enter(&db->db_mtx); + + if (txg <= spa_last_synced_txg(os->os_spa)) { + /* + * This txg has already synced. There's nothing to do. + */ + mutex_exit(&db->db_mtx); + return (EEXIST); + } + + if (txg <= spa_syncing_txg(os->os_spa)) { + /* + * This txg is currently syncing, so we can't mess with + * the dirty record anymore; just write a new log block. + */ + mutex_exit(&db->db_mtx); + return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb)); + } + + dr = db->db_last_dirty; + while (dr && dr->dr_txg != txg) + dr = dr->dr_next; + + if (dr == NULL) { + /* + * There's no dr for this dbuf, so it must have been freed. + * There's no need to log writes to freed blocks, so we're done. + */ + mutex_exit(&db->db_mtx); + return (ENOENT); + } + + ASSERT(dr->dr_txg == txg); + if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC || + dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { + /* + * We have already issued a sync write for this buffer, + * or this buffer has already been synced. It could not + * have been dirtied since, or we would have cleared the state. + */ + mutex_exit(&db->db_mtx); + return (EALREADY); + } + + ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); + dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; + mutex_exit(&db->db_mtx); + + dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); + dsa->dsa_dr = dr; + dsa->dsa_done = done; + dsa->dsa_zgd = zgd; + dsa->dsa_tx = NULL; + + zio_nowait(arc_write(pio, os->os_spa, txg, + bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp, + dmu_sync_ready, dmu_sync_done, dsa, + ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); + + return (0); +} + +int +dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + err = dnode_set_blksz(dn, size, ibs, tx); + dnode_rele(dn, FTAG); + return (err); +} + +void +dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, + dmu_tx_t *tx) +{ + dnode_t *dn; + + /* XXX assumes dnode_hold will not get an i/o error */ + (void) dnode_hold(os, object, FTAG, &dn); + ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); + dn->dn_checksum = checksum; + dnode_setdirty(dn, tx); + dnode_rele(dn, FTAG); +} + +void +dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, + dmu_tx_t *tx) +{ + dnode_t *dn; + + /* XXX assumes dnode_hold will not get an i/o error */ + (void) dnode_hold(os, object, FTAG, &dn); + ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); + dn->dn_compress = compress; + dnode_setdirty(dn, tx); + dnode_rele(dn, FTAG); +} + +int zfs_mdcomp_disable = 0; + +void +dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) +{ + dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET; + boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata || + (wp & WP_SPILL)); + enum zio_checksum checksum = os->os_checksum; + enum zio_compress compress = os->os_compress; + enum zio_checksum dedup_checksum = os->os_dedup_checksum; + boolean_t dedup; + boolean_t dedup_verify = os->os_dedup_verify; + int copies = os->os_copies; + + /* + * Determine checksum setting. + */ + if (ismd) { + /* + * Metadata always gets checksummed. If the data + * checksum is multi-bit correctable, and it's not a + * ZBT-style checksum, then it's suitable for metadata + * as well. Otherwise, the metadata checksum defaults + * to fletcher4. + */ + if (zio_checksum_table[checksum].ci_correctable < 1 || + zio_checksum_table[checksum].ci_eck) + checksum = ZIO_CHECKSUM_FLETCHER_4; + } else { + checksum = zio_checksum_select(dn->dn_checksum, checksum); + } + + /* + * Determine compression setting. + */ + if (ismd) { + /* + * XXX -- we should design a compression algorithm + * that specializes in arrays of bps. + */ + compress = zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY : + ZIO_COMPRESS_LZJB; + } else { + compress = zio_compress_select(dn->dn_compress, compress); + } + + /* + * Determine dedup setting. If we are in dmu_sync(), we won't + * actually dedup now because that's all done in syncing context; + * but we do want to use the dedup checkum. If the checksum is not + * strong enough to ensure unique signatures, force dedup_verify. + */ + dedup = (!ismd && dedup_checksum != ZIO_CHECKSUM_OFF); + if (dedup) { + checksum = dedup_checksum; + if (!zio_checksum_table[checksum].ci_dedup) + dedup_verify = 1; + } + + if (wp & WP_DMU_SYNC) + dedup = 0; + + if (wp & WP_NOFILL) { + ASSERT(!ismd && level == 0); + checksum = ZIO_CHECKSUM_OFF; + compress = ZIO_COMPRESS_OFF; + dedup = B_FALSE; + } + + zp->zp_checksum = checksum; + zp->zp_compress = compress; + zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; + zp->zp_level = level; + zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa)); + zp->zp_dedup = dedup; + zp->zp_dedup_verify = dedup && dedup_verify; +} + +int +dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) +{ + dnode_t *dn; + int i, err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + /* + * Sync any current changes before + * we go trundling through the block pointers. + */ + for (i = 0; i < TXG_SIZE; i++) { + if (list_link_active(&dn->dn_dirty_link[i])) + break; + } + if (i != TXG_SIZE) { + dnode_rele(dn, FTAG); + txg_wait_synced(dmu_objset_pool(os), 0); + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + } + + err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + dnode_rele(dn, FTAG); + + return (err); +} + +void +dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) +{ + dnode_phys_t *dnp; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + mutex_enter(&dn->dn_mtx); + + dnp = dn->dn_phys; + + doi->doi_data_block_size = dn->dn_datablksz; + doi->doi_metadata_block_size = dn->dn_indblkshift ? + 1ULL << dn->dn_indblkshift : 0; + doi->doi_type = dn->dn_type; + doi->doi_bonus_type = dn->dn_bonustype; + doi->doi_bonus_size = dn->dn_bonuslen; + doi->doi_indirection = dn->dn_nlevels; + doi->doi_checksum = dn->dn_checksum; + doi->doi_compress = dn->dn_compress; + doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9; + doi->doi_max_offset = (dnp->dn_maxblkid + 1) * dn->dn_datablksz; + doi->doi_fill_count = 0; + for (int i = 0; i < dnp->dn_nblkptr; i++) + doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill; + + mutex_exit(&dn->dn_mtx); + rw_exit(&dn->dn_struct_rwlock); +} + +/* + * Get information on a DMU object. + * If doi is NULL, just indicates whether the object exists. + */ +int +dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) +{ + dnode_t *dn; + int err = dnode_hold(os, object, FTAG, &dn); + + if (err) + return (err); + + if (doi != NULL) + dmu_object_info_from_dnode(dn, doi); + + dnode_rele(dn, FTAG); + return (0); +} + +/* + * As above, but faster; can be used when you have a held dbuf in hand. + */ +void +dmu_object_info_from_db(dmu_buf_t *db_fake, dmu_object_info_t *doi) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + DB_DNODE_ENTER(db); + dmu_object_info_from_dnode(DB_DNODE(db), doi); + DB_DNODE_EXIT(db); +} + +/* + * Faster still when you only care about the size. + * This is specifically optimized for zfs_getattr(). + */ +void +dmu_object_size_from_db(dmu_buf_t *db_fake, uint32_t *blksize, + u_longlong_t *nblk512) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + + *blksize = dn->dn_datablksz; + /* add 1 for dnode space */ + *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> + SPA_MINBLOCKSHIFT) + 1; + DB_DNODE_EXIT(db); +} + +void +byteswap_uint64_array(void *vbuf, size_t size) +{ + uint64_t *buf = vbuf; + size_t count = size >> 3; + int i; + + ASSERT((size & 7) == 0); + + for (i = 0; i < count; i++) + buf[i] = BSWAP_64(buf[i]); +} + +void +byteswap_uint32_array(void *vbuf, size_t size) +{ + uint32_t *buf = vbuf; + size_t count = size >> 2; + int i; + + ASSERT((size & 3) == 0); + + for (i = 0; i < count; i++) + buf[i] = BSWAP_32(buf[i]); +} + +void +byteswap_uint16_array(void *vbuf, size_t size) +{ + uint16_t *buf = vbuf; + size_t count = size >> 1; + int i; + + ASSERT((size & 1) == 0); + + for (i = 0; i < count; i++) + buf[i] = BSWAP_16(buf[i]); +} + +/* ARGSUSED */ +void +byteswap_uint8_array(void *vbuf, size_t size) +{ +} + +void +dmu_init(void) +{ + zfs_dbgmsg_init(); + sa_cache_init(); + xuio_stat_init(); + dmu_objset_init(); + dnode_init(); + dbuf_init(); + zfetch_init(); + arc_init(); + l2arc_init(); +} + +void +dmu_fini(void) +{ + l2arc_fini(); + arc_fini(); + zfetch_fini(); + dbuf_fini(); + dnode_fini(); + dmu_objset_fini(); + xuio_stat_fini(); + sa_cache_fini(); + zfs_dbgmsg_fini(); +} diff --git a/uts/common/fs/zfs/dmu_diff.c b/uts/common/fs/zfs/dmu_diff.c new file mode 100644 index 000000000000..22340ebc5374 --- /dev/null +++ b/uts/common/fs/zfs/dmu_diff.c @@ -0,0 +1,221 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/zfs_context.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_traverse.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> +#include <sys/zfs_ioctl.h> +#include <sys/zap.h> +#include <sys/zio_checksum.h> +#include <sys/zfs_znode.h> + +struct diffarg { + struct vnode *da_vp; /* file to which we are reporting */ + offset_t *da_offp; + int da_err; /* error that stopped diff search */ + dmu_diff_record_t da_ddr; +}; + +static int +write_record(struct diffarg *da) +{ + ssize_t resid; /* have to get resid to get detailed errno */ + + if (da->da_ddr.ddr_type == DDR_NONE) { + da->da_err = 0; + return (0); + } + + da->da_err = vn_rdwr(UIO_WRITE, da->da_vp, (caddr_t)&da->da_ddr, + sizeof (da->da_ddr), 0, UIO_SYSSPACE, FAPPEND, + RLIM64_INFINITY, CRED(), &resid); + *da->da_offp += sizeof (da->da_ddr); + return (da->da_err); +} + +static int +report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last) +{ + ASSERT(first <= last); + if (da->da_ddr.ddr_type != DDR_FREE || + first != da->da_ddr.ddr_last + 1) { + if (write_record(da) != 0) + return (da->da_err); + da->da_ddr.ddr_type = DDR_FREE; + da->da_ddr.ddr_first = first; + da->da_ddr.ddr_last = last; + return (0); + } + da->da_ddr.ddr_last = last; + return (0); +} + +static int +report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp) +{ + ASSERT(dnp != NULL); + if (dnp->dn_type == DMU_OT_NONE) + return (report_free_dnode_range(da, object, object)); + + if (da->da_ddr.ddr_type != DDR_INUSE || + object != da->da_ddr.ddr_last + 1) { + if (write_record(da) != 0) + return (da->da_err); + da->da_ddr.ddr_type = DDR_INUSE; + da->da_ddr.ddr_first = da->da_ddr.ddr_last = object; + return (0); + } + da->da_ddr.ddr_last = object; + return (0); +} + +#define DBP_SPAN(dnp, level) \ + (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ + (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) + +/* ARGSUSED */ +static int +diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct diffarg *da = arg; + int err = 0; + + if (issig(JUSTLOOKING) && issig(FORREAL)) + return (EINTR); + + if (zb->zb_object != DMU_META_DNODE_OBJECT) + return (0); + + if (bp == NULL) { + uint64_t span = DBP_SPAN(dnp, zb->zb_level); + uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; + + err = report_free_dnode_range(da, dnobj, + dnobj + (span >> DNODE_SHIFT) - 1); + if (err) + return (err); + } else if (zb->zb_level == 0) { + dnode_phys_t *blk; + arc_buf_t *abuf; + uint32_t aflags = ARC_WAIT; + int blksz = BP_GET_LSIZE(bp); + int i; + + if (dsl_read(NULL, spa, bp, pbuf, + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); + + blk = abuf->b_data; + for (i = 0; i < blksz >> DNODE_SHIFT; i++) { + uint64_t dnobj = (zb->zb_blkid << + (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; + err = report_dnode(da, dnobj, blk+i); + if (err) + break; + } + (void) arc_buf_remove_ref(abuf, &abuf); + if (err) + return (err); + /* Don't care about the data blocks */ + return (TRAVERSE_VISIT_NO_CHILDREN); + } + return (0); +} + +int +dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp) +{ + struct diffarg da; + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap->os_dsl_dataset; + dsl_dataset_t *findds; + dsl_dataset_t *relds; + int err = 0; + + /* make certain we are looking at snapshots */ + if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds)) + return (EINVAL); + + /* fromsnap must be earlier and from the same lineage as tosnap */ + if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg) + return (EXDEV); + + relds = NULL; + findds = ds; + + while (fromds->ds_dir != findds->ds_dir) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (!dsl_dir_is_clone(findds->ds_dir)) { + if (relds) + dsl_dataset_rele(relds, FTAG); + return (EXDEV); + } + + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds); + rw_exit(&dp->dp_config_rwlock); + + if (relds) + dsl_dataset_rele(relds, FTAG); + + if (err) + return (EXDEV); + + relds = findds; + } + + if (relds) + dsl_dataset_rele(relds, FTAG); + + da.da_vp = vp; + da.da_offp = offp; + da.da_ddr.ddr_type = DDR_NONE; + da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0; + da.da_err = 0; + + err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da); + + if (err) { + da.da_err = err; + } else { + /* we set the da.da_err we return as side-effect */ + (void) write_record(&da); + } + + return (da.da_err); +} diff --git a/uts/common/fs/zfs/dmu_object.c b/uts/common/fs/zfs/dmu_object.c new file mode 100644 index 000000000000..8dff46048902 --- /dev/null +++ b/uts/common/fs/zfs/dmu_object.c @@ -0,0 +1,196 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_tx.h> +#include <sys/dnode.h> + +uint64_t +dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + uint64_t object; + uint64_t L2_dnode_count = DNODES_PER_BLOCK << + (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT); + dnode_t *dn = NULL; + int restarted = B_FALSE; + + mutex_enter(&os->os_obj_lock); + for (;;) { + object = os->os_obj_next; + /* + * Each time we polish off an L2 bp worth of dnodes + * (2^13 objects), move to another L2 bp that's still + * reasonably sparse (at most 1/4 full). Look from the + * beginning once, but after that keep looking from here. + * If we can't find one, just keep going from here. + */ + if (P2PHASE(object, L2_dnode_count) == 0) { + uint64_t offset = restarted ? object << DNODE_SHIFT : 0; + int error = dnode_next_offset(DMU_META_DNODE(os), + DNODE_FIND_HOLE, + &offset, 2, DNODES_PER_BLOCK >> 2, 0); + restarted = B_TRUE; + if (error == 0) + object = offset >> DNODE_SHIFT; + } + os->os_obj_next = ++object; + + /* + * XXX We should check for an i/o error here and return + * up to our caller. Actually we should pre-read it in + * dmu_tx_assign(), but there is currently no mechanism + * to do so. + */ + (void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, + FTAG, &dn); + if (dn) + break; + + if (dmu_object_next(os, &object, B_TRUE, 0) == 0) + os->os_obj_next = object - 1; + } + + dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); + dnode_rele(dn, FTAG); + + mutex_exit(&os->os_obj_lock); + + dmu_tx_add_new_object(tx, os, object); + return (object); +} + +int +dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) + return (EBADF); + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn); + if (err) + return (err); + dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); + dnode_rele(dn, FTAG); + + dmu_tx_add_new_object(tx, os, object); + return (0); +} + +int +dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonustype, int bonuslen) +{ + dnode_t *dn; + dmu_tx_t *tx; + int nblkptr; + int err; + + if (object == DMU_META_DNODE_OBJECT) + return (EBADF); + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, + FTAG, &dn); + if (err) + return (err); + + if (dn->dn_type == ot && dn->dn_datablksz == blocksize && + dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) { + /* nothing is changing, this is a noop */ + dnode_rele(dn, FTAG); + return (0); + } + + if (bonustype == DMU_OT_SA) { + nblkptr = 1; + } else { + nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); + } + + /* + * If we are losing blkptrs or changing the block size this must + * be a new file instance. We must clear out the previous file + * contents before we can change this type of metadata in the dnode. + */ + if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) { + err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END); + if (err) + goto out; + } + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + goto out; + } + + dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx); + + dmu_tx_commit(tx); +out: + dnode_rele(dn, FTAG); + + return (err); +} + +int +dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, + FTAG, &dn); + if (err) + return (err); + + ASSERT(dn->dn_type != DMU_OT_NONE); + dnode_free_range(dn, 0, DMU_OBJECT_END, tx); + dnode_free(dn, tx); + dnode_rele(dn, FTAG); + + return (0); +} + +int +dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg) +{ + uint64_t offset = (*objectp + 1) << DNODE_SHIFT; + int error; + + error = dnode_next_offset(DMU_META_DNODE(os), + (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg); + + *objectp = offset >> DNODE_SHIFT; + + return (error); +} diff --git a/uts/common/fs/zfs/dmu_objset.c b/uts/common/fs/zfs/dmu_objset.c new file mode 100644 index 000000000000..7caebd979f02 --- /dev/null +++ b/uts/common/fs/zfs/dmu_objset.c @@ -0,0 +1,1789 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/cred.h> +#include <sys/zfs_context.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_deleg.h> +#include <sys/dnode.h> +#include <sys/dbuf.h> +#include <sys/zvol.h> +#include <sys/dmu_tx.h> +#include <sys/zap.h> +#include <sys/zil.h> +#include <sys/dmu_impl.h> +#include <sys/zfs_ioctl.h> +#include <sys/sa.h> +#include <sys/zfs_onexit.h> + +/* + * Needed to close a window in dnode_move() that allows the objset to be freed + * before it can be safely accessed. + */ +krwlock_t os_lock; + +void +dmu_objset_init(void) +{ + rw_init(&os_lock, NULL, RW_DEFAULT, NULL); +} + +void +dmu_objset_fini(void) +{ + rw_destroy(&os_lock); +} + +spa_t * +dmu_objset_spa(objset_t *os) +{ + return (os->os_spa); +} + +zilog_t * +dmu_objset_zil(objset_t *os) +{ + return (os->os_zil); +} + +dsl_pool_t * +dmu_objset_pool(objset_t *os) +{ + dsl_dataset_t *ds; + + if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) + return (ds->ds_dir->dd_pool); + else + return (spa_get_dsl(os->os_spa)); +} + +dsl_dataset_t * +dmu_objset_ds(objset_t *os) +{ + return (os->os_dsl_dataset); +} + +dmu_objset_type_t +dmu_objset_type(objset_t *os) +{ + return (os->os_phys->os_type); +} + +void +dmu_objset_name(objset_t *os, char *buf) +{ + dsl_dataset_name(os->os_dsl_dataset, buf); +} + +uint64_t +dmu_objset_id(objset_t *os) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + + return (ds ? ds->ds_object : 0); +} + +uint64_t +dmu_objset_syncprop(objset_t *os) +{ + return (os->os_sync); +} + +uint64_t +dmu_objset_logbias(objset_t *os) +{ + return (os->os_logbias); +} + +static void +checksum_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance should have been done by now. + */ + ASSERT(newval != ZIO_CHECKSUM_INHERIT); + + os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); +} + +static void +compression_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval != ZIO_COMPRESS_INHERIT); + + os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); +} + +static void +copies_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval > 0); + ASSERT(newval <= spa_max_replication(os->os_spa)); + + os->os_copies = newval; +} + +static void +dedup_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + spa_t *spa = os->os_spa; + enum zio_checksum checksum; + + /* + * Inheritance should have been done by now. + */ + ASSERT(newval != ZIO_CHECKSUM_INHERIT); + + checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); + + os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; + os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); +} + +static void +primary_cache_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || + newval == ZFS_CACHE_METADATA); + + os->os_primary_cache = newval; +} + +static void +secondary_cache_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || + newval == ZFS_CACHE_METADATA); + + os->os_secondary_cache = newval; +} + +static void +sync_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || + newval == ZFS_SYNC_DISABLED); + + os->os_sync = newval; + if (os->os_zil) + zil_set_sync(os->os_zil, newval); +} + +static void +logbias_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + ASSERT(newval == ZFS_LOGBIAS_LATENCY || + newval == ZFS_LOGBIAS_THROUGHPUT); + os->os_logbias = newval; + if (os->os_zil) + zil_set_logbias(os->os_zil, newval); +} + +void +dmu_objset_byteswap(void *buf, size_t size) +{ + objset_phys_t *osp = buf; + + ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); + dnode_byteswap(&osp->os_meta_dnode); + byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); + osp->os_type = BSWAP_64(osp->os_type); + osp->os_flags = BSWAP_64(osp->os_flags); + if (size == sizeof (objset_phys_t)) { + dnode_byteswap(&osp->os_userused_dnode); + dnode_byteswap(&osp->os_groupused_dnode); + } +} + +int +dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + objset_t **osp) +{ + objset_t *os; + int i, err; + + ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); + + os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); + os->os_dsl_dataset = ds; + os->os_spa = spa; + os->os_rootbp = bp; + if (!BP_IS_HOLE(os->os_rootbp)) { + uint32_t aflags = ARC_WAIT; + zbookmark_t zb; + SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + + if (DMU_OS_IS_L2CACHEABLE(os)) + aflags |= ARC_L2CACHE; + + dprintf_bp(os->os_rootbp, "reading %s", ""); + /* + * XXX when bprewrite scrub can change the bp, + * and this is called from dmu_objset_open_ds_os, the bp + * could change, and we'll need a lock. + */ + err = dsl_read_nolock(NULL, spa, os->os_rootbp, + arc_getbuf_func, &os->os_phys_buf, + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); + if (err) { + kmem_free(os, sizeof (objset_t)); + /* convert checksum errors into IO errors */ + if (err == ECKSUM) + err = EIO; + return (err); + } + + /* Increase the blocksize if we are permitted. */ + if (spa_version(spa) >= SPA_VERSION_USERSPACE && + arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { + arc_buf_t *buf = arc_buf_alloc(spa, + sizeof (objset_phys_t), &os->os_phys_buf, + ARC_BUFC_METADATA); + bzero(buf->b_data, sizeof (objset_phys_t)); + bcopy(os->os_phys_buf->b_data, buf->b_data, + arc_buf_size(os->os_phys_buf)); + (void) arc_buf_remove_ref(os->os_phys_buf, + &os->os_phys_buf); + os->os_phys_buf = buf; + } + + os->os_phys = os->os_phys_buf->b_data; + os->os_flags = os->os_phys->os_flags; + } else { + int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? + sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; + os->os_phys_buf = arc_buf_alloc(spa, size, + &os->os_phys_buf, ARC_BUFC_METADATA); + os->os_phys = os->os_phys_buf->b_data; + bzero(os->os_phys, size); + } + + /* + * Note: the changed_cb will be called once before the register + * func returns, thus changing the checksum/compression from the + * default (fletcher2/off). Snapshots don't need to know about + * checksum/compression/copies. + */ + if (ds) { + err = dsl_prop_register(ds, "primarycache", + primary_cache_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "secondarycache", + secondary_cache_changed_cb, os); + if (!dsl_dataset_is_snapshot(ds)) { + if (err == 0) + err = dsl_prop_register(ds, "checksum", + checksum_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "compression", + compression_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "copies", + copies_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "dedup", + dedup_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "logbias", + logbias_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "sync", + sync_changed_cb, os); + } + if (err) { + VERIFY(arc_buf_remove_ref(os->os_phys_buf, + &os->os_phys_buf) == 1); + kmem_free(os, sizeof (objset_t)); + return (err); + } + } else if (ds == NULL) { + /* It's the meta-objset. */ + os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; + os->os_compress = ZIO_COMPRESS_LZJB; + os->os_copies = spa_max_replication(spa); + os->os_dedup_checksum = ZIO_CHECKSUM_OFF; + os->os_dedup_verify = 0; + os->os_logbias = 0; + os->os_sync = 0; + os->os_primary_cache = ZFS_CACHE_ALL; + os->os_secondary_cache = ZFS_CACHE_ALL; + } + + if (ds == NULL || !dsl_dataset_is_snapshot(ds)) + os->os_zil_header = os->os_phys->os_zil_header; + os->os_zil = zil_alloc(os, &os->os_zil_header); + + for (i = 0; i < TXG_SIZE; i++) { + list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), + offsetof(dnode_t, dn_dirty_link[i])); + list_create(&os->os_free_dnodes[i], sizeof (dnode_t), + offsetof(dnode_t, dn_dirty_link[i])); + } + list_create(&os->os_dnodes, sizeof (dnode_t), + offsetof(dnode_t, dn_link)); + list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), + offsetof(dmu_buf_impl_t, db_link)); + + mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); + + DMU_META_DNODE(os) = dnode_special_open(os, + &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, + &os->os_meta_dnode); + if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { + DMU_USERUSED_DNODE(os) = dnode_special_open(os, + &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, + &os->os_userused_dnode); + DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, + &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, + &os->os_groupused_dnode); + } + + /* + * We should be the only thread trying to do this because we + * have ds_opening_lock + */ + if (ds) { + mutex_enter(&ds->ds_lock); + ASSERT(ds->ds_objset == NULL); + ds->ds_objset = os; + mutex_exit(&ds->ds_lock); + } + + *osp = os; + return (0); +} + +int +dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) +{ + int err = 0; + + mutex_enter(&ds->ds_opening_lock); + *osp = ds->ds_objset; + if (*osp == NULL) { + err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), + ds, dsl_dataset_get_blkptr(ds), osp); + } + mutex_exit(&ds->ds_opening_lock); + return (err); +} + +/* called from zpl */ +int +dmu_objset_hold(const char *name, void *tag, objset_t **osp) +{ + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_hold(name, tag, &ds); + if (err) + return (err); + + err = dmu_objset_from_ds(ds, osp); + if (err) + dsl_dataset_rele(ds, tag); + + return (err); +} + +/* called from zpl */ +int +dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp) +{ + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_own(name, B_FALSE, tag, &ds); + if (err) + return (err); + + err = dmu_objset_from_ds(ds, osp); + if (err) { + dsl_dataset_disown(ds, tag); + } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { + dmu_objset_disown(*osp, tag); + return (EINVAL); + } else if (!readonly && dsl_dataset_is_snapshot(ds)) { + dmu_objset_disown(*osp, tag); + return (EROFS); + } + return (err); +} + +void +dmu_objset_rele(objset_t *os, void *tag) +{ + dsl_dataset_rele(os->os_dsl_dataset, tag); +} + +void +dmu_objset_disown(objset_t *os, void *tag) +{ + dsl_dataset_disown(os->os_dsl_dataset, tag); +} + +int +dmu_objset_evict_dbufs(objset_t *os) +{ + dnode_t *dn; + + mutex_enter(&os->os_lock); + + /* process the mdn last, since the other dnodes have holds on it */ + list_remove(&os->os_dnodes, DMU_META_DNODE(os)); + list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os)); + + /* + * Find the first dnode with holds. We have to do this dance + * because dnode_add_ref() only works if you already have a + * hold. If there are no holds then it has no dbufs so OK to + * skip. + */ + for (dn = list_head(&os->os_dnodes); + dn && !dnode_add_ref(dn, FTAG); + dn = list_next(&os->os_dnodes, dn)) + continue; + + while (dn) { + dnode_t *next_dn = dn; + + do { + next_dn = list_next(&os->os_dnodes, next_dn); + } while (next_dn && !dnode_add_ref(next_dn, FTAG)); + + mutex_exit(&os->os_lock); + dnode_evict_dbufs(dn); + dnode_rele(dn, FTAG); + mutex_enter(&os->os_lock); + dn = next_dn; + } + dn = list_head(&os->os_dnodes); + mutex_exit(&os->os_lock); + return (dn != DMU_META_DNODE(os)); +} + +void +dmu_objset_evict(objset_t *os) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + + for (int t = 0; t < TXG_SIZE; t++) + ASSERT(!dmu_objset_is_dirty(os, t)); + + if (ds) { + if (!dsl_dataset_is_snapshot(ds)) { + VERIFY(0 == dsl_prop_unregister(ds, "checksum", + checksum_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "compression", + compression_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "copies", + copies_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "dedup", + dedup_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "logbias", + logbias_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "sync", + sync_changed_cb, os)); + } + VERIFY(0 == dsl_prop_unregister(ds, "primarycache", + primary_cache_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", + secondary_cache_changed_cb, os)); + } + + if (os->os_sa) + sa_tear_down(os); + + /* + * We should need only a single pass over the dnode list, since + * nothing can be added to the list at this point. + */ + (void) dmu_objset_evict_dbufs(os); + + dnode_special_close(&os->os_meta_dnode); + if (DMU_USERUSED_DNODE(os)) { + dnode_special_close(&os->os_userused_dnode); + dnode_special_close(&os->os_groupused_dnode); + } + zil_free(os->os_zil); + + ASSERT3P(list_head(&os->os_dnodes), ==, NULL); + + VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); + + /* + * This is a barrier to prevent the objset from going away in + * dnode_move() until we can safely ensure that the objset is still in + * use. We consider the objset valid before the barrier and invalid + * after the barrier. + */ + rw_enter(&os_lock, RW_READER); + rw_exit(&os_lock); + + mutex_destroy(&os->os_lock); + mutex_destroy(&os->os_obj_lock); + mutex_destroy(&os->os_user_ptr_lock); + kmem_free(os, sizeof (objset_t)); +} + +timestruc_t +dmu_objset_snap_cmtime(objset_t *os) +{ + return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); +} + +/* called from dsl for meta-objset */ +objset_t * +dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + dmu_objset_type_t type, dmu_tx_t *tx) +{ + objset_t *os; + dnode_t *mdn; + + ASSERT(dmu_tx_is_syncing(tx)); + if (ds != NULL) + VERIFY(0 == dmu_objset_from_ds(ds, &os)); + else + VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os)); + + mdn = DMU_META_DNODE(os); + + dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, + DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); + + /* + * We don't want to have to increase the meta-dnode's nlevels + * later, because then we could do it in quescing context while + * we are also accessing it in open context. + * + * This precaution is not necessary for the MOS (ds == NULL), + * because the MOS is only updated in syncing context. + * This is most fortunate: the MOS is the only objset that + * needs to be synced multiple times as spa_sync() iterates + * to convergence, so minimizing its dn_nlevels matters. + */ + if (ds != NULL) { + int levels = 1; + + /* + * Determine the number of levels necessary for the meta-dnode + * to contain DN_MAX_OBJECT dnodes. + */ + while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + + (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < + DN_MAX_OBJECT * sizeof (dnode_phys_t)) + levels++; + + mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = + mdn->dn_nlevels = levels; + } + + ASSERT(type != DMU_OST_NONE); + ASSERT(type != DMU_OST_ANY); + ASSERT(type < DMU_OST_NUMTYPES); + os->os_phys->os_type = type; + if (dmu_objset_userused_enabled(os)) { + os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; + os->os_flags = os->os_phys->os_flags; + } + + dsl_dataset_dirty(ds, tx); + + return (os); +} + +struct oscarg { + void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); + void *userarg; + dsl_dataset_t *clone_origin; + const char *lastname; + dmu_objset_type_t type; + uint64_t flags; + cred_t *cr; +}; + +/*ARGSUSED*/ +static int +dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct oscarg *oa = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + int err; + uint64_t ddobj; + + err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, + oa->lastname, sizeof (uint64_t), 1, &ddobj); + if (err != ENOENT) + return (err ? err : EEXIST); + + if (oa->clone_origin != NULL) { + /* You can't clone across pools. */ + if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) + return (EXDEV); + + /* You can only clone snapshots, not the head datasets. */ + if (!dsl_dataset_is_snapshot(oa->clone_origin)) + return (EINVAL); + } + + return (0); +} + +static void +dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + spa_t *spa = dd->dd_pool->dp_spa; + struct oscarg *oa = arg2; + uint64_t obj; + + ASSERT(dmu_tx_is_syncing(tx)); + + obj = dsl_dataset_create_sync(dd, oa->lastname, + oa->clone_origin, oa->flags, oa->cr, tx); + + if (oa->clone_origin == NULL) { + dsl_pool_t *dp = dd->dd_pool; + dsl_dataset_t *ds; + blkptr_t *bp; + objset_t *os; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + ASSERT(BP_IS_HOLE(bp)); + + os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + + if (oa->userfunc) + oa->userfunc(os, oa->userarg, oa->cr, tx); + dsl_dataset_rele(ds, FTAG); + } + + spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); +} + +int +dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, + void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) +{ + dsl_dir_t *pdd; + const char *tail; + int err = 0; + struct oscarg oa = { 0 }; + + ASSERT(strchr(name, '@') == NULL); + err = dsl_dir_open(name, FTAG, &pdd, &tail); + if (err) + return (err); + if (tail == NULL) { + dsl_dir_close(pdd, FTAG); + return (EEXIST); + } + + oa.userfunc = func; + oa.userarg = arg; + oa.lastname = tail; + oa.type = type; + oa.flags = flags; + oa.cr = CRED(); + + err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, + dmu_objset_create_sync, pdd, &oa, 5); + dsl_dir_close(pdd, FTAG); + return (err); +} + +int +dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) +{ + dsl_dir_t *pdd; + const char *tail; + int err = 0; + struct oscarg oa = { 0 }; + + ASSERT(strchr(name, '@') == NULL); + err = dsl_dir_open(name, FTAG, &pdd, &tail); + if (err) + return (err); + if (tail == NULL) { + dsl_dir_close(pdd, FTAG); + return (EEXIST); + } + + oa.lastname = tail; + oa.clone_origin = clone_origin; + oa.flags = flags; + oa.cr = CRED(); + + err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, + dmu_objset_create_sync, pdd, &oa, 5); + dsl_dir_close(pdd, FTAG); + return (err); +} + +int +dmu_objset_destroy(const char *name, boolean_t defer) +{ + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); + if (error == 0) { + error = dsl_dataset_destroy(ds, FTAG, defer); + /* dsl_dataset_destroy() closes the ds. */ + } + + return (error); +} + +struct snaparg { + dsl_sync_task_group_t *dstg; + char *snapname; + char *htag; + char failed[MAXPATHLEN]; + boolean_t recursive; + boolean_t needsuspend; + boolean_t temporary; + nvlist_t *props; + struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ + dsl_dataset_t *newds; +}; + +static int +snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + objset_t *os = arg1; + struct snaparg *sn = arg2; + int error; + + /* The props have already been checked by zfs_check_userprops(). */ + + error = dsl_dataset_snapshot_check(os->os_dsl_dataset, + sn->snapname, tx); + if (error) + return (error); + + if (sn->temporary) { + /* + * Ideally we would just call + * dsl_dataset_user_hold_check() and + * dsl_dataset_destroy_check() here. However the + * dataset we want to hold and destroy is the snapshot + * that we just confirmed we can create, but it won't + * exist until after these checks are run. Do any + * checks we can here and if more checks are added to + * those routines in the future, similar checks may be + * necessary here. + */ + if (spa_version(os->os_spa) < SPA_VERSION_USERREFS) + return (ENOTSUP); + /* + * Not checking number of tags because the tag will be + * unique, as it will be the only tag. + */ + if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + return (E2BIG); + + sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + sn->ha->temphold = B_TRUE; + sn->ha->htag = sn->htag; + } + return (error); +} + +static void +snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + objset_t *os = arg1; + dsl_dataset_t *ds = os->os_dsl_dataset; + struct snaparg *sn = arg2; + + dsl_dataset_snapshot_sync(ds, sn->snapname, tx); + + if (sn->props) { + dsl_props_arg_t pa; + pa.pa_props = sn->props; + pa.pa_source = ZPROP_SRC_LOCAL; + dsl_props_set_sync(ds->ds_prev, &pa, tx); + } + + if (sn->temporary) { + struct dsl_ds_destroyarg da; + + dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); + kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); + sn->ha = NULL; + sn->newds = ds->ds_prev; + + da.ds = ds->ds_prev; + da.defer = B_TRUE; + dsl_dataset_destroy_sync(&da, FTAG, tx); + } +} + +static int +dmu_objset_snapshot_one(const char *name, void *arg) +{ + struct snaparg *sn = arg; + objset_t *os; + int err; + char *cp; + + /* + * If the objset starts with a '%', then ignore it unless it was + * explicitly named (ie, not recursive). These hidden datasets + * are always inconsistent, and by not opening them here, we can + * avoid a race with dsl_dir_destroy_check(). + */ + cp = strrchr(name, '/'); + if (cp && cp[1] == '%' && sn->recursive) + return (0); + + (void) strcpy(sn->failed, name); + + /* + * Check permissions if we are doing a recursive snapshot. The + * permission checks for the starting dataset have already been + * performed in zfs_secpolicy_snapshot() + */ + if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) + return (err); + + err = dmu_objset_hold(name, sn, &os); + if (err != 0) + return (err); + + /* + * If the objset is in an inconsistent state (eg, in the process + * of being destroyed), don't snapshot it. As with %hidden + * datasets, we return EBUSY if this name was explicitly + * requested (ie, not recursive), and otherwise ignore it. + */ + if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { + dmu_objset_rele(os, sn); + return (sn->recursive ? 0 : EBUSY); + } + + if (sn->needsuspend) { + err = zil_suspend(dmu_objset_zil(os)); + if (err) { + dmu_objset_rele(os, sn); + return (err); + } + } + dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, + os, sn, 3); + + return (0); +} + +int +dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +{ + dsl_sync_task_t *dst; + struct snaparg sn; + spa_t *spa; + minor_t minor; + int err; + + (void) strcpy(sn.failed, fsname); + + err = spa_open(fsname, &spa, FTAG); + if (err) + return (err); + + if (temporary) { + if (cleanup_fd < 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { + spa_close(spa, FTAG); + return (err); + } + } + + sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + sn.snapname = snapname; + sn.htag = tag; + sn.props = props; + sn.recursive = recursive; + sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + sn.temporary = temporary; + sn.ha = NULL; + sn.newds = NULL; + + if (recursive) { + err = dmu_objset_find(fsname, + dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); + } else { + err = dmu_objset_snapshot_one(fsname, &sn); + } + + if (err == 0) + err = dsl_sync_task_group_wait(sn.dstg); + + for (dst = list_head(&sn.dstg->dstg_tasks); dst; + dst = list_next(&sn.dstg->dstg_tasks, dst)) { + objset_t *os = dst->dst_arg1; + dsl_dataset_t *ds = os->os_dsl_dataset; + if (dst->dst_err) { + dsl_dataset_name(ds, sn.failed); + } else if (temporary) { + dsl_register_onexit_hold_cleanup(sn.newds, tag, minor); + } + if (sn.needsuspend) + zil_resume(dmu_objset_zil(os)); + dmu_objset_rele(os, &sn); + } + + if (err) + (void) strcpy(fsname, sn.failed); + if (temporary) + zfs_onexit_fd_rele(cleanup_fd); + dsl_sync_task_group_destroy(sn.dstg); + spa_close(spa, FTAG); + return (err); +} + +static void +dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) +{ + dnode_t *dn; + + while (dn = list_head(list)) { + ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); + ASSERT(dn->dn_dbuf->db_data_pending); + /* + * Initialize dn_zio outside dnode_sync() because the + * meta-dnode needs to set it ouside dnode_sync(). + */ + dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; + ASSERT(dn->dn_zio); + + ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); + list_remove(list, dn); + + if (newlist) { + (void) dnode_add_ref(dn, newlist); + list_insert_tail(newlist, dn); + } + + dnode_sync(dn, tx); + } +} + +/* ARGSUSED */ +static void +dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) +{ + blkptr_t *bp = zio->io_bp; + objset_t *os = arg; + dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; + + ASSERT(bp == os->os_rootbp); + ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); + ASSERT(BP_GET_LEVEL(bp) == 0); + + /* + * Update rootbp fill count: it should be the number of objects + * allocated in the object set (not counting the "special" + * objects that are stored in the objset_phys_t -- the meta + * dnode and user/group accounting objects). + */ + bp->blk_fill = 0; + for (int i = 0; i < dnp->dn_nblkptr; i++) + bp->blk_fill += dnp->dn_blkptr[i].blk_fill; +} + +/* ARGSUSED */ +static void +dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) +{ + blkptr_t *bp = zio->io_bp; + blkptr_t *bp_orig = &zio->io_bp_orig; + objset_t *os = arg; + + if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { + ASSERT(BP_EQUAL(bp, bp_orig)); + } else { + dsl_dataset_t *ds = os->os_dsl_dataset; + dmu_tx_t *tx = os->os_synctx; + + (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); + dsl_dataset_block_born(ds, bp, tx); + } +} + +/* called from dsl */ +void +dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) +{ + int txgoff; + zbookmark_t zb; + zio_prop_t zp; + zio_t *zio; + list_t *list; + list_t *newlist = NULL; + dbuf_dirty_record_t *dr; + + dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); + + ASSERT(dmu_tx_is_syncing(tx)); + /* XXX the write_done callback should really give us the tx... */ + os->os_synctx = tx; + + if (os->os_dsl_dataset == NULL) { + /* + * This is the MOS. If we have upgraded, + * spa_max_replication() could change, so reset + * os_copies here. + */ + os->os_copies = spa_max_replication(os->os_spa); + } + + /* + * Create the root block IO + */ + SET_BOOKMARK(&zb, os->os_dsl_dataset ? + os->os_dsl_dataset->ds_object : DMU_META_OBJSET, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf, + os->os_rootbp, os->os_spa, &zb)); + + dmu_write_policy(os, NULL, 0, 0, &zp); + + zio = arc_write(pio, os->os_spa, tx->tx_txg, + os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, + dmu_objset_write_ready, dmu_objset_write_done, os, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); + + /* + * Sync special dnodes - the parent IO for the sync is the root block + */ + DMU_META_DNODE(os)->dn_zio = zio; + dnode_sync(DMU_META_DNODE(os), tx); + + os->os_phys->os_flags = os->os_flags; + + if (DMU_USERUSED_DNODE(os) && + DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { + DMU_USERUSED_DNODE(os)->dn_zio = zio; + dnode_sync(DMU_USERUSED_DNODE(os), tx); + DMU_GROUPUSED_DNODE(os)->dn_zio = zio; + dnode_sync(DMU_GROUPUSED_DNODE(os), tx); + } + + txgoff = tx->tx_txg & TXG_MASK; + + if (dmu_objset_userused_enabled(os)) { + newlist = &os->os_synced_dnodes; + /* + * We must create the list here because it uses the + * dn_dirty_link[] of this txg. + */ + list_create(newlist, sizeof (dnode_t), + offsetof(dnode_t, dn_dirty_link[txgoff])); + } + + dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); + dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); + + list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; + while (dr = list_head(list)) { + ASSERT(dr->dr_dbuf->db_level == 0); + list_remove(list, dr); + if (dr->dr_zio) + zio_nowait(dr->dr_zio); + } + /* + * Free intent log blocks up to this tx. + */ + zil_sync(os->os_zil, tx); + os->os_phys->os_zil_header = os->os_zil_header; + zio_nowait(zio); +} + +boolean_t +dmu_objset_is_dirty(objset_t *os, uint64_t txg) +{ + return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || + !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); +} + +boolean_t +dmu_objset_is_dirty_anywhere(objset_t *os) +{ + for (int t = 0; t < TXG_SIZE; t++) + if (dmu_objset_is_dirty(os, t)) + return (B_TRUE); + return (B_FALSE); +} + +static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; + +void +dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) +{ + used_cbs[ost] = cb; +} + +boolean_t +dmu_objset_userused_enabled(objset_t *os) +{ + return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && + used_cbs[os->os_phys->os_type] != NULL && + DMU_USERUSED_DNODE(os) != NULL); +} + +static void +do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, + uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) +{ + if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { + int64_t delta = DNODE_SIZE + used; + if (subtract) + delta = -delta; + VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, + user, delta, tx)); + VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, + group, delta, tx)); + } +} + +void +dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) +{ + dnode_t *dn; + list_t *list = &os->os_synced_dnodes; + + ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); + + while (dn = list_head(list)) { + int flags; + ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); + ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || + dn->dn_phys->dn_flags & + DNODE_FLAG_USERUSED_ACCOUNTED); + + /* Allocate the user/groupused objects if necessary. */ + if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { + VERIFY(0 == zap_create_claim(os, + DMU_USERUSED_OBJECT, + DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); + VERIFY(0 == zap_create_claim(os, + DMU_GROUPUSED_OBJECT, + DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); + } + + /* + * We intentionally modify the zap object even if the + * net delta is zero. Otherwise + * the block of the zap obj could be shared between + * datasets but need to be different between them after + * a bprewrite. + */ + + flags = dn->dn_id_flags; + ASSERT(flags); + if (flags & DN_ID_OLD_EXIST) { + do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, + dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); + } + if (flags & DN_ID_NEW_EXIST) { + do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), + dn->dn_phys->dn_flags, dn->dn_newuid, + dn->dn_newgid, B_FALSE, tx); + } + + mutex_enter(&dn->dn_mtx); + dn->dn_oldused = 0; + dn->dn_oldflags = 0; + if (dn->dn_id_flags & DN_ID_NEW_EXIST) { + dn->dn_olduid = dn->dn_newuid; + dn->dn_oldgid = dn->dn_newgid; + dn->dn_id_flags |= DN_ID_OLD_EXIST; + if (dn->dn_bonuslen == 0) + dn->dn_id_flags |= DN_ID_CHKED_SPILL; + else + dn->dn_id_flags |= DN_ID_CHKED_BONUS; + } + dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); + mutex_exit(&dn->dn_mtx); + + list_remove(list, dn); + dnode_rele(dn, list); + } +} + +/* + * Returns a pointer to data to find uid/gid from + * + * If a dirty record for transaction group that is syncing can't + * be found then NULL is returned. In the NULL case it is assumed + * the uid/gid aren't changing. + */ +static void * +dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) +{ + dbuf_dirty_record_t *dr, **drp; + void *data; + + if (db->db_dirtycnt == 0) + return (db->db.db_data); /* Nothing is changing */ + + for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) + if (dr->dr_txg == tx->tx_txg) + break; + + if (dr == NULL) { + data = NULL; + } else { + dnode_t *dn; + + DB_DNODE_ENTER(dr->dr_dbuf); + dn = DB_DNODE(dr->dr_dbuf); + + if (dn->dn_bonuslen == 0 && + dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) + data = dr->dt.dl.dr_data->b_data; + else + data = dr->dt.dl.dr_data; + + DB_DNODE_EXIT(dr->dr_dbuf); + } + + return (data); +} + +void +dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) +{ + objset_t *os = dn->dn_objset; + void *data = NULL; + dmu_buf_impl_t *db = NULL; + uint64_t *user, *group; + int flags = dn->dn_id_flags; + int error; + boolean_t have_spill = B_FALSE; + + if (!dmu_objset_userused_enabled(dn->dn_objset)) + return; + + if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| + DN_ID_CHKED_SPILL))) + return; + + if (before && dn->dn_bonuslen != 0) + data = DN_BONUS(dn->dn_phys); + else if (!before && dn->dn_bonuslen != 0) { + if (dn->dn_bonus) { + db = dn->dn_bonus; + mutex_enter(&db->db_mtx); + data = dmu_objset_userquota_find_data(db, tx); + } else { + data = DN_BONUS(dn->dn_phys); + } + } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { + int rf = 0; + + if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) + rf |= DB_RF_HAVESTRUCT; + error = dmu_spill_hold_by_dnode(dn, + rf | DB_RF_MUST_SUCCEED, + FTAG, (dmu_buf_t **)&db); + ASSERT(error == 0); + mutex_enter(&db->db_mtx); + data = (before) ? db->db.db_data : + dmu_objset_userquota_find_data(db, tx); + have_spill = B_TRUE; + } else { + mutex_enter(&dn->dn_mtx); + dn->dn_id_flags |= DN_ID_CHKED_BONUS; + mutex_exit(&dn->dn_mtx); + return; + } + + if (before) { + ASSERT(data); + user = &dn->dn_olduid; + group = &dn->dn_oldgid; + } else if (data) { + user = &dn->dn_newuid; + group = &dn->dn_newgid; + } + + /* + * Must always call the callback in case the object + * type has changed and that type isn't an object type to track + */ + error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, + user, group); + + /* + * Preserve existing uid/gid when the callback can't determine + * what the new uid/gid are and the callback returned EEXIST. + * The EEXIST error tells us to just use the existing uid/gid. + * If we don't know what the old values are then just assign + * them to 0, since that is a new file being created. + */ + if (!before && data == NULL && error == EEXIST) { + if (flags & DN_ID_OLD_EXIST) { + dn->dn_newuid = dn->dn_olduid; + dn->dn_newgid = dn->dn_oldgid; + } else { + dn->dn_newuid = 0; + dn->dn_newgid = 0; + } + error = 0; + } + + if (db) + mutex_exit(&db->db_mtx); + + mutex_enter(&dn->dn_mtx); + if (error == 0 && before) + dn->dn_id_flags |= DN_ID_OLD_EXIST; + if (error == 0 && !before) + dn->dn_id_flags |= DN_ID_NEW_EXIST; + + if (have_spill) { + dn->dn_id_flags |= DN_ID_CHKED_SPILL; + } else { + dn->dn_id_flags |= DN_ID_CHKED_BONUS; + } + mutex_exit(&dn->dn_mtx); + if (have_spill) + dmu_buf_rele((dmu_buf_t *)db, FTAG); +} + +boolean_t +dmu_objset_userspace_present(objset_t *os) +{ + return (os->os_phys->os_flags & + OBJSET_FLAG_USERACCOUNTING_COMPLETE); +} + +int +dmu_objset_userspace_upgrade(objset_t *os) +{ + uint64_t obj; + int err = 0; + + if (dmu_objset_userspace_present(os)) + return (0); + if (!dmu_objset_userused_enabled(os)) + return (ENOTSUP); + if (dmu_objset_is_snapshot(os)) + return (EINVAL); + + /* + * We simply need to mark every object dirty, so that it will be + * synced out and now accounted. If this is called + * concurrently, or if we already did some work before crashing, + * that's fine, since we track each object's accounted state + * independently. + */ + + for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { + dmu_tx_t *tx; + dmu_buf_t *db; + int objerr; + + if (issig(JUSTLOOKING) && issig(FORREAL)) + return (EINTR); + + objerr = dmu_bonus_hold(os, obj, FTAG, &db); + if (objerr) + continue; + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, obj); + objerr = dmu_tx_assign(tx, TXG_WAIT); + if (objerr) { + dmu_tx_abort(tx); + continue; + } + dmu_buf_will_dirty(db, tx); + dmu_buf_rele(db, FTAG); + dmu_tx_commit(tx); + } + + os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; + txg_wait_synced(dmu_objset_pool(os), 0); + return (0); +} + +void +dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, + uint64_t *usedobjsp, uint64_t *availobjsp) +{ + dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, + usedobjsp, availobjsp); +} + +uint64_t +dmu_objset_fsid_guid(objset_t *os) +{ + return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); +} + +void +dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) +{ + stat->dds_type = os->os_phys->os_type; + if (os->os_dsl_dataset) + dsl_dataset_fast_stat(os->os_dsl_dataset, stat); +} + +void +dmu_objset_stats(objset_t *os, nvlist_t *nv) +{ + ASSERT(os->os_dsl_dataset || + os->os_phys->os_type == DMU_OST_META); + + if (os->os_dsl_dataset != NULL) + dsl_dataset_stats(os->os_dsl_dataset, nv); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, + os->os_phys->os_type); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, + dmu_objset_userspace_present(os)); +} + +int +dmu_objset_is_snapshot(objset_t *os) +{ + if (os->os_dsl_dataset != NULL) + return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); + else + return (B_FALSE); +} + +int +dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, + boolean_t *conflict) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + uint64_t ignored; + + if (ds->ds_phys->ds_snapnames_zapobj == 0) + return (ENOENT); + + return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, + real, maxlen, conflict)); +} + +int +dmu_snapshot_list_next(objset_t *os, int namelen, char *name, + uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + zap_cursor_t cursor; + zap_attribute_t attr; + + if (ds->ds_phys->ds_snapnames_zapobj == 0) + return (ENOENT); + + zap_cursor_init_serialized(&cursor, + ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, *offp); + + if (zap_cursor_retrieve(&cursor, &attr) != 0) { + zap_cursor_fini(&cursor); + return (ENOENT); + } + + if (strlen(attr.za_name) + 1 > namelen) { + zap_cursor_fini(&cursor); + return (ENAMETOOLONG); + } + + (void) strcpy(name, attr.za_name); + if (idp) + *idp = attr.za_first_integer; + if (case_conflict) + *case_conflict = attr.za_normalization_conflict; + zap_cursor_advance(&cursor); + *offp = zap_cursor_serialize(&cursor); + zap_cursor_fini(&cursor); + + return (0); +} + +int +dmu_dir_list_next(objset_t *os, int namelen, char *name, + uint64_t *idp, uint64_t *offp) +{ + dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; + zap_cursor_t cursor; + zap_attribute_t attr; + + /* there is no next dir on a snapshot! */ + if (os->os_dsl_dataset->ds_object != + dd->dd_phys->dd_head_dataset_obj) + return (ENOENT); + + zap_cursor_init_serialized(&cursor, + dd->dd_pool->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj, *offp); + + if (zap_cursor_retrieve(&cursor, &attr) != 0) { + zap_cursor_fini(&cursor); + return (ENOENT); + } + + if (strlen(attr.za_name) + 1 > namelen) { + zap_cursor_fini(&cursor); + return (ENAMETOOLONG); + } + + (void) strcpy(name, attr.za_name); + if (idp) + *idp = attr.za_first_integer; + zap_cursor_advance(&cursor); + *offp = zap_cursor_serialize(&cursor); + zap_cursor_fini(&cursor); + + return (0); +} + +struct findarg { + int (*func)(const char *, void *); + void *arg; +}; + +/* ARGSUSED */ +static int +findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + struct findarg *fa = arg; + return (fa->func(dsname, fa->arg)); +} + +/* + * Find all objsets under name, and for each, call 'func(child_name, arg)'. + * Perhaps change all callers to use dmu_objset_find_spa()? + */ +int +dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags) +{ + struct findarg fa; + fa.func = func; + fa.arg = arg; + return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); +} + +/* + * Find all objsets under name, call func on each + */ +int +dmu_objset_find_spa(spa_t *spa, const char *name, + int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) +{ + dsl_dir_t *dd; + dsl_pool_t *dp; + dsl_dataset_t *ds; + zap_cursor_t zc; + zap_attribute_t *attr; + char *child; + uint64_t thisobj; + int err; + + if (name == NULL) + name = spa_name(spa); + err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); + if (err) + return (err); + + /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ + if (dd->dd_myname[0] == '$') { + dsl_dir_close(dd, FTAG); + return (0); + } + + thisobj = dd->dd_phys->dd_head_dataset_obj; + attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + dp = dd->dd_pool; + + /* + * Iterate over all children. + */ + if (flags & DS_FIND_CHILDREN) { + for (zap_cursor_init(&zc, dp->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT(attr->za_integer_length == sizeof (uint64_t)); + ASSERT(attr->za_num_integers == 1); + + child = kmem_asprintf("%s/%s", name, attr->za_name); + err = dmu_objset_find_spa(spa, child, func, arg, flags); + strfree(child); + if (err) + break; + } + zap_cursor_fini(&zc); + + if (err) { + dsl_dir_close(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + return (err); + } + } + + /* + * Iterate over all snapshots. + */ + if (flags & DS_FIND_SNAPSHOTS) { + if (!dsl_pool_sync_context(dp)) + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + if (!dsl_pool_sync_context(dp)) + rw_exit(&dp->dp_config_rwlock); + + if (err == 0) { + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + dsl_dataset_rele(ds, FTAG); + + for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT(attr->za_integer_length == + sizeof (uint64_t)); + ASSERT(attr->za_num_integers == 1); + + child = kmem_asprintf("%s@%s", + name, attr->za_name); + err = func(spa, attr->za_first_integer, + child, arg); + strfree(child); + if (err) + break; + } + zap_cursor_fini(&zc); + } + } + + dsl_dir_close(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + + if (err) + return (err); + + /* + * Apply to self if appropriate. + */ + err = func(spa, thisobj, name, arg); + return (err); +} + +/* ARGSUSED */ +int +dmu_objset_prefetch(const char *name, void *arg) +{ + dsl_dataset_t *ds; + + if (dsl_dataset_hold(name, FTAG, &ds)) + return (0); + + if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { + mutex_enter(&ds->ds_opening_lock); + if (ds->ds_objset == NULL) { + uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + zbookmark_t zb; + + SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, + ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + + (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds), + &ds->ds_phys->ds_bp, NULL, NULL, + ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, &zb); + } + mutex_exit(&ds->ds_opening_lock); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + +void +dmu_objset_set_user(objset_t *os, void *user_ptr) +{ + ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); + os->os_user_ptr = user_ptr; +} + +void * +dmu_objset_get_user(objset_t *os) +{ + ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); + return (os->os_user_ptr); +} diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c new file mode 100644 index 000000000000..e47d533a44f4 --- /dev/null +++ b/uts/common/fs/zfs/dmu_send.c @@ -0,0 +1,1606 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/zfs_context.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_traverse.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> +#include <sys/zfs_ioctl.h> +#include <sys/zap.h> +#include <sys/zio_checksum.h> +#include <sys/zfs_znode.h> +#include <zfs_fletcher.h> +#include <sys/avl.h> +#include <sys/ddt.h> +#include <sys/zfs_onexit.h> + +static char *dmu_recv_tag = "dmu_recv_tag"; + +/* + * The list of data whose inclusion in a send stream can be pending from + * one call to backup_cb to another. Multiple calls to dump_free() and + * dump_freeobjects() can be aggregated into a single DRR_FREE or + * DRR_FREEOBJECTS replay record. + */ +typedef enum { + PENDING_NONE, + PENDING_FREE, + PENDING_FREEOBJECTS +} pendop_t; + +struct backuparg { + dmu_replay_record_t *drr; + vnode_t *vp; + offset_t *off; + objset_t *os; + zio_cksum_t zc; + uint64_t toguid; + int err; + pendop_t pending_op; +}; + +static int +dump_bytes(struct backuparg *ba, void *buf, int len) +{ + ssize_t resid; /* have to get resid to get detailed errno */ + ASSERT3U(len % 8, ==, 0); + + fletcher_4_incremental_native(buf, len, &ba->zc); + ba->err = vn_rdwr(UIO_WRITE, ba->vp, + (caddr_t)buf, len, + 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); + *ba->off += len; + return (ba->err); +} + +static int +dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, + uint64_t length) +{ + struct drr_free *drrf = &(ba->drr->drr_u.drr_free); + + /* + * If there is a pending op, but it's not PENDING_FREE, push it out, + * since free block aggregation can only be done for blocks of the + * same type (i.e., DRR_FREE records can only be aggregated with + * other DRR_FREE records. DRR_FREEOBJECTS records can only be + * aggregated with other DRR_FREEOBJECTS records. + */ + if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + + if (ba->pending_op == PENDING_FREE) { + /* + * There should never be a PENDING_FREE if length is -1 + * (because dump_dnode is the only place where this + * function is called with a -1, and only after flushing + * any pending record). + */ + ASSERT(length != -1ULL); + /* + * Check to see whether this free block can be aggregated + * with pending one. + */ + if (drrf->drr_object == object && drrf->drr_offset + + drrf->drr_length == offset) { + drrf->drr_length += length; + return (0); + } else { + /* not a continuation. Push out pending record */ + if (dump_bytes(ba, ba->drr, + sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + } + /* create a FREE record and make it pending */ + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_FREE; + drrf->drr_object = object; + drrf->drr_offset = offset; + drrf->drr_length = length; + drrf->drr_toguid = ba->toguid; + if (length == -1ULL) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + } else { + ba->pending_op = PENDING_FREE; + } + + return (0); +} + +static int +dump_data(struct backuparg *ba, dmu_object_type_t type, + uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) +{ + struct drr_write *drrw = &(ba->drr->drr_u.drr_write); + + + /* + * If there is any kind of pending aggregation (currently either + * a grouping of free objects or free blocks), push it out to + * the stream, since aggregation can't be done across operations + * of different types. + */ + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + /* write a DATA record */ + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_WRITE; + drrw->drr_object = object; + drrw->drr_type = type; + drrw->drr_offset = offset; + drrw->drr_length = blksz; + drrw->drr_toguid = ba->toguid; + drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); + if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) + drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; + DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); + DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); + DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); + drrw->drr_key.ddk_cksum = bp->blk_cksum; + + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + if (dump_bytes(ba, data, blksz) != 0) + return (EINTR); + return (0); +} + +static int +dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) +{ + struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); + + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + + /* write a SPILL record */ + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_SPILL; + drrs->drr_object = object; + drrs->drr_length = blksz; + drrs->drr_toguid = ba->toguid; + + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) + return (EINTR); + if (dump_bytes(ba, data, blksz)) + return (EINTR); + return (0); +} + +static int +dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) +{ + struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); + + /* + * If there is a pending op, but it's not PENDING_FREEOBJECTS, + * push it out, since free block aggregation can only be done for + * blocks of the same type (i.e., DRR_FREE records can only be + * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records + * can only be aggregated with other DRR_FREEOBJECTS records. + */ + if (ba->pending_op != PENDING_NONE && + ba->pending_op != PENDING_FREEOBJECTS) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + if (ba->pending_op == PENDING_FREEOBJECTS) { + /* + * See whether this free object array can be aggregated + * with pending one + */ + if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { + drrfo->drr_numobjs += numobjs; + return (0); + } else { + /* can't be aggregated. Push out pending record */ + if (dump_bytes(ba, ba->drr, + sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + } + + /* write a FREEOBJECTS record */ + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_FREEOBJECTS; + drrfo->drr_firstobj = firstobj; + drrfo->drr_numobjs = numobjs; + drrfo->drr_toguid = ba->toguid; + + ba->pending_op = PENDING_FREEOBJECTS; + + return (0); +} + +static int +dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) +{ + struct drr_object *drro = &(ba->drr->drr_u.drr_object); + + if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) + return (dump_freeobjects(ba, object, 1)); + + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + ba->pending_op = PENDING_NONE; + } + + /* write an OBJECT record */ + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_OBJECT; + drro->drr_object = object; + drro->drr_type = dnp->dn_type; + drro->drr_bonustype = dnp->dn_bonustype; + drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; + drro->drr_bonuslen = dnp->dn_bonuslen; + drro->drr_checksumtype = dnp->dn_checksum; + drro->drr_compress = dnp->dn_compress; + drro->drr_toguid = ba->toguid; + + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + + if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + return (EINTR); + + /* free anything past the end of the file */ + if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) + return (EINTR); + if (ba->err) + return (EINTR); + return (0); +} + +#define BP_SPAN(dnp, level) \ + (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ + (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) + +/* ARGSUSED */ +static int +backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct backuparg *ba = arg; + dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; + int err = 0; + + if (issig(JUSTLOOKING) && issig(FORREAL)) + return (EINTR); + + if (zb->zb_object != DMU_META_DNODE_OBJECT && + DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { + return (0); + } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { + uint64_t span = BP_SPAN(dnp, zb->zb_level); + uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; + err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); + } else if (bp == NULL) { + uint64_t span = BP_SPAN(dnp, zb->zb_level); + err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); + } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { + return (0); + } else if (type == DMU_OT_DNODE) { + dnode_phys_t *blk; + int i; + int blksz = BP_GET_LSIZE(bp); + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf; + + if (dsl_read(NULL, spa, bp, pbuf, + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); + + blk = abuf->b_data; + for (i = 0; i < blksz >> DNODE_SHIFT; i++) { + uint64_t dnobj = (zb->zb_blkid << + (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; + err = dump_dnode(ba, dnobj, blk+i); + if (err) + break; + } + (void) arc_buf_remove_ref(abuf, &abuf); + } else if (type == DMU_OT_SA) { + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf; + int blksz = BP_GET_LSIZE(bp); + + if (arc_read_nolock(NULL, spa, bp, + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); + + err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); + (void) arc_buf_remove_ref(abuf, &abuf); + } else { /* it's a level-0 block of a regular object */ + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf; + int blksz = BP_GET_LSIZE(bp); + + if (dsl_read(NULL, spa, bp, pbuf, + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); + + err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, + blksz, bp, abuf->b_data); + (void) arc_buf_remove_ref(abuf, &abuf); + } + + ASSERT(err == 0 || err == EINTR); + return (err); +} + +int +dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + vnode_t *vp, offset_t *off) +{ + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + dmu_replay_record_t *drr; + struct backuparg ba; + int err; + uint64_t fromtxg = 0; + + /* tosnap must be a snapshot */ + if (ds->ds_phys->ds_next_snap_obj == 0) + return (EINVAL); + + /* fromsnap must be an earlier snapshot from the same fs as tosnap */ + if (fromds && (ds->ds_dir != fromds->ds_dir || + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + return (EXDEV); + + if (fromorigin) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (fromsnap) + return (EINVAL); + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); + drr->drr_type = DRR_BEGIN; + drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, + DMU_SUBSTREAM); + +#ifdef _KERNEL + if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { + uint64_t version; + if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) + return (EINVAL); + if (version == ZPL_VERSION_SA) { + DMU_SET_FEATUREFLAGS( + drr->drr_u.drr_begin.drr_versioninfo, + DMU_BACKUP_FEATURE_SA_SPILL); + } + } +#endif + + drr->drr_u.drr_begin.drr_creation_time = + ds->ds_phys->ds_creation_time; + drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; + if (fromorigin) + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; + drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; + if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; + + if (fromds) + drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; + dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); + + if (fromds) + fromtxg = fromds->ds_phys->ds_creation_txg; + if (fromorigin) + dsl_dataset_rele(fromds, FTAG); + + ba.drr = drr; + ba.vp = vp; + ba.os = tosnap; + ba.off = off; + ba.toguid = ds->ds_phys->ds_guid; + ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); + ba.pending_op = PENDING_NONE; + + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (ba.err); + } + + err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, + backup_cb, &ba); + + if (ba.pending_op != PENDING_NONE) + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) + err = EINTR; + + if (err) { + if (err == EINTR && ba.err) + err = ba.err; + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (err); + } + + bzero(drr, sizeof (dmu_replay_record_t)); + drr->drr_type = DRR_END; + drr->drr_u.drr_end.drr_checksum = ba.zc; + drr->drr_u.drr_end.drr_toguid = ba.toguid; + + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (ba.err); + } + + kmem_free(drr, sizeof (dmu_replay_record_t)); + + return (0); +} + +struct recvbeginsyncarg { + const char *tofs; + const char *tosnap; + dsl_dataset_t *origin; + uint64_t fromguid; + dmu_objset_type_t type; + void *tag; + boolean_t force; + uint64_t dsflags; + char clonelastname[MAXNAMELEN]; + dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ + cred_t *cr; +}; + +/* ARGSUSED */ +static int +recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct recvbeginsyncarg *rbsa = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t val; + int err; + + err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, + strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); + + if (err != ENOENT) + return (err ? err : EEXIST); + + if (rbsa->origin) { + /* make sure it's a snap in the same pool */ + if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) + return (EXDEV); + if (!dsl_dataset_is_snapshot(rbsa->origin)) + return (EINVAL); + if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) + return (ENODEV); + } + + return (0); +} + +static void +recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct recvbeginsyncarg *rbsa = arg2; + uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; + uint64_t dsobj; + + /* Create and open new dataset. */ + dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, + rbsa->origin, flags, rbsa->cr, tx); + VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, + B_TRUE, dmu_recv_tag, &rbsa->ds)); + + if (rbsa->origin == NULL) { + (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, + rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); + } + + spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, + dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); +} + +/* ARGSUSED */ +static int +recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + int err; + uint64_t val; + + /* must not have any changes since most recent snapshot */ + if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) + return (ETXTBSY); + + /* new snapshot name must not exist */ + err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + + if (rbsa->fromguid) { + /* if incremental, most recent snapshot must match fromguid */ + if (ds->ds_prev == NULL) + return (ENODEV); + + /* + * most recent snapshot must match fromguid, or there are no + * changes since the fromguid one + */ + if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { + uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; + uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; + while (obj != 0) { + dsl_dataset_t *snap; + err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + obj, FTAG, &snap); + if (err) + return (ENODEV); + if (snap->ds_phys->ds_creation_txg < birth) { + dsl_dataset_rele(snap, FTAG); + return (ENODEV); + } + if (snap->ds_phys->ds_guid == rbsa->fromguid) { + dsl_dataset_rele(snap, FTAG); + break; /* it's ok */ + } + obj = snap->ds_phys->ds_prev_snap_obj; + dsl_dataset_rele(snap, FTAG); + } + if (obj == 0) + return (ENODEV); + } + } else { + /* if full, most recent snapshot must be $ORIGIN */ + if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) + return (ENODEV); + } + + /* temporary clone name must not exist */ + err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_dir->dd_phys->dd_child_dir_zapobj, + rbsa->clonelastname, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + + return (0); +} + +/* ARGSUSED */ +static void +recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ohds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + dsl_pool_t *dp = ohds->ds_dir->dd_pool; + dsl_dataset_t *cds; + uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; + uint64_t dsobj; + + /* create and open the temporary clone */ + dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, + ohds->ds_prev, flags, rbsa->cr, tx); + VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); + + /* + * If we actually created a non-clone, we need to create the + * objset in our new dataset. + */ + if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { + (void) dmu_objset_create_impl(dp->dp_spa, + cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); + } + + rbsa->ds = cds; + + spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, + dp->dp_spa, tx, "dataset = %lld", dsobj); +} + +static boolean_t +dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) +{ + int featureflags; + + featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + + /* Verify pool version supports SA if SA_SPILL feature set */ + return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && + (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); +} + +/* + * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() + * succeeds; otherwise we will leak the holds on the datasets. + */ +int +dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, + boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) +{ + int err = 0; + boolean_t byteswap; + struct recvbeginsyncarg rbsa = { 0 }; + uint64_t versioninfo; + int flags; + dsl_dataset_t *ds; + + if (drrb->drr_magic == DMU_BACKUP_MAGIC) + byteswap = FALSE; + else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + byteswap = TRUE; + else + return (EINVAL); + + rbsa.tofs = tofs; + rbsa.tosnap = tosnap; + rbsa.origin = origin ? origin->os_dsl_dataset : NULL; + rbsa.fromguid = drrb->drr_fromguid; + rbsa.type = drrb->drr_type; + rbsa.tag = FTAG; + rbsa.dsflags = 0; + rbsa.cr = CRED(); + versioninfo = drrb->drr_versioninfo; + flags = drrb->drr_flags; + + if (byteswap) { + rbsa.type = BSWAP_32(rbsa.type); + rbsa.fromguid = BSWAP_64(rbsa.fromguid); + versioninfo = BSWAP_64(versioninfo); + flags = BSWAP_32(flags); + } + + if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || + rbsa.type >= DMU_OST_NUMTYPES || + ((flags & DRR_FLAG_CLONE) && origin == NULL)) + return (EINVAL); + + if (flags & DRR_FLAG_CI_DATA) + rbsa.dsflags = DS_FLAG_CI_DATASET; + + bzero(drc, sizeof (dmu_recv_cookie_t)); + drc->drc_drrb = drrb; + drc->drc_tosnap = tosnap; + drc->drc_top_ds = top_ds; + drc->drc_force = force; + + /* + * Process the begin in syncing context. + */ + + /* open the dataset we are logically receiving into */ + err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); + if (err == 0) { + if (dmu_recv_verify_features(ds, drrb)) { + dsl_dataset_rele(ds, dmu_recv_tag); + return (ENOTSUP); + } + /* target fs already exists; recv into temp clone */ + + /* Can't recv a clone into an existing fs */ + if (flags & DRR_FLAG_CLONE) { + dsl_dataset_rele(ds, dmu_recv_tag); + return (EINVAL); + } + + /* must not have an incremental recv already in progress */ + if (!mutex_tryenter(&ds->ds_recvlock)) { + dsl_dataset_rele(ds, dmu_recv_tag); + return (EBUSY); + } + + /* tmp clone name is: tofs/%tosnap" */ + (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), + "%%%s", tosnap); + rbsa.force = force; + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_existing_check, recv_existing_sync, ds, &rbsa, 5); + if (err) { + mutex_exit(&ds->ds_recvlock); + dsl_dataset_rele(ds, dmu_recv_tag); + return (err); + } + drc->drc_logical_ds = ds; + drc->drc_real_ds = rbsa.ds; + } else if (err == ENOENT) { + /* target fs does not exist; must be a full backup or clone */ + char *cp; + + /* + * If it's a non-clone incremental, we are missing the + * target fs, so fail the recv. + */ + if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) + return (ENOENT); + + /* Open the parent of tofs */ + cp = strrchr(tofs, '/'); + *cp = '\0'; + err = dsl_dataset_hold(tofs, FTAG, &ds); + *cp = '/'; + if (err) + return (err); + + if (dmu_recv_verify_features(ds, drrb)) { + dsl_dataset_rele(ds, FTAG); + return (ENOTSUP); + } + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); + dsl_dataset_rele(ds, FTAG); + if (err) + return (err); + drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; + drc->drc_newfs = B_TRUE; + } + + return (err); +} + +struct restorearg { + int err; + int byteswap; + vnode_t *vp; + char *buf; + uint64_t voff; + int bufsize; /* amount of memory allocated for buf */ + zio_cksum_t cksum; + avl_tree_t *guid_to_ds_map; +}; + +typedef struct guid_map_entry { + uint64_t guid; + dsl_dataset_t *gme_ds; + avl_node_t avlnode; +} guid_map_entry_t; + +static int +guid_compare(const void *arg1, const void *arg2) +{ + const guid_map_entry_t *gmep1 = arg1; + const guid_map_entry_t *gmep2 = arg2; + + if (gmep1->guid < gmep2->guid) + return (-1); + else if (gmep1->guid > gmep2->guid) + return (1); + return (0); +} + +/* + * This function is a callback used by dmu_objset_find() (which + * enumerates the object sets) to build an avl tree that maps guids + * to datasets. The resulting table is used when processing DRR_WRITE_BYREF + * send stream records. These records, which are used in dedup'ed + * streams, do not contain data themselves, but refer to a copy + * of the data block that has already been written because it was + * earlier in the stream. That previous copy is identified by the + * guid of the dataset with the referenced data. + */ +int +find_ds_by_guid(const char *name, void *arg) +{ + avl_tree_t *guid_map = arg; + dsl_dataset_t *ds, *snapds; + guid_map_entry_t *gmep; + dsl_pool_t *dp; + int err; + uint64_t lastobj, firstobj; + + if (dsl_dataset_hold(name, FTAG, &ds) != 0) + return (0); + + dp = ds->ds_dir->dd_pool; + rw_enter(&dp->dp_config_rwlock, RW_READER); + firstobj = ds->ds_dir->dd_phys->dd_origin_obj; + lastobj = ds->ds_phys->ds_prev_snap_obj; + + while (lastobj != firstobj) { + err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds); + if (err) { + /* + * Skip this snapshot and move on. It's not + * clear why this would ever happen, but the + * remainder of the snapshot streadm can be + * processed. + */ + rw_exit(&dp->dp_config_rwlock); + dsl_dataset_rele(ds, FTAG); + return (0); + } + + gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); + gmep->guid = snapds->ds_phys->ds_guid; + gmep->gme_ds = snapds; + avl_add(guid_map, gmep); + lastobj = snapds->ds_phys->ds_prev_snap_obj; + } + + rw_exit(&dp->dp_config_rwlock); + dsl_dataset_rele(ds, FTAG); + + return (0); +} + +static void +free_guid_map_onexit(void *arg) +{ + avl_tree_t *ca = arg; + void *cookie = NULL; + guid_map_entry_t *gmep; + + while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { + dsl_dataset_rele(gmep->gme_ds, ca); + kmem_free(gmep, sizeof (guid_map_entry_t)); + } + avl_destroy(ca); + kmem_free(ca, sizeof (avl_tree_t)); +} + +static void * +restore_read(struct restorearg *ra, int len) +{ + void *rv; + int done = 0; + + /* some things will require 8-byte alignment, so everything must */ + ASSERT3U(len % 8, ==, 0); + + while (done < len) { + ssize_t resid; + + ra->err = vn_rdwr(UIO_READ, ra->vp, + (caddr_t)ra->buf + done, len - done, + ra->voff, UIO_SYSSPACE, FAPPEND, + RLIM64_INFINITY, CRED(), &resid); + + if (resid == len - done) + ra->err = EINVAL; + ra->voff += len - done - resid; + done = len - resid; + if (ra->err) + return (NULL); + } + + ASSERT3U(done, ==, len); + rv = ra->buf; + if (ra->byteswap) + fletcher_4_incremental_byteswap(rv, len, &ra->cksum); + else + fletcher_4_incremental_native(rv, len, &ra->cksum); + return (rv); +} + +static void +backup_byteswap(dmu_replay_record_t *drr) +{ +#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) +#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) + drr->drr_type = BSWAP_32(drr->drr_type); + drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); + switch (drr->drr_type) { + case DRR_BEGIN: + DO64(drr_begin.drr_magic); + DO64(drr_begin.drr_versioninfo); + DO64(drr_begin.drr_creation_time); + DO32(drr_begin.drr_type); + DO32(drr_begin.drr_flags); + DO64(drr_begin.drr_toguid); + DO64(drr_begin.drr_fromguid); + break; + case DRR_OBJECT: + DO64(drr_object.drr_object); + /* DO64(drr_object.drr_allocation_txg); */ + DO32(drr_object.drr_type); + DO32(drr_object.drr_bonustype); + DO32(drr_object.drr_blksz); + DO32(drr_object.drr_bonuslen); + DO64(drr_object.drr_toguid); + break; + case DRR_FREEOBJECTS: + DO64(drr_freeobjects.drr_firstobj); + DO64(drr_freeobjects.drr_numobjs); + DO64(drr_freeobjects.drr_toguid); + break; + case DRR_WRITE: + DO64(drr_write.drr_object); + DO32(drr_write.drr_type); + DO64(drr_write.drr_offset); + DO64(drr_write.drr_length); + DO64(drr_write.drr_toguid); + DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); + DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); + DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); + DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); + DO64(drr_write.drr_key.ddk_prop); + break; + case DRR_WRITE_BYREF: + DO64(drr_write_byref.drr_object); + DO64(drr_write_byref.drr_offset); + DO64(drr_write_byref.drr_length); + DO64(drr_write_byref.drr_toguid); + DO64(drr_write_byref.drr_refguid); + DO64(drr_write_byref.drr_refobject); + DO64(drr_write_byref.drr_refoffset); + DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); + DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); + DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); + DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); + DO64(drr_write_byref.drr_key.ddk_prop); + break; + case DRR_FREE: + DO64(drr_free.drr_object); + DO64(drr_free.drr_offset); + DO64(drr_free.drr_length); + DO64(drr_free.drr_toguid); + break; + case DRR_SPILL: + DO64(drr_spill.drr_object); + DO64(drr_spill.drr_length); + DO64(drr_spill.drr_toguid); + break; + case DRR_END: + DO64(drr_end.drr_checksum.zc_word[0]); + DO64(drr_end.drr_checksum.zc_word[1]); + DO64(drr_end.drr_checksum.zc_word[2]); + DO64(drr_end.drr_checksum.zc_word[3]); + DO64(drr_end.drr_toguid); + break; + } +#undef DO64 +#undef DO32 +} + +static int +restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) +{ + int err; + dmu_tx_t *tx; + void *data = NULL; + + if (drro->drr_type == DMU_OT_NONE || + drro->drr_type >= DMU_OT_NUMTYPES || + drro->drr_bonustype >= DMU_OT_NUMTYPES || + drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || + drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || + P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || + drro->drr_blksz < SPA_MINBLOCKSIZE || + drro->drr_blksz > SPA_MAXBLOCKSIZE || + drro->drr_bonuslen > DN_MAX_BONUSLEN) { + return (EINVAL); + } + + err = dmu_object_info(os, drro->drr_object, NULL); + + if (err != 0 && err != ENOENT) + return (EINVAL); + + if (drro->drr_bonuslen) { + data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); + if (ra->err) + return (ra->err); + } + + if (err == ENOENT) { + /* currently free, want to be allocated */ + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + err = dmu_object_claim(os, drro->drr_object, + drro->drr_type, drro->drr_blksz, + drro->drr_bonustype, drro->drr_bonuslen, tx); + dmu_tx_commit(tx); + } else { + /* currently allocated, want to be allocated */ + err = dmu_object_reclaim(os, drro->drr_object, + drro->drr_type, drro->drr_blksz, + drro->drr_bonustype, drro->drr_bonuslen); + } + if (err) { + return (EINVAL); + } + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, drro->drr_object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + + dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, + tx); + dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); + + if (data != NULL) { + dmu_buf_t *db; + + VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); + dmu_buf_will_dirty(db, tx); + + ASSERT3U(db->db_size, >=, drro->drr_bonuslen); + bcopy(data, db->db_data, drro->drr_bonuslen); + if (ra->byteswap) { + dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, + drro->drr_bonuslen); + } + dmu_buf_rele(db, FTAG); + } + dmu_tx_commit(tx); + return (0); +} + +/* ARGSUSED */ +static int +restore_freeobjects(struct restorearg *ra, objset_t *os, + struct drr_freeobjects *drrfo) +{ + uint64_t obj; + + if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) + return (EINVAL); + + for (obj = drrfo->drr_firstobj; + obj < drrfo->drr_firstobj + drrfo->drr_numobjs; + (void) dmu_object_next(os, &obj, FALSE, 0)) { + int err; + + if (dmu_object_info(os, obj, NULL) != 0) + continue; + + err = dmu_free_object(os, obj); + if (err) + return (err); + } + return (0); +} + +static int +restore_write(struct restorearg *ra, objset_t *os, + struct drr_write *drrw) +{ + dmu_tx_t *tx; + void *data; + int err; + + if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || + drrw->drr_type >= DMU_OT_NUMTYPES) + return (EINVAL); + + data = restore_read(ra, drrw->drr_length); + if (data == NULL) + return (ra->err); + + if (dmu_object_info(os, drrw->drr_object, NULL) != 0) + return (EINVAL); + + tx = dmu_tx_create(os); + + dmu_tx_hold_write(tx, drrw->drr_object, + drrw->drr_offset, drrw->drr_length); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + if (ra->byteswap) + dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); + dmu_write(os, drrw->drr_object, + drrw->drr_offset, drrw->drr_length, data, tx); + dmu_tx_commit(tx); + return (0); +} + +/* + * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed + * streams to refer to a copy of the data that is already on the + * system because it came in earlier in the stream. This function + * finds the earlier copy of the data, and uses that copy instead of + * data from the stream to fulfill this write. + */ +static int +restore_write_byref(struct restorearg *ra, objset_t *os, + struct drr_write_byref *drrwbr) +{ + dmu_tx_t *tx; + int err; + guid_map_entry_t gmesrch; + guid_map_entry_t *gmep; + avl_index_t where; + objset_t *ref_os = NULL; + dmu_buf_t *dbp; + + if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) + return (EINVAL); + + /* + * If the GUID of the referenced dataset is different from the + * GUID of the target dataset, find the referenced dataset. + */ + if (drrwbr->drr_toguid != drrwbr->drr_refguid) { + gmesrch.guid = drrwbr->drr_refguid; + if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, + &where)) == NULL) { + return (EINVAL); + } + if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) + return (EINVAL); + } else { + ref_os = os; + } + + if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, + drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) + return (err); + + tx = dmu_tx_create(os); + + dmu_tx_hold_write(tx, drrwbr->drr_object, + drrwbr->drr_offset, drrwbr->drr_length); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + dmu_write(os, drrwbr->drr_object, + drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + dmu_buf_rele(dbp, FTAG); + dmu_tx_commit(tx); + return (0); +} + +static int +restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) +{ + dmu_tx_t *tx; + void *data; + dmu_buf_t *db, *db_spill; + int err; + + if (drrs->drr_length < SPA_MINBLOCKSIZE || + drrs->drr_length > SPA_MAXBLOCKSIZE) + return (EINVAL); + + data = restore_read(ra, drrs->drr_length); + if (data == NULL) + return (ra->err); + + if (dmu_object_info(os, drrs->drr_object, NULL) != 0) + return (EINVAL); + + VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); + if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { + dmu_buf_rele(db, FTAG); + return (err); + } + + tx = dmu_tx_create(os); + + dmu_tx_hold_spill(tx, db->db_object); + + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_buf_rele(db, FTAG); + dmu_buf_rele(db_spill, FTAG); + dmu_tx_abort(tx); + return (err); + } + dmu_buf_will_dirty(db_spill, tx); + + if (db_spill->db_size < drrs->drr_length) + VERIFY(0 == dbuf_spill_set_blksz(db_spill, + drrs->drr_length, tx)); + bcopy(data, db_spill->db_data, drrs->drr_length); + + dmu_buf_rele(db, FTAG); + dmu_buf_rele(db_spill, FTAG); + + dmu_tx_commit(tx); + return (0); +} + +/* ARGSUSED */ +static int +restore_free(struct restorearg *ra, objset_t *os, + struct drr_free *drrf) +{ + int err; + + if (drrf->drr_length != -1ULL && + drrf->drr_offset + drrf->drr_length < drrf->drr_offset) + return (EINVAL); + + if (dmu_object_info(os, drrf->drr_object, NULL) != 0) + return (EINVAL); + + err = dmu_free_long_range(os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); + return (err); +} + +/* + * NB: callers *must* call dmu_recv_end() if this succeeds. + */ +int +dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep) +{ + struct restorearg ra = { 0 }; + dmu_replay_record_t *drr; + objset_t *os; + zio_cksum_t pcksum; + int featureflags; + + if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + ra.byteswap = TRUE; + + { + /* compute checksum of drr_begin record */ + dmu_replay_record_t *drr; + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); + + drr->drr_type = DRR_BEGIN; + drr->drr_u.drr_begin = *drc->drc_drrb; + if (ra.byteswap) { + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &ra.cksum); + } else { + fletcher_4_incremental_native(drr, + sizeof (dmu_replay_record_t), &ra.cksum); + } + kmem_free(drr, sizeof (dmu_replay_record_t)); + } + + if (ra.byteswap) { + struct drr_begin *drrb = drc->drc_drrb; + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); + drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); + } + + ra.vp = vp; + ra.voff = *voffp; + ra.bufsize = 1<<20; + ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); + + /* these were verified in dmu_recv_begin */ + ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == + DMU_SUBSTREAM); + ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); + + /* + * Open the objset we are modifying. + */ + VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); + + ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); + + featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); + + /* if this stream is dedup'ed, set up the avl tree for guid mapping */ + if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { + minor_t minor; + + if (cleanup_fd == -1) { + ra.err = EBADF; + goto out; + } + ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (ra.err) { + cleanup_fd = -1; + goto out; + } + + if (*action_handlep == 0) { + ra.guid_to_ds_map = + kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + avl_create(ra.guid_to_ds_map, guid_compare, + sizeof (guid_map_entry_t), + offsetof(guid_map_entry_t, avlnode)); + (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, + (void *)ra.guid_to_ds_map, + DS_FIND_CHILDREN); + ra.err = zfs_onexit_add_cb(minor, + free_guid_map_onexit, ra.guid_to_ds_map, + action_handlep); + if (ra.err) + goto out; + } else { + ra.err = zfs_onexit_cb_data(minor, *action_handlep, + (void **)&ra.guid_to_ds_map); + if (ra.err) + goto out; + } + } + + /* + * Read records and process them. + */ + pcksum = ra.cksum; + while (ra.err == 0 && + NULL != (drr = restore_read(&ra, sizeof (*drr)))) { + if (issig(JUSTLOOKING) && issig(FORREAL)) { + ra.err = EINTR; + goto out; + } + + if (ra.byteswap) + backup_byteswap(drr); + + switch (drr->drr_type) { + case DRR_OBJECT: + { + /* + * We need to make a copy of the record header, + * because restore_{object,write} may need to + * restore_read(), which will invalidate drr. + */ + struct drr_object drro = drr->drr_u.drr_object; + ra.err = restore_object(&ra, os, &drro); + break; + } + case DRR_FREEOBJECTS: + { + struct drr_freeobjects drrfo = + drr->drr_u.drr_freeobjects; + ra.err = restore_freeobjects(&ra, os, &drrfo); + break; + } + case DRR_WRITE: + { + struct drr_write drrw = drr->drr_u.drr_write; + ra.err = restore_write(&ra, os, &drrw); + break; + } + case DRR_WRITE_BYREF: + { + struct drr_write_byref drrwbr = + drr->drr_u.drr_write_byref; + ra.err = restore_write_byref(&ra, os, &drrwbr); + break; + } + case DRR_FREE: + { + struct drr_free drrf = drr->drr_u.drr_free; + ra.err = restore_free(&ra, os, &drrf); + break; + } + case DRR_END: + { + struct drr_end drre = drr->drr_u.drr_end; + /* + * We compare against the *previous* checksum + * value, because the stored checksum is of + * everything before the DRR_END record. + */ + if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) + ra.err = ECKSUM; + goto out; + } + case DRR_SPILL: + { + struct drr_spill drrs = drr->drr_u.drr_spill; + ra.err = restore_spill(&ra, os, &drrs); + break; + } + default: + ra.err = EINVAL; + goto out; + } + pcksum = ra.cksum; + } + ASSERT(ra.err != 0); + +out: + if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) + zfs_onexit_fd_rele(cleanup_fd); + + if (ra.err != 0) { + /* + * destroy what we created, so we don't leave it in the + * inconsistent restoring state. + */ + txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); + + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, + B_FALSE); + if (drc->drc_real_ds != drc->drc_logical_ds) { + mutex_exit(&drc->drc_logical_ds->ds_recvlock); + dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); + } + } + + kmem_free(ra.buf, ra.bufsize); + *voffp = ra.voff; + return (ra.err); +} + +struct recvendsyncarg { + char *tosnap; + uint64_t creation_time; + uint64_t toguid; +}; + +static int +recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvendsyncarg *resa = arg2; + + return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); +} + +static void +recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvendsyncarg *resa = arg2; + + dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); + + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; + ds->ds_prev->ds_phys->ds_guid = resa->toguid; + ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; +} + +static int +dmu_recv_existing_end(dmu_recv_cookie_t *drc) +{ + struct recvendsyncarg resa; + dsl_dataset_t *ds = drc->drc_logical_ds; + int err; + + /* + * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() + * expects it to have a ds_user_ptr (and zil), but clone_swap() + * can close it. + */ + txg_wait_synced(ds->ds_dir->dd_pool, 0); + + if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { + err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, + drc->drc_force); + if (err) + goto out; + } else { + mutex_exit(&ds->ds_recvlock); + dsl_dataset_rele(ds, dmu_recv_tag); + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, + B_FALSE); + return (EBUSY); + } + + resa.creation_time = drc->drc_drrb->drr_creation_time; + resa.toguid = drc->drc_drrb->drr_toguid; + resa.tosnap = drc->drc_tosnap; + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_end_check, recv_end_sync, ds, &resa, 3); + if (err) { + /* swap back */ + (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); + } + +out: + mutex_exit(&ds->ds_recvlock); + dsl_dataset_disown(ds, dmu_recv_tag); + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + return (err); +} + +static int +dmu_recv_new_end(dmu_recv_cookie_t *drc) +{ + struct recvendsyncarg resa; + dsl_dataset_t *ds = drc->drc_logical_ds; + int err; + + /* + * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() + * expects it to have a ds_user_ptr (and zil), but clone_swap() + * can close it. + */ + txg_wait_synced(ds->ds_dir->dd_pool, 0); + + resa.creation_time = drc->drc_drrb->drr_creation_time; + resa.toguid = drc->drc_drrb->drr_toguid; + resa.tosnap = drc->drc_tosnap; + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_end_check, recv_end_sync, ds, &resa, 3); + if (err) { + /* clean up the fs we just recv'd into */ + (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); + } else { + /* release the hold from dmu_recv_begin */ + dsl_dataset_disown(ds, dmu_recv_tag); + } + return (err); +} + +int +dmu_recv_end(dmu_recv_cookie_t *drc) +{ + if (drc->drc_logical_ds != drc->drc_real_ds) + return (dmu_recv_existing_end(drc)); + else + return (dmu_recv_new_end(drc)); +} diff --git a/uts/common/fs/zfs/dmu_traverse.c b/uts/common/fs/zfs/dmu_traverse.c new file mode 100644 index 000000000000..023f90e12e34 --- /dev/null +++ b/uts/common/fs/zfs/dmu_traverse.c @@ -0,0 +1,482 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_traverse.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_pool.h> +#include <sys/dnode.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu_impl.h> +#include <sys/sa.h> +#include <sys/sa_impl.h> +#include <sys/callb.h> + +int zfs_pd_blks_max = 100; + +typedef struct prefetch_data { + kmutex_t pd_mtx; + kcondvar_t pd_cv; + int pd_blks_max; + int pd_blks_fetched; + int pd_flags; + boolean_t pd_cancel; + boolean_t pd_exited; +} prefetch_data_t; + +typedef struct traverse_data { + spa_t *td_spa; + uint64_t td_objset; + blkptr_t *td_rootbp; + uint64_t td_min_txg; + int td_flags; + prefetch_data_t *td_pfd; + blkptr_cb_t *td_func; + void *td_arg; +} traverse_data_t; + +static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, + arc_buf_t *buf, uint64_t objset, uint64_t object); + +static int +traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) +{ + traverse_data_t *td = arg; + zbookmark_t zb; + + if (bp->blk_birth == 0) + return (0); + + if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa)) + return (0); + + SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, + bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); + + (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg); + + return (0); +} + +static int +traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) +{ + traverse_data_t *td = arg; + + if (lrc->lrc_txtype == TX_WRITE) { + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + zbookmark_t zb; + + if (bp->blk_birth == 0) + return (0); + + if (claim_txg == 0 || bp->blk_birth < claim_txg) + return (0); + + SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, + ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); + + (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, + td->td_arg); + } + return (0); +} + +static void +traverse_zil(traverse_data_t *td, zil_header_t *zh) +{ + uint64_t claim_txg = zh->zh_claim_txg; + zilog_t *zilog; + + /* + * We only want to visit blocks that have been claimed but not yet + * replayed; plus, in read-only mode, blocks that are already stable. + */ + if (claim_txg == 0 && spa_writeable(td->td_spa)) + return; + + zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); + + (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, + claim_txg); + + zil_free(zilog); +} + +static int +traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, + arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb) +{ + zbookmark_t czb; + int err = 0, lasterr = 0; + arc_buf_t *buf = NULL; + prefetch_data_t *pd = td->td_pfd; + boolean_t hard = td->td_flags & TRAVERSE_HARD; + + if (bp->blk_birth == 0) { + err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp, + td->td_arg); + return (err); + } + + if (bp->blk_birth <= td->td_min_txg) + return (0); + + if (pd && !pd->pd_exited && + ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) || + BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) { + mutex_enter(&pd->pd_mtx); + ASSERT(pd->pd_blks_fetched >= 0); + while (pd->pd_blks_fetched == 0 && !pd->pd_exited) + cv_wait(&pd->pd_cv, &pd->pd_mtx); + pd->pd_blks_fetched--; + cv_broadcast(&pd->pd_cv); + mutex_exit(&pd->pd_mtx); + } + + if (td->td_flags & TRAVERSE_PRE) { + err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp, + td->td_arg); + if (err == TRAVERSE_VISIT_NO_CHILDREN) + return (0); + if (err) + return (err); + } + + if (BP_GET_LEVEL(bp) > 0) { + uint32_t flags = ARC_WAIT; + int i; + blkptr_t *cbp; + int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + + err = dsl_read(NULL, td->td_spa, bp, pbuf, + arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + if (err) + return (err); + + /* recursively visitbp() blocks below this */ + cbp = buf->b_data; + for (i = 0; i < epb; i++, cbp++) { + SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, + zb->zb_level - 1, + zb->zb_blkid * epb + i); + err = traverse_visitbp(td, dnp, buf, cbp, &czb); + if (err) { + if (!hard) + break; + lasterr = err; + } + } + } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { + uint32_t flags = ARC_WAIT; + int i; + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + + err = dsl_read(NULL, td->td_spa, bp, pbuf, + arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + if (err) + return (err); + + /* recursively visitbp() blocks below this */ + dnp = buf->b_data; + for (i = 0; i < epb; i++, dnp++) { + err = traverse_dnode(td, dnp, buf, zb->zb_objset, + zb->zb_blkid * epb + i); + if (err) { + if (!hard) + break; + lasterr = err; + } + } + } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + uint32_t flags = ARC_WAIT; + objset_phys_t *osp; + dnode_phys_t *dnp; + + err = dsl_read_nolock(NULL, td->td_spa, bp, + arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + if (err) + return (err); + + osp = buf->b_data; + dnp = &osp->os_meta_dnode; + err = traverse_dnode(td, dnp, buf, zb->zb_objset, + DMU_META_DNODE_OBJECT); + if (err && hard) { + lasterr = err; + err = 0; + } + if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { + dnp = &osp->os_userused_dnode; + err = traverse_dnode(td, dnp, buf, zb->zb_objset, + DMU_USERUSED_OBJECT); + } + if (err && hard) { + lasterr = err; + err = 0; + } + if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { + dnp = &osp->os_groupused_dnode; + err = traverse_dnode(td, dnp, buf, zb->zb_objset, + DMU_GROUPUSED_OBJECT); + } + } + + if (buf) + (void) arc_buf_remove_ref(buf, &buf); + + if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { + err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp, + td->td_arg); + } + + return (err != 0 ? err : lasterr); +} + +static int +traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, + arc_buf_t *buf, uint64_t objset, uint64_t object) +{ + int j, err = 0, lasterr = 0; + zbookmark_t czb; + boolean_t hard = (td->td_flags & TRAVERSE_HARD); + + for (j = 0; j < dnp->dn_nblkptr; j++) { + SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); + err = traverse_visitbp(td, dnp, buf, + (blkptr_t *)&dnp->dn_blkptr[j], &czb); + if (err) { + if (!hard) + break; + lasterr = err; + } + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + SET_BOOKMARK(&czb, objset, + object, 0, DMU_SPILL_BLKID); + err = traverse_visitbp(td, dnp, buf, + (blkptr_t *)&dnp->dn_spill, &czb); + if (err) { + if (!hard) + return (err); + lasterr = err; + } + } + return (err != 0 ? err : lasterr); +} + +/* ARGSUSED */ +static int +traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, + arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, + void *arg) +{ + prefetch_data_t *pfd = arg; + uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + + ASSERT(pfd->pd_blks_fetched >= 0); + if (pfd->pd_cancel) + return (EINTR); + + if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) || + BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) || + BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) + return (0); + + mutex_enter(&pfd->pd_mtx); + while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max) + cv_wait(&pfd->pd_cv, &pfd->pd_mtx); + pfd->pd_blks_fetched++; + cv_broadcast(&pfd->pd_cv); + mutex_exit(&pfd->pd_mtx); + + (void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL, + ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, zb); + + return (0); +} + +static void +traverse_prefetch_thread(void *arg) +{ + traverse_data_t *td_main = arg; + traverse_data_t td = *td_main; + zbookmark_t czb; + + td.td_func = traverse_prefetcher; + td.td_arg = td_main->td_pfd; + td.td_pfd = NULL; + + SET_BOOKMARK(&czb, td.td_objset, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + (void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb); + + mutex_enter(&td_main->td_pfd->pd_mtx); + td_main->td_pfd->pd_exited = B_TRUE; + cv_broadcast(&td_main->td_pfd->pd_cv); + mutex_exit(&td_main->td_pfd->pd_mtx); +} + +/* + * NB: dataset must not be changing on-disk (eg, is a snapshot or we are + * in syncing context). + */ +static int +traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp, + uint64_t txg_start, int flags, blkptr_cb_t func, void *arg) +{ + traverse_data_t td; + prefetch_data_t pd = { 0 }; + zbookmark_t czb; + int err; + + td.td_spa = spa; + td.td_objset = ds ? ds->ds_object : 0; + td.td_rootbp = rootbp; + td.td_min_txg = txg_start; + td.td_func = func; + td.td_arg = arg; + td.td_pfd = &pd; + td.td_flags = flags; + + pd.pd_blks_max = zfs_pd_blks_max; + pd.pd_flags = flags; + mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL); + cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); + + /* See comment on ZIL traversal in dsl_scan_visitds. */ + if (ds != NULL && !dsl_dataset_is_snapshot(ds)) { + objset_t *os; + + err = dmu_objset_from_ds(ds, &os); + if (err) + return (err); + + traverse_zil(&td, &os->os_zil_header); + } + + if (!(flags & TRAVERSE_PREFETCH) || + 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread, + &td, TQ_NOQUEUE)) + pd.pd_exited = B_TRUE; + + SET_BOOKMARK(&czb, td.td_objset, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + err = traverse_visitbp(&td, NULL, NULL, rootbp, &czb); + + mutex_enter(&pd.pd_mtx); + pd.pd_cancel = B_TRUE; + cv_broadcast(&pd.pd_cv); + while (!pd.pd_exited) + cv_wait(&pd.pd_cv, &pd.pd_mtx); + mutex_exit(&pd.pd_mtx); + + mutex_destroy(&pd.pd_mtx); + cv_destroy(&pd.pd_cv); + + return (err); +} + +/* + * NB: dataset must not be changing on-disk (eg, is a snapshot or we are + * in syncing context). + */ +int +traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags, + blkptr_cb_t func, void *arg) +{ + return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, + &ds->ds_phys->ds_bp, txg_start, flags, func, arg)); +} + +/* + * NB: pool must not be changing on-disk (eg, from zdb or sync context). + */ +int +traverse_pool(spa_t *spa, uint64_t txg_start, int flags, + blkptr_cb_t func, void *arg) +{ + int err, lasterr = 0; + uint64_t obj; + dsl_pool_t *dp = spa_get_dsl(spa); + objset_t *mos = dp->dp_meta_objset; + boolean_t hard = (flags & TRAVERSE_HARD); + + /* visit the MOS */ + err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa), + txg_start, flags, func, arg); + if (err) + return (err); + + /* visit each dataset */ + for (obj = 1; err == 0 || (err != ESRCH && hard); + err = dmu_object_next(mos, &obj, FALSE, txg_start)) { + dmu_object_info_t doi; + + err = dmu_object_info(mos, obj, &doi); + if (err) { + if (!hard) + return (err); + lasterr = err; + continue; + } + + if (doi.doi_type == DMU_OT_DSL_DATASET) { + dsl_dataset_t *ds; + uint64_t txg = txg_start; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (err) { + if (!hard) + return (err); + lasterr = err; + continue; + } + if (ds->ds_phys->ds_prev_snap_txg > txg) + txg = ds->ds_phys->ds_prev_snap_txg; + err = traverse_dataset(ds, txg, flags, func, arg); + dsl_dataset_rele(ds, FTAG); + if (err) { + if (!hard) + return (err); + lasterr = err; + } + } + } + if (err == ESRCH) + err = 0; + return (err != 0 ? err : lasterr); +} diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c new file mode 100644 index 000000000000..bd5c71a2265e --- /dev/null +++ b/uts/common/fs/zfs/dmu_tx.c @@ -0,0 +1,1382 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dbuf.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ +#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ +#include <sys/dsl_pool.h> +#include <sys/zap_impl.h> /* for fzap_default_block_shift */ +#include <sys/spa.h> +#include <sys/sa.h> +#include <sys/sa_impl.h> +#include <sys/zfs_context.h> +#include <sys/varargs.h> + +typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, + uint64_t arg1, uint64_t arg2); + + +dmu_tx_t * +dmu_tx_create_dd(dsl_dir_t *dd) +{ + dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); + tx->tx_dir = dd; + if (dd) + tx->tx_pool = dd->dd_pool; + list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), + offsetof(dmu_tx_hold_t, txh_node)); + list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), + offsetof(dmu_tx_callback_t, dcb_node)); +#ifdef ZFS_DEBUG + refcount_create(&tx->tx_space_written); + refcount_create(&tx->tx_space_freed); +#endif + return (tx); +} + +dmu_tx_t * +dmu_tx_create(objset_t *os) +{ + dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); + tx->tx_objset = os; + tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset); + return (tx); +} + +dmu_tx_t * +dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) +{ + dmu_tx_t *tx = dmu_tx_create_dd(NULL); + + ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); + tx->tx_pool = dp; + tx->tx_txg = txg; + tx->tx_anyobj = TRUE; + + return (tx); +} + +int +dmu_tx_is_syncing(dmu_tx_t *tx) +{ + return (tx->tx_anyobj); +} + +int +dmu_tx_private_ok(dmu_tx_t *tx) +{ + return (tx->tx_anyobj); +} + +static dmu_tx_hold_t * +dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, + enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) +{ + dmu_tx_hold_t *txh; + dnode_t *dn = NULL; + int err; + + if (object != DMU_NEW_OBJECT) { + err = dnode_hold(os, object, tx, &dn); + if (err) { + tx->tx_err = err; + return (NULL); + } + + if (err == 0 && tx->tx_txg != 0) { + mutex_enter(&dn->dn_mtx); + /* + * dn->dn_assigned_txg == tx->tx_txg doesn't pose a + * problem, but there's no way for it to happen (for + * now, at least). + */ + ASSERT(dn->dn_assigned_txg == 0); + dn->dn_assigned_txg = tx->tx_txg; + (void) refcount_add(&dn->dn_tx_holds, tx); + mutex_exit(&dn->dn_mtx); + } + } + + txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); + txh->txh_tx = tx; + txh->txh_dnode = dn; +#ifdef ZFS_DEBUG + txh->txh_type = type; + txh->txh_arg1 = arg1; + txh->txh_arg2 = arg2; +#endif + list_insert_tail(&tx->tx_holds, txh); + + return (txh); +} + +void +dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) +{ + /* + * If we're syncing, they can manipulate any object anyhow, and + * the hold on the dnode_t can cause problems. + */ + if (!dmu_tx_is_syncing(tx)) { + (void) dmu_tx_hold_object_impl(tx, os, + object, THT_NEWOBJECT, 0, 0); + } +} + +static int +dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) +{ + int err; + dmu_buf_impl_t *db; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + db = dbuf_hold_level(dn, level, blkid, FTAG); + rw_exit(&dn->dn_struct_rwlock); + if (db == NULL) + return (EIO); + err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); + dbuf_rele(db, FTAG); + return (err); +} + +static void +dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db, + int level, uint64_t blkid, boolean_t freeable, uint64_t *history) +{ + objset_t *os = dn->dn_objset; + dsl_dataset_t *ds = os->os_dsl_dataset; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + dmu_buf_impl_t *parent = NULL; + blkptr_t *bp = NULL; + uint64_t space; + + if (level >= dn->dn_nlevels || history[level] == blkid) + return; + + history[level] = blkid; + + space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift); + + if (db == NULL || db == dn->dn_dbuf) { + ASSERT(level != 0); + db = NULL; + } else { + ASSERT(DB_DNODE(db) == dn); + ASSERT(db->db_level == level); + ASSERT(db->db.db_size == space); + ASSERT(db->db_blkid == blkid); + bp = db->db_blkptr; + parent = db->db_parent; + } + + freeable = (bp && (freeable || + dsl_dataset_block_freeable(ds, bp, bp->blk_birth))); + + if (freeable) + txh->txh_space_tooverwrite += space; + else + txh->txh_space_towrite += space; + if (bp) + txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp); + + dmu_tx_count_twig(txh, dn, parent, level + 1, + blkid >> epbs, freeable, history); +} + +/* ARGSUSED */ +static void +dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) +{ + dnode_t *dn = txh->txh_dnode; + uint64_t start, end, i; + int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; + int err = 0; + + if (len == 0) + return; + + min_bs = SPA_MINBLOCKSHIFT; + max_bs = SPA_MAXBLOCKSHIFT; + min_ibs = DN_MIN_INDBLKSHIFT; + max_ibs = DN_MAX_INDBLKSHIFT; + + if (dn) { + uint64_t history[DN_MAX_LEVELS]; + int nlvls = dn->dn_nlevels; + int delta; + + /* + * For i/o error checking, read the first and last level-0 + * blocks (if they are not aligned), and all the level-1 blocks. + */ + if (dn->dn_maxblkid == 0) { + delta = dn->dn_datablksz; + start = (off < dn->dn_datablksz) ? 0 : 1; + end = (off+len <= dn->dn_datablksz) ? 0 : 1; + if (start == 0 && (off > 0 || len < dn->dn_datablksz)) { + err = dmu_tx_check_ioerr(NULL, dn, 0, 0); + if (err) + goto out; + delta -= off; + } + } else { + zio_t *zio = zio_root(dn->dn_objset->os_spa, + NULL, NULL, ZIO_FLAG_CANFAIL); + + /* first level-0 block */ + start = off >> dn->dn_datablkshift; + if (P2PHASE(off, dn->dn_datablksz) || + len < dn->dn_datablksz) { + err = dmu_tx_check_ioerr(zio, dn, 0, start); + if (err) + goto out; + } + + /* last level-0 block */ + end = (off+len-1) >> dn->dn_datablkshift; + if (end != start && end <= dn->dn_maxblkid && + P2PHASE(off+len, dn->dn_datablksz)) { + err = dmu_tx_check_ioerr(zio, dn, 0, end); + if (err) + goto out; + } + + /* level-1 blocks */ + if (nlvls > 1) { + int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + for (i = (start>>shft)+1; i < end>>shft; i++) { + err = dmu_tx_check_ioerr(zio, dn, 1, i); + if (err) + goto out; + } + } + + err = zio_wait(zio); + if (err) + goto out; + delta = P2NPHASE(off, dn->dn_datablksz); + } + + if (dn->dn_maxblkid > 0) { + /* + * The blocksize can't change, + * so we can make a more precise estimate. + */ + ASSERT(dn->dn_datablkshift != 0); + min_bs = max_bs = dn->dn_datablkshift; + min_ibs = max_ibs = dn->dn_indblkshift; + } else if (dn->dn_indblkshift > max_ibs) { + /* + * This ensures that if we reduce DN_MAX_INDBLKSHIFT, + * the code will still work correctly on older pools. + */ + min_ibs = max_ibs = dn->dn_indblkshift; + } + + /* + * If this write is not off the end of the file + * we need to account for overwrites/unref. + */ + if (start <= dn->dn_maxblkid) { + for (int l = 0; l < DN_MAX_LEVELS; l++) + history[l] = -1ULL; + } + while (start <= dn->dn_maxblkid) { + dmu_buf_impl_t *db; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db); + rw_exit(&dn->dn_struct_rwlock); + + if (err) { + txh->txh_tx->tx_err = err; + return; + } + + dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE, + history); + dbuf_rele(db, FTAG); + if (++start > end) { + /* + * Account for new indirects appearing + * before this IO gets assigned into a txg. + */ + bits = 64 - min_bs; + epbs = min_ibs - SPA_BLKPTRSHIFT; + for (bits -= epbs * (nlvls - 1); + bits >= 0; bits -= epbs) + txh->txh_fudge += 1ULL << max_ibs; + goto out; + } + off += delta; + if (len >= delta) + len -= delta; + delta = dn->dn_datablksz; + } + } + + /* + * 'end' is the last thing we will access, not one past. + * This way we won't overflow when accessing the last byte. + */ + start = P2ALIGN(off, 1ULL << max_bs); + end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; + txh->txh_space_towrite += end - start + 1; + + start >>= min_bs; + end >>= min_bs; + + epbs = min_ibs - SPA_BLKPTRSHIFT; + + /* + * The object contains at most 2^(64 - min_bs) blocks, + * and each indirect level maps 2^epbs. + */ + for (bits = 64 - min_bs; bits >= 0; bits -= epbs) { + start >>= epbs; + end >>= epbs; + ASSERT3U(end, >=, start); + txh->txh_space_towrite += (end - start + 1) << max_ibs; + if (start != 0) { + /* + * We also need a new blkid=0 indirect block + * to reference any existing file data. + */ + txh->txh_space_towrite += 1ULL << max_ibs; + } + } + +out: + if (txh->txh_space_towrite + txh->txh_space_tooverwrite > + 2 * DMU_MAX_ACCESS) + err = EFBIG; + + if (err) + txh->txh_tx->tx_err = err; +} + +static void +dmu_tx_count_dnode(dmu_tx_hold_t *txh) +{ + dnode_t *dn = txh->txh_dnode; + dnode_t *mdn = DMU_META_DNODE(txh->txh_tx->tx_objset); + uint64_t space = mdn->dn_datablksz + + ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); + + if (dn && dn->dn_dbuf->db_blkptr && + dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, + dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) { + txh->txh_space_tooverwrite += space; + txh->txh_space_tounref += space; + } else { + txh->txh_space_towrite += space; + if (dn && dn->dn_dbuf->db_blkptr) + txh->txh_space_tounref += space; + } +} + +void +dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) +{ + dmu_tx_hold_t *txh; + + ASSERT(tx->tx_txg == 0); + ASSERT(len < DMU_MAX_ACCESS); + ASSERT(len == 0 || UINT64_MAX - off >= len - 1); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_WRITE, off, len); + if (txh == NULL) + return; + + dmu_tx_count_write(txh, off, len); + dmu_tx_count_dnode(txh); +} + +static void +dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) +{ + uint64_t blkid, nblks, lastblk; + uint64_t space = 0, unref = 0, skipped = 0; + dnode_t *dn = txh->txh_dnode; + dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; + spa_t *spa = txh->txh_tx->tx_pool->dp_spa; + int epbs; + + if (dn->dn_nlevels == 0) + return; + + /* + * The struct_rwlock protects us against dn_nlevels + * changing, in case (against all odds) we manage to dirty & + * sync out the changes after we check for being dirty. + * Also, dbuf_hold_impl() wants us to have the struct_rwlock. + */ + rw_enter(&dn->dn_struct_rwlock, RW_READER); + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + if (dn->dn_maxblkid == 0) { + if (off == 0 && len >= dn->dn_datablksz) { + blkid = 0; + nblks = 1; + } else { + rw_exit(&dn->dn_struct_rwlock); + return; + } + } else { + blkid = off >> dn->dn_datablkshift; + nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; + + if (blkid >= dn->dn_maxblkid) { + rw_exit(&dn->dn_struct_rwlock); + return; + } + if (blkid + nblks > dn->dn_maxblkid) + nblks = dn->dn_maxblkid - blkid; + + } + if (dn->dn_nlevels == 1) { + int i; + for (i = 0; i < nblks; i++) { + blkptr_t *bp = dn->dn_phys->dn_blkptr; + ASSERT3U(blkid + i, <, dn->dn_nblkptr); + bp += blkid + i; + if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) { + dprintf_bp(bp, "can free old%s", ""); + space += bp_get_dsize(spa, bp); + } + unref += BP_GET_ASIZE(bp); + } + nblks = 0; + } + + /* + * Add in memory requirements of higher-level indirects. + * This assumes a worst-possible scenario for dn_nlevels. + */ + { + uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); + int level = (dn->dn_nlevels > 1) ? 2 : 1; + + while (level++ < DN_MAX_LEVELS) { + txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; + blkcnt = 1 + (blkcnt >> epbs); + } + ASSERT(blkcnt <= dn->dn_nblkptr); + } + + lastblk = blkid + nblks - 1; + while (nblks) { + dmu_buf_impl_t *dbuf; + uint64_t ibyte, new_blkid; + int epb = 1 << epbs; + int err, i, blkoff, tochk; + blkptr_t *bp; + + ibyte = blkid << dn->dn_datablkshift; + err = dnode_next_offset(dn, + DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0); + new_blkid = ibyte >> dn->dn_datablkshift; + if (err == ESRCH) { + skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; + break; + } + if (err) { + txh->txh_tx->tx_err = err; + break; + } + if (new_blkid > lastblk) { + skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; + break; + } + + if (new_blkid > blkid) { + ASSERT((new_blkid >> epbs) > (blkid >> epbs)); + skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1; + nblks -= new_blkid - blkid; + blkid = new_blkid; + } + blkoff = P2PHASE(blkid, epb); + tochk = MIN(epb - blkoff, nblks); + + err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf); + if (err) { + txh->txh_tx->tx_err = err; + break; + } + + txh->txh_memory_tohold += dbuf->db.db_size; + + /* + * We don't check memory_tohold against DMU_MAX_ACCESS because + * memory_tohold is an over-estimation (especially the >L1 + * indirect blocks), so it could fail. Callers should have + * already verified that they will not be holding too much + * memory. + */ + + err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); + if (err != 0) { + txh->txh_tx->tx_err = err; + dbuf_rele(dbuf, FTAG); + break; + } + + bp = dbuf->db.db_data; + bp += blkoff; + + for (i = 0; i < tochk; i++) { + if (dsl_dataset_block_freeable(ds, &bp[i], + bp[i].blk_birth)) { + dprintf_bp(&bp[i], "can free old%s", ""); + space += bp_get_dsize(spa, &bp[i]); + } + unref += BP_GET_ASIZE(bp); + } + dbuf_rele(dbuf, FTAG); + + blkid += tochk; + nblks -= tochk; + } + rw_exit(&dn->dn_struct_rwlock); + + /* account for new level 1 indirect blocks that might show up */ + if (skipped > 0) { + txh->txh_fudge += skipped << dn->dn_indblkshift; + skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); + txh->txh_memory_tohold += skipped << dn->dn_indblkshift; + } + txh->txh_space_tofree += space; + txh->txh_space_tounref += unref; +} + +void +dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) +{ + dmu_tx_hold_t *txh; + dnode_t *dn; + uint64_t start, end, i; + int err, shift; + zio_t *zio; + + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_FREE, off, len); + if (txh == NULL) + return; + dn = txh->txh_dnode; + + /* first block */ + if (off != 0) + dmu_tx_count_write(txh, off, 1); + /* last block */ + if (len != DMU_OBJECT_END) + dmu_tx_count_write(txh, off+len, 1); + + dmu_tx_count_dnode(txh); + + if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) + return; + if (len == DMU_OBJECT_END) + len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off; + + /* + * For i/o error checking, read the first and last level-0 + * blocks, and all the level-1 blocks. The above count_write's + * have already taken care of the level-0 blocks. + */ + if (dn->dn_nlevels > 1) { + shift = dn->dn_datablkshift + dn->dn_indblkshift - + SPA_BLKPTRSHIFT; + start = off >> shift; + end = dn->dn_datablkshift ? ((off+len) >> shift) : 0; + + zio = zio_root(tx->tx_pool->dp_spa, + NULL, NULL, ZIO_FLAG_CANFAIL); + for (i = start; i <= end; i++) { + uint64_t ibyte = i << shift; + err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); + i = ibyte >> shift; + if (err == ESRCH) + break; + if (err) { + tx->tx_err = err; + return; + } + + err = dmu_tx_check_ioerr(zio, dn, 1, i); + if (err) { + tx->tx_err = err; + return; + } + } + err = zio_wait(zio); + if (err) { + tx->tx_err = err; + return; + } + } + + dmu_tx_count_free(txh, off, len); +} + +void +dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) +{ + dmu_tx_hold_t *txh; + dnode_t *dn; + uint64_t nblocks; + int epbs, err; + + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_ZAP, add, (uintptr_t)name); + if (txh == NULL) + return; + dn = txh->txh_dnode; + + dmu_tx_count_dnode(txh); + + if (dn == NULL) { + /* + * We will be able to fit a new object's entries into one leaf + * block. So there will be at most 2 blocks total, + * including the header block. + */ + dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); + return; + } + + ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); + + if (dn->dn_maxblkid == 0 && !add) { + /* + * If there is only one block (i.e. this is a micro-zap) + * and we are not adding anything, the accounting is simple. + */ + err = dmu_tx_check_ioerr(NULL, dn, 0, 0); + if (err) { + tx->tx_err = err; + return; + } + + /* + * Use max block size here, since we don't know how much + * the size will change between now and the dbuf dirty call. + */ + if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, + &dn->dn_phys->dn_blkptr[0], + dn->dn_phys->dn_blkptr[0].blk_birth)) { + txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; + } else { + txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + } + if (dn->dn_phys->dn_blkptr[0].blk_birth) + txh->txh_space_tounref += SPA_MAXBLOCKSIZE; + return; + } + + if (dn->dn_maxblkid > 0 && name) { + /* + * access the name in this fat-zap so that we'll check + * for i/o errors to the leaf blocks, etc. + */ + err = zap_lookup(dn->dn_objset, dn->dn_object, name, + 8, 0, NULL); + if (err == EIO) { + tx->tx_err = err; + return; + } + } + + err = zap_count_write(dn->dn_objset, dn->dn_object, name, add, + &txh->txh_space_towrite, &txh->txh_space_tooverwrite); + + /* + * If the modified blocks are scattered to the four winds, + * we'll have to modify an indirect twig for each. + */ + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) + if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj) + txh->txh_space_towrite += 3 << dn->dn_indblkshift; + else + txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; +} + +void +dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) +{ + dmu_tx_hold_t *txh; + + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_BONUS, 0, 0); + if (txh) + dmu_tx_count_dnode(txh); +} + +void +dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) +{ + dmu_tx_hold_t *txh; + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + DMU_NEW_OBJECT, THT_SPACE, space, 0); + + txh->txh_space_towrite += space; +} + +int +dmu_tx_holds(dmu_tx_t *tx, uint64_t object) +{ + dmu_tx_hold_t *txh; + int holds = 0; + + /* + * By asserting that the tx is assigned, we're counting the + * number of dn_tx_holds, which is the same as the number of + * dn_holds. Otherwise, we'd be counting dn_holds, but + * dn_tx_holds could be 0. + */ + ASSERT(tx->tx_txg != 0); + + /* if (tx->tx_anyobj == TRUE) */ + /* return (0); */ + + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { + if (txh->txh_dnode && txh->txh_dnode->dn_object == object) + holds++; + } + + return (holds); +} + +#ifdef ZFS_DEBUG +void +dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) +{ + dmu_tx_hold_t *txh; + int match_object = FALSE, match_offset = FALSE; + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + ASSERT(tx->tx_txg != 0); + ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); + ASSERT3U(dn->dn_object, ==, db->db.db_object); + + if (tx->tx_anyobj) { + DB_DNODE_EXIT(db); + return; + } + + /* XXX No checking on the meta dnode for now */ + if (db->db.db_object == DMU_META_DNODE_OBJECT) { + DB_DNODE_EXIT(db); + return; + } + + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { + ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); + if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) + match_object = TRUE; + if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { + int datablkshift = dn->dn_datablkshift ? + dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + int shift = datablkshift + epbs * db->db_level; + uint64_t beginblk = shift >= 64 ? 0 : + (txh->txh_arg1 >> shift); + uint64_t endblk = shift >= 64 ? 0 : + ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); + uint64_t blkid = db->db_blkid; + + /* XXX txh_arg2 better not be zero... */ + + dprintf("found txh type %x beginblk=%llx endblk=%llx\n", + txh->txh_type, beginblk, endblk); + + switch (txh->txh_type) { + case THT_WRITE: + if (blkid >= beginblk && blkid <= endblk) + match_offset = TRUE; + /* + * We will let this hold work for the bonus + * or spill buffer so that we don't need to + * hold it when creating a new object. + */ + if (blkid == DMU_BONUS_BLKID || + blkid == DMU_SPILL_BLKID) + match_offset = TRUE; + /* + * They might have to increase nlevels, + * thus dirtying the new TLIBs. Or the + * might have to change the block size, + * thus dirying the new lvl=0 blk=0. + */ + if (blkid == 0) + match_offset = TRUE; + break; + case THT_FREE: + /* + * We will dirty all the level 1 blocks in + * the free range and perhaps the first and + * last level 0 block. + */ + if (blkid >= beginblk && (blkid <= endblk || + txh->txh_arg2 == DMU_OBJECT_END)) + match_offset = TRUE; + break; + case THT_SPILL: + if (blkid == DMU_SPILL_BLKID) + match_offset = TRUE; + break; + case THT_BONUS: + if (blkid == DMU_BONUS_BLKID) + match_offset = TRUE; + break; + case THT_ZAP: + match_offset = TRUE; + break; + case THT_NEWOBJECT: + match_object = TRUE; + break; + default: + ASSERT(!"bad txh_type"); + } + } + if (match_object && match_offset) { + DB_DNODE_EXIT(db); + return; + } + } + DB_DNODE_EXIT(db); + panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", + (u_longlong_t)db->db.db_object, db->db_level, + (u_longlong_t)db->db_blkid); +} +#endif + +static int +dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) +{ + dmu_tx_hold_t *txh; + spa_t *spa = tx->tx_pool->dp_spa; + uint64_t memory, asize, fsize, usize; + uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; + + ASSERT3U(tx->tx_txg, ==, 0); + + if (tx->tx_err) + return (tx->tx_err); + + if (spa_suspended(spa)) { + /* + * If the user has indicated a blocking failure mode + * then return ERESTART which will block in dmu_tx_wait(). + * Otherwise, return EIO so that an error can get + * propagated back to the VOP calls. + * + * Note that we always honor the txg_how flag regardless + * of the failuremode setting. + */ + if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && + txg_how != TXG_WAIT) + return (EIO); + + return (ERESTART); + } + + tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); + tx->tx_needassign_txh = NULL; + + /* + * NB: No error returns are allowed after txg_hold_open, but + * before processing the dnode holds, due to the + * dmu_tx_unassign() logic. + */ + + towrite = tofree = tooverwrite = tounref = tohold = fudge = 0; + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { + dnode_t *dn = txh->txh_dnode; + if (dn != NULL) { + mutex_enter(&dn->dn_mtx); + if (dn->dn_assigned_txg == tx->tx_txg - 1) { + mutex_exit(&dn->dn_mtx); + tx->tx_needassign_txh = txh; + return (ERESTART); + } + if (dn->dn_assigned_txg == 0) + dn->dn_assigned_txg = tx->tx_txg; + ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); + (void) refcount_add(&dn->dn_tx_holds, tx); + mutex_exit(&dn->dn_mtx); + } + towrite += txh->txh_space_towrite; + tofree += txh->txh_space_tofree; + tooverwrite += txh->txh_space_tooverwrite; + tounref += txh->txh_space_tounref; + tohold += txh->txh_memory_tohold; + fudge += txh->txh_fudge; + } + + /* + * NB: This check must be after we've held the dnodes, so that + * the dmu_tx_unassign() logic will work properly + */ + if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) + return (ERESTART); + + /* + * If a snapshot has been taken since we made our estimates, + * assume that we won't be able to free or overwrite anything. + */ + if (tx->tx_objset && + dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) > + tx->tx_lastsnap_txg) { + towrite += tooverwrite; + tooverwrite = tofree = 0; + } + + /* needed allocation: worst-case estimate of write space */ + asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite); + /* freed space estimate: worst-case overwrite + free estimate */ + fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; + /* convert unrefd space to worst-case estimate */ + usize = spa_get_asize(tx->tx_pool->dp_spa, tounref); + /* calculate memory footprint estimate */ + memory = towrite + tooverwrite + tohold; + +#ifdef ZFS_DEBUG + /* + * Add in 'tohold' to account for our dirty holds on this memory + * XXX - the "fudge" factor is to account for skipped blocks that + * we missed because dnode_next_offset() misses in-core-only blocks. + */ + tx->tx_space_towrite = asize + + spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge); + tx->tx_space_tofree = tofree; + tx->tx_space_tooverwrite = tooverwrite; + tx->tx_space_tounref = tounref; +#endif + + if (tx->tx_dir && asize != 0) { + int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, + asize, fsize, usize, &tx->tx_tempreserve_cookie, tx); + if (err) + return (err); + } + + return (0); +} + +static void +dmu_tx_unassign(dmu_tx_t *tx) +{ + dmu_tx_hold_t *txh; + + if (tx->tx_txg == 0) + return; + + txg_rele_to_quiesce(&tx->tx_txgh); + + for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; + txh = list_next(&tx->tx_holds, txh)) { + dnode_t *dn = txh->txh_dnode; + + if (dn == NULL) + continue; + mutex_enter(&dn->dn_mtx); + ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); + + if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { + dn->dn_assigned_txg = 0; + cv_broadcast(&dn->dn_notxholds); + } + mutex_exit(&dn->dn_mtx); + } + + txg_rele_to_sync(&tx->tx_txgh); + + tx->tx_lasttried_txg = tx->tx_txg; + tx->tx_txg = 0; +} + +/* + * Assign tx to a transaction group. txg_how can be one of: + * + * (1) TXG_WAIT. If the current open txg is full, waits until there's + * a new one. This should be used when you're not holding locks. + * If will only fail if we're truly out of space (or over quota). + * + * (2) TXG_NOWAIT. If we can't assign into the current open txg without + * blocking, returns immediately with ERESTART. This should be used + * whenever you're holding locks. On an ERESTART error, the caller + * should drop locks, do a dmu_tx_wait(tx), and try again. + * + * (3) A specific txg. Use this if you need to ensure that multiple + * transactions all sync in the same txg. Like TXG_NOWAIT, it + * returns ERESTART if it can't assign you into the requested txg. + */ +int +dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) +{ + int err; + + ASSERT(tx->tx_txg == 0); + ASSERT(txg_how != 0); + ASSERT(!dsl_pool_sync_context(tx->tx_pool)); + + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { + dmu_tx_unassign(tx); + + if (err != ERESTART || txg_how != TXG_WAIT) + return (err); + + dmu_tx_wait(tx); + } + + txg_rele_to_quiesce(&tx->tx_txgh); + + return (0); +} + +void +dmu_tx_wait(dmu_tx_t *tx) +{ + spa_t *spa = tx->tx_pool->dp_spa; + + ASSERT(tx->tx_txg == 0); + + /* + * It's possible that the pool has become active after this thread + * has tried to obtain a tx. If that's the case then his + * tx_lasttried_txg would not have been assigned. + */ + if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { + txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1); + } else if (tx->tx_needassign_txh) { + dnode_t *dn = tx->tx_needassign_txh->txh_dnode; + + mutex_enter(&dn->dn_mtx); + while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) + cv_wait(&dn->dn_notxholds, &dn->dn_mtx); + mutex_exit(&dn->dn_mtx); + tx->tx_needassign_txh = NULL; + } else { + txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); + } +} + +void +dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) +{ +#ifdef ZFS_DEBUG + if (tx->tx_dir == NULL || delta == 0) + return; + + if (delta > 0) { + ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=, + tx->tx_space_towrite); + (void) refcount_add_many(&tx->tx_space_written, delta, NULL); + } else { + (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); + } +#endif +} + +void +dmu_tx_commit(dmu_tx_t *tx) +{ + dmu_tx_hold_t *txh; + + ASSERT(tx->tx_txg != 0); + + while (txh = list_head(&tx->tx_holds)) { + dnode_t *dn = txh->txh_dnode; + + list_remove(&tx->tx_holds, txh); + kmem_free(txh, sizeof (dmu_tx_hold_t)); + if (dn == NULL) + continue; + mutex_enter(&dn->dn_mtx); + ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); + + if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { + dn->dn_assigned_txg = 0; + cv_broadcast(&dn->dn_notxholds); + } + mutex_exit(&dn->dn_mtx); + dnode_rele(dn, tx); + } + + if (tx->tx_tempreserve_cookie) + dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); + + if (!list_is_empty(&tx->tx_callbacks)) + txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); + + if (tx->tx_anyobj == FALSE) + txg_rele_to_sync(&tx->tx_txgh); + + list_destroy(&tx->tx_callbacks); + list_destroy(&tx->tx_holds); +#ifdef ZFS_DEBUG + dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", + tx->tx_space_towrite, refcount_count(&tx->tx_space_written), + tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); + refcount_destroy_many(&tx->tx_space_written, + refcount_count(&tx->tx_space_written)); + refcount_destroy_many(&tx->tx_space_freed, + refcount_count(&tx->tx_space_freed)); +#endif + kmem_free(tx, sizeof (dmu_tx_t)); +} + +void +dmu_tx_abort(dmu_tx_t *tx) +{ + dmu_tx_hold_t *txh; + + ASSERT(tx->tx_txg == 0); + + while (txh = list_head(&tx->tx_holds)) { + dnode_t *dn = txh->txh_dnode; + + list_remove(&tx->tx_holds, txh); + kmem_free(txh, sizeof (dmu_tx_hold_t)); + if (dn != NULL) + dnode_rele(dn, tx); + } + + /* + * Call any registered callbacks with an error code. + */ + if (!list_is_empty(&tx->tx_callbacks)) + dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); + + list_destroy(&tx->tx_callbacks); + list_destroy(&tx->tx_holds); +#ifdef ZFS_DEBUG + refcount_destroy_many(&tx->tx_space_written, + refcount_count(&tx->tx_space_written)); + refcount_destroy_many(&tx->tx_space_freed, + refcount_count(&tx->tx_space_freed)); +#endif + kmem_free(tx, sizeof (dmu_tx_t)); +} + +uint64_t +dmu_tx_get_txg(dmu_tx_t *tx) +{ + ASSERT(tx->tx_txg != 0); + return (tx->tx_txg); +} + +void +dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) +{ + dmu_tx_callback_t *dcb; + + dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); + + dcb->dcb_func = func; + dcb->dcb_data = data; + + list_insert_tail(&tx->tx_callbacks, dcb); +} + +/* + * Call all the commit callbacks on a list, with a given error code. + */ +void +dmu_tx_do_callbacks(list_t *cb_list, int error) +{ + dmu_tx_callback_t *dcb; + + while (dcb = list_head(cb_list)) { + list_remove(cb_list, dcb); + dcb->dcb_func(dcb->dcb_data, error); + kmem_free(dcb, sizeof (dmu_tx_callback_t)); + } +} + +/* + * Interface to hold a bunch of attributes. + * used for creating new files. + * attrsize is the total size of all attributes + * to be added during object creation + * + * For updating/adding a single attribute dmu_tx_hold_sa() should be used. + */ + +/* + * hold necessary attribute name for attribute registration. + * should be a very rare case where this is needed. If it does + * happen it would only happen on the first write to the file system. + */ +static void +dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) +{ + int i; + + if (!sa->sa_need_attr_registration) + return; + + for (i = 0; i != sa->sa_num_attrs; i++) { + if (!sa->sa_attr_table[i].sa_registered) { + if (sa->sa_reg_attr_obj) + dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, + B_TRUE, sa->sa_attr_table[i].sa_name); + else + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, + B_TRUE, sa->sa_attr_table[i].sa_name); + } + } +} + + +void +dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) +{ + dnode_t *dn; + dmu_tx_hold_t *txh; + blkptr_t *bp; + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, + THT_SPILL, 0, 0); + + dn = txh->txh_dnode; + + if (dn == NULL) + return; + + /* If blkptr doesn't exist then add space to towrite */ + bp = &dn->dn_phys->dn_spill; + if (BP_IS_HOLE(bp)) { + txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + txh->txh_space_tounref = 0; + } else { + if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, + bp, bp->blk_birth)) + txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; + else + txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + if (bp->blk_birth) + txh->txh_space_tounref += SPA_MAXBLOCKSIZE; + } +} + +void +dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) +{ + sa_os_t *sa = tx->tx_objset->os_sa; + + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + + if (tx->tx_objset->os_sa->sa_master_obj == 0) + return; + + if (tx->tx_objset->os_sa->sa_layout_attr_obj) + dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); + else { + dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); + dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + } + + dmu_tx_sa_registration_hold(sa, tx); + + if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill) + return; + + (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, + THT_SPILL, 0, 0); +} + +/* + * Hold SA attribute + * + * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size) + * + * variable_size is the total size of all variable sized attributes + * passed to this function. It is not the total size of all + * variable size attributes that *may* exist on this object. + */ +void +dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) +{ + uint64_t object; + sa_os_t *sa = tx->tx_objset->os_sa; + + ASSERT(hdl != NULL); + + object = sa_handle_object(hdl); + + dmu_tx_hold_bonus(tx, object); + + if (tx->tx_objset->os_sa->sa_master_obj == 0) + return; + + if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || + tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { + dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); + dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + } + + dmu_tx_sa_registration_hold(sa, tx); + + if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) + dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); + + if (sa->sa_force_spill || may_grow || hdl->sa_spill) { + ASSERT(tx->tx_txg == 0); + dmu_tx_hold_spill(tx, object); + } else { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + if (dn->dn_have_spill) { + ASSERT(tx->tx_txg == 0); + dmu_tx_hold_spill(tx, object); + } + DB_DNODE_EXIT(db); + } +} diff --git a/uts/common/fs/zfs/dmu_zfetch.c b/uts/common/fs/zfs/dmu_zfetch.c new file mode 100644 index 000000000000..37037c30f623 --- /dev/null +++ b/uts/common/fs/zfs/dmu_zfetch.c @@ -0,0 +1,724 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/dnode.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_zfetch.h> +#include <sys/dmu.h> +#include <sys/dbuf.h> +#include <sys/kstat.h> + +/* + * I'm against tune-ables, but these should probably exist as tweakable globals + * until we can get this working the way we want it to. + */ + +int zfs_prefetch_disable = 0; + +/* max # of streams per zfetch */ +uint32_t zfetch_max_streams = 8; +/* min time before stream reclaim */ +uint32_t zfetch_min_sec_reap = 2; +/* max number of blocks to fetch at a time */ +uint32_t zfetch_block_cap = 256; +/* number of bytes in a array_read at which we stop prefetching (1Mb) */ +uint64_t zfetch_array_rd_sz = 1024 * 1024; + +/* forward decls for static routines */ +static int dmu_zfetch_colinear(zfetch_t *, zstream_t *); +static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *); +static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t); +static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t); +static int dmu_zfetch_find(zfetch_t *, zstream_t *, int); +static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *); +static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *); +static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *); +static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *); + +typedef struct zfetch_stats { + kstat_named_t zfetchstat_hits; + kstat_named_t zfetchstat_misses; + kstat_named_t zfetchstat_colinear_hits; + kstat_named_t zfetchstat_colinear_misses; + kstat_named_t zfetchstat_stride_hits; + kstat_named_t zfetchstat_stride_misses; + kstat_named_t zfetchstat_reclaim_successes; + kstat_named_t zfetchstat_reclaim_failures; + kstat_named_t zfetchstat_stream_resets; + kstat_named_t zfetchstat_stream_noresets; + kstat_named_t zfetchstat_bogus_streams; +} zfetch_stats_t; + +static zfetch_stats_t zfetch_stats = { + { "hits", KSTAT_DATA_UINT64 }, + { "misses", KSTAT_DATA_UINT64 }, + { "colinear_hits", KSTAT_DATA_UINT64 }, + { "colinear_misses", KSTAT_DATA_UINT64 }, + { "stride_hits", KSTAT_DATA_UINT64 }, + { "stride_misses", KSTAT_DATA_UINT64 }, + { "reclaim_successes", KSTAT_DATA_UINT64 }, + { "reclaim_failures", KSTAT_DATA_UINT64 }, + { "streams_resets", KSTAT_DATA_UINT64 }, + { "streams_noresets", KSTAT_DATA_UINT64 }, + { "bogus_streams", KSTAT_DATA_UINT64 }, +}; + +#define ZFETCHSTAT_INCR(stat, val) \ + atomic_add_64(&zfetch_stats.stat.value.ui64, (val)); + +#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1); + +kstat_t *zfetch_ksp; + +/* + * Given a zfetch structure and a zstream structure, determine whether the + * blocks to be read are part of a co-linear pair of existing prefetch + * streams. If a set is found, coalesce the streams, removing one, and + * configure the prefetch so it looks for a strided access pattern. + * + * In other words: if we find two sequential access streams that are + * the same length and distance N appart, and this read is N from the + * last stream, then we are probably in a strided access pattern. So + * combine the two sequential streams into a single strided stream. + * + * If no co-linear streams are found, return NULL. + */ +static int +dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh) +{ + zstream_t *z_walk; + zstream_t *z_comp; + + if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) + return (0); + + if (zh == NULL) { + rw_exit(&zf->zf_rwlock); + return (0); + } + + for (z_walk = list_head(&zf->zf_stream); z_walk; + z_walk = list_next(&zf->zf_stream, z_walk)) { + for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp; + z_comp = list_next(&zf->zf_stream, z_comp)) { + int64_t diff; + + if (z_walk->zst_len != z_walk->zst_stride || + z_comp->zst_len != z_comp->zst_stride) { + continue; + } + + diff = z_comp->zst_offset - z_walk->zst_offset; + if (z_comp->zst_offset + diff == zh->zst_offset) { + z_walk->zst_offset = zh->zst_offset; + z_walk->zst_direction = diff < 0 ? -1 : 1; + z_walk->zst_stride = + diff * z_walk->zst_direction; + z_walk->zst_ph_offset = + zh->zst_offset + z_walk->zst_stride; + dmu_zfetch_stream_remove(zf, z_comp); + mutex_destroy(&z_comp->zst_lock); + kmem_free(z_comp, sizeof (zstream_t)); + + dmu_zfetch_dofetch(zf, z_walk); + + rw_exit(&zf->zf_rwlock); + return (1); + } + + diff = z_walk->zst_offset - z_comp->zst_offset; + if (z_walk->zst_offset + diff == zh->zst_offset) { + z_walk->zst_offset = zh->zst_offset; + z_walk->zst_direction = diff < 0 ? -1 : 1; + z_walk->zst_stride = + diff * z_walk->zst_direction; + z_walk->zst_ph_offset = + zh->zst_offset + z_walk->zst_stride; + dmu_zfetch_stream_remove(zf, z_comp); + mutex_destroy(&z_comp->zst_lock); + kmem_free(z_comp, sizeof (zstream_t)); + + dmu_zfetch_dofetch(zf, z_walk); + + rw_exit(&zf->zf_rwlock); + return (1); + } + } + } + + rw_exit(&zf->zf_rwlock); + return (0); +} + +/* + * Given a zstream_t, determine the bounds of the prefetch. Then call the + * routine that actually prefetches the individual blocks. + */ +static void +dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs) +{ + uint64_t prefetch_tail; + uint64_t prefetch_limit; + uint64_t prefetch_ofst; + uint64_t prefetch_len; + uint64_t blocks_fetched; + + zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len); + zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap); + + prefetch_tail = MAX((int64_t)zs->zst_ph_offset, + (int64_t)(zs->zst_offset + zs->zst_stride)); + /* + * XXX: use a faster division method? + */ + prefetch_limit = zs->zst_offset + zs->zst_len + + (zs->zst_cap * zs->zst_stride) / zs->zst_len; + + while (prefetch_tail < prefetch_limit) { + prefetch_ofst = zs->zst_offset + zs->zst_direction * + (prefetch_tail - zs->zst_offset); + + prefetch_len = zs->zst_len; + + /* + * Don't prefetch beyond the end of the file, if working + * backwards. + */ + if ((zs->zst_direction == ZFETCH_BACKWARD) && + (prefetch_ofst > prefetch_tail)) { + prefetch_len += prefetch_ofst; + prefetch_ofst = 0; + } + + /* don't prefetch more than we're supposed to */ + if (prefetch_len > zs->zst_len) + break; + + blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode, + prefetch_ofst, zs->zst_len); + + prefetch_tail += zs->zst_stride; + /* stop if we've run out of stuff to prefetch */ + if (blocks_fetched < zs->zst_len) + break; + } + zs->zst_ph_offset = prefetch_tail; + zs->zst_last = ddi_get_lbolt(); +} + +void +zfetch_init(void) +{ + + zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", + KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (zfetch_ksp != NULL) { + zfetch_ksp->ks_data = &zfetch_stats; + kstat_install(zfetch_ksp); + } +} + +void +zfetch_fini(void) +{ + if (zfetch_ksp != NULL) { + kstat_delete(zfetch_ksp); + zfetch_ksp = NULL; + } +} + +/* + * This takes a pointer to a zfetch structure and a dnode. It performs the + * necessary setup for the zfetch structure, grokking data from the + * associated dnode. + */ +void +dmu_zfetch_init(zfetch_t *zf, dnode_t *dno) +{ + if (zf == NULL) { + return; + } + + zf->zf_dnode = dno; + zf->zf_stream_cnt = 0; + zf->zf_alloc_fail = 0; + + list_create(&zf->zf_stream, sizeof (zstream_t), + offsetof(zstream_t, zst_node)); + + rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL); +} + +/* + * This function computes the actual size, in blocks, that can be prefetched, + * and fetches it. + */ +static uint64_t +dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks) +{ + uint64_t fetchsz; + uint64_t i; + + fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks); + + for (i = 0; i < fetchsz; i++) { + dbuf_prefetch(dn, blkid + i); + } + + return (fetchsz); +} + +/* + * this function returns the number of blocks that would be prefetched, based + * upon the supplied dnode, blockid, and nblks. This is used so that we can + * update streams in place, and then prefetch with their old value after the + * fact. This way, we can delay the prefetch, but subsequent accesses to the + * stream won't result in the same data being prefetched multiple times. + */ +static uint64_t +dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks) +{ + uint64_t fetchsz; + + if (blkid > dn->dn_maxblkid) { + return (0); + } + + /* compute fetch size */ + if (blkid + nblks + 1 > dn->dn_maxblkid) { + fetchsz = (dn->dn_maxblkid - blkid) + 1; + ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid); + } else { + fetchsz = nblks; + } + + + return (fetchsz); +} + +/* + * given a zfetch and a zstream structure, see if there is an associated zstream + * for this block read. If so, it starts a prefetch for the stream it + * located and returns true, otherwise it returns false + */ +static int +dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) +{ + zstream_t *zs; + int64_t diff; + int reset = !prefetched; + int rc = 0; + + if (zh == NULL) + return (0); + + /* + * XXX: This locking strategy is a bit coarse; however, it's impact has + * yet to be tested. If this turns out to be an issue, it can be + * modified in a number of different ways. + */ + + rw_enter(&zf->zf_rwlock, RW_READER); +top: + + for (zs = list_head(&zf->zf_stream); zs; + zs = list_next(&zf->zf_stream, zs)) { + + /* + * XXX - should this be an assert? + */ + if (zs->zst_len == 0) { + /* bogus stream */ + ZFETCHSTAT_BUMP(zfetchstat_bogus_streams); + continue; + } + + /* + * We hit this case when we are in a strided prefetch stream: + * we will read "len" blocks before "striding". + */ + if (zh->zst_offset >= zs->zst_offset && + zh->zst_offset < zs->zst_offset + zs->zst_len) { + if (prefetched) { + /* already fetched */ + ZFETCHSTAT_BUMP(zfetchstat_stride_hits); + rc = 1; + goto out; + } else { + ZFETCHSTAT_BUMP(zfetchstat_stride_misses); + } + } + + /* + * This is the forward sequential read case: we increment + * len by one each time we hit here, so we will enter this + * case on every read. + */ + if (zh->zst_offset == zs->zst_offset + zs->zst_len) { + + reset = !prefetched && zs->zst_len > 1; + + mutex_enter(&zs->zst_lock); + + if (zh->zst_offset != zs->zst_offset + zs->zst_len) { + mutex_exit(&zs->zst_lock); + goto top; + } + zs->zst_len += zh->zst_len; + diff = zs->zst_len - zfetch_block_cap; + if (diff > 0) { + zs->zst_offset += diff; + zs->zst_len = zs->zst_len > diff ? + zs->zst_len - diff : 0; + } + zs->zst_direction = ZFETCH_FORWARD; + + break; + + /* + * Same as above, but reading backwards through the file. + */ + } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) { + /* backwards sequential access */ + + reset = !prefetched && zs->zst_len > 1; + + mutex_enter(&zs->zst_lock); + + if (zh->zst_offset != zs->zst_offset - zh->zst_len) { + mutex_exit(&zs->zst_lock); + goto top; + } + + zs->zst_offset = zs->zst_offset > zh->zst_len ? + zs->zst_offset - zh->zst_len : 0; + zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ? + zs->zst_ph_offset - zh->zst_len : 0; + zs->zst_len += zh->zst_len; + + diff = zs->zst_len - zfetch_block_cap; + if (diff > 0) { + zs->zst_ph_offset = zs->zst_ph_offset > diff ? + zs->zst_ph_offset - diff : 0; + zs->zst_len = zs->zst_len > diff ? + zs->zst_len - diff : zs->zst_len; + } + zs->zst_direction = ZFETCH_BACKWARD; + + break; + + } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride < + zs->zst_len) && (zs->zst_len != zs->zst_stride)) { + /* strided forward access */ + + mutex_enter(&zs->zst_lock); + + if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >= + zs->zst_len) || (zs->zst_len == zs->zst_stride)) { + mutex_exit(&zs->zst_lock); + goto top; + } + + zs->zst_offset += zs->zst_stride; + zs->zst_direction = ZFETCH_FORWARD; + + break; + + } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride < + zs->zst_len) && (zs->zst_len != zs->zst_stride)) { + /* strided reverse access */ + + mutex_enter(&zs->zst_lock); + + if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >= + zs->zst_len) || (zs->zst_len == zs->zst_stride)) { + mutex_exit(&zs->zst_lock); + goto top; + } + + zs->zst_offset = zs->zst_offset > zs->zst_stride ? + zs->zst_offset - zs->zst_stride : 0; + zs->zst_ph_offset = (zs->zst_ph_offset > + (2 * zs->zst_stride)) ? + (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0; + zs->zst_direction = ZFETCH_BACKWARD; + + break; + } + } + + if (zs) { + if (reset) { + zstream_t *remove = zs; + + ZFETCHSTAT_BUMP(zfetchstat_stream_resets); + rc = 0; + mutex_exit(&zs->zst_lock); + rw_exit(&zf->zf_rwlock); + rw_enter(&zf->zf_rwlock, RW_WRITER); + /* + * Relocate the stream, in case someone removes + * it while we were acquiring the WRITER lock. + */ + for (zs = list_head(&zf->zf_stream); zs; + zs = list_next(&zf->zf_stream, zs)) { + if (zs == remove) { + dmu_zfetch_stream_remove(zf, zs); + mutex_destroy(&zs->zst_lock); + kmem_free(zs, sizeof (zstream_t)); + break; + } + } + } else { + ZFETCHSTAT_BUMP(zfetchstat_stream_noresets); + rc = 1; + dmu_zfetch_dofetch(zf, zs); + mutex_exit(&zs->zst_lock); + } + } +out: + rw_exit(&zf->zf_rwlock); + return (rc); +} + +/* + * Clean-up state associated with a zfetch structure. This frees allocated + * structure members, empties the zf_stream tree, and generally makes things + * nice. This doesn't free the zfetch_t itself, that's left to the caller. + */ +void +dmu_zfetch_rele(zfetch_t *zf) +{ + zstream_t *zs; + zstream_t *zs_next; + + ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock)); + + for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) { + zs_next = list_next(&zf->zf_stream, zs); + + list_remove(&zf->zf_stream, zs); + mutex_destroy(&zs->zst_lock); + kmem_free(zs, sizeof (zstream_t)); + } + list_destroy(&zf->zf_stream); + rw_destroy(&zf->zf_rwlock); + + zf->zf_dnode = NULL; +} + +/* + * Given a zfetch and zstream structure, insert the zstream structure into the + * AVL tree contained within the zfetch structure. Peform the appropriate + * book-keeping. It is possible that another thread has inserted a stream which + * matches one that we are about to insert, so we must be sure to check for this + * case. If one is found, return failure, and let the caller cleanup the + * duplicates. + */ +static int +dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs) +{ + zstream_t *zs_walk; + zstream_t *zs_next; + + ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); + + for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) { + zs_next = list_next(&zf->zf_stream, zs_walk); + + if (dmu_zfetch_streams_equal(zs_walk, zs)) { + return (0); + } + } + + list_insert_head(&zf->zf_stream, zs); + zf->zf_stream_cnt++; + return (1); +} + + +/* + * Walk the list of zstreams in the given zfetch, find an old one (by time), and + * reclaim it for use by the caller. + */ +static zstream_t * +dmu_zfetch_stream_reclaim(zfetch_t *zf) +{ + zstream_t *zs; + + if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) + return (0); + + for (zs = list_head(&zf->zf_stream); zs; + zs = list_next(&zf->zf_stream, zs)) { + + if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap) + break; + } + + if (zs) { + dmu_zfetch_stream_remove(zf, zs); + mutex_destroy(&zs->zst_lock); + bzero(zs, sizeof (zstream_t)); + } else { + zf->zf_alloc_fail++; + } + rw_exit(&zf->zf_rwlock); + + return (zs); +} + +/* + * Given a zfetch and zstream structure, remove the zstream structure from its + * container in the zfetch structure. Perform the appropriate book-keeping. + */ +static void +dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs) +{ + ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); + + list_remove(&zf->zf_stream, zs); + zf->zf_stream_cnt--; +} + +static int +dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2) +{ + if (zs1->zst_offset != zs2->zst_offset) + return (0); + + if (zs1->zst_len != zs2->zst_len) + return (0); + + if (zs1->zst_stride != zs2->zst_stride) + return (0); + + if (zs1->zst_ph_offset != zs2->zst_ph_offset) + return (0); + + if (zs1->zst_cap != zs2->zst_cap) + return (0); + + if (zs1->zst_direction != zs2->zst_direction) + return (0); + + return (1); +} + +/* + * This is the prefetch entry point. It calls all of the other dmu_zfetch + * routines to create, delete, find, or operate upon prefetch streams. + */ +void +dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) +{ + zstream_t zst; + zstream_t *newstream; + int fetched; + int inserted; + unsigned int blkshft; + uint64_t blksz; + + if (zfs_prefetch_disable) + return; + + /* files that aren't ln2 blocksz are only one block -- nothing to do */ + if (!zf->zf_dnode->dn_datablkshift) + return; + + /* convert offset and size, into blockid and nblocks */ + blkshft = zf->zf_dnode->dn_datablkshift; + blksz = (1 << blkshft); + + bzero(&zst, sizeof (zstream_t)); + zst.zst_offset = offset >> blkshft; + zst.zst_len = (P2ROUNDUP(offset + size, blksz) - + P2ALIGN(offset, blksz)) >> blkshft; + + fetched = dmu_zfetch_find(zf, &zst, prefetched); + if (fetched) { + ZFETCHSTAT_BUMP(zfetchstat_hits); + } else { + ZFETCHSTAT_BUMP(zfetchstat_misses); + if (fetched = dmu_zfetch_colinear(zf, &zst)) { + ZFETCHSTAT_BUMP(zfetchstat_colinear_hits); + } else { + ZFETCHSTAT_BUMP(zfetchstat_colinear_misses); + } + } + + if (!fetched) { + newstream = dmu_zfetch_stream_reclaim(zf); + + /* + * we still couldn't find a stream, drop the lock, and allocate + * one if possible. Otherwise, give up and go home. + */ + if (newstream) { + ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes); + } else { + uint64_t maxblocks; + uint32_t max_streams; + uint32_t cur_streams; + + ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures); + cur_streams = zf->zf_stream_cnt; + maxblocks = zf->zf_dnode->dn_maxblkid; + + max_streams = MIN(zfetch_max_streams, + (maxblocks / zfetch_block_cap)); + if (max_streams == 0) { + max_streams++; + } + + if (cur_streams >= max_streams) { + return; + } + newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP); + } + + newstream->zst_offset = zst.zst_offset; + newstream->zst_len = zst.zst_len; + newstream->zst_stride = zst.zst_len; + newstream->zst_ph_offset = zst.zst_len + zst.zst_offset; + newstream->zst_cap = zst.zst_len; + newstream->zst_direction = ZFETCH_FORWARD; + newstream->zst_last = ddi_get_lbolt(); + + mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL); + + rw_enter(&zf->zf_rwlock, RW_WRITER); + inserted = dmu_zfetch_stream_insert(zf, newstream); + rw_exit(&zf->zf_rwlock); + + if (!inserted) { + mutex_destroy(&newstream->zst_lock); + kmem_free(newstream, sizeof (zstream_t)); + } + } +} diff --git a/uts/common/fs/zfs/dnode.c b/uts/common/fs/zfs/dnode.c new file mode 100644 index 000000000000..850dd5816bf3 --- /dev/null +++ b/uts/common/fs/zfs/dnode.c @@ -0,0 +1,1993 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_dataset.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu_zfetch.h> + +static int free_range_compar(const void *node1, const void *node2); + +static kmem_cache_t *dnode_cache; +/* + * Define DNODE_STATS to turn on statistic gathering. By default, it is only + * turned on when DEBUG is also defined. + */ +#ifdef DEBUG +#define DNODE_STATS +#endif /* DEBUG */ + +#ifdef DNODE_STATS +#define DNODE_STAT_ADD(stat) ((stat)++) +#else +#define DNODE_STAT_ADD(stat) /* nothing */ +#endif /* DNODE_STATS */ + +static dnode_phys_t dnode_phys_zero; + +int zfs_default_bs = SPA_MINBLOCKSHIFT; +int zfs_default_ibs = DN_MAX_INDBLKSHIFT; + +static kmem_cbrc_t dnode_move(void *, void *, size_t, void *); + +/* ARGSUSED */ +static int +dnode_cons(void *arg, void *unused, int kmflag) +{ + dnode_t *dn = arg; + int i; + + rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL); + mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL); + + refcount_create(&dn->dn_holds); + refcount_create(&dn->dn_tx_holds); + list_link_init(&dn->dn_link); + + bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr)); + bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels)); + bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift)); + bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype)); + bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk)); + bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); + bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); + + for (i = 0; i < TXG_SIZE; i++) { + list_link_init(&dn->dn_dirty_link[i]); + avl_create(&dn->dn_ranges[i], free_range_compar, + sizeof (free_range_t), + offsetof(struct free_range, fr_node)); + list_create(&dn->dn_dirty_records[i], + sizeof (dbuf_dirty_record_t), + offsetof(dbuf_dirty_record_t, dr_dirty_node)); + } + + dn->dn_allocated_txg = 0; + dn->dn_free_txg = 0; + dn->dn_assigned_txg = 0; + dn->dn_dirtyctx = 0; + dn->dn_dirtyctx_firstset = NULL; + dn->dn_bonus = NULL; + dn->dn_have_spill = B_FALSE; + dn->dn_zio = NULL; + dn->dn_oldused = 0; + dn->dn_oldflags = 0; + dn->dn_olduid = 0; + dn->dn_oldgid = 0; + dn->dn_newuid = 0; + dn->dn_newgid = 0; + dn->dn_id_flags = 0; + + dn->dn_dbufs_count = 0; + list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t), + offsetof(dmu_buf_impl_t, db_link)); + + dn->dn_moved = 0; + return (0); +} + +/* ARGSUSED */ +static void +dnode_dest(void *arg, void *unused) +{ + int i; + dnode_t *dn = arg; + + rw_destroy(&dn->dn_struct_rwlock); + mutex_destroy(&dn->dn_mtx); + mutex_destroy(&dn->dn_dbufs_mtx); + cv_destroy(&dn->dn_notxholds); + refcount_destroy(&dn->dn_holds); + refcount_destroy(&dn->dn_tx_holds); + ASSERT(!list_link_active(&dn->dn_link)); + + for (i = 0; i < TXG_SIZE; i++) { + ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + avl_destroy(&dn->dn_ranges[i]); + list_destroy(&dn->dn_dirty_records[i]); + ASSERT3U(dn->dn_next_nblkptr[i], ==, 0); + ASSERT3U(dn->dn_next_nlevels[i], ==, 0); + ASSERT3U(dn->dn_next_indblkshift[i], ==, 0); + ASSERT3U(dn->dn_next_bonustype[i], ==, 0); + ASSERT3U(dn->dn_rm_spillblk[i], ==, 0); + ASSERT3U(dn->dn_next_bonuslen[i], ==, 0); + ASSERT3U(dn->dn_next_blksz[i], ==, 0); + } + + ASSERT3U(dn->dn_allocated_txg, ==, 0); + ASSERT3U(dn->dn_free_txg, ==, 0); + ASSERT3U(dn->dn_assigned_txg, ==, 0); + ASSERT3U(dn->dn_dirtyctx, ==, 0); + ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL); + ASSERT3P(dn->dn_bonus, ==, NULL); + ASSERT(!dn->dn_have_spill); + ASSERT3P(dn->dn_zio, ==, NULL); + ASSERT3U(dn->dn_oldused, ==, 0); + ASSERT3U(dn->dn_oldflags, ==, 0); + ASSERT3U(dn->dn_olduid, ==, 0); + ASSERT3U(dn->dn_oldgid, ==, 0); + ASSERT3U(dn->dn_newuid, ==, 0); + ASSERT3U(dn->dn_newgid, ==, 0); + ASSERT3U(dn->dn_id_flags, ==, 0); + + ASSERT3U(dn->dn_dbufs_count, ==, 0); + list_destroy(&dn->dn_dbufs); +} + +void +dnode_init(void) +{ + ASSERT(dnode_cache == NULL); + dnode_cache = kmem_cache_create("dnode_t", + sizeof (dnode_t), + 0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0); + kmem_cache_set_move(dnode_cache, dnode_move); +} + +void +dnode_fini(void) +{ + kmem_cache_destroy(dnode_cache); + dnode_cache = NULL; +} + + +#ifdef ZFS_DEBUG +void +dnode_verify(dnode_t *dn) +{ + int drop_struct_lock = FALSE; + + ASSERT(dn->dn_phys); + ASSERT(dn->dn_objset); + ASSERT(dn->dn_handle->dnh_dnode == dn); + + ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); + + if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY)) + return; + + if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + drop_struct_lock = TRUE; + } + if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) { + int i; + ASSERT3U(dn->dn_indblkshift, >=, 0); + ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT); + if (dn->dn_datablkshift) { + ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT); + ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT); + ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz); + } + ASSERT3U(dn->dn_nlevels, <=, 30); + ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES); + ASSERT3U(dn->dn_nblkptr, >=, 1); + ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); + ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN); + ASSERT3U(dn->dn_datablksz, ==, + dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); + ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0); + ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) + + dn->dn_bonuslen, <=, DN_MAX_BONUSLEN); + for (i = 0; i < TXG_SIZE; i++) { + ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels); + } + } + if (dn->dn_phys->dn_type != DMU_OT_NONE) + ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels); + ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL); + if (dn->dn_dbuf != NULL) { + ASSERT3P(dn->dn_phys, ==, + (dnode_phys_t *)dn->dn_dbuf->db.db_data + + (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT))); + } + if (drop_struct_lock) + rw_exit(&dn->dn_struct_rwlock); +} +#endif + +void +dnode_byteswap(dnode_phys_t *dnp) +{ + uint64_t *buf64 = (void*)&dnp->dn_blkptr; + int i; + + if (dnp->dn_type == DMU_OT_NONE) { + bzero(dnp, sizeof (dnode_phys_t)); + return; + } + + dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec); + dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen); + dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid); + dnp->dn_used = BSWAP_64(dnp->dn_used); + + /* + * dn_nblkptr is only one byte, so it's OK to read it in either + * byte order. We can't read dn_bouslen. + */ + ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT); + ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR); + for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++) + buf64[i] = BSWAP_64(buf64[i]); + + /* + * OK to check dn_bonuslen for zero, because it won't matter if + * we have the wrong byte order. This is necessary because the + * dnode dnode is smaller than a regular dnode. + */ + if (dnp->dn_bonuslen != 0) { + /* + * Note that the bonus length calculated here may be + * longer than the actual bonus buffer. This is because + * we always put the bonus buffer after the last block + * pointer (instead of packing it against the end of the + * dnode buffer). + */ + int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t); + size_t len = DN_MAX_BONUSLEN - off; + ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES); + dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len); + } + + /* Swap SPILL block if we have one */ + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) + byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t)); + +} + +void +dnode_buf_byteswap(void *vbuf, size_t size) +{ + dnode_phys_t *buf = vbuf; + int i; + + ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT)); + ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0); + + size >>= DNODE_SHIFT; + for (i = 0; i < size; i++) { + dnode_byteswap(buf); + buf++; + } +} + +static int +free_range_compar(const void *node1, const void *node2) +{ + const free_range_t *rp1 = node1; + const free_range_t *rp2 = node2; + + if (rp1->fr_blkid < rp2->fr_blkid) + return (-1); + else if (rp1->fr_blkid > rp2->fr_blkid) + return (1); + else return (0); +} + +void +dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) +{ + ASSERT3U(refcount_count(&dn->dn_holds), >=, 1); + + dnode_setdirty(dn, tx); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + ASSERT3U(newsize, <=, DN_MAX_BONUSLEN - + (dn->dn_nblkptr-1) * sizeof (blkptr_t)); + dn->dn_bonuslen = newsize; + if (newsize == 0) + dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN; + else + dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen; + rw_exit(&dn->dn_struct_rwlock); +} + +void +dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx) +{ + ASSERT3U(refcount_count(&dn->dn_holds), >=, 1); + dnode_setdirty(dn, tx); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dn->dn_bonustype = newtype; + dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype; + rw_exit(&dn->dn_struct_rwlock); +} + +void +dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx) +{ + ASSERT3U(refcount_count(&dn->dn_holds), >=, 1); + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + dnode_setdirty(dn, tx); + dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK; + dn->dn_have_spill = B_FALSE; +} + +static void +dnode_setdblksz(dnode_t *dn, int size) +{ + ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0); + ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); + ASSERT3U(size, >=, SPA_MINBLOCKSIZE); + ASSERT3U(size >> SPA_MINBLOCKSHIFT, <, + 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8)); + dn->dn_datablksz = size; + dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT; + dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0; +} + +static dnode_t * +dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, + uint64_t object, dnode_handle_t *dnh) +{ + dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP); + + ASSERT(!POINTER_IS_VALID(dn->dn_objset)); + dn->dn_moved = 0; + + /* + * Defer setting dn_objset until the dnode is ready to be a candidate + * for the dnode_move() callback. + */ + dn->dn_object = object; + dn->dn_dbuf = db; + dn->dn_handle = dnh; + dn->dn_phys = dnp; + + if (dnp->dn_datablkszsec) { + dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); + } else { + dn->dn_datablksz = 0; + dn->dn_datablkszsec = 0; + dn->dn_datablkshift = 0; + } + dn->dn_indblkshift = dnp->dn_indblkshift; + dn->dn_nlevels = dnp->dn_nlevels; + dn->dn_type = dnp->dn_type; + dn->dn_nblkptr = dnp->dn_nblkptr; + dn->dn_checksum = dnp->dn_checksum; + dn->dn_compress = dnp->dn_compress; + dn->dn_bonustype = dnp->dn_bonustype; + dn->dn_bonuslen = dnp->dn_bonuslen; + dn->dn_maxblkid = dnp->dn_maxblkid; + dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0); + dn->dn_id_flags = 0; + + dmu_zfetch_init(&dn->dn_zfetch, dn); + + ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); + + mutex_enter(&os->os_lock); + list_insert_head(&os->os_dnodes, dn); + membar_producer(); + /* + * Everything else must be valid before assigning dn_objset makes the + * dnode eligible for dnode_move(). + */ + dn->dn_objset = os; + mutex_exit(&os->os_lock); + + arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER); + return (dn); +} + +/* + * Caller must be holding the dnode handle, which is released upon return. + */ +static void +dnode_destroy(dnode_t *dn) +{ + objset_t *os = dn->dn_objset; + + ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0); + + mutex_enter(&os->os_lock); + POINTER_INVALIDATE(&dn->dn_objset); + list_remove(&os->os_dnodes, dn); + mutex_exit(&os->os_lock); + + /* the dnode can no longer move, so we can release the handle */ + zrl_remove(&dn->dn_handle->dnh_zrlock); + + dn->dn_allocated_txg = 0; + dn->dn_free_txg = 0; + dn->dn_assigned_txg = 0; + + dn->dn_dirtyctx = 0; + if (dn->dn_dirtyctx_firstset != NULL) { + kmem_free(dn->dn_dirtyctx_firstset, 1); + dn->dn_dirtyctx_firstset = NULL; + } + if (dn->dn_bonus != NULL) { + mutex_enter(&dn->dn_bonus->db_mtx); + dbuf_evict(dn->dn_bonus); + dn->dn_bonus = NULL; + } + dn->dn_zio = NULL; + + dn->dn_have_spill = B_FALSE; + dn->dn_oldused = 0; + dn->dn_oldflags = 0; + dn->dn_olduid = 0; + dn->dn_oldgid = 0; + dn->dn_newuid = 0; + dn->dn_newgid = 0; + dn->dn_id_flags = 0; + + dmu_zfetch_rele(&dn->dn_zfetch); + kmem_cache_free(dnode_cache, dn); + arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER); +} + +void +dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + int i; + + if (blocksize == 0) + blocksize = 1 << zfs_default_bs; + else if (blocksize > SPA_MAXBLOCKSIZE) + blocksize = SPA_MAXBLOCKSIZE; + else + blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE); + + if (ibs == 0) + ibs = zfs_default_ibs; + + ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT); + + dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset, + dn->dn_object, tx->tx_txg, blocksize, ibs); + + ASSERT(dn->dn_type == DMU_OT_NONE); + ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0); + ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE); + ASSERT(ot != DMU_OT_NONE); + ASSERT3U(ot, <, DMU_OT_NUMTYPES); + ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || + (bonustype == DMU_OT_SA && bonuslen == 0) || + (bonustype != DMU_OT_NONE && bonuslen != 0)); + ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); + ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); + ASSERT(dn->dn_type == DMU_OT_NONE); + ASSERT3U(dn->dn_maxblkid, ==, 0); + ASSERT3U(dn->dn_allocated_txg, ==, 0); + ASSERT3U(dn->dn_assigned_txg, ==, 0); + ASSERT(refcount_is_zero(&dn->dn_tx_holds)); + ASSERT3U(refcount_count(&dn->dn_holds), <=, 1); + ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); + + for (i = 0; i < TXG_SIZE; i++) { + ASSERT3U(dn->dn_next_nblkptr[i], ==, 0); + ASSERT3U(dn->dn_next_nlevels[i], ==, 0); + ASSERT3U(dn->dn_next_indblkshift[i], ==, 0); + ASSERT3U(dn->dn_next_bonuslen[i], ==, 0); + ASSERT3U(dn->dn_next_bonustype[i], ==, 0); + ASSERT3U(dn->dn_rm_spillblk[i], ==, 0); + ASSERT3U(dn->dn_next_blksz[i], ==, 0); + ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); + ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0); + } + + dn->dn_type = ot; + dnode_setdblksz(dn, blocksize); + dn->dn_indblkshift = ibs; + dn->dn_nlevels = 1; + if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */ + dn->dn_nblkptr = 1; + else + dn->dn_nblkptr = 1 + + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); + dn->dn_bonustype = bonustype; + dn->dn_bonuslen = bonuslen; + dn->dn_checksum = ZIO_CHECKSUM_INHERIT; + dn->dn_compress = ZIO_COMPRESS_INHERIT; + dn->dn_dirtyctx = 0; + + dn->dn_free_txg = 0; + if (dn->dn_dirtyctx_firstset) { + kmem_free(dn->dn_dirtyctx_firstset, 1); + dn->dn_dirtyctx_firstset = NULL; + } + + dn->dn_allocated_txg = tx->tx_txg; + dn->dn_id_flags = 0; + + dnode_setdirty(dn, tx); + dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs; + dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen; + dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype; + dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz; +} + +void +dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + int nblkptr; + + ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE); + ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE); + ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0); + ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); + ASSERT(tx->tx_txg != 0); + ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || + (bonustype != DMU_OT_NONE && bonuslen != 0) || + (bonustype == DMU_OT_SA && bonuslen == 0)); + ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); + ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); + + /* clean up any unreferenced dbufs */ + dnode_evict_dbufs(dn); + + dn->dn_id_flags = 0; + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_setdirty(dn, tx); + if (dn->dn_datablksz != blocksize) { + /* change blocksize */ + ASSERT(dn->dn_maxblkid == 0 && + (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || + dnode_block_freed(dn, 0))); + dnode_setdblksz(dn, blocksize); + dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize; + } + if (dn->dn_bonuslen != bonuslen) + dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen; + + if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */ + nblkptr = 1; + else + nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); + if (dn->dn_bonustype != bonustype) + dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype; + if (dn->dn_nblkptr != nblkptr) + dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr; + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + dbuf_rm_spill(dn, tx); + dnode_rm_spill(dn, tx); + } + rw_exit(&dn->dn_struct_rwlock); + + /* change type */ + dn->dn_type = ot; + + /* change bonus size and type */ + mutex_enter(&dn->dn_mtx); + dn->dn_bonustype = bonustype; + dn->dn_bonuslen = bonuslen; + dn->dn_nblkptr = nblkptr; + dn->dn_checksum = ZIO_CHECKSUM_INHERIT; + dn->dn_compress = ZIO_COMPRESS_INHERIT; + ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); + + /* fix up the bonus db_size */ + if (dn->dn_bonus) { + dn->dn_bonus->db.db_size = + DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t); + ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size); + } + + dn->dn_allocated_txg = tx->tx_txg; + mutex_exit(&dn->dn_mtx); +} + +#ifdef DNODE_STATS +static struct { + uint64_t dms_dnode_invalid; + uint64_t dms_dnode_recheck1; + uint64_t dms_dnode_recheck2; + uint64_t dms_dnode_special; + uint64_t dms_dnode_handle; + uint64_t dms_dnode_rwlock; + uint64_t dms_dnode_active; +} dnode_move_stats; +#endif /* DNODE_STATS */ + +static void +dnode_move_impl(dnode_t *odn, dnode_t *ndn) +{ + int i; + + ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock)); + ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx)); + ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx)); + ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock)); + + /* Copy fields. */ + ndn->dn_objset = odn->dn_objset; + ndn->dn_object = odn->dn_object; + ndn->dn_dbuf = odn->dn_dbuf; + ndn->dn_handle = odn->dn_handle; + ndn->dn_phys = odn->dn_phys; + ndn->dn_type = odn->dn_type; + ndn->dn_bonuslen = odn->dn_bonuslen; + ndn->dn_bonustype = odn->dn_bonustype; + ndn->dn_nblkptr = odn->dn_nblkptr; + ndn->dn_checksum = odn->dn_checksum; + ndn->dn_compress = odn->dn_compress; + ndn->dn_nlevels = odn->dn_nlevels; + ndn->dn_indblkshift = odn->dn_indblkshift; + ndn->dn_datablkshift = odn->dn_datablkshift; + ndn->dn_datablkszsec = odn->dn_datablkszsec; + ndn->dn_datablksz = odn->dn_datablksz; + ndn->dn_maxblkid = odn->dn_maxblkid; + bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0], + sizeof (odn->dn_next_nblkptr)); + bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0], + sizeof (odn->dn_next_nlevels)); + bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0], + sizeof (odn->dn_next_indblkshift)); + bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0], + sizeof (odn->dn_next_bonustype)); + bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0], + sizeof (odn->dn_rm_spillblk)); + bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0], + sizeof (odn->dn_next_bonuslen)); + bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0], + sizeof (odn->dn_next_blksz)); + for (i = 0; i < TXG_SIZE; i++) { + list_move_tail(&ndn->dn_dirty_records[i], + &odn->dn_dirty_records[i]); + } + bcopy(&odn->dn_ranges[0], &ndn->dn_ranges[0], sizeof (odn->dn_ranges)); + ndn->dn_allocated_txg = odn->dn_allocated_txg; + ndn->dn_free_txg = odn->dn_free_txg; + ndn->dn_assigned_txg = odn->dn_assigned_txg; + ndn->dn_dirtyctx = odn->dn_dirtyctx; + ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; + ASSERT(refcount_count(&odn->dn_tx_holds) == 0); + refcount_transfer(&ndn->dn_holds, &odn->dn_holds); + ASSERT(list_is_empty(&ndn->dn_dbufs)); + list_move_tail(&ndn->dn_dbufs, &odn->dn_dbufs); + ndn->dn_dbufs_count = odn->dn_dbufs_count; + ndn->dn_bonus = odn->dn_bonus; + ndn->dn_have_spill = odn->dn_have_spill; + ndn->dn_zio = odn->dn_zio; + ndn->dn_oldused = odn->dn_oldused; + ndn->dn_oldflags = odn->dn_oldflags; + ndn->dn_olduid = odn->dn_olduid; + ndn->dn_oldgid = odn->dn_oldgid; + ndn->dn_newuid = odn->dn_newuid; + ndn->dn_newgid = odn->dn_newgid; + ndn->dn_id_flags = odn->dn_id_flags; + dmu_zfetch_init(&ndn->dn_zfetch, NULL); + list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream); + ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode; + ndn->dn_zfetch.zf_stream_cnt = odn->dn_zfetch.zf_stream_cnt; + ndn->dn_zfetch.zf_alloc_fail = odn->dn_zfetch.zf_alloc_fail; + + /* + * Update back pointers. Updating the handle fixes the back pointer of + * every descendant dbuf as well as the bonus dbuf. + */ + ASSERT(ndn->dn_handle->dnh_dnode == odn); + ndn->dn_handle->dnh_dnode = ndn; + if (ndn->dn_zfetch.zf_dnode == odn) { + ndn->dn_zfetch.zf_dnode = ndn; + } + + /* + * Invalidate the original dnode by clearing all of its back pointers. + */ + odn->dn_dbuf = NULL; + odn->dn_handle = NULL; + list_create(&odn->dn_dbufs, sizeof (dmu_buf_impl_t), + offsetof(dmu_buf_impl_t, db_link)); + odn->dn_dbufs_count = 0; + odn->dn_bonus = NULL; + odn->dn_zfetch.zf_dnode = NULL; + + /* + * Set the low bit of the objset pointer to ensure that dnode_move() + * recognizes the dnode as invalid in any subsequent callback. + */ + POINTER_INVALIDATE(&odn->dn_objset); + + /* + * Satisfy the destructor. + */ + for (i = 0; i < TXG_SIZE; i++) { + list_create(&odn->dn_dirty_records[i], + sizeof (dbuf_dirty_record_t), + offsetof(dbuf_dirty_record_t, dr_dirty_node)); + odn->dn_ranges[i].avl_root = NULL; + odn->dn_ranges[i].avl_numnodes = 0; + odn->dn_next_nlevels[i] = 0; + odn->dn_next_indblkshift[i] = 0; + odn->dn_next_bonustype[i] = 0; + odn->dn_rm_spillblk[i] = 0; + odn->dn_next_bonuslen[i] = 0; + odn->dn_next_blksz[i] = 0; + } + odn->dn_allocated_txg = 0; + odn->dn_free_txg = 0; + odn->dn_assigned_txg = 0; + odn->dn_dirtyctx = 0; + odn->dn_dirtyctx_firstset = NULL; + odn->dn_have_spill = B_FALSE; + odn->dn_zio = NULL; + odn->dn_oldused = 0; + odn->dn_oldflags = 0; + odn->dn_olduid = 0; + odn->dn_oldgid = 0; + odn->dn_newuid = 0; + odn->dn_newgid = 0; + odn->dn_id_flags = 0; + + /* + * Mark the dnode. + */ + ndn->dn_moved = 1; + odn->dn_moved = (uint8_t)-1; +} + +#ifdef _KERNEL +/*ARGSUSED*/ +static kmem_cbrc_t +dnode_move(void *buf, void *newbuf, size_t size, void *arg) +{ + dnode_t *odn = buf, *ndn = newbuf; + objset_t *os; + int64_t refcount; + uint32_t dbufs; + + /* + * The dnode is on the objset's list of known dnodes if the objset + * pointer is valid. We set the low bit of the objset pointer when + * freeing the dnode to invalidate it, and the memory patterns written + * by kmem (baddcafe and deadbeef) set at least one of the two low bits. + * A newly created dnode sets the objset pointer last of all to indicate + * that the dnode is known and in a valid state to be moved by this + * function. + */ + os = odn->dn_objset; + if (!POINTER_IS_VALID(os)) { + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * Ensure that the objset does not go away during the move. + */ + rw_enter(&os_lock, RW_WRITER); + if (os != odn->dn_objset) { + rw_exit(&os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * If the dnode is still valid, then so is the objset. We know that no + * valid objset can be freed while we hold os_lock, so we can safely + * ensure that the objset remains in use. + */ + mutex_enter(&os->os_lock); + + /* + * Recheck the objset pointer in case the dnode was removed just before + * acquiring the lock. + */ + if (os != odn->dn_objset) { + mutex_exit(&os->os_lock); + rw_exit(&os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * At this point we know that as long as we hold os->os_lock, the dnode + * cannot be freed and fields within the dnode can be safely accessed. + * The objset listing this dnode cannot go away as long as this dnode is + * on its list. + */ + rw_exit(&os_lock); + if (DMU_OBJECT_IS_SPECIAL(odn->dn_object)) { + mutex_exit(&os->os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special); + return (KMEM_CBRC_NO); + } + ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */ + + /* + * Lock the dnode handle to prevent the dnode from obtaining any new + * holds. This also prevents the descendant dbufs and the bonus dbuf + * from accessing the dnode, so that we can discount their holds. The + * handle is safe to access because we know that while the dnode cannot + * go away, neither can its handle. Once we hold dnh_zrlock, we can + * safely move any dnode referenced only by dbufs. + */ + if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) { + mutex_exit(&os->os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle); + return (KMEM_CBRC_LATER); + } + + /* + * Ensure a consistent view of the dnode's holds and the dnode's dbufs. + * We need to guarantee that there is a hold for every dbuf in order to + * determine whether the dnode is actively referenced. Falsely matching + * a dbuf to an active hold would lead to an unsafe move. It's possible + * that a thread already having an active dnode hold is about to add a + * dbuf, and we can't compare hold and dbuf counts while the add is in + * progress. + */ + if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) { + zrl_exit(&odn->dn_handle->dnh_zrlock); + mutex_exit(&os->os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock); + return (KMEM_CBRC_LATER); + } + + /* + * A dbuf may be removed (evicted) without an active dnode hold. In that + * case, the dbuf count is decremented under the handle lock before the + * dbuf's hold is released. This order ensures that if we count the hold + * after the dbuf is removed but before its hold is released, we will + * treat the unmatched hold as active and exit safely. If we count the + * hold before the dbuf is removed, the hold is discounted, and the + * removal is blocked until the move completes. + */ + refcount = refcount_count(&odn->dn_holds); + ASSERT(refcount >= 0); + dbufs = odn->dn_dbufs_count; + + /* We can't have more dbufs than dnode holds. */ + ASSERT3U(dbufs, <=, refcount); + DTRACE_PROBE3(dnode__move, dnode_t *, odn, int64_t, refcount, + uint32_t, dbufs); + + if (refcount > dbufs) { + rw_exit(&odn->dn_struct_rwlock); + zrl_exit(&odn->dn_handle->dnh_zrlock); + mutex_exit(&os->os_lock); + DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active); + return (KMEM_CBRC_LATER); + } + + rw_exit(&odn->dn_struct_rwlock); + + /* + * At this point we know that anyone with a hold on the dnode is not + * actively referencing it. The dnode is known and in a valid state to + * move. We're holding the locks needed to execute the critical section. + */ + dnode_move_impl(odn, ndn); + + list_link_replace(&odn->dn_link, &ndn->dn_link); + /* If the dnode was safe to move, the refcount cannot have changed. */ + ASSERT(refcount == refcount_count(&ndn->dn_holds)); + ASSERT(dbufs == ndn->dn_dbufs_count); + zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */ + mutex_exit(&os->os_lock); + + return (KMEM_CBRC_YES); +} +#endif /* _KERNEL */ + +void +dnode_special_close(dnode_handle_t *dnh) +{ + dnode_t *dn = dnh->dnh_dnode; + + /* + * Wait for final references to the dnode to clear. This can + * only happen if the arc is asyncronously evicting state that + * has a hold on this dnode while we are trying to evict this + * dnode. + */ + while (refcount_count(&dn->dn_holds) > 0) + delay(1); + zrl_add(&dnh->dnh_zrlock); + dnode_destroy(dn); /* implicit zrl_remove() */ + zrl_destroy(&dnh->dnh_zrlock); + dnh->dnh_dnode = NULL; +} + +dnode_t * +dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object, + dnode_handle_t *dnh) +{ + dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh); + dnh->dnh_dnode = dn; + zrl_init(&dnh->dnh_zrlock); + DNODE_VERIFY(dn); + return (dn); +} + +static void +dnode_buf_pageout(dmu_buf_t *db, void *arg) +{ + dnode_children_t *children_dnodes = arg; + int i; + int epb = db->db_size >> DNODE_SHIFT; + + ASSERT(epb == children_dnodes->dnc_count); + + for (i = 0; i < epb; i++) { + dnode_handle_t *dnh = &children_dnodes->dnc_children[i]; + dnode_t *dn; + + /* + * The dnode handle lock guards against the dnode moving to + * another valid address, so there is no need here to guard + * against changes to or from NULL. + */ + if (dnh->dnh_dnode == NULL) { + zrl_destroy(&dnh->dnh_zrlock); + continue; + } + + zrl_add(&dnh->dnh_zrlock); + dn = dnh->dnh_dnode; + /* + * If there are holds on this dnode, then there should + * be holds on the dnode's containing dbuf as well; thus + * it wouldn't be eligible for eviction and this function + * would not have been called. + */ + ASSERT(refcount_is_zero(&dn->dn_holds)); + ASSERT(refcount_is_zero(&dn->dn_tx_holds)); + + dnode_destroy(dn); /* implicit zrl_remove() */ + zrl_destroy(&dnh->dnh_zrlock); + dnh->dnh_dnode = NULL; + } + kmem_free(children_dnodes, sizeof (dnode_children_t) + + (epb - 1) * sizeof (dnode_handle_t)); +} + +/* + * errors: + * EINVAL - invalid object number. + * EIO - i/o error. + * succeeds even for free dnodes. + */ +int +dnode_hold_impl(objset_t *os, uint64_t object, int flag, + void *tag, dnode_t **dnp) +{ + int epb, idx, err; + int drop_struct_lock = FALSE; + int type; + uint64_t blk; + dnode_t *mdn, *dn; + dmu_buf_impl_t *db; + dnode_children_t *children_dnodes; + dnode_handle_t *dnh; + + /* + * If you are holding the spa config lock as writer, you shouldn't + * be asking the DMU to do *anything* unless it's the root pool + * which may require us to read from the root filesystem while + * holding some (not all) of the locks as writer. + */ + ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 || + (spa_is_root(os->os_spa) && + spa_config_held(os->os_spa, SCL_STATE, RW_WRITER))); + + if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) { + dn = (object == DMU_USERUSED_OBJECT) ? + DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os); + if (dn == NULL) + return (ENOENT); + type = dn->dn_type; + if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) + return (ENOENT); + if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) + return (EEXIST); + DNODE_VERIFY(dn); + (void) refcount_add(&dn->dn_holds, tag); + *dnp = dn; + return (0); + } + + if (object == 0 || object >= DN_MAX_OBJECT) + return (EINVAL); + + mdn = DMU_META_DNODE(os); + ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT); + + DNODE_VERIFY(mdn); + + if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) { + rw_enter(&mdn->dn_struct_rwlock, RW_READER); + drop_struct_lock = TRUE; + } + + blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t)); + + db = dbuf_hold(mdn, blk, FTAG); + if (drop_struct_lock) + rw_exit(&mdn->dn_struct_rwlock); + if (db == NULL) + return (EIO); + err = dbuf_read(db, NULL, DB_RF_CANFAIL); + if (err) { + dbuf_rele(db, FTAG); + return (err); + } + + ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT); + epb = db->db.db_size >> DNODE_SHIFT; + + idx = object & (epb-1); + + ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE); + children_dnodes = dmu_buf_get_user(&db->db); + if (children_dnodes == NULL) { + int i; + dnode_children_t *winner; + children_dnodes = kmem_alloc(sizeof (dnode_children_t) + + (epb - 1) * sizeof (dnode_handle_t), KM_SLEEP); + children_dnodes->dnc_count = epb; + dnh = &children_dnodes->dnc_children[0]; + for (i = 0; i < epb; i++) { + zrl_init(&dnh[i].dnh_zrlock); + dnh[i].dnh_dnode = NULL; + } + if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL, + dnode_buf_pageout)) { + kmem_free(children_dnodes, sizeof (dnode_children_t) + + (epb - 1) * sizeof (dnode_handle_t)); + children_dnodes = winner; + } + } + ASSERT(children_dnodes->dnc_count == epb); + + dnh = &children_dnodes->dnc_children[idx]; + zrl_add(&dnh->dnh_zrlock); + if ((dn = dnh->dnh_dnode) == NULL) { + dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx; + dnode_t *winner; + + dn = dnode_create(os, phys, db, object, dnh); + winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn); + if (winner != NULL) { + zrl_add(&dnh->dnh_zrlock); + dnode_destroy(dn); /* implicit zrl_remove() */ + dn = winner; + } + } + + mutex_enter(&dn->dn_mtx); + type = dn->dn_type; + if (dn->dn_free_txg || + ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) || + ((flag & DNODE_MUST_BE_FREE) && + (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) { + mutex_exit(&dn->dn_mtx); + zrl_remove(&dnh->dnh_zrlock); + dbuf_rele(db, FTAG); + return (type == DMU_OT_NONE ? ENOENT : EEXIST); + } + mutex_exit(&dn->dn_mtx); + + if (refcount_add(&dn->dn_holds, tag) == 1) + dbuf_add_ref(db, dnh); + /* Now we can rely on the hold to prevent the dnode from moving. */ + zrl_remove(&dnh->dnh_zrlock); + + DNODE_VERIFY(dn); + ASSERT3P(dn->dn_dbuf, ==, db); + ASSERT3U(dn->dn_object, ==, object); + dbuf_rele(db, FTAG); + + *dnp = dn; + return (0); +} + +/* + * Return held dnode if the object is allocated, NULL if not. + */ +int +dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp) +{ + return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp)); +} + +/* + * Can only add a reference if there is already at least one + * reference on the dnode. Returns FALSE if unable to add a + * new reference. + */ +boolean_t +dnode_add_ref(dnode_t *dn, void *tag) +{ + mutex_enter(&dn->dn_mtx); + if (refcount_is_zero(&dn->dn_holds)) { + mutex_exit(&dn->dn_mtx); + return (FALSE); + } + VERIFY(1 < refcount_add(&dn->dn_holds, tag)); + mutex_exit(&dn->dn_mtx); + return (TRUE); +} + +void +dnode_rele(dnode_t *dn, void *tag) +{ + uint64_t refs; + /* Get while the hold prevents the dnode from moving. */ + dmu_buf_impl_t *db = dn->dn_dbuf; + dnode_handle_t *dnh = dn->dn_handle; + + mutex_enter(&dn->dn_mtx); + refs = refcount_remove(&dn->dn_holds, tag); + mutex_exit(&dn->dn_mtx); + + /* + * It's unsafe to release the last hold on a dnode by dnode_rele() or + * indirectly by dbuf_rele() while relying on the dnode handle to + * prevent the dnode from moving, since releasing the last hold could + * result in the dnode's parent dbuf evicting its dnode handles. For + * that reason anyone calling dnode_rele() or dbuf_rele() without some + * other direct or indirect hold on the dnode must first drop the dnode + * handle. + */ + ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread); + + /* NOTE: the DNODE_DNODE does not have a dn_dbuf */ + if (refs == 0 && db != NULL) { + /* + * Another thread could add a hold to the dnode handle in + * dnode_hold_impl() while holding the parent dbuf. Since the + * hold on the parent dbuf prevents the handle from being + * destroyed, the hold on the handle is OK. We can't yet assert + * that the handle has zero references, but that will be + * asserted anyway when the handle gets destroyed. + */ + dbuf_rele(db, dnh); + } +} + +void +dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) +{ + objset_t *os = dn->dn_objset; + uint64_t txg = tx->tx_txg; + + if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + dsl_dataset_dirty(os->os_dsl_dataset, tx); + return; + } + + DNODE_VERIFY(dn); + +#ifdef ZFS_DEBUG + mutex_enter(&dn->dn_mtx); + ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg); + ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg); + mutex_exit(&dn->dn_mtx); +#endif + + /* + * Determine old uid/gid when necessary + */ + dmu_objset_userquota_get_ids(dn, B_TRUE, tx); + + mutex_enter(&os->os_lock); + + /* + * If we are already marked dirty, we're done. + */ + if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { + mutex_exit(&os->os_lock); + return; + } + + ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs)); + ASSERT(dn->dn_datablksz != 0); + ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0); + ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0); + ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0); + + dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n", + dn->dn_object, txg); + + if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) { + list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn); + } else { + list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn); + } + + mutex_exit(&os->os_lock); + + /* + * The dnode maintains a hold on its containing dbuf as + * long as there are holds on it. Each instantiated child + * dbuf maintains a hold on the dnode. When the last child + * drops its hold, the dnode will drop its hold on the + * containing dbuf. We add a "dirty hold" here so that the + * dnode will hang around after we finish processing its + * children. + */ + VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); + + (void) dbuf_dirty(dn->dn_dbuf, tx); + + dsl_dataset_dirty(os->os_dsl_dataset, tx); +} + +void +dnode_free(dnode_t *dn, dmu_tx_t *tx) +{ + int txgoff = tx->tx_txg & TXG_MASK; + + dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg); + + /* we should be the only holder... hopefully */ + /* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */ + + mutex_enter(&dn->dn_mtx); + if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) { + mutex_exit(&dn->dn_mtx); + return; + } + dn->dn_free_txg = tx->tx_txg; + mutex_exit(&dn->dn_mtx); + + /* + * If the dnode is already dirty, it needs to be moved from + * the dirty list to the free list. + */ + mutex_enter(&dn->dn_objset->os_lock); + if (list_link_active(&dn->dn_dirty_link[txgoff])) { + list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn); + list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn); + mutex_exit(&dn->dn_objset->os_lock); + } else { + mutex_exit(&dn->dn_objset->os_lock); + dnode_setdirty(dn, tx); + } +} + +/* + * Try to change the block size for the indicated dnode. This can only + * succeed if there are no blocks allocated or dirty beyond first block + */ +int +dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db, *db_next; + int err; + + if (size == 0) + size = SPA_MINBLOCKSIZE; + if (size > SPA_MAXBLOCKSIZE) + size = SPA_MAXBLOCKSIZE; + else + size = P2ROUNDUP(size, SPA_MINBLOCKSIZE); + + if (ibs == dn->dn_indblkshift) + ibs = 0; + + if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0) + return (0); + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + + /* Check for any allocated blocks beyond the first */ + if (dn->dn_phys->dn_maxblkid != 0) + goto fail; + + mutex_enter(&dn->dn_dbufs_mtx); + for (db = list_head(&dn->dn_dbufs); db; db = db_next) { + db_next = list_next(&dn->dn_dbufs, db); + + if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID && + db->db_blkid != DMU_SPILL_BLKID) { + mutex_exit(&dn->dn_dbufs_mtx); + goto fail; + } + } + mutex_exit(&dn->dn_dbufs_mtx); + + if (ibs && dn->dn_nlevels != 1) + goto fail; + + /* resize the old block */ + err = dbuf_hold_impl(dn, 0, 0, TRUE, FTAG, &db); + if (err == 0) + dbuf_new_size(db, size, tx); + else if (err != ENOENT) + goto fail; + + dnode_setdblksz(dn, size); + dnode_setdirty(dn, tx); + dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size; + if (ibs) { + dn->dn_indblkshift = ibs; + dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; + } + /* rele after we have fixed the blocksize in the dnode */ + if (db) + dbuf_rele(db, FTAG); + + rw_exit(&dn->dn_struct_rwlock); + return (0); + +fail: + rw_exit(&dn->dn_struct_rwlock); + return (ENOTSUP); +} + +/* read-holding callers must not rely on the lock being continuously held */ +void +dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) +{ + uint64_t txgoff = tx->tx_txg & TXG_MASK; + int epbs, new_nlevels; + uint64_t sz; + + ASSERT(blkid != DMU_BONUS_BLKID); + + ASSERT(have_read ? + RW_READ_HELD(&dn->dn_struct_rwlock) : + RW_WRITE_HELD(&dn->dn_struct_rwlock)); + + /* + * if we have a read-lock, check to see if we need to do any work + * before upgrading to a write-lock. + */ + if (have_read) { + if (blkid <= dn->dn_maxblkid) + return; + + if (!rw_tryupgrade(&dn->dn_struct_rwlock)) { + rw_exit(&dn->dn_struct_rwlock); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + } + } + + if (blkid <= dn->dn_maxblkid) + goto out; + + dn->dn_maxblkid = blkid; + + /* + * Compute the number of levels necessary to support the new maxblkid. + */ + new_nlevels = 1; + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + for (sz = dn->dn_nblkptr; + sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs) + new_nlevels++; + + if (new_nlevels > dn->dn_nlevels) { + int old_nlevels = dn->dn_nlevels; + dmu_buf_impl_t *db; + list_t *list; + dbuf_dirty_record_t *new, *dr, *dr_next; + + dn->dn_nlevels = new_nlevels; + + ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]); + dn->dn_next_nlevels[txgoff] = new_nlevels; + + /* dirty the left indirects */ + db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); + ASSERT(db != NULL); + new = dbuf_dirty(db, tx); + dbuf_rele(db, FTAG); + + /* transfer the dirty records to the new indirect */ + mutex_enter(&dn->dn_mtx); + mutex_enter(&new->dt.di.dr_mtx); + list = &dn->dn_dirty_records[txgoff]; + for (dr = list_head(list); dr; dr = dr_next) { + dr_next = list_next(&dn->dn_dirty_records[txgoff], dr); + if (dr->dr_dbuf->db_level != new_nlevels-1 && + dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID && + dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) { + ASSERT(dr->dr_dbuf->db_level == old_nlevels-1); + list_remove(&dn->dn_dirty_records[txgoff], dr); + list_insert_tail(&new->dt.di.dr_children, dr); + dr->dr_parent = new; + } + } + mutex_exit(&new->dt.di.dr_mtx); + mutex_exit(&dn->dn_mtx); + } + +out: + if (have_read) + rw_downgrade(&dn->dn_struct_rwlock); +} + +void +dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) +{ + avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; + avl_index_t where; + free_range_t *rp; + free_range_t rp_tofind; + uint64_t endblk = blkid + nblks; + + ASSERT(MUTEX_HELD(&dn->dn_mtx)); + ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */ + + dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", + blkid, nblks, tx->tx_txg); + rp_tofind.fr_blkid = blkid; + rp = avl_find(tree, &rp_tofind, &where); + if (rp == NULL) + rp = avl_nearest(tree, where, AVL_BEFORE); + if (rp == NULL) + rp = avl_nearest(tree, where, AVL_AFTER); + + while (rp && (rp->fr_blkid <= blkid + nblks)) { + uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks; + free_range_t *nrp = AVL_NEXT(tree, rp); + + if (blkid <= rp->fr_blkid && endblk >= fr_endblk) { + /* clear this entire range */ + avl_remove(tree, rp); + kmem_free(rp, sizeof (free_range_t)); + } else if (blkid <= rp->fr_blkid && + endblk > rp->fr_blkid && endblk < fr_endblk) { + /* clear the beginning of this range */ + rp->fr_blkid = endblk; + rp->fr_nblks = fr_endblk - endblk; + } else if (blkid > rp->fr_blkid && blkid < fr_endblk && + endblk >= fr_endblk) { + /* clear the end of this range */ + rp->fr_nblks = blkid - rp->fr_blkid; + } else if (blkid > rp->fr_blkid && endblk < fr_endblk) { + /* clear a chunk out of this range */ + free_range_t *new_rp = + kmem_alloc(sizeof (free_range_t), KM_SLEEP); + + new_rp->fr_blkid = endblk; + new_rp->fr_nblks = fr_endblk - endblk; + avl_insert_here(tree, new_rp, rp, AVL_AFTER); + rp->fr_nblks = blkid - rp->fr_blkid; + } + /* there may be no overlap */ + rp = nrp; + } +} + +void +dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db; + uint64_t blkoff, blkid, nblks; + int blksz, blkshift, head, tail; + int trunc = FALSE; + int epbs; + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + blksz = dn->dn_datablksz; + blkshift = dn->dn_datablkshift; + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + + if (len == -1ULL) { + len = UINT64_MAX - off; + trunc = TRUE; + } + + /* + * First, block align the region to free: + */ + if (ISP2(blksz)) { + head = P2NPHASE(off, blksz); + blkoff = P2PHASE(off, blksz); + if ((off >> blkshift) > dn->dn_maxblkid) + goto out; + } else { + ASSERT(dn->dn_maxblkid == 0); + if (off == 0 && len >= blksz) { + /* Freeing the whole block; fast-track this request */ + blkid = 0; + nblks = 1; + goto done; + } else if (off >= blksz) { + /* Freeing past end-of-data */ + goto out; + } else { + /* Freeing part of the block. */ + head = blksz - off; + ASSERT3U(head, >, 0); + } + blkoff = off; + } + /* zero out any partial block data at the start of the range */ + if (head) { + ASSERT3U(blkoff + head, ==, blksz); + if (len < head) + head = len; + if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE, + FTAG, &db) == 0) { + caddr_t data; + + /* don't dirty if it isn't on disk and isn't dirty */ + if (db->db_last_dirty || + (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { + rw_exit(&dn->dn_struct_rwlock); + dbuf_will_dirty(db, tx); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + data = db->db.db_data; + bzero(data + blkoff, head); + } + dbuf_rele(db, FTAG); + } + off += head; + len -= head; + } + + /* If the range was less than one block, we're done */ + if (len == 0) + goto out; + + /* If the remaining range is past end of file, we're done */ + if ((off >> blkshift) > dn->dn_maxblkid) + goto out; + + ASSERT(ISP2(blksz)); + if (trunc) + tail = 0; + else + tail = P2PHASE(len, blksz); + + ASSERT3U(P2PHASE(off, blksz), ==, 0); + /* zero out any partial block data at the end of the range */ + if (tail) { + if (len < tail) + tail = len; + if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len), + TRUE, FTAG, &db) == 0) { + /* don't dirty if not on disk and not dirty */ + if (db->db_last_dirty || + (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { + rw_exit(&dn->dn_struct_rwlock); + dbuf_will_dirty(db, tx); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + bzero(db->db.db_data, tail); + } + dbuf_rele(db, FTAG); + } + len -= tail; + } + + /* If the range did not include a full block, we are done */ + if (len == 0) + goto out; + + ASSERT(IS_P2ALIGNED(off, blksz)); + ASSERT(trunc || IS_P2ALIGNED(len, blksz)); + blkid = off >> blkshift; + nblks = len >> blkshift; + if (trunc) + nblks += 1; + + /* + * Read in and mark all the level-1 indirects dirty, + * so that they will stay in memory until syncing phase. + * Always dirty the first and last indirect to make sure + * we dirty all the partial indirects. + */ + if (dn->dn_nlevels > 1) { + uint64_t i, first, last; + int shift = epbs + dn->dn_datablkshift; + + first = blkid >> epbs; + if (db = dbuf_hold_level(dn, 1, first, FTAG)) { + dbuf_will_dirty(db, tx); + dbuf_rele(db, FTAG); + } + if (trunc) + last = dn->dn_maxblkid >> epbs; + else + last = (blkid + nblks - 1) >> epbs; + if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) { + dbuf_will_dirty(db, tx); + dbuf_rele(db, FTAG); + } + for (i = first + 1; i < last; i++) { + uint64_t ibyte = i << shift; + int err; + + err = dnode_next_offset(dn, + DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0); + i = ibyte >> shift; + if (err == ESRCH || i >= last) + break; + ASSERT(err == 0); + db = dbuf_hold_level(dn, 1, i, FTAG); + if (db) { + dbuf_will_dirty(db, tx); + dbuf_rele(db, FTAG); + } + } + } +done: + /* + * Add this range to the dnode range list. + * We will finish up this free operation in the syncing phase. + */ + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, blkid, nblks, tx); + { + free_range_t *rp, *found; + avl_index_t where; + avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; + + /* Add new range to dn_ranges */ + rp = kmem_alloc(sizeof (free_range_t), KM_SLEEP); + rp->fr_blkid = blkid; + rp->fr_nblks = nblks; + found = avl_find(tree, rp, &where); + ASSERT(found == NULL); + avl_insert(tree, rp, where); + dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", + blkid, nblks, tx->tx_txg); + } + mutex_exit(&dn->dn_mtx); + + dbuf_free_range(dn, blkid, blkid + nblks - 1, tx); + dnode_setdirty(dn, tx); +out: + if (trunc && dn->dn_maxblkid >= (off >> blkshift)) + dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0); + + rw_exit(&dn->dn_struct_rwlock); +} + +static boolean_t +dnode_spill_freed(dnode_t *dn) +{ + int i; + + mutex_enter(&dn->dn_mtx); + for (i = 0; i < TXG_SIZE; i++) { + if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK) + break; + } + mutex_exit(&dn->dn_mtx); + return (i < TXG_SIZE); +} + +/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */ +uint64_t +dnode_block_freed(dnode_t *dn, uint64_t blkid) +{ + free_range_t range_tofind; + void *dp = spa_get_dsl(dn->dn_objset->os_spa); + int i; + + if (blkid == DMU_BONUS_BLKID) + return (FALSE); + + /* + * If we're in the process of opening the pool, dp will not be + * set yet, but there shouldn't be anything dirty. + */ + if (dp == NULL) + return (FALSE); + + if (dn->dn_free_txg) + return (TRUE); + + if (blkid == DMU_SPILL_BLKID) + return (dnode_spill_freed(dn)); + + range_tofind.fr_blkid = blkid; + mutex_enter(&dn->dn_mtx); + for (i = 0; i < TXG_SIZE; i++) { + free_range_t *range_found; + avl_index_t idx; + + range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx); + if (range_found) { + ASSERT(range_found->fr_nblks > 0); + break; + } + range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE); + if (range_found && + range_found->fr_blkid + range_found->fr_nblks > blkid) + break; + } + mutex_exit(&dn->dn_mtx); + return (i < TXG_SIZE); +} + +/* call from syncing context when we actually write/free space for this dnode */ +void +dnode_diduse_space(dnode_t *dn, int64_t delta) +{ + uint64_t space; + dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n", + dn, dn->dn_phys, + (u_longlong_t)dn->dn_phys->dn_used, + (longlong_t)delta); + + mutex_enter(&dn->dn_mtx); + space = DN_USED_BYTES(dn->dn_phys); + if (delta > 0) { + ASSERT3U(space + delta, >=, space); /* no overflow */ + } else { + ASSERT3U(space, >=, -delta); /* no underflow */ + } + space += delta; + if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) { + ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0); + ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0); + dn->dn_phys->dn_used = space >> DEV_BSHIFT; + } else { + dn->dn_phys->dn_used = space; + dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES; + } + mutex_exit(&dn->dn_mtx); +} + +/* + * Call when we think we're going to write/free space in open context. + * Be conservative (ie. OK to write less than this or free more than + * this, but don't write more or free less). + */ +void +dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx) +{ + objset_t *os = dn->dn_objset; + dsl_dataset_t *ds = os->os_dsl_dataset; + + if (space > 0) + space = spa_get_asize(os->os_spa, space); + + if (ds) + dsl_dir_willuse_space(ds->ds_dir, space, tx); + + dmu_tx_willuse_space(tx, space); +} + +/* + * This function scans a block at the indicated "level" looking for + * a hole or data (depending on 'flags'). If level > 0, then we are + * scanning an indirect block looking at its pointers. If level == 0, + * then we are looking at a block of dnodes. If we don't find what we + * are looking for in the block, we return ESRCH. Otherwise, return + * with *offset pointing to the beginning (if searching forwards) or + * end (if searching backwards) of the range covered by the block + * pointer we matched on (or dnode). + * + * The basic search algorithm used below by dnode_next_offset() is to + * use this function to search up the block tree (widen the search) until + * we find something (i.e., we don't return ESRCH) and then search back + * down the tree (narrow the search) until we reach our original search + * level. + */ +static int +dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, + int lvl, uint64_t blkfill, uint64_t txg) +{ + dmu_buf_impl_t *db = NULL; + void *data = NULL; + uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + uint64_t epb = 1ULL << epbs; + uint64_t minfill, maxfill; + boolean_t hole; + int i, inc, error, span; + + dprintf("probing object %llu offset %llx level %d of %u\n", + dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels); + + hole = ((flags & DNODE_FIND_HOLE) != 0); + inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1; + ASSERT(txg == 0 || !hole); + + if (lvl == dn->dn_phys->dn_nlevels) { + error = 0; + epb = dn->dn_phys->dn_nblkptr; + data = dn->dn_phys->dn_blkptr; + } else { + uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl); + error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db); + if (error) { + if (error != ENOENT) + return (error); + if (hole) + return (0); + /* + * This can only happen when we are searching up + * the block tree for data. We don't really need to + * adjust the offset, as we will just end up looking + * at the pointer to this block in its parent, and its + * going to be unallocated, so we will skip over it. + */ + return (ESRCH); + } + error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); + if (error) { + dbuf_rele(db, FTAG); + return (error); + } + data = db->db.db_data; + } + + if (db && txg && + (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) { + /* + * This can only happen when we are searching up the tree + * and these conditions mean that we need to keep climbing. + */ + error = ESRCH; + } else if (lvl == 0) { + dnode_phys_t *dnp = data; + span = DNODE_SHIFT; + ASSERT(dn->dn_type == DMU_OT_DNODE); + + for (i = (*offset >> span) & (blkfill - 1); + i >= 0 && i < blkfill; i += inc) { + if ((dnp[i].dn_type == DMU_OT_NONE) == hole) + break; + *offset += (1ULL << span) * inc; + } + if (i < 0 || i == blkfill) + error = ESRCH; + } else { + blkptr_t *bp = data; + uint64_t start = *offset; + span = (lvl - 1) * epbs + dn->dn_datablkshift; + minfill = 0; + maxfill = blkfill << ((lvl - 1) * epbs); + + if (hole) + maxfill--; + else + minfill++; + + *offset = *offset >> span; + for (i = BF64_GET(*offset, 0, epbs); + i >= 0 && i < epb; i += inc) { + if (bp[i].blk_fill >= minfill && + bp[i].blk_fill <= maxfill && + (hole || bp[i].blk_birth > txg)) + break; + if (inc > 0 || *offset > 0) + *offset += inc; + } + *offset = *offset << span; + if (inc < 0) { + /* traversing backwards; position offset at the end */ + ASSERT3U(*offset, <=, start); + *offset = MIN(*offset + (1ULL << span) - 1, start); + } else if (*offset < start) { + *offset = start; + } + if (i < 0 || i >= epb) + error = ESRCH; + } + + if (db) + dbuf_rele(db, FTAG); + + return (error); +} + +/* + * Find the next hole, data, or sparse region at or after *offset. + * The value 'blkfill' tells us how many items we expect to find + * in an L0 data block; this value is 1 for normal objects, + * DNODES_PER_BLOCK for the meta dnode, and some fraction of + * DNODES_PER_BLOCK when searching for sparse regions thereof. + * + * Examples: + * + * dnode_next_offset(dn, flags, offset, 1, 1, 0); + * Finds the next/previous hole/data in a file. + * Used in dmu_offset_next(). + * + * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg); + * Finds the next free/allocated dnode an objset's meta-dnode. + * Only finds objects that have new contents since txg (ie. + * bonus buffer changes and content removal are ignored). + * Used in dmu_object_next(). + * + * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0); + * Finds the next L2 meta-dnode bp that's at most 1/4 full. + * Used in dmu_object_alloc(). + */ +int +dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, + int minlvl, uint64_t blkfill, uint64_t txg) +{ + uint64_t initial_offset = *offset; + int lvl, maxlvl; + int error = 0; + + if (!(flags & DNODE_FIND_HAVELOCK)) + rw_enter(&dn->dn_struct_rwlock, RW_READER); + + if (dn->dn_phys->dn_nlevels == 0) { + error = ESRCH; + goto out; + } + + if (dn->dn_datablkshift == 0) { + if (*offset < dn->dn_datablksz) { + if (flags & DNODE_FIND_HOLE) + *offset = dn->dn_datablksz; + } else { + error = ESRCH; + } + goto out; + } + + maxlvl = dn->dn_phys->dn_nlevels; + + for (lvl = minlvl; lvl <= maxlvl; lvl++) { + error = dnode_next_offset_level(dn, + flags, offset, lvl, blkfill, txg); + if (error != ESRCH) + break; + } + + while (error == 0 && --lvl >= minlvl) { + error = dnode_next_offset_level(dn, + flags, offset, lvl, blkfill, txg); + } + + if (error == 0 && (flags & DNODE_FIND_BACKWARDS ? + initial_offset < *offset : initial_offset > *offset)) + error = ESRCH; +out: + if (!(flags & DNODE_FIND_HAVELOCK)) + rw_exit(&dn->dn_struct_rwlock); + + return (error); +} diff --git a/uts/common/fs/zfs/dnode_sync.c b/uts/common/fs/zfs/dnode_sync.c new file mode 100644 index 000000000000..2ee990a3b32c --- /dev/null +++ b/uts/common/fs/zfs/dnode_sync.c @@ -0,0 +1,693 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/spa.h> + +static void +dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db; + int txgoff = tx->tx_txg & TXG_MASK; + int nblkptr = dn->dn_phys->dn_nblkptr; + int old_toplvl = dn->dn_phys->dn_nlevels - 1; + int new_level = dn->dn_next_nlevels[txgoff]; + int i; + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + + /* this dnode can't be paged out because it's dirty */ + ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); + + db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); + ASSERT(db != NULL); + + dn->dn_phys->dn_nlevels = new_level; + dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, + dn->dn_object, dn->dn_phys->dn_nlevels); + + /* check for existing blkptrs in the dnode */ + for (i = 0; i < nblkptr; i++) + if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i])) + break; + if (i != nblkptr) { + /* transfer dnode's block pointers to new indirect block */ + (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); + ASSERT(db->db.db_data); + ASSERT(arc_released(db->db_buf)); + ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); + bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, + sizeof (blkptr_t) * nblkptr); + arc_buf_freeze(db->db_buf); + } + + /* set dbuf's parent pointers to new indirect buf */ + for (i = 0; i < nblkptr; i++) { + dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i); + + if (child == NULL) + continue; +#ifdef DEBUG + DB_DNODE_ENTER(child); + ASSERT3P(DB_DNODE(child), ==, dn); + DB_DNODE_EXIT(child); +#endif /* DEBUG */ + if (child->db_parent && child->db_parent != dn->dn_dbuf) { + ASSERT(child->db_parent->db_level == db->db_level); + ASSERT(child->db_blkptr != + &dn->dn_phys->dn_blkptr[child->db_blkid]); + mutex_exit(&child->db_mtx); + continue; + } + ASSERT(child->db_parent == NULL || + child->db_parent == dn->dn_dbuf); + + child->db_parent = db; + dbuf_add_ref(db, child); + if (db->db.db_data) + child->db_blkptr = (blkptr_t *)db->db.db_data + i; + else + child->db_blkptr = NULL; + dprintf_dbuf_bp(child, child->db_blkptr, + "changed db_blkptr to new indirect %s", ""); + + mutex_exit(&child->db_mtx); + } + + bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); + + dbuf_rele(db, FTAG); + + rw_exit(&dn->dn_struct_rwlock); +} + +static int +free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; + uint64_t bytesfreed = 0; + int i, blocks_freed = 0; + + dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num); + + for (i = 0; i < num; i++, bp++) { + if (BP_IS_HOLE(bp)) + continue; + + bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE); + ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); + bzero(bp, sizeof (blkptr_t)); + blocks_freed += 1; + } + dnode_diduse_space(dn, -bytesfreed); + return (blocks_freed); +} + +#ifdef ZFS_DEBUG +static void +free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) +{ + int off, num; + int i, err, epbs; + uint64_t txg = tx->tx_txg; + dnode_t *dn; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + off = start - (db->db_blkid * 1<<epbs); + num = end - start + 1; + + ASSERT3U(off, >=, 0); + ASSERT3U(num, >=, 0); + ASSERT3U(db->db_level, >, 0); + ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); + ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); + ASSERT(db->db_blkptr != NULL); + + for (i = off; i < off+num; i++) { + uint64_t *buf; + dmu_buf_impl_t *child; + dbuf_dirty_record_t *dr; + int j; + + ASSERT(db->db_level == 1); + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + err = dbuf_hold_impl(dn, db->db_level-1, + (db->db_blkid << epbs) + i, TRUE, FTAG, &child); + rw_exit(&dn->dn_struct_rwlock); + if (err == ENOENT) + continue; + ASSERT(err == 0); + ASSERT(child->db_level == 0); + dr = child->db_last_dirty; + while (dr && dr->dr_txg > txg) + dr = dr->dr_next; + ASSERT(dr == NULL || dr->dr_txg == txg); + + /* data_old better be zeroed */ + if (dr) { + buf = dr->dt.dl.dr_data->b_data; + for (j = 0; j < child->db.db_size >> 3; j++) { + if (buf[j] != 0) { + panic("freed data not zero: " + "child=%p i=%d off=%d num=%d\n", + (void *)child, i, off, num); + } + } + } + + /* + * db_data better be zeroed unless it's dirty in a + * future txg. + */ + mutex_enter(&child->db_mtx); + buf = child->db.db_data; + if (buf != NULL && child->db_state != DB_FILL && + child->db_last_dirty == NULL) { + for (j = 0; j < child->db.db_size >> 3; j++) { + if (buf[j] != 0) { + panic("freed data not zero: " + "child=%p i=%d off=%d num=%d\n", + (void *)child, i, off, num); + } + } + } + mutex_exit(&child->db_mtx); + + dbuf_rele(child, FTAG); + } + DB_DNODE_EXIT(db); +} +#endif + +#define ALL -1 + +static int +free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, + dmu_tx_t *tx) +{ + dnode_t *dn; + blkptr_t *bp; + dmu_buf_impl_t *subdb; + uint64_t start, end, dbstart, dbend, i; + int epbs, shift, err; + int all = TRUE; + int blocks_freed = 0; + + /* + * There is a small possibility that this block will not be cached: + * 1 - if level > 1 and there are no children with level <= 1 + * 2 - if we didn't get a dirty hold (because this block had just + * finished being written -- and so had no holds), and then this + * block got evicted before we got here. + */ + if (db->db_state != DB_CACHED) + (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); + + dbuf_release_bp(db); + bp = (blkptr_t *)db->db.db_data; + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + shift = (db->db_level - 1) * epbs; + dbstart = db->db_blkid << epbs; + start = blkid >> shift; + if (dbstart < start) { + bp += start - dbstart; + all = FALSE; + } else { + start = dbstart; + } + dbend = ((db->db_blkid + 1) << epbs) - 1; + end = (blkid + nblks - 1) >> shift; + if (dbend <= end) + end = dbend; + else if (all) + all = trunc; + ASSERT3U(start, <=, end); + + if (db->db_level == 1) { + FREE_VERIFY(db, start, end, tx); + blocks_freed = free_blocks(dn, bp, end-start+1, tx); + arc_buf_freeze(db->db_buf); + ASSERT(all || blocks_freed == 0 || db->db_last_dirty); + DB_DNODE_EXIT(db); + return (all ? ALL : blocks_freed); + } + + for (i = start; i <= end; i++, bp++) { + if (BP_IS_HOLE(bp)) + continue; + rw_enter(&dn->dn_struct_rwlock, RW_READER); + err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); + ASSERT3U(err, ==, 0); + rw_exit(&dn->dn_struct_rwlock); + + if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) { + ASSERT3P(subdb->db_blkptr, ==, bp); + blocks_freed += free_blocks(dn, bp, 1, tx); + } else { + all = FALSE; + } + dbuf_rele(subdb, FTAG); + } + DB_DNODE_EXIT(db); + arc_buf_freeze(db->db_buf); +#ifdef ZFS_DEBUG + bp -= (end-start)+1; + for (i = start; i <= end; i++, bp++) { + if (i == start && blkid != 0) + continue; + else if (i == end && !trunc) + continue; + ASSERT3U(bp->blk_birth, ==, 0); + } +#endif + ASSERT(all || blocks_freed == 0 || db->db_last_dirty); + return (all ? ALL : blocks_freed); +} + +/* + * free_range: Traverse the indicated range of the provided file + * and "free" all the blocks contained there. + */ +static void +dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) +{ + blkptr_t *bp = dn->dn_phys->dn_blkptr; + dmu_buf_impl_t *db; + int trunc, start, end, shift, i, err; + int dnlevel = dn->dn_phys->dn_nlevels; + + if (blkid > dn->dn_phys->dn_maxblkid) + return; + + ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); + trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; + if (trunc) + nblks = dn->dn_phys->dn_maxblkid - blkid + 1; + + /* There are no indirect blocks in the object */ + if (dnlevel == 1) { + if (blkid >= dn->dn_phys->dn_nblkptr) { + /* this range was never made persistent */ + return; + } + ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); + (void) free_blocks(dn, bp + blkid, nblks, tx); + if (trunc) { + uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * + (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); + dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); + ASSERT(off < dn->dn_phys->dn_maxblkid || + dn->dn_phys->dn_maxblkid == 0 || + dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); + } + return; + } + + shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); + start = blkid >> shift; + ASSERT(start < dn->dn_phys->dn_nblkptr); + end = (blkid + nblks - 1) >> shift; + bp += start; + for (i = start; i <= end; i++, bp++) { + if (BP_IS_HOLE(bp)) + continue; + rw_enter(&dn->dn_struct_rwlock, RW_READER); + err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); + ASSERT3U(err, ==, 0); + rw_exit(&dn->dn_struct_rwlock); + + if (free_children(db, blkid, nblks, trunc, tx) == ALL) { + ASSERT3P(db->db_blkptr, ==, bp); + (void) free_blocks(dn, bp, 1, tx); + } + dbuf_rele(db, FTAG); + } + if (trunc) { + uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * + (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); + dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); + ASSERT(off < dn->dn_phys->dn_maxblkid || + dn->dn_phys->dn_maxblkid == 0 || + dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); + } +} + +/* + * Try to kick all the dnodes dbufs out of the cache... + */ +void +dnode_evict_dbufs(dnode_t *dn) +{ + int progress; + int pass = 0; + + do { + dmu_buf_impl_t *db, marker; + int evicting = FALSE; + + progress = FALSE; + mutex_enter(&dn->dn_dbufs_mtx); + list_insert_tail(&dn->dn_dbufs, &marker); + db = list_head(&dn->dn_dbufs); + for (; db != ▮ db = list_head(&dn->dn_dbufs)) { + list_remove(&dn->dn_dbufs, db); + list_insert_tail(&dn->dn_dbufs, db); +#ifdef DEBUG + DB_DNODE_ENTER(db); + ASSERT3P(DB_DNODE(db), ==, dn); + DB_DNODE_EXIT(db); +#endif /* DEBUG */ + + mutex_enter(&db->db_mtx); + if (db->db_state == DB_EVICTING) { + progress = TRUE; + evicting = TRUE; + mutex_exit(&db->db_mtx); + } else if (refcount_is_zero(&db->db_holds)) { + progress = TRUE; + dbuf_clear(db); /* exits db_mtx for us */ + } else { + mutex_exit(&db->db_mtx); + } + + } + list_remove(&dn->dn_dbufs, &marker); + /* + * NB: we need to drop dn_dbufs_mtx between passes so + * that any DB_EVICTING dbufs can make progress. + * Ideally, we would have some cv we could wait on, but + * since we don't, just wait a bit to give the other + * thread a chance to run. + */ + mutex_exit(&dn->dn_dbufs_mtx); + if (evicting) + delay(1); + pass++; + ASSERT(pass < 100); /* sanity check */ + } while (progress); + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { + mutex_enter(&dn->dn_bonus->db_mtx); + dbuf_evict(dn->dn_bonus); + dn->dn_bonus = NULL; + } + rw_exit(&dn->dn_struct_rwlock); +} + +static void +dnode_undirty_dbufs(list_t *list) +{ + dbuf_dirty_record_t *dr; + + while (dr = list_head(list)) { + dmu_buf_impl_t *db = dr->dr_dbuf; + uint64_t txg = dr->dr_txg; + + if (db->db_level != 0) + dnode_undirty_dbufs(&dr->dt.di.dr_children); + + mutex_enter(&db->db_mtx); + /* XXX - use dbuf_undirty()? */ + list_remove(list, dr); + ASSERT(db->db_last_dirty == dr); + db->db_last_dirty = NULL; + db->db_dirtycnt -= 1; + if (db->db_level == 0) { + ASSERT(db->db_blkid == DMU_BONUS_BLKID || + dr->dt.dl.dr_data == db->db_buf); + dbuf_unoverride(dr); + } + kmem_free(dr, sizeof (dbuf_dirty_record_t)); + dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); + } +} + +static void +dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) +{ + int txgoff = tx->tx_txg & TXG_MASK; + + ASSERT(dmu_tx_is_syncing(tx)); + + /* + * Our contents should have been freed in dnode_sync() by the + * free range record inserted by the caller of dnode_free(). + */ + ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0); + ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr)); + + dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); + dnode_evict_dbufs(dn); + ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); + + /* + * XXX - It would be nice to assert this, but we may still + * have residual holds from async evictions from the arc... + * + * zfs_obj_to_path() also depends on this being + * commented out. + * + * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); + */ + + /* Undirty next bits */ + dn->dn_next_nlevels[txgoff] = 0; + dn->dn_next_indblkshift[txgoff] = 0; + dn->dn_next_blksz[txgoff] = 0; + + /* ASSERT(blkptrs are zero); */ + ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); + ASSERT(dn->dn_type != DMU_OT_NONE); + + ASSERT(dn->dn_free_txg > 0); + if (dn->dn_allocated_txg != dn->dn_free_txg) + dbuf_will_dirty(dn->dn_dbuf, tx); + bzero(dn->dn_phys, sizeof (dnode_phys_t)); + + mutex_enter(&dn->dn_mtx); + dn->dn_type = DMU_OT_NONE; + dn->dn_maxblkid = 0; + dn->dn_allocated_txg = 0; + dn->dn_free_txg = 0; + dn->dn_have_spill = B_FALSE; + mutex_exit(&dn->dn_mtx); + + ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); + + dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); + /* + * Now that we've released our hold, the dnode may + * be evicted, so we musn't access it. + */ +} + +/* + * Write out the dnode's dirty buffers. + */ +void +dnode_sync(dnode_t *dn, dmu_tx_t *tx) +{ + free_range_t *rp; + dnode_phys_t *dnp = dn->dn_phys; + int txgoff = tx->tx_txg & TXG_MASK; + list_t *list = &dn->dn_dirty_records[txgoff]; + static const dnode_phys_t zerodn = { 0 }; + boolean_t kill_spill = B_FALSE; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); + ASSERT(dnp->dn_type != DMU_OT_NONE || + bcmp(dnp, &zerodn, DNODE_SIZE) == 0); + DNODE_VERIFY(dn); + + ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); + + if (dmu_objset_userused_enabled(dn->dn_objset) && + !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + mutex_enter(&dn->dn_mtx); + dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); + dn->dn_oldflags = dn->dn_phys->dn_flags; + dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED; + mutex_exit(&dn->dn_mtx); + dmu_objset_userquota_get_ids(dn, B_FALSE, tx); + } else { + /* Once we account for it, we should always account for it. */ + ASSERT(!(dn->dn_phys->dn_flags & + DNODE_FLAG_USERUSED_ACCOUNTED)); + } + + mutex_enter(&dn->dn_mtx); + if (dn->dn_allocated_txg == tx->tx_txg) { + /* The dnode is newly allocated or reallocated */ + if (dnp->dn_type == DMU_OT_NONE) { + /* this is a first alloc, not a realloc */ + dnp->dn_nlevels = 1; + dnp->dn_nblkptr = dn->dn_nblkptr; + } + + dnp->dn_type = dn->dn_type; + dnp->dn_bonustype = dn->dn_bonustype; + dnp->dn_bonuslen = dn->dn_bonuslen; + } + + ASSERT(dnp->dn_nlevels > 1 || + BP_IS_HOLE(&dnp->dn_blkptr[0]) || + BP_GET_LSIZE(&dnp->dn_blkptr[0]) == + dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); + + if (dn->dn_next_blksz[txgoff]) { + ASSERT(P2PHASE(dn->dn_next_blksz[txgoff], + SPA_MINBLOCKSIZE) == 0); + ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) || + dn->dn_maxblkid == 0 || list_head(list) != NULL || + avl_last(&dn->dn_ranges[txgoff]) || + dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == + dnp->dn_datablkszsec); + dnp->dn_datablkszsec = + dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; + dn->dn_next_blksz[txgoff] = 0; + } + + if (dn->dn_next_bonuslen[txgoff]) { + if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN) + dnp->dn_bonuslen = 0; + else + dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff]; + ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN); + dn->dn_next_bonuslen[txgoff] = 0; + } + + if (dn->dn_next_bonustype[txgoff]) { + ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES); + dnp->dn_bonustype = dn->dn_next_bonustype[txgoff]; + dn->dn_next_bonustype[txgoff] = 0; + } + + /* + * We will either remove a spill block when a file is being removed + * or we have been asked to remove it. + */ + if (dn->dn_rm_spillblk[txgoff] || + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && + dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) { + if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) + kill_spill = B_TRUE; + dn->dn_rm_spillblk[txgoff] = 0; + } + + if (dn->dn_next_indblkshift[txgoff]) { + ASSERT(dnp->dn_nlevels == 1); + dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; + dn->dn_next_indblkshift[txgoff] = 0; + } + + /* + * Just take the live (open-context) values for checksum and compress. + * Strictly speaking it's a future leak, but nothing bad happens if we + * start using the new checksum or compress algorithm a little early. + */ + dnp->dn_checksum = dn->dn_checksum; + dnp->dn_compress = dn->dn_compress; + + mutex_exit(&dn->dn_mtx); + + if (kill_spill) { + (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx); + mutex_enter(&dn->dn_mtx); + dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR; + mutex_exit(&dn->dn_mtx); + } + + /* process all the "freed" ranges in the file */ + while (rp = avl_last(&dn->dn_ranges[txgoff])) { + dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx); + /* grab the mutex so we don't race with dnode_block_freed() */ + mutex_enter(&dn->dn_mtx); + avl_remove(&dn->dn_ranges[txgoff], rp); + mutex_exit(&dn->dn_mtx); + kmem_free(rp, sizeof (free_range_t)); + } + + if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { + dnode_sync_free(dn, tx); + return; + } + + if (dn->dn_next_nblkptr[txgoff]) { + /* this should only happen on a realloc */ + ASSERT(dn->dn_allocated_txg == tx->tx_txg); + if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) { + /* zero the new blkptrs we are gaining */ + bzero(dnp->dn_blkptr + dnp->dn_nblkptr, + sizeof (blkptr_t) * + (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr)); +#ifdef ZFS_DEBUG + } else { + int i; + ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr); + /* the blkptrs we are losing better be unallocated */ + for (i = dn->dn_next_nblkptr[txgoff]; + i < dnp->dn_nblkptr; i++) + ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i])); +#endif + } + mutex_enter(&dn->dn_mtx); + dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff]; + dn->dn_next_nblkptr[txgoff] = 0; + mutex_exit(&dn->dn_mtx); + } + + if (dn->dn_next_nlevels[txgoff]) { + dnode_increase_indirection(dn, tx); + dn->dn_next_nlevels[txgoff] = 0; + } + + dbuf_sync_list(list, tx); + + if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + ASSERT3P(list_head(list), ==, NULL); + dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); + } + + /* + * Although we have dropped our reference to the dnode, it + * can't be evicted until its written, and we haven't yet + * initiated the IO for the dnode's dbuf. + */ +} diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c new file mode 100644 index 000000000000..59ac4a60947a --- /dev/null +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -0,0 +1,4030 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dmu_traverse.h> +#include <sys/dmu_tx.h> +#include <sys/arc.h> +#include <sys/zio.h> +#include <sys/zap.h> +#include <sys/unique.h> +#include <sys/zfs_context.h> +#include <sys/zfs_ioctl.h> +#include <sys/spa.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_onexit.h> +#include <sys/zvol.h> +#include <sys/dsl_scan.h> +#include <sys/dsl_deadlist.h> + +static char *dsl_reaper = "the grim reaper"; + +static dsl_checkfunc_t dsl_dataset_destroy_begin_check; +static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; +static dsl_syncfunc_t dsl_dataset_set_reservation_sync; + +#define SWITCH64(x, y) \ + { \ + uint64_t __tmp = (x); \ + (x) = (y); \ + (y) = __tmp; \ + } + +#define DS_REF_MAX (1ULL << 62) + +#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE + +#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) + + +/* + * Figure out how much of this delta should be propogated to the dsl_dir + * layer. If there's a refreservation, that space has already been + * partially accounted for in our ancestors. + */ +static int64_t +parent_delta(dsl_dataset_t *ds, int64_t delta) +{ + uint64_t old_bytes, new_bytes; + + if (ds->ds_reserved == 0) + return (delta); + + old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); + + ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); + return (new_bytes - old_bytes); +} + +void +dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) +{ + int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); + int compressed = BP_GET_PSIZE(bp); + int uncompressed = BP_GET_UCSIZE(bp); + int64_t delta; + + dprintf_bp(bp, "ds=%p", ds); + + ASSERT(dmu_tx_is_syncing(tx)); + /* It could have been compressed away to nothing */ + if (BP_IS_HOLE(bp)) + return; + ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); + ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); + if (ds == NULL) { + /* + * Account for the meta-objset space in its placeholder + * dsl_dir. + */ + ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ + dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, + used, compressed, uncompressed, tx); + dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + return; + } + dmu_buf_will_dirty(ds->ds_dbuf, tx); + + mutex_enter(&ds->ds_dir->dd_lock); + mutex_enter(&ds->ds_lock); + delta = parent_delta(ds, used); + ds->ds_phys->ds_used_bytes += used; + ds->ds_phys->ds_compressed_bytes += compressed; + ds->ds_phys->ds_uncompressed_bytes += uncompressed; + ds->ds_phys->ds_unique_bytes += used; + mutex_exit(&ds->ds_lock); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, + compressed, uncompressed, tx); + dsl_dir_transfer_space(ds->ds_dir, used - delta, + DD_USED_REFRSRV, DD_USED_HEAD, tx); + mutex_exit(&ds->ds_dir->dd_lock); +} + +int +dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, + boolean_t async) +{ + if (BP_IS_HOLE(bp)) + return (0); + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(bp->blk_birth <= tx->tx_txg); + + int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); + int compressed = BP_GET_PSIZE(bp); + int uncompressed = BP_GET_UCSIZE(bp); + + ASSERT(used > 0); + if (ds == NULL) { + /* + * Account for the meta-objset space in its placeholder + * dataset. + */ + dsl_free(tx->tx_pool, tx->tx_txg, bp); + + dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, + -used, -compressed, -uncompressed, tx); + dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + return (used); + } + ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); + + ASSERT(!dsl_dataset_is_snapshot(ds)); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + + if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { + int64_t delta; + + dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); + dsl_free(tx->tx_pool, tx->tx_txg, bp); + + mutex_enter(&ds->ds_dir->dd_lock); + mutex_enter(&ds->ds_lock); + ASSERT(ds->ds_phys->ds_unique_bytes >= used || + !DS_UNIQUE_IS_ACCURATE(ds)); + delta = parent_delta(ds, -used); + ds->ds_phys->ds_unique_bytes -= used; + mutex_exit(&ds->ds_lock); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, + delta, -compressed, -uncompressed, tx); + dsl_dir_transfer_space(ds->ds_dir, -used - delta, + DD_USED_REFRSRV, DD_USED_HEAD, tx); + mutex_exit(&ds->ds_dir->dd_lock); + } else { + dprintf_bp(bp, "putting on dead list: %s", ""); + if (async) { + /* + * We are here as part of zio's write done callback, + * which means we're a zio interrupt thread. We can't + * call dsl_deadlist_insert() now because it may block + * waiting for I/O. Instead, put bp on the deferred + * queue and let dsl_pool_sync() finish the job. + */ + bplist_append(&ds->ds_pending_deadlist, bp); + } else { + dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); + } + ASSERT3U(ds->ds_prev->ds_object, ==, + ds->ds_phys->ds_prev_snap_obj); + ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); + /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ + if (ds->ds_prev->ds_phys->ds_next_snap_obj == + ds->ds_object && bp->blk_birth > + ds->ds_prev->ds_phys->ds_prev_snap_txg) { + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + mutex_enter(&ds->ds_prev->ds_lock); + ds->ds_prev->ds_phys->ds_unique_bytes += used; + mutex_exit(&ds->ds_prev->ds_lock); + } + if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { + dsl_dir_transfer_space(ds->ds_dir, used, + DD_USED_HEAD, DD_USED_SNAP, tx); + } + } + mutex_enter(&ds->ds_lock); + ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); + ds->ds_phys->ds_used_bytes -= used; + ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); + ds->ds_phys->ds_compressed_bytes -= compressed; + ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); + ds->ds_phys->ds_uncompressed_bytes -= uncompressed; + mutex_exit(&ds->ds_lock); + + return (used); +} + +uint64_t +dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) +{ + uint64_t trysnap = 0; + + if (ds == NULL) + return (0); + /* + * The snapshot creation could fail, but that would cause an + * incorrect FALSE return, which would only result in an + * overestimation of the amount of space that an operation would + * consume, which is OK. + * + * There's also a small window where we could miss a pending + * snapshot, because we could set the sync task in the quiescing + * phase. So this should only be used as a guess. + */ + if (ds->ds_trysnap_txg > + spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) + trysnap = ds->ds_trysnap_txg; + return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); +} + +boolean_t +dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, + uint64_t blk_birth) +{ + if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) + return (B_FALSE); + + ddt_prefetch(dsl_dataset_get_spa(ds), bp); + + return (B_TRUE); +} + +/* ARGSUSED */ +static void +dsl_dataset_evict(dmu_buf_t *db, void *dsv) +{ + dsl_dataset_t *ds = dsv; + + ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); + + unique_remove(ds->ds_fsid_guid); + + if (ds->ds_objset != NULL) + dmu_objset_evict(ds->ds_objset); + + if (ds->ds_prev) { + dsl_dataset_drop_ref(ds->ds_prev, ds); + ds->ds_prev = NULL; + } + + bplist_destroy(&ds->ds_pending_deadlist); + if (db != NULL) { + dsl_deadlist_close(&ds->ds_deadlist); + } else { + ASSERT(ds->ds_deadlist.dl_dbuf == NULL); + ASSERT(!ds->ds_deadlist.dl_oldfmt); + } + if (ds->ds_dir) + dsl_dir_close(ds->ds_dir, ds); + + ASSERT(!list_link_active(&ds->ds_synced_link)); + + mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_recvlock); + mutex_destroy(&ds->ds_opening_lock); + rw_destroy(&ds->ds_rwlock); + cv_destroy(&ds->ds_exclusive_cv); + + kmem_free(ds, sizeof (dsl_dataset_t)); +} + +static int +dsl_dataset_get_snapname(dsl_dataset_t *ds) +{ + dsl_dataset_phys_t *headphys; + int err; + dmu_buf_t *headdbuf; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + + if (ds->ds_snapname[0]) + return (0); + if (ds->ds_phys->ds_next_snap_obj == 0) + return (0); + + err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, + FTAG, &headdbuf); + if (err) + return (err); + headphys = headdbuf->db_data; + err = zap_value_search(dp->dp_meta_objset, + headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); + dmu_buf_rele(headdbuf, FTAG); + return (err); +} + +static int +dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + matchtype_t mt; + int err; + + if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + mt = MT_FIRST; + else + mt = MT_EXACT; + + err = zap_lookup_norm(mos, snapobj, name, 8, 1, + value, mt, NULL, 0, NULL); + if (err == ENOTSUP && mt == MT_FIRST) + err = zap_lookup(mos, snapobj, name, 8, 1, value); + return (err); +} + +static int +dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + matchtype_t mt; + int err; + + dsl_dir_snap_cmtime_update(ds->ds_dir); + + if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + mt = MT_FIRST; + else + mt = MT_EXACT; + + err = zap_remove_norm(mos, snapobj, name, mt, tx); + if (err == ENOTSUP && mt == MT_FIRST) + err = zap_remove(mos, snapobj, name, tx); + return (err); +} + +static int +dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **dsp) +{ + objset_t *mos = dp->dp_meta_objset; + dmu_buf_t *dbuf; + dsl_dataset_t *ds; + int err; + dmu_object_info_t doi; + + ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || + dsl_pool_sync_context(dp)); + + err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); + if (err) + return (err); + + /* Make sure dsobj has the correct object type. */ + dmu_object_info_from_db(dbuf, &doi); + if (doi.doi_type != DMU_OT_DSL_DATASET) + return (EINVAL); + + ds = dmu_buf_get_user(dbuf); + if (ds == NULL) { + dsl_dataset_t *winner; + + ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); + ds->ds_dbuf = dbuf; + ds->ds_object = dsobj; + ds->ds_phys = dbuf->db_data; + + mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ds->ds_rwlock, 0, 0, 0); + cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); + + bplist_create(&ds->ds_pending_deadlist); + dsl_deadlist_open(&ds->ds_deadlist, + mos, ds->ds_phys->ds_deadlist_obj); + + if (err == 0) { + err = dsl_dir_open_obj(dp, + ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); + } + if (err) { + mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_recvlock); + mutex_destroy(&ds->ds_opening_lock); + rw_destroy(&ds->ds_rwlock); + cv_destroy(&ds->ds_exclusive_cv); + bplist_destroy(&ds->ds_pending_deadlist); + dsl_deadlist_close(&ds->ds_deadlist); + kmem_free(ds, sizeof (dsl_dataset_t)); + dmu_buf_rele(dbuf, tag); + return (err); + } + + if (!dsl_dataset_is_snapshot(ds)) { + ds->ds_snapname[0] = '\0'; + if (ds->ds_phys->ds_prev_snap_obj) { + err = dsl_dataset_get_ref(dp, + ds->ds_phys->ds_prev_snap_obj, + ds, &ds->ds_prev); + } + } else { + if (zfs_flags & ZFS_DEBUG_SNAPNAMES) + err = dsl_dataset_get_snapname(ds); + if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { + err = zap_count( + ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_userrefs_obj, + &ds->ds_userrefs); + } + } + + if (err == 0 && !dsl_dataset_is_snapshot(ds)) { + /* + * In sync context, we're called with either no lock + * or with the write lock. If we're not syncing, + * we're always called with the read lock held. + */ + boolean_t need_lock = + !RW_WRITE_HELD(&dp->dp_config_rwlock) && + dsl_pool_sync_context(dp); + + if (need_lock) + rw_enter(&dp->dp_config_rwlock, RW_READER); + + err = dsl_prop_get_ds(ds, + "refreservation", sizeof (uint64_t), 1, + &ds->ds_reserved, NULL); + if (err == 0) { + err = dsl_prop_get_ds(ds, + "refquota", sizeof (uint64_t), 1, + &ds->ds_quota, NULL); + } + + if (need_lock) + rw_exit(&dp->dp_config_rwlock); + } else { + ds->ds_reserved = ds->ds_quota = 0; + } + + if (err == 0) { + winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, + dsl_dataset_evict); + } + if (err || winner) { + bplist_destroy(&ds->ds_pending_deadlist); + dsl_deadlist_close(&ds->ds_deadlist); + if (ds->ds_prev) + dsl_dataset_drop_ref(ds->ds_prev, ds); + dsl_dir_close(ds->ds_dir, ds); + mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_recvlock); + mutex_destroy(&ds->ds_opening_lock); + rw_destroy(&ds->ds_rwlock); + cv_destroy(&ds->ds_exclusive_cv); + kmem_free(ds, sizeof (dsl_dataset_t)); + if (err) { + dmu_buf_rele(dbuf, tag); + return (err); + } + ds = winner; + } else { + ds->ds_fsid_guid = + unique_insert(ds->ds_phys->ds_fsid_guid); + } + } + ASSERT3P(ds->ds_dbuf, ==, dbuf); + ASSERT3P(ds->ds_phys, ==, dbuf->db_data); + ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || + spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || + dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); + mutex_enter(&ds->ds_lock); + if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { + mutex_exit(&ds->ds_lock); + dmu_buf_rele(ds->ds_dbuf, tag); + return (ENOENT); + } + mutex_exit(&ds->ds_lock); + *dsp = ds; + return (0); +} + +static int +dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + /* + * In syncing context we don't want the rwlock lock: there + * may be an existing writer waiting for sync phase to + * finish. We don't need to worry about such writers, since + * sync phase is single-threaded, so the writer can't be + * doing anything while we are active. + */ + if (dsl_pool_sync_context(dp)) { + ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); + return (0); + } + + /* + * Normal users will hold the ds_rwlock as a READER until they + * are finished (i.e., call dsl_dataset_rele()). "Owners" will + * drop their READER lock after they set the ds_owner field. + * + * If the dataset is being destroyed, the destroy thread will + * obtain a WRITER lock for exclusive access after it's done its + * open-context work and then change the ds_owner to + * dsl_reaper once destruction is assured. So threads + * may block here temporarily, until the "destructability" of + * the dataset is determined. + */ + ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); + mutex_enter(&ds->ds_lock); + while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { + rw_exit(&dp->dp_config_rwlock); + cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); + if (DSL_DATASET_IS_DESTROYED(ds)) { + mutex_exit(&ds->ds_lock); + dsl_dataset_drop_ref(ds, tag); + rw_enter(&dp->dp_config_rwlock, RW_READER); + return (ENOENT); + } + /* + * The dp_config_rwlock lives above the ds_lock. And + * we need to check DSL_DATASET_IS_DESTROYED() while + * holding the ds_lock, so we have to drop and reacquire + * the ds_lock here. + */ + mutex_exit(&ds->ds_lock); + rw_enter(&dp->dp_config_rwlock, RW_READER); + mutex_enter(&ds->ds_lock); + } + mutex_exit(&ds->ds_lock); + return (0); +} + +int +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **dsp) +{ + int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); + + if (err) + return (err); + return (dsl_dataset_hold_ref(*dsp, tag)); +} + +int +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp) +{ + int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + if (err) + return (err); + if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + dsl_dataset_rele(*dsp, tag); + *dsp = NULL; + return (EBUSY); + } + return (0); +} + +int +dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) +{ + dsl_dir_t *dd; + dsl_pool_t *dp; + const char *snapname; + uint64_t obj; + int err = 0; + + err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); + if (err) + return (err); + + dp = dd->dd_pool; + obj = dd->dd_phys->dd_head_dataset_obj; + rw_enter(&dp->dp_config_rwlock, RW_READER); + if (obj) + err = dsl_dataset_get_ref(dp, obj, tag, dsp); + else + err = ENOENT; + if (err) + goto out; + + err = dsl_dataset_hold_ref(*dsp, tag); + + /* we may be looking for a snapshot */ + if (err == 0 && snapname != NULL) { + dsl_dataset_t *ds = NULL; + + if (*snapname++ != '@') { + dsl_dataset_rele(*dsp, tag); + err = ENOENT; + goto out; + } + + dprintf("looking for snapshot '%s'\n", snapname); + err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); + if (err == 0) + err = dsl_dataset_get_ref(dp, obj, tag, &ds); + dsl_dataset_rele(*dsp, tag); + + ASSERT3U((err == 0), ==, (ds != NULL)); + + if (ds) { + mutex_enter(&ds->ds_lock); + if (ds->ds_snapname[0] == 0) + (void) strlcpy(ds->ds_snapname, snapname, + sizeof (ds->ds_snapname)); + mutex_exit(&ds->ds_lock); + err = dsl_dataset_hold_ref(ds, tag); + *dsp = err ? NULL : ds; + } + } +out: + rw_exit(&dp->dp_config_rwlock); + dsl_dir_close(dd, FTAG); + return (err); +} + +int +dsl_dataset_own(const char *name, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp) +{ + int err = dsl_dataset_hold(name, tag, dsp); + if (err) + return (err); + if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + dsl_dataset_rele(*dsp, tag); + return (EBUSY); + } + return (0); +} + +void +dsl_dataset_name(dsl_dataset_t *ds, char *name) +{ + if (ds == NULL) { + (void) strcpy(name, "mos"); + } else { + dsl_dir_name(ds->ds_dir, name); + VERIFY(0 == dsl_dataset_get_snapname(ds)); + if (ds->ds_snapname[0]) { + (void) strcat(name, "@"); + /* + * We use a "recursive" mutex so that we + * can call dprintf_ds() with ds_lock held. + */ + if (!MUTEX_HELD(&ds->ds_lock)) { + mutex_enter(&ds->ds_lock); + (void) strcat(name, ds->ds_snapname); + mutex_exit(&ds->ds_lock); + } else { + (void) strcat(name, ds->ds_snapname); + } + } + } +} + +static int +dsl_dataset_namelen(dsl_dataset_t *ds) +{ + int result; + + if (ds == NULL) { + result = 3; /* "mos" */ + } else { + result = dsl_dir_namelen(ds->ds_dir); + VERIFY(0 == dsl_dataset_get_snapname(ds)); + if (ds->ds_snapname[0]) { + ++result; /* adding one for the @-sign */ + if (!MUTEX_HELD(&ds->ds_lock)) { + mutex_enter(&ds->ds_lock); + result += strlen(ds->ds_snapname); + mutex_exit(&ds->ds_lock); + } else { + result += strlen(ds->ds_snapname); + } + } + } + + return (result); +} + +void +dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) +{ + dmu_buf_rele(ds->ds_dbuf, tag); +} + +void +dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +{ + if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { + rw_exit(&ds->ds_rwlock); + } + dsl_dataset_drop_ref(ds, tag); +} + +void +dsl_dataset_disown(dsl_dataset_t *ds, void *tag) +{ + ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || + (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); + + mutex_enter(&ds->ds_lock); + ds->ds_owner = NULL; + if (RW_WRITE_HELD(&ds->ds_rwlock)) { + rw_exit(&ds->ds_rwlock); + cv_broadcast(&ds->ds_exclusive_cv); + } + mutex_exit(&ds->ds_lock); + if (ds->ds_dbuf) + dsl_dataset_drop_ref(ds, tag); + else + dsl_dataset_evict(NULL, ds); +} + +boolean_t +dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) +{ + boolean_t gotit = FALSE; + + mutex_enter(&ds->ds_lock); + if (ds->ds_owner == NULL && + (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { + ds->ds_owner = tag; + if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) + rw_exit(&ds->ds_rwlock); + gotit = TRUE; + } + mutex_exit(&ds->ds_lock); + return (gotit); +} + +void +dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) +{ + ASSERT3P(owner, ==, ds->ds_owner); + if (!RW_WRITE_HELD(&ds->ds_rwlock)) + rw_enter(&ds->ds_rwlock, RW_WRITER); +} + +uint64_t +dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, + uint64_t flags, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dd->dd_pool; + dmu_buf_t *dbuf; + dsl_dataset_phys_t *dsphys; + uint64_t dsobj; + objset_t *mos = dp->dp_meta_objset; + + if (origin == NULL) + origin = dp->dp_origin_snap; + + ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); + ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); + + dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, + DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); + VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + dmu_buf_will_dirty(dbuf, tx); + dsphys = dbuf->db_data; + bzero(dsphys, sizeof (dsl_dataset_phys_t)); + dsphys->ds_dir_obj = dd->dd_object; + dsphys->ds_flags = flags; + dsphys->ds_fsid_guid = unique_create(); + (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, + sizeof (dsphys->ds_guid)); + dsphys->ds_snapnames_zapobj = + zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, + DMU_OT_NONE, 0, tx); + dsphys->ds_creation_time = gethrestime_sec(); + dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; + + if (origin == NULL) { + dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); + } else { + dsl_dataset_t *ohds; + + dsphys->ds_prev_snap_obj = origin->ds_object; + dsphys->ds_prev_snap_txg = + origin->ds_phys->ds_creation_txg; + dsphys->ds_used_bytes = + origin->ds_phys->ds_used_bytes; + dsphys->ds_compressed_bytes = + origin->ds_phys->ds_compressed_bytes; + dsphys->ds_uncompressed_bytes = + origin->ds_phys->ds_uncompressed_bytes; + dsphys->ds_bp = origin->ds_phys->ds_bp; + dsphys->ds_flags |= origin->ds_phys->ds_flags; + + dmu_buf_will_dirty(origin->ds_dbuf, tx); + origin->ds_phys->ds_num_children++; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); + dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, + dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); + dsl_dataset_rele(ohds, FTAG); + + if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { + if (origin->ds_phys->ds_next_clones_obj == 0) { + origin->ds_phys->ds_next_clones_obj = + zap_create(mos, + DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY(0 == zap_add_int(mos, + origin->ds_phys->ds_next_clones_obj, + dsobj, tx)); + } + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dd->dd_phys->dd_origin_obj = origin->ds_object; + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + if (origin->ds_dir->dd_phys->dd_clones == 0) { + dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); + origin->ds_dir->dd_phys->dd_clones = + zap_create(mos, + DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY3U(0, ==, zap_add_int(mos, + origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + } + } + + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) + dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; + + dmu_buf_rele(dbuf, FTAG); + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dd->dd_phys->dd_head_dataset_obj = dsobj; + + return (dsobj); +} + +uint64_t +dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, + dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) +{ + dsl_pool_t *dp = pdd->dd_pool; + uint64_t dsobj, ddobj; + dsl_dir_t *dd; + + ASSERT(lastname[0] != '@'); + + ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); + VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); + + dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); + + dsl_deleg_set_create_perms(dd, tx, cr); + + dsl_dir_close(dd, FTAG); + + /* + * If we are creating a clone, make sure we zero out any stale + * data from the origin snapshots zil header. + */ + if (origin != NULL) { + dsl_dataset_t *ds; + objset_t *os; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); + bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + dsl_dataset_dirty(ds, tx); + dsl_dataset_rele(ds, FTAG); + } + + return (dsobj); +} + +struct destroyarg { + dsl_sync_task_group_t *dstg; + char *snapname; + char *failed; + boolean_t defer; +}; + +static int +dsl_snapshot_destroy_one(const char *name, void *arg) +{ + struct destroyarg *da = arg; + dsl_dataset_t *ds; + int err; + char *dsname; + + dsname = kmem_asprintf("%s@%s", name, da->snapname); + err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); + strfree(dsname); + if (err == 0) { + struct dsl_ds_destroyarg *dsda; + + dsl_dataset_make_exclusive(ds, da->dstg); + dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); + dsda->ds = ds; + dsda->defer = da->defer; + dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, dsda, da->dstg, 0); + } else if (err == ENOENT) { + err = 0; + } else { + (void) strcpy(da->failed, name); + } + return (err); +} + +/* + * Destroy 'snapname' in all descendants of 'fsname'. + */ +#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy +int +dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) +{ + int err; + struct destroyarg da; + dsl_sync_task_t *dst; + spa_t *spa; + + err = spa_open(fsname, &spa, FTAG); + if (err) + return (err); + da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + da.snapname = snapname; + da.failed = fsname; + da.defer = defer; + + err = dmu_objset_find(fsname, + dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); + + if (err == 0) + err = dsl_sync_task_group_wait(da.dstg); + + for (dst = list_head(&da.dstg->dstg_tasks); dst; + dst = list_next(&da.dstg->dstg_tasks, dst)) { + struct dsl_ds_destroyarg *dsda = dst->dst_arg1; + dsl_dataset_t *ds = dsda->ds; + + /* + * Return the file system name that triggered the error + */ + if (dst->dst_err) { + dsl_dataset_name(ds, fsname); + *strchr(fsname, '@') = '\0'; + } + ASSERT3P(dsda->rm_origin, ==, NULL); + dsl_dataset_disown(ds, da.dstg); + kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); + } + + dsl_sync_task_group_destroy(da.dstg); + spa_close(spa, FTAG); + return (err); +} + +static boolean_t +dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) +{ + boolean_t might_destroy = B_FALSE; + + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && + DS_IS_DEFER_DESTROY(ds)) + might_destroy = B_TRUE; + mutex_exit(&ds->ds_lock); + + return (might_destroy); +} + +/* + * If we're removing a clone, and these three conditions are true: + * 1) the clone's origin has no other children + * 2) the clone's origin has no user references + * 3) the clone's origin has been marked for deferred destruction + * Then, prepare to remove the origin as part of this sync task group. + */ +static int +dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) +{ + dsl_dataset_t *ds = dsda->ds; + dsl_dataset_t *origin = ds->ds_prev; + + if (dsl_dataset_might_destroy_origin(origin)) { + char *name; + int namelen; + int error; + + namelen = dsl_dataset_namelen(origin) + 1; + name = kmem_alloc(namelen, KM_SLEEP); + dsl_dataset_name(origin, name); +#ifdef _KERNEL + error = zfs_unmount_snap(name, NULL); + if (error) { + kmem_free(name, namelen); + return (error); + } +#endif + error = dsl_dataset_own(name, B_TRUE, tag, &origin); + kmem_free(name, namelen); + if (error) + return (error); + dsda->rm_origin = origin; + dsl_dataset_make_exclusive(origin, tag); + } + + return (0); +} + +/* + * ds must be opened as OWNER. On return (whether successful or not), + * ds will be closed and caller can no longer dereference it. + */ +int +dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) +{ + int err; + dsl_sync_task_group_t *dstg; + objset_t *os; + dsl_dir_t *dd; + uint64_t obj; + struct dsl_ds_destroyarg dsda = { 0 }; + dsl_dataset_t dummy_ds = { 0 }; + + dsda.ds = ds; + + if (dsl_dataset_is_snapshot(ds)) { + /* Destroying a snapshot is simpler */ + dsl_dataset_make_exclusive(ds, tag); + + dsda.defer = defer; + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_destroy_check, dsl_dataset_destroy_sync, + &dsda, tag, 0); + ASSERT3P(dsda.rm_origin, ==, NULL); + goto out; + } else if (defer) { + err = EINVAL; + goto out; + } + + dd = ds->ds_dir; + dummy_ds.ds_dir = dd; + dummy_ds.ds_object = ds->ds_object; + + /* + * Check for errors and mark this ds as inconsistent, in + * case we crash while freeing the objects. + */ + err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, + dsl_dataset_destroy_begin_sync, ds, NULL, 0); + if (err) + goto out; + + err = dmu_objset_from_ds(ds, &os); + if (err) + goto out; + + /* + * remove the objects in open context, so that we won't + * have too much to do in syncing context. + */ + for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, + ds->ds_phys->ds_prev_snap_txg)) { + /* + * Ignore errors, if there is not enough disk space + * we will deal with it in dsl_dataset_destroy_sync(). + */ + (void) dmu_free_object(os, obj); + } + if (err != ESRCH) + goto out; + + /* + * Only the ZIL knows how to free log blocks. + */ + zil_destroy(dmu_objset_zil(os), B_FALSE); + + /* + * Sync out all in-flight IO. + */ + txg_wait_synced(dd->dd_pool, 0); + + /* + * If we managed to free all the objects in open + * context, the user space accounting should be zero. + */ + if (ds->ds_phys->ds_bp.blk_fill == 0 && + dmu_objset_userused_enabled(os)) { + uint64_t count; + + ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || + count == 0); + ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || + count == 0); + } + + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); + rw_exit(&dd->dd_pool->dp_config_rwlock); + + if (err) + goto out; + + /* + * Blow away the dsl_dir + head dataset. + */ + dsl_dataset_make_exclusive(ds, tag); + /* + * If we're removing a clone, we might also need to remove its + * origin. + */ + do { + dsda.need_prep = B_FALSE; + if (dsl_dir_is_clone(dd)) { + err = dsl_dataset_origin_rm_prep(&dsda, tag); + if (err) { + dsl_dir_close(dd, FTAG); + goto out; + } + } + + dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); + dsl_sync_task_create(dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, &dsda, tag, 0); + dsl_sync_task_create(dstg, dsl_dir_destroy_check, + dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); + err = dsl_sync_task_group_wait(dstg); + dsl_sync_task_group_destroy(dstg); + + /* + * We could be racing against 'zfs release' or 'zfs destroy -d' + * on the origin snap, in which case we can get EBUSY if we + * needed to destroy the origin snap but were not ready to + * do so. + */ + if (dsda.need_prep) { + ASSERT(err == EBUSY); + ASSERT(dsl_dir_is_clone(dd)); + ASSERT(dsda.rm_origin == NULL); + } + } while (dsda.need_prep); + + if (dsda.rm_origin != NULL) + dsl_dataset_disown(dsda.rm_origin, tag); + + /* if it is successful, dsl_dir_destroy_sync will close the dd */ + if (err) + dsl_dir_close(dd, FTAG); +out: + dsl_dataset_disown(ds, tag); + return (err); +} + +blkptr_t * +dsl_dataset_get_blkptr(dsl_dataset_t *ds) +{ + return (&ds->ds_phys->ds_bp); +} + +void +dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + /* If it's the meta-objset, set dp_meta_rootbp */ + if (ds == NULL) { + tx->tx_pool->dp_meta_rootbp = *bp; + } else { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_bp = *bp; + } +} + +spa_t * +dsl_dataset_get_spa(dsl_dataset_t *ds) +{ + return (ds->ds_dir->dd_pool->dp_spa); +} + +void +dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + dsl_pool_t *dp; + + if (ds == NULL) /* this is the meta-objset */ + return; + + ASSERT(ds->ds_objset != NULL); + + if (ds->ds_phys->ds_next_snap_obj != 0) + panic("dirtying snapshot!"); + + dp = ds->ds_dir->dd_pool; + + if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(ds->ds_dbuf, ds); + } +} + +/* + * The unique space in the head dataset can be calculated by subtracting + * the space used in the most recent snapshot, that is still being used + * in this file system, from the space currently in use. To figure out + * the space in the most recent snapshot still in use, we need to take + * the total space used in the snapshot and subtract out the space that + * has been freed up since the snapshot was taken. + */ +static void +dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) +{ + uint64_t mrs_used; + uint64_t dlused, dlcomp, dluncomp; + + ASSERT(!dsl_dataset_is_snapshot(ds)); + + if (ds->ds_phys->ds_prev_snap_obj != 0) + mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; + else + mrs_used = 0; + + dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); + + ASSERT3U(dlused, <=, mrs_used); + ds->ds_phys->ds_unique_bytes = + ds->ds_phys->ds_used_bytes - (mrs_used - dlused); + + if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= + SPA_VERSION_UNIQUE_ACCURATE) + ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; +} + +struct killarg { + dsl_dataset_t *ds; + dmu_tx_t *tx; +}; + +/* ARGSUSED */ +static int +kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct killarg *ka = arg; + dmu_tx_t *tx = ka->tx; + + if (bp == NULL) + return (0); + + if (zb->zb_level == ZB_ZIL_LEVEL) { + ASSERT(zilog != NULL); + /* + * It's a block in the intent log. It has no + * accounting, so just free it. + */ + dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); + } else { + ASSERT(zilog == NULL); + ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); + (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); + } + + return (0); +} + +/* ARGSUSED */ +static int +dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t count; + int err; + + /* + * Can't delete a head dataset if there are snapshots of it. + * (Except if the only snapshots are from the branch we cloned + * from.) + */ + if (ds->ds_prev != NULL && + ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) + return (EBUSY); + + /* + * This is really a dsl_dir thing, but check it here so that + * we'll be less likely to leave this dataset inconsistent & + * nearly destroyed. + */ + err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); + if (err) + return (err); + if (count != 0) + return (EEXIST); + + return (0); +} + +/* ARGSUSED */ +static void +dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + /* Mark it as inconsistent on-disk, in case we crash */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + + spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, + "dataset = %llu", ds->ds_object); +} + +static int +dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, + dmu_tx_t *tx) +{ + dsl_dataset_t *ds = dsda->ds; + dsl_dataset_t *ds_prev = ds->ds_prev; + + if (dsl_dataset_might_destroy_origin(ds_prev)) { + struct dsl_ds_destroyarg ndsda = {0}; + + /* + * If we're not prepared to remove the origin, don't remove + * the clone either. + */ + if (dsda->rm_origin == NULL) { + dsda->need_prep = B_TRUE; + return (EBUSY); + } + + ndsda.ds = ds_prev; + ndsda.is_origin_rm = B_TRUE; + return (dsl_dataset_destroy_check(&ndsda, tag, tx)); + } + + /* + * If we're not going to remove the origin after all, + * undo the open context setup. + */ + if (dsda->rm_origin != NULL) { + dsl_dataset_disown(dsda->rm_origin, tag); + dsda->rm_origin = NULL; + } + + return (0); +} + +/* + * If you add new checks here, you may need to add + * additional checks to the "temporary" case in + * snapshot_check() in dmu_objset.c. + */ +/* ARGSUSED */ +int +dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + struct dsl_ds_destroyarg *dsda = arg1; + dsl_dataset_t *ds = dsda->ds; + + /* we have an owner hold, so noone else can destroy us */ + ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); + + /* + * Only allow deferred destroy on pools that support it. + * NOTE: deferred destroy is only supported on snapshots. + */ + if (dsda->defer) { + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < + SPA_VERSION_USERREFS) + return (ENOTSUP); + ASSERT(dsl_dataset_is_snapshot(ds)); + return (0); + } + + /* + * Can't delete a head dataset if there are snapshots of it. + * (Except if the only snapshots are from the branch we cloned + * from.) + */ + if (ds->ds_prev != NULL && + ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) + return (EBUSY); + + /* + * If we made changes this txg, traverse_dsl_dataset won't find + * them. Try again. + */ + if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) + return (EAGAIN); + + if (dsl_dataset_is_snapshot(ds)) { + /* + * If this snapshot has an elevated user reference count, + * we can't destroy it yet. + */ + if (ds->ds_userrefs > 0 && !dsda->releasing) + return (EBUSY); + + mutex_enter(&ds->ds_lock); + /* + * Can't delete a branch point. However, if we're destroying + * a clone and removing its origin due to it having a user + * hold count of 0 and having been marked for deferred destroy, + * it's OK for the origin to have a single clone. + */ + if (ds->ds_phys->ds_num_children > + (dsda->is_origin_rm ? 2 : 1)) { + mutex_exit(&ds->ds_lock); + return (EEXIST); + } + mutex_exit(&ds->ds_lock); + } else if (dsl_dir_is_clone(ds->ds_dir)) { + return (dsl_dataset_origin_check(dsda, arg2, tx)); + } + + /* XXX we should do some i/o error checking... */ + return (0); +} + +struct refsarg { + kmutex_t lock; + boolean_t gone; + kcondvar_t cv; +}; + +/* ARGSUSED */ +static void +dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) +{ + struct refsarg *arg = argv; + + mutex_enter(&arg->lock); + arg->gone = TRUE; + cv_signal(&arg->cv); + mutex_exit(&arg->lock); +} + +static void +dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) +{ + struct refsarg arg; + + mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); + arg.gone = FALSE; + (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, + dsl_dataset_refs_gone); + dmu_buf_rele(ds->ds_dbuf, tag); + mutex_enter(&arg.lock); + while (!arg.gone) + cv_wait(&arg.cv, &arg.lock); + ASSERT(arg.gone); + mutex_exit(&arg.lock); + ds->ds_dbuf = NULL; + ds->ds_phys = NULL; + mutex_destroy(&arg.lock); + cv_destroy(&arg.cv); +} + +static void +remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t count; + int err; + + ASSERT(ds->ds_phys->ds_num_children >= 2); + err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); + /* + * The err should not be ENOENT, but a bug in a previous version + * of the code could cause upgrade_clones_cb() to not set + * ds_next_snap_obj when it should, leading to a missing entry. + * If we knew that the pool was created after + * SPA_VERSION_NEXT_CLONES, we could assert that it isn't + * ENOENT. However, at least we can check that we don't have + * too many entries in the next_clones_obj even after failing to + * remove this one. + */ + if (err != ENOENT) { + VERIFY3U(err, ==, 0); + } + ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, + &count)); + ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); +} + +static void +dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + + /* + * If it is the old version, dd_clones doesn't exist so we can't + * find the clones, but deadlist_remove_key() is a no-op so it + * doesn't matter. + */ + if (ds->ds_dir->dd_phys->dd_clones == 0) + return; + + for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); + if (clone->ds_dir->dd_origin_txg > mintxg) { + dsl_deadlist_remove_key(&clone->ds_deadlist, + mintxg, tx); + dsl_dataset_remove_clones_key(clone, mintxg, tx); + } + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); +} + +struct process_old_arg { + dsl_dataset_t *ds; + dsl_dataset_t *ds_prev; + boolean_t after_branch_point; + zio_t *pio; + uint64_t used, comp, uncomp; +}; + +static int +process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + struct process_old_arg *poa = arg; + dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; + + if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { + dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); + if (poa->ds_prev && !poa->after_branch_point && + bp->blk_birth > + poa->ds_prev->ds_phys->ds_prev_snap_txg) { + poa->ds_prev->ds_phys->ds_unique_bytes += + bp_get_dsize_sync(dp->dp_spa, bp); + } + } else { + poa->used += bp_get_dsize_sync(dp->dp_spa, bp); + poa->comp += BP_GET_PSIZE(bp); + poa->uncomp += BP_GET_UCSIZE(bp); + dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); + } + return (0); +} + +static void +process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, + dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) +{ + struct process_old_arg poa = { 0 }; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + + ASSERT(ds->ds_deadlist.dl_oldfmt); + ASSERT(ds_next->ds_deadlist.dl_oldfmt); + + poa.ds = ds; + poa.ds_prev = ds_prev; + poa.after_branch_point = after_branch_point; + poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, + process_old_cb, &poa, tx)); + VERIFY3U(zio_wait(poa.pio), ==, 0); + ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); + + /* change snapused */ + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -poa.used, -poa.comp, -poa.uncomp, tx); + + /* swap next's deadlist to our deadlist */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_close(&ds_next->ds_deadlist); + SWITCH64(ds_next->ds_phys->ds_deadlist_obj, + ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds_next->ds_deadlist, mos, + ds_next->ds_phys->ds_deadlist_obj); +} + +void +dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +{ + struct dsl_ds_destroyarg *dsda = arg1; + dsl_dataset_t *ds = dsda->ds; + int err; + int after_branch_point = FALSE; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds_prev = NULL; + boolean_t wont_destroy; + uint64_t obj; + + wont_destroy = (dsda->defer && + (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); + + ASSERT(ds->ds_owner || wont_destroy); + ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); + ASSERT(ds->ds_prev == NULL || + ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + + if (wont_destroy) { + ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + return; + } + + /* signal any waiters that this dataset is going away */ + mutex_enter(&ds->ds_lock); + ds->ds_owner = dsl_reaper; + cv_broadcast(&ds->ds_exclusive_cv); + mutex_exit(&ds->ds_lock); + + /* Remove our reservation */ + if (ds->ds_reserved != 0) { + dsl_prop_setarg_t psa; + uint64_t value = 0; + + dsl_prop_setarg_init_uint64(&psa, "refreservation", + (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), + &value); + psa.psa_effective_value = 0; /* predict default value */ + + dsl_dataset_set_reservation_sync(ds, &psa, tx); + ASSERT3U(ds->ds_reserved, ==, 0); + } + + ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); + + dsl_scan_ds_destroyed(ds, tx); + + obj = ds->ds_object; + + if (ds->ds_phys->ds_prev_snap_obj != 0) { + if (ds->ds_prev) { + ds_prev = ds->ds_prev; + } else { + VERIFY(0 == dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); + } + after_branch_point = + (ds_prev->ds_phys->ds_next_snap_obj != obj); + + dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); + if (after_branch_point && + ds_prev->ds_phys->ds_next_clones_obj != 0) { + remove_from_next_clones(ds_prev, obj, tx); + if (ds->ds_phys->ds_next_snap_obj != 0) { + VERIFY(0 == zap_add_int(mos, + ds_prev->ds_phys->ds_next_clones_obj, + ds->ds_phys->ds_next_snap_obj, tx)); + } + } + if (after_branch_point && + ds->ds_phys->ds_next_snap_obj == 0) { + /* This clone is toast. */ + ASSERT(ds_prev->ds_phys->ds_num_children > 1); + ds_prev->ds_phys->ds_num_children--; + + /* + * If the clone's origin has no other clones, no + * user holds, and has been marked for deferred + * deletion, then we should have done the necessary + * destroy setup for it. + */ + if (ds_prev->ds_phys->ds_num_children == 1 && + ds_prev->ds_userrefs == 0 && + DS_IS_DEFER_DESTROY(ds_prev)) { + ASSERT3P(dsda->rm_origin, !=, NULL); + } else { + ASSERT3P(dsda->rm_origin, ==, NULL); + } + } else if (!after_branch_point) { + ds_prev->ds_phys->ds_next_snap_obj = + ds->ds_phys->ds_next_snap_obj; + } + } + + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_t *ds_next; + uint64_t old_unique; + uint64_t used = 0, comp = 0, uncomp = 0; + + VERIFY(0 == dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); + ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); + + old_unique = ds_next->ds_phys->ds_unique_bytes; + + dmu_buf_will_dirty(ds_next->ds_dbuf, tx); + ds_next->ds_phys->ds_prev_snap_obj = + ds->ds_phys->ds_prev_snap_obj; + ds_next->ds_phys->ds_prev_snap_txg = + ds->ds_phys->ds_prev_snap_txg; + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, + ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); + + + if (ds_next->ds_deadlist.dl_oldfmt) { + process_old_deadlist(ds, ds_prev, ds_next, + after_branch_point, tx); + } else { + /* Adjust prev's unique space. */ + if (ds_prev && !after_branch_point) { + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds_prev->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_prev_snap_txg, + &used, &comp, &uncomp); + ds_prev->ds_phys->ds_unique_bytes += used; + } + + /* Adjust snapused. */ + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -used, -comp, -uncomp, tx); + + /* Move blocks to be freed to pool's free list. */ + dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, + &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, + tx); + dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, + DD_USED_HEAD, used, comp, uncomp, tx); + dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); + + /* Merge our deadlist into next's and free it. */ + dsl_deadlist_merge(&ds_next->ds_deadlist, + ds->ds_phys->ds_deadlist_obj, tx); + } + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + + /* Collapse range in clone heads */ + dsl_dataset_remove_clones_key(ds, + ds->ds_phys->ds_creation_txg, tx); + + if (dsl_dataset_is_snapshot(ds_next)) { + dsl_dataset_t *ds_nextnext; + + /* + * Update next's unique to include blocks which + * were previously shared by only this snapshot + * and it. Those blocks will be born after the + * prev snap and before this snap, and will have + * died after the next snap and before the one + * after that (ie. be on the snap after next's + * deadlist). + */ + VERIFY(0 == dsl_dataset_hold_obj(dp, + ds_next->ds_phys->ds_next_snap_obj, + FTAG, &ds_nextnext)); + dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + ds_next->ds_phys->ds_unique_bytes += used; + dsl_dataset_rele(ds_nextnext, FTAG); + ASSERT3P(ds_next->ds_prev, ==, NULL); + + /* Collapse range in this head. */ + dsl_dataset_t *hds; + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, + FTAG, &hds)); + dsl_deadlist_remove_key(&hds->ds_deadlist, + ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_rele(hds, FTAG); + + } else { + ASSERT3P(ds_next->ds_prev, ==, ds); + dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); + ds_next->ds_prev = NULL; + if (ds_prev) { + VERIFY(0 == dsl_dataset_get_ref(dp, + ds->ds_phys->ds_prev_snap_obj, + ds_next, &ds_next->ds_prev)); + } + + dsl_dataset_recalc_head_uniq(ds_next); + + /* + * Reduce the amount of our unconsmed refreservation + * being charged to our parent by the amount of + * new unique data we have gained. + */ + if (old_unique < ds_next->ds_reserved) { + int64_t mrsdelta; + uint64_t new_unique = + ds_next->ds_phys->ds_unique_bytes; + + ASSERT(old_unique <= new_unique); + mrsdelta = MIN(new_unique - old_unique, + ds_next->ds_reserved - old_unique); + dsl_dir_diduse_space(ds->ds_dir, + DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); + } + } + dsl_dataset_rele(ds_next, FTAG); + } else { + /* + * There's no next snapshot, so this is a head dataset. + * Destroy the deadlist. Unless it's a clone, the + * deadlist should be empty. (If it's a clone, it's + * safe to ignore the deadlist contents.) + */ + struct killarg ka; + + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + ds->ds_phys->ds_deadlist_obj = 0; + + /* + * Free everything that we point to (that's born after + * the previous snapshot, if we are a clone) + * + * NB: this should be very quick, because we already + * freed all the objects in open context. + */ + ka.ds = ds; + ka.tx = tx; + err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, + TRAVERSE_POST, kill_blkptr, &ka); + ASSERT3U(err, ==, 0); + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || + ds->ds_phys->ds_unique_bytes == 0); + + if (ds->ds_prev != NULL) { + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY3U(0, ==, zap_remove_int(mos, + ds->ds_prev->ds_dir->dd_phys->dd_clones, + ds->ds_object, tx)); + } + dsl_dataset_rele(ds->ds_prev, ds); + ds->ds_prev = ds_prev = NULL; + } + } + + /* + * This must be done after the dsl_traverse(), because it will + * re-open the objset. + */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + + if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { + /* Erase the link in the dir */ + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; + ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); + err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); + ASSERT(err == 0); + } else { + /* remove from snapshot namespace */ + dsl_dataset_t *ds_head; + ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); + VERIFY(0 == dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); + VERIFY(0 == dsl_dataset_get_snapname(ds)); +#ifdef ZFS_DEBUG + { + uint64_t val; + + err = dsl_dataset_snap_lookup(ds_head, + ds->ds_snapname, &val); + ASSERT3U(err, ==, 0); + ASSERT3U(val, ==, obj); + } +#endif + err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); + ASSERT(err == 0); + dsl_dataset_rele(ds_head, FTAG); + } + + if (ds_prev && ds->ds_prev != ds_prev) + dsl_dataset_rele(ds_prev, FTAG); + + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, + "dataset = %llu", ds->ds_object); + + if (ds->ds_phys->ds_next_clones_obj != 0) { + uint64_t count; + ASSERT(0 == zap_count(mos, + ds->ds_phys->ds_next_clones_obj, &count) && count == 0); + VERIFY(0 == dmu_object_free(mos, + ds->ds_phys->ds_next_clones_obj, tx)); + } + if (ds->ds_phys->ds_props_obj != 0) + VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); + if (ds->ds_phys->ds_userrefs_obj != 0) + VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); + dsl_dir_close(ds->ds_dir, ds); + ds->ds_dir = NULL; + dsl_dataset_drain_refs(ds, tag); + VERIFY(0 == dmu_object_free(mos, obj, tx)); + + if (dsda->rm_origin) { + /* + * Remove the origin of the clone we just destroyed. + */ + struct dsl_ds_destroyarg ndsda = {0}; + + ndsda.ds = dsda->rm_origin; + dsl_dataset_destroy_sync(&ndsda, tag, tx); + } +} + +static int +dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + uint64_t asize; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + /* + * If there's an fs-only reservation, any blocks that might become + * owned by the snapshot dataset must be accommodated by space + * outside of the reservation. + */ + ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); + asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) + return (ENOSPC); + + /* + * Propogate any reserved space for this snapshot to other + * snapshot checks in this sync group. + */ + if (asize > 0) + dsl_dir_willuse_space(ds->ds_dir, asize, tx); + + return (0); +} + +int +dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + const char *snapname = arg2; + int err; + uint64_t value; + + /* + * We don't allow multiple snapshots of the same txg. If there + * is already one, try again. + */ + if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) + return (EAGAIN); + + /* + * Check for conflicting name snapshot name. + */ + err = dsl_dataset_snap_lookup(ds, snapname, &value); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + + /* + * Check that the dataset's name is not too long. Name consists + * of the dataset's length + 1 for the @-sign + snapshot name's length + */ + if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) + return (ENAMETOOLONG); + + err = dsl_dataset_snapshot_reserve_space(ds, tx); + if (err) + return (err); + + ds->ds_trysnap_txg = tx->tx_txg; + return (0); +} + +void +dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + const char *snapname = arg2; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dmu_buf_t *dbuf; + dsl_dataset_phys_t *dsphys; + uint64_t dsobj, crtxg; + objset_t *mos = dp->dp_meta_objset; + int err; + + ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); + + /* + * The origin's ds_creation_txg has to be < TXG_INITIAL + */ + if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) + crtxg = 1; + else + crtxg = tx->tx_txg; + + dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, + DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); + VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + dmu_buf_will_dirty(dbuf, tx); + dsphys = dbuf->db_data; + bzero(dsphys, sizeof (dsl_dataset_phys_t)); + dsphys->ds_dir_obj = ds->ds_dir->dd_object; + dsphys->ds_fsid_guid = unique_create(); + (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, + sizeof (dsphys->ds_guid)); + dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; + dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; + dsphys->ds_next_snap_obj = ds->ds_object; + dsphys->ds_num_children = 1; + dsphys->ds_creation_time = gethrestime_sec(); + dsphys->ds_creation_txg = crtxg; + dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; + dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; + dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; + dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; + dsphys->ds_flags = ds->ds_phys->ds_flags; + dsphys->ds_bp = ds->ds_phys->ds_bp; + dmu_buf_rele(dbuf, FTAG); + + ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); + if (ds->ds_prev) { + uint64_t next_clones_obj = + ds->ds_prev->ds_phys->ds_next_clones_obj; + ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == + ds->ds_object || + ds->ds_prev->ds_phys->ds_num_children > 1); + if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, + ds->ds_prev->ds_phys->ds_creation_txg); + ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; + } else if (next_clones_obj != 0) { + remove_from_next_clones(ds->ds_prev, + dsphys->ds_next_snap_obj, tx); + VERIFY3U(0, ==, zap_add_int(mos, + next_clones_obj, dsobj, tx)); + } + } + + /* + * If we have a reference-reservation on this dataset, we will + * need to increase the amount of refreservation being charged + * since our unique space is going to zero. + */ + if (ds->ds_reserved) { + int64_t delta; + ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); + delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, + delta, 0, 0, tx); + } + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", + ds->ds_dir->dd_myname, snapname, dsobj, + ds->ds_phys->ds_prev_snap_txg); + ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, + UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_add_key(&ds->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, tx); + + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); + ds->ds_phys->ds_prev_snap_obj = dsobj; + ds->ds_phys->ds_prev_snap_txg = crtxg; + ds->ds_phys->ds_unique_bytes = 0; + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) + ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; + + err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, + snapname, 8, 1, &dsobj, tx); + ASSERT(err == 0); + + if (ds->ds_prev) + dsl_dataset_drop_ref(ds->ds_prev, ds); + VERIFY(0 == dsl_dataset_get_ref(dp, + ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + + dsl_scan_ds_snapshotted(ds, tx); + + dsl_dir_snap_cmtime_update(ds->ds_dir); + + spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, + "dataset = %llu", dsobj); +} + +void +dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(ds->ds_objset != NULL); + ASSERT(ds->ds_phys->ds_next_snap_obj == 0); + + /* + * in case we had to change ds_fsid_guid when we opened it, + * sync it out now. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; + + dsl_dir_dirty(ds->ds_dir, tx); + dmu_objset_sync(ds->ds_objset, zio, tx); +} + +void +dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t refd, avail, uobjs, aobjs; + + dsl_dir_stats(ds->ds_dir, nv); + + dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, + ds->ds_phys->ds_creation_time); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, + ds->ds_phys->ds_creation_txg); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, + ds->ds_quota); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, + ds->ds_reserved); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, + ds->ds_phys->ds_guid); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, + ds->ds_phys->ds_unique_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, + ds->ds_object); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, + ds->ds_userrefs); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, + DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + + if (ds->ds_phys->ds_next_snap_obj) { + /* + * This is a snapshot; override the dd's space used with + * our unique space and compression ratio. + */ + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, + ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes)); + } +} + +void +dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) +{ + stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; + stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; + stat->dds_guid = ds->ds_phys->ds_guid; + if (ds->ds_phys->ds_next_snap_obj) { + stat->dds_is_snapshot = B_TRUE; + stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; + } else { + stat->dds_is_snapshot = B_FALSE; + stat->dds_num_clones = 0; + } + + /* clone origin is really a dsl_dir thing... */ + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; + + VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_drop_ref(ods, FTAG); + } else { + stat->dds_origin[0] = '\0'; + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); +} + +uint64_t +dsl_dataset_fsid_guid(dsl_dataset_t *ds) +{ + return (ds->ds_fsid_guid); +} + +void +dsl_dataset_space(dsl_dataset_t *ds, + uint64_t *refdbytesp, uint64_t *availbytesp, + uint64_t *usedobjsp, uint64_t *availobjsp) +{ + *refdbytesp = ds->ds_phys->ds_used_bytes; + *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); + if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) + *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; + if (ds->ds_quota != 0) { + /* + * Adjust available bytes according to refquota + */ + if (*refdbytesp < ds->ds_quota) + *availbytesp = MIN(*availbytesp, + ds->ds_quota - *refdbytesp); + else + *availbytesp = 0; + } + *usedobjsp = ds->ds_phys->ds_bp.blk_fill; + *availobjsp = DN_MAX_OBJECT - *usedobjsp; +} + +boolean_t +dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || + dsl_pool_sync_context(dp)); + if (ds->ds_prev == NULL) + return (B_FALSE); + if (ds->ds_phys->ds_bp.blk_birth > + ds->ds_prev->ds_phys->ds_creation_txg) { + objset_t *os, *os_prev; + /* + * It may be that only the ZIL differs, because it was + * reset in the head. Don't count that as being + * modified. + */ + if (dmu_objset_from_ds(ds, &os) != 0) + return (B_TRUE); + if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) + return (B_TRUE); + return (bcmp(&os->os_phys->os_meta_dnode, + &os_prev->os_phys->os_meta_dnode, + sizeof (os->os_phys->os_meta_dnode)) != 0); + } + return (B_FALSE); +} + +/* ARGSUSED */ +static int +dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + char *newsnapname = arg2; + dsl_dir_t *dd = ds->ds_dir; + dsl_dataset_t *hds; + uint64_t val; + int err; + + err = dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); + if (err) + return (err); + + /* new name better not be in use */ + err = dsl_dataset_snap_lookup(hds, newsnapname, &val); + dsl_dataset_rele(hds, FTAG); + + if (err == 0) + err = EEXIST; + else if (err == ENOENT) + err = 0; + + /* dataset name + 1 for the "@" + the new snapshot name must fit */ + if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) + err = ENAMETOOLONG; + + return (err); +} + +static void +dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + const char *newsnapname = arg2; + dsl_dir_t *dd = ds->ds_dir; + objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_dataset_t *hds; + int err; + + ASSERT(ds->ds_phys->ds_next_snap_obj != 0); + + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + + VERIFY(0 == dsl_dataset_get_snapname(ds)); + err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); + ASSERT3U(err, ==, 0); + mutex_enter(&ds->ds_lock); + (void) strcpy(ds->ds_snapname, newsnapname); + mutex_exit(&ds->ds_lock); + err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, + ds->ds_snapname, 8, 1, &ds->ds_object, tx); + ASSERT3U(err, ==, 0); + + spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, + "dataset = %llu", ds->ds_object); + dsl_dataset_rele(hds, FTAG); +} + +struct renamesnaparg { + dsl_sync_task_group_t *dstg; + char failed[MAXPATHLEN]; + char *oldsnap; + char *newsnap; +}; + +static int +dsl_snapshot_rename_one(const char *name, void *arg) +{ + struct renamesnaparg *ra = arg; + dsl_dataset_t *ds = NULL; + char *snapname; + int err; + + snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); + (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); + + /* + * For recursive snapshot renames the parent won't be changing + * so we just pass name for both the to/from argument. + */ + err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); + if (err != 0) { + strfree(snapname); + return (err == ENOENT ? 0 : err); + } + +#ifdef _KERNEL + /* + * For all filesystems undergoing rename, we'll need to unmount it. + */ + (void) zfs_unmount_snap(snapname, NULL); +#endif + err = dsl_dataset_hold(snapname, ra->dstg, &ds); + strfree(snapname); + if (err != 0) + return (err == ENOENT ? 0 : err); + + dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, + dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); + + return (0); +} + +static int +dsl_recursive_rename(char *oldname, const char *newname) +{ + int err; + struct renamesnaparg *ra; + dsl_sync_task_t *dst; + spa_t *spa; + char *cp, *fsname = spa_strdup(oldname); + int len = strlen(oldname) + 1; + + /* truncate the snapshot name to get the fsname */ + cp = strchr(fsname, '@'); + *cp = '\0'; + + err = spa_open(fsname, &spa, FTAG); + if (err) { + kmem_free(fsname, len); + return (err); + } + ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); + ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + + ra->oldsnap = strchr(oldname, '@') + 1; + ra->newsnap = strchr(newname, '@') + 1; + *ra->failed = '\0'; + + err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, + DS_FIND_CHILDREN); + kmem_free(fsname, len); + + if (err == 0) { + err = dsl_sync_task_group_wait(ra->dstg); + } + + for (dst = list_head(&ra->dstg->dstg_tasks); dst; + dst = list_next(&ra->dstg->dstg_tasks, dst)) { + dsl_dataset_t *ds = dst->dst_arg1; + if (dst->dst_err) { + dsl_dir_name(ds->ds_dir, ra->failed); + (void) strlcat(ra->failed, "@", sizeof (ra->failed)); + (void) strlcat(ra->failed, ra->newsnap, + sizeof (ra->failed)); + } + dsl_dataset_rele(ds, ra->dstg); + } + + if (err) + (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); + + dsl_sync_task_group_destroy(ra->dstg); + kmem_free(ra, sizeof (struct renamesnaparg)); + spa_close(spa, FTAG); + return (err); +} + +static int +dsl_valid_rename(const char *oldname, void *arg) +{ + int delta = *(int *)arg; + + if (strlen(oldname) + delta >= MAXNAMELEN) + return (ENAMETOOLONG); + + return (0); +} + +#pragma weak dmu_objset_rename = dsl_dataset_rename +int +dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) +{ + dsl_dir_t *dd; + dsl_dataset_t *ds; + const char *tail; + int err; + + err = dsl_dir_open(oldname, FTAG, &dd, &tail); + if (err) + return (err); + + if (tail == NULL) { + int delta = strlen(newname) - strlen(oldname); + + /* if we're growing, validate child name lengths */ + if (delta > 0) + err = dmu_objset_find(oldname, dsl_valid_rename, + &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + + if (err == 0) + err = dsl_dir_rename(dd, newname); + dsl_dir_close(dd, FTAG); + return (err); + } + + if (tail[0] != '@') { + /* the name ended in a nonexistent component */ + dsl_dir_close(dd, FTAG); + return (ENOENT); + } + + dsl_dir_close(dd, FTAG); + + /* new name must be snapshot in same filesystem */ + tail = strchr(newname, '@'); + if (tail == NULL) + return (EINVAL); + tail++; + if (strncmp(oldname, newname, tail - newname) != 0) + return (EXDEV); + + if (recursive) { + err = dsl_recursive_rename(oldname, newname); + } else { + err = dsl_dataset_hold(oldname, FTAG, &ds); + if (err) + return (err); + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_snapshot_rename_check, + dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); + + dsl_dataset_rele(ds, FTAG); + } + + return (err); +} + +struct promotenode { + list_node_t link; + dsl_dataset_t *ds; +}; + +struct promotearg { + list_t shared_snaps, origin_snaps, clone_snaps; + dsl_dataset_t *origin_origin; + uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; + char *err_ds; +}; + +static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); +static boolean_t snaplist_unstable(list_t *l); + +static int +dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *hds = arg1; + struct promotearg *pa = arg2; + struct promotenode *snap = list_head(&pa->shared_snaps); + dsl_dataset_t *origin_ds = snap->ds; + int err; + uint64_t unused; + + /* Check that it is a real clone */ + if (!dsl_dir_is_clone(hds->ds_dir)) + return (EINVAL); + + /* Since this is so expensive, don't do the preliminary check */ + if (!dmu_tx_is_syncing(tx)) + return (0); + + if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) + return (EXDEV); + + /* compute origin's new unique space */ + snap = list_tail(&pa->clone_snaps); + ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &pa->unique, &unused, &unused); + + /* + * Walk the snapshots that we are moving + * + * Compute space to transfer. Consider the incremental changes + * to used for each snapshot: + * (my used) = (prev's used) + (blocks born) - (blocks killed) + * So each snapshot gave birth to: + * (blocks born) = (my used) - (prev's used) + (blocks killed) + * So a sequence would look like: + * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) + * Which simplifies to: + * uN + kN + kN-1 + ... + k1 + k0 + * Note however, if we stop before we reach the ORIGIN we get: + * uN + kN + kN-1 + ... + kM - uM-1 + */ + pa->used = origin_ds->ds_phys->ds_used_bytes; + pa->comp = origin_ds->ds_phys->ds_compressed_bytes; + pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; + for (snap = list_head(&pa->shared_snaps); snap; + snap = list_next(&pa->shared_snaps, snap)) { + uint64_t val, dlused, dlcomp, dluncomp; + dsl_dataset_t *ds = snap->ds; + + /* Check that the snapshot name does not conflict */ + VERIFY(0 == dsl_dataset_get_snapname(ds)); + err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); + if (err == 0) { + err = EEXIST; + goto out; + } + if (err != ENOENT) + goto out; + + /* The very first snapshot does not have a deadlist */ + if (ds->ds_phys->ds_prev_snap_obj == 0) + continue; + + dsl_deadlist_space(&ds->ds_deadlist, + &dlused, &dlcomp, &dluncomp); + pa->used += dlused; + pa->comp += dlcomp; + pa->uncomp += dluncomp; + } + + /* + * If we are a clone of a clone then we never reached ORIGIN, + * so we need to subtract out the clone origin's used space. + */ + if (pa->origin_origin) { + pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; + pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; + pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; + } + + /* Check that there is enough space here */ + err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, + pa->used); + if (err) + return (err); + + /* + * Compute the amounts of space that will be used by snapshots + * after the promotion (for both origin and clone). For each, + * it is the amount of space that will be on all of their + * deadlists (that was not born before their new origin). + */ + if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + uint64_t space; + + /* + * Note, typically this will not be a clone of a clone, + * so dd_origin_txg will be < TXG_INITIAL, so + * these snaplist_space() -> dsl_deadlist_space_range() + * calls will be fast because they do not have to + * iterate over all bps. + */ + snap = list_head(&pa->origin_snaps); + err = snaplist_space(&pa->shared_snaps, + snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); + if (err) + return (err); + + err = snaplist_space(&pa->clone_snaps, + snap->ds->ds_dir->dd_origin_txg, &space); + if (err) + return (err); + pa->cloneusedsnap += space; + } + if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + err = snaplist_space(&pa->origin_snaps, + origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); + if (err) + return (err); + } + + return (0); +out: + pa->err_ds = snap->ds->ds_snapname; + return (err); +} + +static void +dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *hds = arg1; + struct promotearg *pa = arg2; + struct promotenode *snap = list_head(&pa->shared_snaps); + dsl_dataset_t *origin_ds = snap->ds; + dsl_dataset_t *origin_head; + dsl_dir_t *dd = hds->ds_dir; + dsl_pool_t *dp = hds->ds_dir->dd_pool; + dsl_dir_t *odd = NULL; + uint64_t oldnext_obj; + int64_t delta; + + ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); + + snap = list_head(&pa->origin_snaps); + origin_head = snap->ds; + + /* + * We need to explicitly open odd, since origin_ds's dd will be + * changing. + */ + VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, + NULL, FTAG, &odd)); + + /* change origin's next snap */ + dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); + oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; + snap = list_tail(&pa->clone_snaps); + ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); + origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; + + /* change the origin's next clone */ + if (origin_ds->ds_phys->ds_next_clones_obj) { + remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + origin_ds->ds_phys->ds_next_clones_obj, + oldnext_obj, tx)); + } + + /* change origin */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); + dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; + dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; + dmu_buf_will_dirty(odd->dd_dbuf, tx); + odd->dd_phys->dd_origin_obj = origin_ds->ds_object; + origin_head->ds_dir->dd_origin_txg = + origin_ds->ds_phys->ds_creation_txg; + + /* change dd_clone entries */ + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, hds->ds_object, tx)); + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + pa->origin_origin->ds_dir->dd_phys->dd_clones, + hds->ds_object, tx)); + + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + pa->origin_origin->ds_dir->dd_phys->dd_clones, + origin_head->ds_object, tx)); + if (dd->dd_phys->dd_clones == 0) { + dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, + DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, origin_head->ds_object, tx)); + + } + + /* move snapshots to this dir */ + for (snap = list_head(&pa->shared_snaps); snap; + snap = list_next(&pa->shared_snaps, snap)) { + dsl_dataset_t *ds = snap->ds; + + /* unregister props as dsl_dir is changing */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + /* move snap name entry */ + VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY(0 == dsl_dataset_snap_remove(origin_head, + ds->ds_snapname, tx)); + VERIFY(0 == zap_add(dp->dp_meta_objset, + hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, + 8, 1, &ds->ds_object, tx)); + + /* change containing dsl_dir */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); + ds->ds_phys->ds_dir_obj = dd->dd_object; + ASSERT3P(ds->ds_dir, ==, odd); + dsl_dir_close(ds->ds_dir, ds); + VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, + NULL, ds, &ds->ds_dir)); + + /* move any clone references */ + if (ds->ds_phys->ds_next_clones_obj && + spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + zap_cursor_t zc; + zap_attribute_t za; + + for (zap_cursor_init(&zc, dp->dp_meta_objset, + ds->ds_phys->ds_next_clones_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *cnds; + uint64_t o; + + if (za.za_first_integer == oldnext_obj) { + /* + * We've already moved the + * origin's reference. + */ + continue; + } + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &cnds)); + o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; + + VERIFY3U(zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, o, tx), ==, 0); + VERIFY3U(zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, o, tx), ==, 0); + dsl_dataset_rele(cnds, FTAG); + } + zap_cursor_fini(&zc); + } + + ASSERT3U(dsl_prop_numcb(ds), ==, 0); + } + + /* + * Change space accounting. + * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either + * both be valid, or both be 0 (resulting in delta == 0). This + * is true for each of {clone,origin} independently. + */ + + delta = pa->cloneusedsnap - + dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, >=, 0); + ASSERT3U(pa->used, >=, delta); + dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(dd, DD_USED_HEAD, + pa->used - delta, pa->comp, pa->uncomp, tx); + + delta = pa->originusedsnap - + odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, <=, 0); + ASSERT3U(pa->used, >=, -delta); + dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(odd, DD_USED_HEAD, + -pa->used - delta, -pa->comp, -pa->uncomp, tx); + + origin_ds->ds_phys->ds_unique_bytes = pa->unique; + + /* log history record */ + spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, + "dataset = %llu", hds->ds_object); + + dsl_dir_close(odd, FTAG); +} + +static char *snaplist_tag = "snaplist"; +/* + * Make a list of dsl_dataset_t's for the snapshots between first_obj + * (exclusive) and last_obj (inclusive). The list will be in reverse + * order (last_obj will be the list_head()). If first_obj == 0, do all + * snapshots back to this dataset's origin. + */ +static int +snaplist_make(dsl_pool_t *dp, boolean_t own, + uint64_t first_obj, uint64_t last_obj, list_t *l) +{ + uint64_t obj = last_obj; + + ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); + + list_create(l, sizeof (struct promotenode), + offsetof(struct promotenode, link)); + + while (obj != first_obj) { + dsl_dataset_t *ds; + struct promotenode *snap; + int err; + + if (own) { + err = dsl_dataset_own_obj(dp, obj, + 0, snaplist_tag, &ds); + if (err == 0) + dsl_dataset_make_exclusive(ds, snaplist_tag); + } else { + err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); + } + if (err == ENOENT) { + /* lost race with snapshot destroy */ + struct promotenode *last = list_tail(l); + ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); + obj = last->ds->ds_phys->ds_prev_snap_obj; + continue; + } else if (err) { + return (err); + } + + if (first_obj == 0) + first_obj = ds->ds_dir->dd_phys->dd_origin_obj; + + snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); + snap->ds = ds; + list_insert_tail(l, snap); + obj = ds->ds_phys->ds_prev_snap_obj; + } + + return (0); +} + +static int +snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) +{ + struct promotenode *snap; + + *spacep = 0; + for (snap = list_head(l); snap; snap = list_next(l, snap)) { + uint64_t used, comp, uncomp; + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + mintxg, UINT64_MAX, &used, &comp, &uncomp); + *spacep += used; + } + return (0); +} + +static void +snaplist_destroy(list_t *l, boolean_t own) +{ + struct promotenode *snap; + + if (!l || !list_link_active(&l->list_head)) + return; + + while ((snap = list_tail(l)) != NULL) { + list_remove(l, snap); + if (own) + dsl_dataset_disown(snap->ds, snaplist_tag); + else + dsl_dataset_rele(snap->ds, snaplist_tag); + kmem_free(snap, sizeof (struct promotenode)); + } + list_destroy(l); +} + +/* + * Promote a clone. Nomenclature note: + * "clone" or "cds": the original clone which is being promoted + * "origin" or "ods": the snapshot which is originally clone's origin + * "origin head" or "ohds": the dataset which is the head + * (filesystem/volume) for the origin + * "origin origin": the origin of the origin's filesystem (typically + * NULL, indicating that the clone is not a clone of a clone). + */ +int +dsl_dataset_promote(const char *name, char *conflsnap) +{ + dsl_dataset_t *ds; + dsl_dir_t *dd; + dsl_pool_t *dp; + dmu_object_info_t doi; + struct promotearg pa = { 0 }; + struct promotenode *snap; + int err; + + err = dsl_dataset_hold(name, FTAG, &ds); + if (err) + return (err); + dd = ds->ds_dir; + dp = dd->dd_pool; + + err = dmu_object_info(dp->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, &doi); + if (err) { + dsl_dataset_rele(ds, FTAG); + return (err); + } + + if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + /* + * We are going to inherit all the snapshots taken before our + * origin (i.e., our new origin will be our parent's origin). + * Take ownership of them so that we can rename them into our + * namespace. + */ + rw_enter(&dp->dp_config_rwlock, RW_READER); + + err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, + &pa.shared_snaps); + if (err != 0) + goto out; + + err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); + if (err != 0) + goto out; + + snap = list_head(&pa.shared_snaps); + ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); + err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, + snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); + if (err != 0) + goto out; + + if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { + err = dsl_dataset_hold_obj(dp, + snap->ds->ds_dir->dd_phys->dd_origin_obj, + FTAG, &pa.origin_origin); + if (err != 0) + goto out; + } + +out: + rw_exit(&dp->dp_config_rwlock); + + /* + * Add in 128x the snapnames zapobj size, since we will be moving + * a bunch of snapnames to the promoted ds, and dirtying their + * bonus buffers. + */ + if (err == 0) { + err = dsl_sync_task_do(dp, dsl_dataset_promote_check, + dsl_dataset_promote_sync, ds, &pa, + 2 + 2 * doi.doi_physical_blocks_512); + if (err && pa.err_ds && conflsnap) + (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); + } + + snaplist_destroy(&pa.shared_snaps, B_TRUE); + snaplist_destroy(&pa.clone_snaps, B_FALSE); + snaplist_destroy(&pa.origin_snaps, B_FALSE); + if (pa.origin_origin) + dsl_dataset_rele(pa.origin_origin, FTAG); + dsl_dataset_rele(ds, FTAG); + return (err); +} + +struct cloneswaparg { + dsl_dataset_t *cds; /* clone dataset */ + dsl_dataset_t *ohds; /* origin's head dataset */ + boolean_t force; + int64_t unused_refres_delta; /* change in unconsumed refreservation */ +}; + +/* ARGSUSED */ +static int +dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + struct cloneswaparg *csa = arg1; + + /* they should both be heads */ + if (dsl_dataset_is_snapshot(csa->cds) || + dsl_dataset_is_snapshot(csa->ohds)) + return (EINVAL); + + /* the branch point should be just before them */ + if (csa->cds->ds_prev != csa->ohds->ds_prev) + return (EINVAL); + + /* cds should be the clone (unless they are unrelated) */ + if (csa->cds->ds_prev != NULL && + csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && + csa->ohds->ds_object != + csa->cds->ds_prev->ds_phys->ds_next_snap_obj) + return (EINVAL); + + /* the clone should be a child of the origin */ + if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) + return (EINVAL); + + /* ohds shouldn't be modified unless 'force' */ + if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) + return (ETXTBSY); + + /* adjust amount of any unconsumed refreservation */ + csa->unused_refres_delta = + (int64_t)MIN(csa->ohds->ds_reserved, + csa->ohds->ds_phys->ds_unique_bytes) - + (int64_t)MIN(csa->ohds->ds_reserved, + csa->cds->ds_phys->ds_unique_bytes); + + if (csa->unused_refres_delta > 0 && + csa->unused_refres_delta > + dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) + return (ENOSPC); + + if (csa->ohds->ds_quota != 0 && + csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) + return (EDQUOT); + + return (0); +} + +/* ARGSUSED */ +static void +dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + struct cloneswaparg *csa = arg1; + dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; + + ASSERT(csa->cds->ds_reserved == 0); + ASSERT(csa->ohds->ds_quota == 0 || + csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); + + dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); + dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); + + if (csa->cds->ds_objset != NULL) { + dmu_objset_evict(csa->cds->ds_objset); + csa->cds->ds_objset = NULL; + } + + if (csa->ohds->ds_objset != NULL) { + dmu_objset_evict(csa->ohds->ds_objset); + csa->ohds->ds_objset = NULL; + } + + /* + * Reset origin's unique bytes, if it exists. + */ + if (csa->cds->ds_prev) { + dsl_dataset_t *origin = csa->cds->ds_prev; + uint64_t comp, uncomp; + + dmu_buf_will_dirty(origin->ds_dbuf, tx); + dsl_deadlist_space_range(&csa->cds->ds_deadlist, + origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); + } + + /* swap blkptrs */ + { + blkptr_t tmp; + tmp = csa->ohds->ds_phys->ds_bp; + csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; + csa->cds->ds_phys->ds_bp = tmp; + } + + /* set dd_*_bytes */ + { + int64_t dused, dcomp, duncomp; + uint64_t cdl_used, cdl_comp, cdl_uncomp; + uint64_t odl_used, odl_comp, odl_uncomp; + + ASSERT3U(csa->cds->ds_dir->dd_phys-> + dd_used_breakdown[DD_USED_SNAP], ==, 0); + + dsl_deadlist_space(&csa->cds->ds_deadlist, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space(&csa->ohds->ds_deadlist, + &odl_used, &odl_comp, &odl_uncomp); + + dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - + (csa->ohds->ds_phys->ds_used_bytes + odl_used); + dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - + (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); + duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + + cdl_uncomp - + (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); + + dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, + dused, dcomp, duncomp, tx); + dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, + -dused, -dcomp, -duncomp, tx); + + /* + * The difference in the space used by snapshots is the + * difference in snapshot space due to the head's + * deadlist (since that's the only thing that's + * changing that affects the snapused). + */ + dsl_deadlist_space_range(&csa->cds->ds_deadlist, + csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space_range(&csa->ohds->ds_deadlist, + csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, + &odl_used, &odl_comp, &odl_uncomp); + dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, + DD_USED_HEAD, DD_USED_SNAP, tx); + } + + /* swap ds_*_bytes */ + SWITCH64(csa->ohds->ds_phys->ds_used_bytes, + csa->cds->ds_phys->ds_used_bytes); + SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, + csa->cds->ds_phys->ds_compressed_bytes); + SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, + csa->cds->ds_phys->ds_uncompressed_bytes); + SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, + csa->cds->ds_phys->ds_unique_bytes); + + /* apply any parent delta for change in unconsumed refreservation */ + dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, + csa->unused_refres_delta, 0, 0, tx); + + /* + * Swap deadlists. + */ + dsl_deadlist_close(&csa->cds->ds_deadlist); + dsl_deadlist_close(&csa->ohds->ds_deadlist); + SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, + csa->cds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, + csa->cds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, + csa->ohds->ds_phys->ds_deadlist_obj); + + dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); +} + +/* + * Swap 'clone' with its origin head datasets. Used at the end of "zfs + * recv" into an existing fs to swizzle the file system to the new + * version, and by "zfs rollback". Can also be used to swap two + * independent head datasets if neither has any snapshots. + */ +int +dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, + boolean_t force) +{ + struct cloneswaparg csa; + int error; + + ASSERT(clone->ds_owner); + ASSERT(origin_head->ds_owner); +retry: + /* + * Need exclusive access for the swap. If we're swapping these + * datasets back after an error, we already hold the locks. + */ + if (!RW_WRITE_HELD(&clone->ds_rwlock)) + rw_enter(&clone->ds_rwlock, RW_WRITER); + if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && + !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { + rw_exit(&clone->ds_rwlock); + rw_enter(&origin_head->ds_rwlock, RW_WRITER); + if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { + rw_exit(&origin_head->ds_rwlock); + goto retry; + } + } + csa.cds = clone; + csa.ohds = origin_head; + csa.force = force; + error = dsl_sync_task_do(clone->ds_dir->dd_pool, + dsl_dataset_clone_swap_check, + dsl_dataset_clone_swap_sync, &csa, NULL, 9); + return (error); +} + +/* + * Given a pool name and a dataset object number in that pool, + * return the name of that dataset. + */ +int +dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) +{ + spa_t *spa; + dsl_pool_t *dp; + dsl_dataset_t *ds; + int error; + + if ((error = spa_open(pname, &spa, FTAG)) != 0) + return (error); + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { + dsl_dataset_name(ds, buf); + dsl_dataset_rele(ds, FTAG); + } + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + + return (error); +} + +int +dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, + uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) +{ + int error = 0; + + ASSERT3S(asize, >, 0); + + /* + * *ref_rsrv is the portion of asize that will come from any + * unconsumed refreservation space. + */ + *ref_rsrv = 0; + + mutex_enter(&ds->ds_lock); + /* + * Make a space adjustment for reserved bytes. + */ + if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { + ASSERT3U(*used, >=, + ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *ref_rsrv = + asize - MIN(asize, parent_delta(ds, asize + inflight)); + } + + if (!check_quota || ds->ds_quota == 0) { + mutex_exit(&ds->ds_lock); + return (0); + } + /* + * If they are requesting more space, and our current estimate + * is over quota, they get to try again unless the actual + * on-disk is over quota and there are no pending changes (which + * may free up space for us). + */ + if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { + if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) + error = ERESTART; + else + error = EDQUOT; + } + mutex_exit(&ds->ds_lock); + + return (error); +} + +/* ARGSUSED */ +static int +dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_prop_setarg_t *psa = arg2; + int err; + + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) + return (ENOTSUP); + + if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) + return (err); + + if (psa->psa_effective_value == 0) + return (0); + + if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes || + psa->psa_effective_value < ds->ds_reserved) + return (ENOSPC); + + return (0); +} + +extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); + +void +dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value = psa->psa_effective_value; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); + + if (ds->ds_quota != effective_value) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_quota = effective_value; + + spa_history_log_internal(LOG_DS_REFQUOTA, + ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", + (longlong_t)ds->ds_quota, ds->ds_object); + } +} + +int +dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) +{ + dsl_dataset_t *ds; + dsl_prop_setarg_t psa; + int err; + + dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); + + err = dsl_dataset_hold(dsname, FTAG, &ds); + if (err) + return (err); + + /* + * If someone removes a file, then tries to set the quota, we + * want to make sure the file freeing takes effect. + */ + txg_wait_open(ds->ds_dir->dd_pool, 0); + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, + ds, &psa, 0); + + dsl_dataset_rele(ds, FTAG); + return (err); +} + +static int +dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value; + uint64_t unique; + int err; + + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < + SPA_VERSION_REFRESERVATION) + return (ENOTSUP); + + if (dsl_dataset_is_snapshot(ds)) + return (EINVAL); + + if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) + return (err); + + effective_value = psa->psa_effective_value; + + /* + * If we are doing the preliminary check in open context, the + * space estimates may be inaccurate. + */ + if (!dmu_tx_is_syncing(tx)) + return (0); + + mutex_enter(&ds->ds_lock); + if (!DS_UNIQUE_IS_ACCURATE(ds)) + dsl_dataset_recalc_head_uniq(ds); + unique = ds->ds_phys->ds_unique_bytes; + mutex_exit(&ds->ds_lock); + + if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { + uint64_t delta = MAX(unique, effective_value) - + MAX(unique, ds->ds_reserved); + + if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) + return (ENOSPC); + if (ds->ds_quota > 0 && + effective_value > ds->ds_quota) + return (ENOSPC); + } + + return (0); +} + +static void +dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value = psa->psa_effective_value; + uint64_t unique; + int64_t delta; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + + mutex_enter(&ds->ds_dir->dd_lock); + mutex_enter(&ds->ds_lock); + ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); + unique = ds->ds_phys->ds_unique_bytes; + delta = MAX(0, (int64_t)(effective_value - unique)) - + MAX(0, (int64_t)(ds->ds_reserved - unique)); + ds->ds_reserved = effective_value; + mutex_exit(&ds->ds_lock); + + dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); + mutex_exit(&ds->ds_dir->dd_lock); + + spa_history_log_internal(LOG_DS_REFRESERV, + ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", + (longlong_t)effective_value, ds->ds_object); +} + +int +dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, + uint64_t reservation) +{ + dsl_dataset_t *ds; + dsl_prop_setarg_t psa; + int err; + + dsl_prop_setarg_init_uint64(&psa, "refreservation", source, + &reservation); + + err = dsl_dataset_hold(dsname, FTAG, &ds); + if (err) + return (err); + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_set_reservation_check, + dsl_dataset_set_reservation_sync, ds, &psa, 0); + + dsl_dataset_rele(ds, FTAG); + return (err); +} + +typedef struct zfs_hold_cleanup_arg { + dsl_pool_t *dp; + uint64_t dsobj; + char htag[MAXNAMELEN]; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + + (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, + B_TRUE); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); +} + +void +dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca; + + ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); + ca->dp = ds->ds_dir->dd_pool; + ca->dsobj = ds->ds_object; + (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); + VERIFY3U(0, ==, zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + +/* + * If you add new checks here, you may need to add + * additional checks to the "temporary" case in + * snapshot_check() in dmu_objset.c. + */ +static int +dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct dsl_ds_holdarg *ha = arg2; + char *htag = ha->htag; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + int error = 0; + + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) + return (ENOTSUP); + + if (!dsl_dataset_is_snapshot(ds)) + return (EINVAL); + + /* tags must be unique */ + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj) { + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, + 8, 1, tx); + if (error == 0) + error = EEXIST; + else if (error == ENOENT) + error = 0; + } + mutex_exit(&ds->ds_lock); + + if (error == 0 && ha->temphold && + strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + error = E2BIG; + + return (error); +} + +void +dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct dsl_ds_holdarg *ha = arg2; + char *htag = ha->htag; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t now = gethrestime_sec(); + uint64_t zapobj; + + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj == 0) { + /* + * This is the first user hold for this dataset. Create + * the userrefs zap object. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + zapobj = ds->ds_phys->ds_userrefs_obj = + zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); + } else { + zapobj = ds->ds_phys->ds_userrefs_obj; + } + ds->ds_userrefs++; + mutex_exit(&ds->ds_lock); + + VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); + + if (ha->temphold) { + VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, + htag, &now, tx)); + } + + spa_history_log_internal(LOG_DS_USER_HOLD, + dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, + (int)ha->temphold, ds->ds_object); +} + +static int +dsl_dataset_user_hold_one(const char *dsname, void *arg) +{ + struct dsl_ds_holdarg *ha = arg; + dsl_dataset_t *ds; + int error; + char *name; + + /* alloc a buffer to hold dsname@snapname plus terminating NULL */ + name = kmem_asprintf("%s@%s", dsname, ha->snapname); + error = dsl_dataset_hold(name, ha->dstg, &ds); + strfree(name); + if (error == 0) { + ha->gotone = B_TRUE; + dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, + dsl_dataset_user_hold_sync, ds, ha, 0); + } else if (error == ENOENT && ha->recursive) { + error = 0; + } else { + (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + } + return (error); +} + +int +dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, + boolean_t temphold) +{ + struct dsl_ds_holdarg *ha; + int error; + + ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + ha->htag = htag; + ha->temphold = temphold; + error = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, + ds, ha, 0); + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + + return (error); +} + +int +dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, + boolean_t recursive, boolean_t temphold, int cleanup_fd) +{ + struct dsl_ds_holdarg *ha; + dsl_sync_task_t *dst; + spa_t *spa; + int error; + minor_t minor = 0; + + if (cleanup_fd != -1) { + /* Currently we only support cleanup-on-exit of tempholds. */ + if (!temphold) + return (EINVAL); + error = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (error) + return (error); + } + + ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + + (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + + error = spa_open(dsname, &spa, FTAG); + if (error) { + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + if (cleanup_fd != -1) + zfs_onexit_fd_rele(cleanup_fd); + return (error); + } + + ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + ha->htag = htag; + ha->snapname = snapname; + ha->recursive = recursive; + ha->temphold = temphold; + + if (recursive) { + error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, + ha, DS_FIND_CHILDREN); + } else { + error = dsl_dataset_user_hold_one(dsname, ha); + } + if (error == 0) + error = dsl_sync_task_group_wait(ha->dstg); + + for (dst = list_head(&ha->dstg->dstg_tasks); dst; + dst = list_next(&ha->dstg->dstg_tasks, dst)) { + dsl_dataset_t *ds = dst->dst_arg1; + + if (dst->dst_err) { + dsl_dataset_name(ds, ha->failed); + *strchr(ha->failed, '@') = '\0'; + } else if (error == 0 && minor != 0 && temphold) { + /* + * If this hold is to be released upon process exit, + * register that action now. + */ + dsl_register_onexit_hold_cleanup(ds, htag, minor); + } + dsl_dataset_rele(ds, ha->dstg); + } + + if (error == 0 && recursive && !ha->gotone) + error = ENOENT; + + if (error) + (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); + + dsl_sync_task_group_destroy(ha->dstg); + + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + spa_close(spa, FTAG); + if (cleanup_fd != -1) + zfs_onexit_fd_rele(cleanup_fd); + return (error); +} + +struct dsl_ds_releasearg { + dsl_dataset_t *ds; + const char *htag; + boolean_t own; /* do we own or just hold ds? */ +}; + +static int +dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, + boolean_t *might_destroy) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t zapobj; + uint64_t tmp; + int error; + + *might_destroy = B_FALSE; + + mutex_enter(&ds->ds_lock); + zapobj = ds->ds_phys->ds_userrefs_obj; + if (zapobj == 0) { + /* The tag can't possibly exist */ + mutex_exit(&ds->ds_lock); + return (ESRCH); + } + + /* Make sure the tag exists */ + error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); + if (error) { + mutex_exit(&ds->ds_lock); + if (error == ENOENT) + error = ESRCH; + return (error); + } + + if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)) + *might_destroy = B_TRUE; + + mutex_exit(&ds->ds_lock); + return (0); +} + +static int +dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) +{ + struct dsl_ds_releasearg *ra = arg1; + dsl_dataset_t *ds = ra->ds; + boolean_t might_destroy; + int error; + + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) + return (ENOTSUP); + + error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); + if (error) + return (error); + + if (might_destroy) { + struct dsl_ds_destroyarg dsda = {0}; + + if (dmu_tx_is_syncing(tx)) { + /* + * If we're not prepared to remove the snapshot, + * we can't allow the release to happen right now. + */ + if (!ra->own) + return (EBUSY); + } + dsda.ds = ds; + dsda.releasing = B_TRUE; + return (dsl_dataset_destroy_check(&dsda, tag, tx)); + } + + return (0); +} + +static void +dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) +{ + struct dsl_ds_releasearg *ra = arg1; + dsl_dataset_t *ds = ra->ds; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj; + uint64_t dsobj = ds->ds_object; + uint64_t refs; + int error; + + mutex_enter(&ds->ds_lock); + ds->ds_userrefs--; + refs = ds->ds_userrefs; + mutex_exit(&ds->ds_lock); + error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); + VERIFY(error == 0 || error == ENOENT); + zapobj = ds->ds_phys->ds_userrefs_obj; + VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); + if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)) { + struct dsl_ds_destroyarg dsda = {0}; + + ASSERT(ra->own); + dsda.ds = ds; + dsda.releasing = B_TRUE; + /* We already did the destroy_check */ + dsl_dataset_destroy_sync(&dsda, tag, tx); + } + + spa_history_log_internal(LOG_DS_USER_RELEASE, + dp->dp_spa, tx, "<%s> %lld dataset = %llu", + ra->htag, (longlong_t)refs, dsobj); +} + +static int +dsl_dataset_user_release_one(const char *dsname, void *arg) +{ + struct dsl_ds_holdarg *ha = arg; + struct dsl_ds_releasearg *ra; + dsl_dataset_t *ds; + int error; + void *dtag = ha->dstg; + char *name; + boolean_t own = B_FALSE; + boolean_t might_destroy; + + /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ + name = kmem_asprintf("%s@%s", dsname, ha->snapname); + error = dsl_dataset_hold(name, dtag, &ds); + strfree(name); + if (error == ENOENT && ha->recursive) + return (0); + (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + if (error) + return (error); + + ha->gotone = B_TRUE; + + ASSERT(dsl_dataset_is_snapshot(ds)); + + error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); + if (error) { + dsl_dataset_rele(ds, dtag); + return (error); + } + + if (might_destroy) { +#ifdef _KERNEL + name = kmem_asprintf("%s@%s", dsname, ha->snapname); + error = zfs_unmount_snap(name, NULL); + strfree(name); + if (error) { + dsl_dataset_rele(ds, dtag); + return (error); + } +#endif + if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { + dsl_dataset_rele(ds, dtag); + return (EBUSY); + } else { + own = B_TRUE; + dsl_dataset_make_exclusive(ds, dtag); + } + } + + ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); + ra->ds = ds; + ra->htag = ha->htag; + ra->own = own; + dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, + dsl_dataset_user_release_sync, ra, dtag, 0); + + return (0); +} + +int +dsl_dataset_user_release(char *dsname, char *snapname, char *htag, + boolean_t recursive) +{ + struct dsl_ds_holdarg *ha; + dsl_sync_task_t *dst; + spa_t *spa; + int error; + +top: + ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + + (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + + error = spa_open(dsname, &spa, FTAG); + if (error) { + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + return (error); + } + + ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + ha->htag = htag; + ha->snapname = snapname; + ha->recursive = recursive; + if (recursive) { + error = dmu_objset_find(dsname, dsl_dataset_user_release_one, + ha, DS_FIND_CHILDREN); + } else { + error = dsl_dataset_user_release_one(dsname, ha); + } + if (error == 0) + error = dsl_sync_task_group_wait(ha->dstg); + + for (dst = list_head(&ha->dstg->dstg_tasks); dst; + dst = list_next(&ha->dstg->dstg_tasks, dst)) { + struct dsl_ds_releasearg *ra = dst->dst_arg1; + dsl_dataset_t *ds = ra->ds; + + if (dst->dst_err) + dsl_dataset_name(ds, ha->failed); + + if (ra->own) + dsl_dataset_disown(ds, ha->dstg); + else + dsl_dataset_rele(ds, ha->dstg); + + kmem_free(ra, sizeof (struct dsl_ds_releasearg)); + } + + if (error == 0 && recursive && !ha->gotone) + error = ENOENT; + + if (error && error != EBUSY) + (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); + + dsl_sync_task_group_destroy(ha->dstg); + kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + spa_close(spa, FTAG); + + /* + * We can get EBUSY if we were racing with deferred destroy and + * dsl_dataset_user_release_check() hadn't done the necessary + * open context setup. We can also get EBUSY if we're racing + * with destroy and that thread is the ds_owner. Either way + * the busy condition should be transient, and we should retry + * the release operation. + */ + if (error == EBUSY) + goto top; + + return (error); +} + +/* + * Called at spa_load time (with retry == B_FALSE) to release a stale + * temporary user hold. Also called by the onexit code (with retry == B_TRUE). + */ +int +dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, + boolean_t retry) +{ + dsl_dataset_t *ds; + char *snap; + char *name; + int namelen; + int error; + + do { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) + return (error); + namelen = dsl_dataset_namelen(ds)+1; + name = kmem_alloc(namelen, KM_SLEEP); + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + + snap = strchr(name, '@'); + *snap = '\0'; + ++snap; + error = dsl_dataset_user_release(name, snap, htag, B_FALSE); + kmem_free(name, namelen); + + /* + * The object can't have been destroyed because we have a hold, + * but it might have been renamed, resulting in ENOENT. Retry + * if we've been requested to do so. + * + * It would be nice if we could use the dsobj all the way + * through and avoid ENOENT entirely. But we might need to + * unmount the snapshot, and there's currently no way to lookup + * a vfsp using a ZFS object id. + */ + } while ((error == ENOENT) && retry); + + return (error); +} + +int +dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) +{ + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_hold(dsname, FTAG, &ds); + if (err) + return (err); + + VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); + if (ds->ds_phys->ds_userrefs_obj != 0) { + zap_attribute_t *za; + zap_cursor_t zc; + + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_userrefs_obj); + zap_cursor_retrieve(&zc, za) == 0; + zap_cursor_advance(&zc)) { + VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, + za->za_first_integer)); + } + zap_cursor_fini(&zc); + kmem_free(za, sizeof (zap_attribute_t)); + } + dsl_dataset_rele(ds, FTAG); + return (0); +} + +/* + * Note, this fuction is used as the callback for dmu_objset_find(). We + * always return 0 so that we will continue to find and process + * inconsistent datasets, even if we encounter an error trying to + * process one of them. + */ +/* ARGSUSED */ +int +dsl_destroy_inconsistent(const char *dsname, void *arg) +{ + dsl_dataset_t *ds; + + if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { + if (DS_IS_INCONSISTENT(ds)) + (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); + else + dsl_dataset_disown(ds, FTAG); + } + return (0); +} diff --git a/uts/common/fs/zfs/dsl_deadlist.c b/uts/common/fs/zfs/dsl_deadlist.c new file mode 100644 index 000000000000..064f8aceb8ee --- /dev/null +++ b/uts/common/fs/zfs/dsl_deadlist.c @@ -0,0 +1,474 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dsl_dataset.h> +#include <sys/dmu.h> +#include <sys/refcount.h> +#include <sys/zap.h> +#include <sys/zfs_context.h> +#include <sys/dsl_pool.h> + +static int +dsl_deadlist_compare(const void *arg1, const void *arg2) +{ + const dsl_deadlist_entry_t *dle1 = arg1; + const dsl_deadlist_entry_t *dle2 = arg2; + + if (dle1->dle_mintxg < dle2->dle_mintxg) + return (-1); + else if (dle1->dle_mintxg > dle2->dle_mintxg) + return (+1); + else + return (0); +} + +static void +dsl_deadlist_load_tree(dsl_deadlist_t *dl) +{ + zap_cursor_t zc; + zap_attribute_t za; + + ASSERT(!dl->dl_oldfmt); + if (dl->dl_havetree) + return; + + avl_create(&dl->dl_tree, dsl_deadlist_compare, + sizeof (dsl_deadlist_entry_t), + offsetof(dsl_deadlist_entry_t, dle_node)); + for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP); + dle->dle_mintxg = strtonum(za.za_name, NULL); + VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, + za.za_first_integer)); + avl_add(&dl->dl_tree, dle); + } + zap_cursor_fini(&zc); + dl->dl_havetree = B_TRUE; +} + +void +dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object) +{ + dmu_object_info_t doi; + + mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL); + dl->dl_os = os; + dl->dl_object = object; + VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf)); + dmu_object_info_from_db(dl->dl_dbuf, &doi); + if (doi.doi_type == DMU_OT_BPOBJ) { + dmu_buf_rele(dl->dl_dbuf, dl); + dl->dl_dbuf = NULL; + dl->dl_oldfmt = B_TRUE; + VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object)); + return; + } + + dl->dl_oldfmt = B_FALSE; + dl->dl_phys = dl->dl_dbuf->db_data; + dl->dl_havetree = B_FALSE; +} + +void +dsl_deadlist_close(dsl_deadlist_t *dl) +{ + void *cookie = NULL; + dsl_deadlist_entry_t *dle; + + if (dl->dl_oldfmt) { + dl->dl_oldfmt = B_FALSE; + bpobj_close(&dl->dl_bpobj); + return; + } + + if (dl->dl_havetree) { + while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie)) + != NULL) { + bpobj_close(&dle->dle_bpobj); + kmem_free(dle, sizeof (*dle)); + } + avl_destroy(&dl->dl_tree); + } + dmu_buf_rele(dl->dl_dbuf, dl); + mutex_destroy(&dl->dl_lock); + dl->dl_dbuf = NULL; + dl->dl_phys = NULL; +} + +uint64_t +dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx) +{ + if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) + return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx)); + return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, + sizeof (dsl_deadlist_phys_t), tx)); +} + +void +dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) +{ + dmu_object_info_t doi; + zap_cursor_t zc; + zap_attribute_t za; + + VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi)); + if (doi.doi_type == DMU_OT_BPOBJ) { + bpobj_free(os, dlobj, tx); + return; + } + + for (zap_cursor_init(&zc, os, dlobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) + bpobj_free(os, za.za_first_integer, tx); + zap_cursor_fini(&zc); + VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx)); +} + +void +dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + + if (dl->dl_oldfmt) { + bpobj_enqueue(&dl->dl_bpobj, bp, tx); + return; + } + + dsl_deadlist_load_tree(dl); + + dmu_buf_will_dirty(dl->dl_dbuf, tx); + mutex_enter(&dl->dl_lock); + dl->dl_phys->dl_used += + bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp); + dl->dl_phys->dl_comp += BP_GET_PSIZE(bp); + dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp); + mutex_exit(&dl->dl_lock); + + dle_tofind.dle_mintxg = bp->blk_birth; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + if (dle == NULL) + dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); + else + dle = AVL_PREV(&dl->dl_tree, dle); + bpobj_enqueue(&dle->dle_bpobj, bp, tx); +} + +/* + * Insert new key in deadlist, which must be > all current entries. + * mintxg is not inclusive. + */ +void +dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) +{ + uint64_t obj; + dsl_deadlist_entry_t *dle; + + if (dl->dl_oldfmt) + return; + + dsl_deadlist_load_tree(dl); + + dle = kmem_alloc(sizeof (*dle), KM_SLEEP); + dle->dle_mintxg = mintxg; + obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); + VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); + avl_add(&dl->dl_tree, dle); + + VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object, + mintxg, obj, tx)); +} + +/* + * Remove this key, merging its entries into the previous key. + */ +void +dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle, *dle_prev; + + if (dl->dl_oldfmt) + return; + + dsl_deadlist_load_tree(dl); + + dle_tofind.dle_mintxg = mintxg; + dle = avl_find(&dl->dl_tree, &dle_tofind, NULL); + dle_prev = AVL_PREV(&dl->dl_tree, dle); + + bpobj_enqueue_subobj(&dle_prev->dle_bpobj, + dle->dle_bpobj.bpo_object, tx); + + avl_remove(&dl->dl_tree, dle); + bpobj_close(&dle->dle_bpobj); + kmem_free(dle, sizeof (*dle)); + + VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx)); +} + +/* + * Walk ds's snapshots to regenerate generate ZAP & AVL. + */ +static void +dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj, + uint64_t mrs_obj, dmu_tx_t *tx) +{ + dsl_deadlist_t dl; + dsl_pool_t *dp = dmu_objset_pool(os); + + dsl_deadlist_open(&dl, os, dlobj); + if (dl.dl_oldfmt) { + dsl_deadlist_close(&dl); + return; + } + + while (mrs_obj != 0) { + dsl_dataset_t *ds; + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds)); + dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx); + mrs_obj = ds->ds_phys->ds_prev_snap_obj; + dsl_dataset_rele(ds, FTAG); + } + dsl_deadlist_close(&dl); +} + +uint64_t +dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg, + uint64_t mrs_obj, dmu_tx_t *tx) +{ + dsl_deadlist_entry_t *dle; + uint64_t newobj; + + newobj = dsl_deadlist_alloc(dl->dl_os, tx); + + if (dl->dl_oldfmt) { + dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx); + return (newobj); + } + + dsl_deadlist_load_tree(dl); + + for (dle = avl_first(&dl->dl_tree); dle; + dle = AVL_NEXT(&dl->dl_tree, dle)) { + uint64_t obj; + + if (dle->dle_mintxg >= maxtxg) + break; + + obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); + VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj, + dle->dle_mintxg, obj, tx)); + } + return (newobj); +} + +void +dsl_deadlist_space(dsl_deadlist_t *dl, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + if (dl->dl_oldfmt) { + VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj, + usedp, compp, uncompp)); + return; + } + + mutex_enter(&dl->dl_lock); + *usedp = dl->dl_phys->dl_used; + *compp = dl->dl_phys->dl_comp; + *uncompp = dl->dl_phys->dl_uncomp; + mutex_exit(&dl->dl_lock); +} + +/* + * return space used in the range (mintxg, maxtxg]. + * Includes maxtxg, does not include mintxg. + * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is + * UINT64_MAX). + */ +void +dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + + if (dl->dl_oldfmt) { + VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj, + mintxg, maxtxg, usedp, compp, uncompp)); + return; + } + + dsl_deadlist_load_tree(dl); + *usedp = *compp = *uncompp = 0; + + dle_tofind.dle_mintxg = mintxg; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + /* + * If we don't find this mintxg, there shouldn't be anything + * after it either. + */ + ASSERT(dle != NULL || + avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL); + for (; dle && dle->dle_mintxg < maxtxg; + dle = AVL_NEXT(&dl->dl_tree, dle)) { + uint64_t used, comp, uncomp; + + VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, + &used, &comp, &uncomp)); + + *usedp += used; + *compp += comp; + *uncompp += uncomp; + } +} + +static void +dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth, + dmu_tx_t *tx) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + uint64_t used, comp, uncomp; + bpobj_t bpo; + + VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); + VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp)); + bpobj_close(&bpo); + + dsl_deadlist_load_tree(dl); + + dmu_buf_will_dirty(dl->dl_dbuf, tx); + mutex_enter(&dl->dl_lock); + dl->dl_phys->dl_used += used; + dl->dl_phys->dl_comp += comp; + dl->dl_phys->dl_uncomp += uncomp; + mutex_exit(&dl->dl_lock); + + dle_tofind.dle_mintxg = birth; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + if (dle == NULL) + dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); + bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx); +} + +static int +dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_deadlist_t *dl = arg; + dsl_deadlist_insert(dl, bp, tx); + return (0); +} + +/* + * Merge the deadlist pointed to by 'obj' into dl. obj will be left as + * an empty deadlist. + */ +void +dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + dmu_buf_t *bonus; + dsl_deadlist_phys_t *dlp; + dmu_object_info_t doi; + + VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi)); + if (doi.doi_type == DMU_OT_BPOBJ) { + bpobj_t bpo; + VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); + VERIFY3U(0, ==, bpobj_iterate(&bpo, + dsl_deadlist_insert_cb, dl, tx)); + bpobj_close(&bpo); + return; + } + + for (zap_cursor_init(&zc, dl->dl_os, obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + uint64_t mintxg = strtonum(za.za_name, NULL); + dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); + VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx)); + } + zap_cursor_fini(&zc); + + VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); + dlp = bonus->db_data; + dmu_buf_will_dirty(bonus, tx); + bzero(dlp, sizeof (*dlp)); + dmu_buf_rele(bonus, FTAG); +} + +/* + * Remove entries on dl that are >= mintxg, and put them on the bpobj. + */ +void +dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, + dmu_tx_t *tx) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + + ASSERT(!dl->dl_oldfmt); + dmu_buf_will_dirty(dl->dl_dbuf, tx); + dsl_deadlist_load_tree(dl); + + dle_tofind.dle_mintxg = mintxg; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + if (dle == NULL) + dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); + while (dle) { + uint64_t used, comp, uncomp; + dsl_deadlist_entry_t *dle_next; + + bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); + + VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, + &used, &comp, &uncomp)); + mutex_enter(&dl->dl_lock); + ASSERT3U(dl->dl_phys->dl_used, >=, used); + ASSERT3U(dl->dl_phys->dl_comp, >=, comp); + ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp); + dl->dl_phys->dl_used -= used; + dl->dl_phys->dl_comp -= comp; + dl->dl_phys->dl_uncomp -= uncomp; + mutex_exit(&dl->dl_lock); + + VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, + dle->dle_mintxg, tx)); + + dle_next = AVL_NEXT(&dl->dl_tree, dle); + avl_remove(&dl->dl_tree, dle); + bpobj_close(&dle->dle_bpobj); + kmem_free(dle, sizeof (*dle)); + dle = dle_next; + } +} diff --git a/uts/common/fs/zfs/dsl_deleg.c b/uts/common/fs/zfs/dsl_deleg.c new file mode 100644 index 000000000000..529fb052fa75 --- /dev/null +++ b/uts/common/fs/zfs/dsl_deleg.c @@ -0,0 +1,746 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * DSL permissions are stored in a two level zap attribute + * mechanism. The first level identifies the "class" of + * entry. The class is identified by the first 2 letters of + * the attribute. The second letter "l" or "d" identifies whether + * it is a local or descendent permission. The first letter + * identifies the type of entry. + * + * ul$<id> identifies permissions granted locally for this userid. + * ud$<id> identifies permissions granted on descendent datasets for + * this userid. + * Ul$<id> identifies permission sets granted locally for this userid. + * Ud$<id> identifies permission sets granted on descendent datasets for + * this userid. + * gl$<id> identifies permissions granted locally for this groupid. + * gd$<id> identifies permissions granted on descendent datasets for + * this groupid. + * Gl$<id> identifies permission sets granted locally for this groupid. + * Gd$<id> identifies permission sets granted on descendent datasets for + * this groupid. + * el$ identifies permissions granted locally for everyone. + * ed$ identifies permissions granted on descendent datasets + * for everyone. + * El$ identifies permission sets granted locally for everyone. + * Ed$ identifies permission sets granted to descendent datasets for + * everyone. + * c-$ identifies permission to create at dataset creation time. + * C-$ identifies permission sets to grant locally at dataset creation + * time. + * s-$@<name> permissions defined in specified set @<name> + * S-$@<name> Sets defined in named set @<name> + * + * Each of the above entities points to another zap attribute that contains one + * attribute for each allowed permission, such as create, destroy,... + * All of the "upper" case class types will specify permission set names + * rather than permissions. + * + * Basically it looks something like this: + * ul$12 -> ZAP OBJ -> permissions... + * + * The ZAP OBJ is referred to as the jump object. + */ + +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_deleg.h> +#include <sys/spa.h> +#include <sys/zap.h> +#include <sys/fs/zfs.h> +#include <sys/cred.h> +#include <sys/sunddi.h> + +#include "zfs_deleg.h" + +/* + * Validate that user is allowed to delegate specified permissions. + * + * In order to delegate "create" you must have "create" + * and "allow". + */ +int +dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) +{ + nvpair_t *whopair = NULL; + int error; + + if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) + return (error); + + while (whopair = nvlist_next_nvpair(nvp, whopair)) { + nvlist_t *perms; + nvpair_t *permpair = NULL; + + VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + + while (permpair = nvlist_next_nvpair(perms, permpair)) { + const char *perm = nvpair_name(permpair); + + if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0) + return (EPERM); + + if ((error = dsl_deleg_access(ddname, perm, cr)) != 0) + return (error); + } + } + return (0); +} + +/* + * Validate that user is allowed to unallow specified permissions. They + * must have the 'allow' permission, and even then can only unallow + * perms for their uid. + */ +int +dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) +{ + nvpair_t *whopair = NULL; + int error; + char idstr[32]; + + if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) + return (error); + + (void) snprintf(idstr, sizeof (idstr), "%lld", + (longlong_t)crgetuid(cr)); + + while (whopair = nvlist_next_nvpair(nvp, whopair)) { + zfs_deleg_who_type_t type = nvpair_name(whopair)[0]; + + if (type != ZFS_DELEG_USER && + type != ZFS_DELEG_USER_SETS) + return (EPERM); + + if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0) + return (EPERM); + } + return (0); +} + +static void +dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + nvlist_t *nvp = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + nvpair_t *whopair = NULL; + uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + + if (zapobj == 0) { + dmu_buf_will_dirty(dd->dd_dbuf, tx); + zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, + DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); + } + + while (whopair = nvlist_next_nvpair(nvp, whopair)) { + const char *whokey = nvpair_name(whopair); + nvlist_t *perms; + nvpair_t *permpair = NULL; + uint64_t jumpobj; + + VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + + if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { + jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, + DMU_OT_NONE, 0, tx); + VERIFY(zap_update(mos, zapobj, + whokey, 8, 1, &jumpobj, tx) == 0); + } + + while (permpair = nvlist_next_nvpair(perms, permpair)) { + const char *perm = nvpair_name(permpair); + uint64_t n = 0; + + VERIFY(zap_update(mos, jumpobj, + perm, 8, 1, &n, tx) == 0); + spa_history_log_internal(LOG_DS_PERM_UPDATE, + dd->dd_pool->dp_spa, tx, + "%s %s dataset = %llu", whokey, perm, + dd->dd_phys->dd_head_dataset_obj); + } + } +} + +static void +dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + nvlist_t *nvp = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + nvpair_t *whopair = NULL; + uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + + if (zapobj == 0) + return; + + while (whopair = nvlist_next_nvpair(nvp, whopair)) { + const char *whokey = nvpair_name(whopair); + nvlist_t *perms; + nvpair_t *permpair = NULL; + uint64_t jumpobj; + + if (nvpair_value_nvlist(whopair, &perms) != 0) { + if (zap_lookup(mos, zapobj, whokey, 8, + 1, &jumpobj) == 0) { + (void) zap_remove(mos, zapobj, whokey, tx); + VERIFY(0 == zap_destroy(mos, jumpobj, tx)); + } + spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, + dd->dd_pool->dp_spa, tx, + "%s dataset = %llu", whokey, + dd->dd_phys->dd_head_dataset_obj); + continue; + } + + if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) + continue; + + while (permpair = nvlist_next_nvpair(perms, permpair)) { + const char *perm = nvpair_name(permpair); + uint64_t n = 0; + + (void) zap_remove(mos, jumpobj, perm, tx); + if (zap_count(mos, jumpobj, &n) == 0 && n == 0) { + (void) zap_remove(mos, zapobj, + whokey, tx); + VERIFY(0 == zap_destroy(mos, + jumpobj, tx)); + } + spa_history_log_internal(LOG_DS_PERM_REMOVE, + dd->dd_pool->dp_spa, tx, + "%s %s dataset = %llu", whokey, perm, + dd->dd_phys->dd_head_dataset_obj); + } + } +} + +int +dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) +{ + dsl_dir_t *dd; + int error; + nvpair_t *whopair = NULL; + int blocks_modified = 0; + + error = dsl_dir_open(ddname, FTAG, &dd, NULL); + if (error) + return (error); + + if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) < + SPA_VERSION_DELEGATED_PERMS) { + dsl_dir_close(dd, FTAG); + return (ENOTSUP); + } + + while (whopair = nvlist_next_nvpair(nvp, whopair)) + blocks_modified++; + + error = dsl_sync_task_do(dd->dd_pool, NULL, + unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, + dd, nvp, blocks_modified); + dsl_dir_close(dd, FTAG); + + return (error); +} + +/* + * Find all 'allow' permissions from a given point and then continue + * traversing up to the root. + * + * This function constructs an nvlist of nvlists. + * each setpoint is an nvlist composed of an nvlist of an nvlist + * of the individual * users/groups/everyone/create + * permissions. + * + * The nvlist will look like this. + * + * { source fsname -> { whokeys { permissions,...}, ...}} + * + * The fsname nvpairs will be arranged in a bottom up order. For example, + * if we have the following structure a/b/c then the nvpairs for the fsnames + * will be ordered a/b/c, a/b, a. + */ +int +dsl_deleg_get(const char *ddname, nvlist_t **nvp) +{ + dsl_dir_t *dd, *startdd; + dsl_pool_t *dp; + int error; + objset_t *mos; + + error = dsl_dir_open(ddname, FTAG, &startdd, NULL); + if (error) + return (error); + + dp = startdd->dd_pool; + mos = dp->dp_meta_objset; + + VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + for (dd = startdd; dd != NULL; dd = dd->dd_parent) { + zap_cursor_t basezc; + zap_attribute_t baseza; + nvlist_t *sp_nvp; + uint64_t n; + char source[MAXNAMELEN]; + + if (dd->dd_phys->dd_deleg_zapobj && + (zap_count(mos, dd->dd_phys->dd_deleg_zapobj, + &n) == 0) && n) { + VERIFY(nvlist_alloc(&sp_nvp, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + } else { + continue; + } + + for (zap_cursor_init(&basezc, mos, + dd->dd_phys->dd_deleg_zapobj); + zap_cursor_retrieve(&basezc, &baseza) == 0; + zap_cursor_advance(&basezc)) { + zap_cursor_t zc; + zap_attribute_t za; + nvlist_t *perms_nvp; + + ASSERT(baseza.za_integer_length == 8); + ASSERT(baseza.za_num_integers == 1); + + VERIFY(nvlist_alloc(&perms_nvp, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + for (zap_cursor_init(&zc, mos, baseza.za_first_integer); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + VERIFY(nvlist_add_boolean(perms_nvp, + za.za_name) == 0); + } + zap_cursor_fini(&zc); + VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name, + perms_nvp) == 0); + nvlist_free(perms_nvp); + } + + zap_cursor_fini(&basezc); + + dsl_dir_name(dd, source); + VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0); + nvlist_free(sp_nvp); + } + rw_exit(&dp->dp_config_rwlock); + + dsl_dir_close(startdd, FTAG); + return (0); +} + +/* + * Routines for dsl_deleg_access() -- access checking. + */ +typedef struct perm_set { + avl_node_t p_node; + boolean_t p_matched; + char p_setname[ZFS_MAX_DELEG_NAME]; +} perm_set_t; + +static int +perm_set_compare(const void *arg1, const void *arg2) +{ + const perm_set_t *node1 = arg1; + const perm_set_t *node2 = arg2; + int val; + + val = strcmp(node1->p_setname, node2->p_setname); + if (val == 0) + return (0); + return (val > 0 ? 1 : -1); +} + +/* + * Determine whether a specified permission exists. + * + * First the base attribute has to be retrieved. i.e. ul$12 + * Once the base object has been retrieved the actual permission + * is lookup up in the zap object the base object points to. + * + * Return 0 if permission exists, ENOENT if there is no whokey, EPERM if + * there is no perm in that jumpobj. + */ +static int +dsl_check_access(objset_t *mos, uint64_t zapobj, + char type, char checkflag, void *valp, const char *perm) +{ + int error; + uint64_t jumpobj, zero; + char whokey[ZFS_MAX_DELEG_NAME]; + + zfs_deleg_whokey(whokey, type, checkflag, valp); + error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj); + if (error == 0) { + error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero); + if (error == ENOENT) + error = EPERM; + } + return (error); +} + +/* + * check a specified user/group for a requested permission + */ +static int +dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm, + int checkflag, cred_t *cr) +{ + const gid_t *gids; + int ngids; + int i; + uint64_t id; + + /* check for user */ + id = crgetuid(cr); + if (dsl_check_access(mos, zapobj, + ZFS_DELEG_USER, checkflag, &id, perm) == 0) + return (0); + + /* check for users primary group */ + id = crgetgid(cr); + if (dsl_check_access(mos, zapobj, + ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) + return (0); + + /* check for everyone entry */ + id = -1; + if (dsl_check_access(mos, zapobj, + ZFS_DELEG_EVERYONE, checkflag, &id, perm) == 0) + return (0); + + /* check each supplemental group user is a member of */ + ngids = crgetngroups(cr); + gids = crgetgroups(cr); + for (i = 0; i != ngids; i++) { + id = gids[i]; + if (dsl_check_access(mos, zapobj, + ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) + return (0); + } + + return (EPERM); +} + +/* + * Iterate over the sets specified in the specified zapobj + * and load them into the permsets avl tree. + */ +static int +dsl_load_sets(objset_t *mos, uint64_t zapobj, + char type, char checkflag, void *valp, avl_tree_t *avl) +{ + zap_cursor_t zc; + zap_attribute_t za; + perm_set_t *permnode; + avl_index_t idx; + uint64_t jumpobj; + int error; + char whokey[ZFS_MAX_DELEG_NAME]; + + zfs_deleg_whokey(whokey, type, checkflag, valp); + + error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj); + if (error != 0) + return (error); + + for (zap_cursor_init(&zc, mos, jumpobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + permnode = kmem_alloc(sizeof (perm_set_t), KM_SLEEP); + (void) strlcpy(permnode->p_setname, za.za_name, + sizeof (permnode->p_setname)); + permnode->p_matched = B_FALSE; + + if (avl_find(avl, permnode, &idx) == NULL) { + avl_insert(avl, permnode, idx); + } else { + kmem_free(permnode, sizeof (perm_set_t)); + } + } + zap_cursor_fini(&zc); + return (0); +} + +/* + * Load all permissions user based on cred belongs to. + */ +static void +dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, + char checkflag, cred_t *cr) +{ + const gid_t *gids; + int ngids, i; + uint64_t id; + + id = crgetuid(cr); + (void) dsl_load_sets(mos, zapobj, + ZFS_DELEG_USER_SETS, checkflag, &id, avl); + + id = crgetgid(cr); + (void) dsl_load_sets(mos, zapobj, + ZFS_DELEG_GROUP_SETS, checkflag, &id, avl); + + (void) dsl_load_sets(mos, zapobj, + ZFS_DELEG_EVERYONE_SETS, checkflag, NULL, avl); + + ngids = crgetngroups(cr); + gids = crgetgroups(cr); + for (i = 0; i != ngids; i++) { + id = gids[i]; + (void) dsl_load_sets(mos, zapobj, + ZFS_DELEG_GROUP_SETS, checkflag, &id, avl); + } +} + +/* + * Check if user has requested permission. + */ +int +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) +{ + dsl_dir_t *dd; + dsl_pool_t *dp; + void *cookie; + int error; + char checkflag; + objset_t *mos; + avl_tree_t permsets; + perm_set_t *setnode; + + dp = ds->ds_dir->dd_pool; + mos = dp->dp_meta_objset; + + if (dsl_delegation_on(mos) == B_FALSE) + return (ECANCELED); + + if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) < + SPA_VERSION_DELEGATED_PERMS) + return (EPERM); + + if (dsl_dataset_is_snapshot(ds)) { + /* + * Snapshots are treated as descendents only, + * local permissions do not apply. + */ + checkflag = ZFS_DELEG_DESCENDENT; + } else { + checkflag = ZFS_DELEG_LOCAL; + } + + avl_create(&permsets, perm_set_compare, sizeof (perm_set_t), + offsetof(perm_set_t, p_node)); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent, + checkflag = ZFS_DELEG_DESCENDENT) { + uint64_t zapobj; + boolean_t expanded; + + /* + * If not in global zone then make sure + * the zoned property is set + */ + if (!INGLOBALZONE(curproc)) { + uint64_t zoned; + + if (dsl_prop_get_dd(dd, + zfs_prop_to_name(ZFS_PROP_ZONED), + 8, 1, &zoned, NULL, B_FALSE) != 0) + break; + if (!zoned) + break; + } + zapobj = dd->dd_phys->dd_deleg_zapobj; + + if (zapobj == 0) + continue; + + dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr); +again: + expanded = B_FALSE; + for (setnode = avl_first(&permsets); setnode; + setnode = AVL_NEXT(&permsets, setnode)) { + if (setnode->p_matched == B_TRUE) + continue; + + /* See if this set directly grants this permission */ + error = dsl_check_access(mos, zapobj, + ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm); + if (error == 0) + goto success; + if (error == EPERM) + setnode->p_matched = B_TRUE; + + /* See if this set includes other sets */ + error = dsl_load_sets(mos, zapobj, + ZFS_DELEG_NAMED_SET_SETS, 0, + setnode->p_setname, &permsets); + if (error == 0) + setnode->p_matched = expanded = B_TRUE; + } + /* + * If we expanded any sets, that will define more sets, + * which we need to check. + */ + if (expanded) + goto again; + + error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr); + if (error == 0) + goto success; + } + error = EPERM; +success: + rw_exit(&dp->dp_config_rwlock); + + cookie = NULL; + while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) + kmem_free(setnode, sizeof (perm_set_t)); + + return (error); +} + +int +dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) +{ + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_hold(dsname, FTAG, &ds); + if (error) + return (error); + + error = dsl_deleg_access_impl(ds, perm, cr); + dsl_dataset_rele(ds, FTAG); + + return (error); +} + +/* + * Other routines. + */ + +static void +copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, + boolean_t dosets, uint64_t uid, dmu_tx_t *tx) +{ + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t jumpobj, pjumpobj; + uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + zap_cursor_t zc; + zap_attribute_t za; + char whokey[ZFS_MAX_DELEG_NAME]; + + zfs_deleg_whokey(whokey, + dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE, + ZFS_DELEG_LOCAL, NULL); + if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0) + return; + + if (zapobj == 0) { + dmu_buf_will_dirty(dd->dd_dbuf, tx); + zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, + DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); + } + + zfs_deleg_whokey(whokey, + dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER, + ZFS_DELEG_LOCAL, &uid); + if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) { + jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); + } + + for (zap_cursor_init(&zc, mos, pjumpobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + uint64_t zero = 0; + ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); + + VERIFY(zap_update(mos, jumpobj, za.za_name, + 8, 1, &zero, tx) == 0); + } + zap_cursor_fini(&zc); +} + +/* + * set all create time permission on new dataset. + */ +void +dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr) +{ + dsl_dir_t *dd; + uint64_t uid = crgetuid(cr); + + if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) < + SPA_VERSION_DELEGATED_PERMS) + return; + + for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) { + uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj; + + if (pzapobj == 0) + continue; + + copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx); + copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx); + } +} + +int +dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + + if (zapobj == 0) + return (0); + + for (zap_cursor_init(&zc, mos, zapobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); + VERIFY(0 == zap_destroy(mos, za.za_first_integer, tx)); + } + zap_cursor_fini(&zc); + VERIFY(0 == zap_destroy(mos, zapobj, tx)); + return (0); +} + +boolean_t +dsl_delegation_on(objset_t *os) +{ + return (!!spa_delegation(os->os_spa)); +} diff --git a/uts/common/fs/zfs/dsl_dir.c b/uts/common/fs/zfs/dsl_dir.c new file mode 100644 index 000000000000..1cd49c8274e8 --- /dev/null +++ b/uts/common/fs/zfs/dsl_dir.c @@ -0,0 +1,1416 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_deleg.h> +#include <sys/spa.h> +#include <sys/metaslab.h> +#include <sys/zap.h> +#include <sys/zio.h> +#include <sys/arc.h> +#include <sys/sunddi.h> +#include "zfs_namecheck.h" + +static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); +static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); + + +/* ARGSUSED */ +static void +dsl_dir_evict(dmu_buf_t *db, void *arg) +{ + dsl_dir_t *dd = arg; + dsl_pool_t *dp = dd->dd_pool; + int t; + + for (t = 0; t < TXG_SIZE; t++) { + ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); + ASSERT(dd->dd_tempreserved[t] == 0); + ASSERT(dd->dd_space_towrite[t] == 0); + } + + if (dd->dd_parent) + dsl_dir_close(dd->dd_parent, dd); + + spa_close(dd->dd_pool->dp_spa, dd); + + /* + * The props callback list should have been cleaned up by + * objset_evict(). + */ + list_destroy(&dd->dd_prop_cbs); + mutex_destroy(&dd->dd_lock); + kmem_free(dd, sizeof (dsl_dir_t)); +} + +int +dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, + const char *tail, void *tag, dsl_dir_t **ddp) +{ + dmu_buf_t *dbuf; + dsl_dir_t *dd; + int err; + + ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || + dsl_pool_sync_context(dp)); + + err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); + if (err) + return (err); + dd = dmu_buf_get_user(dbuf); +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(dbuf, &doi); + ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); + ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); + } +#endif + if (dd == NULL) { + dsl_dir_t *winner; + + dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); + dd->dd_object = ddobj; + dd->dd_dbuf = dbuf; + dd->dd_pool = dp; + dd->dd_phys = dbuf->db_data; + mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); + + list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), + offsetof(dsl_prop_cb_record_t, cbr_node)); + + dsl_dir_snap_cmtime_update(dd); + + if (dd->dd_phys->dd_parent_obj) { + err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, + NULL, dd, &dd->dd_parent); + if (err) + goto errout; + if (tail) { +#ifdef ZFS_DEBUG + uint64_t foundobj; + + err = zap_lookup(dp->dp_meta_objset, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, + tail, sizeof (foundobj), 1, &foundobj); + ASSERT(err || foundobj == ddobj); +#endif + (void) strcpy(dd->dd_myname, tail); + } else { + err = zap_value_search(dp->dp_meta_objset, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, + ddobj, 0, dd->dd_myname); + } + if (err) + goto errout; + } else { + (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); + } + + if (dsl_dir_is_clone(dd)) { + dmu_buf_t *origin_bonus; + dsl_dataset_phys_t *origin_phys; + + /* + * We can't open the origin dataset, because + * that would require opening this dsl_dir. + * Just look at its phys directly instead. + */ + err = dmu_bonus_hold(dp->dp_meta_objset, + dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); + if (err) + goto errout; + origin_phys = origin_bonus->db_data; + dd->dd_origin_txg = + origin_phys->ds_creation_txg; + dmu_buf_rele(origin_bonus, FTAG); + } + + winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, + dsl_dir_evict); + if (winner) { + if (dd->dd_parent) + dsl_dir_close(dd->dd_parent, dd); + mutex_destroy(&dd->dd_lock); + kmem_free(dd, sizeof (dsl_dir_t)); + dd = winner; + } else { + spa_open_ref(dp->dp_spa, dd); + } + } + + /* + * The dsl_dir_t has both open-to-close and instantiate-to-evict + * holds on the spa. We need the open-to-close holds because + * otherwise the spa_refcnt wouldn't change when we open a + * dir which the spa also has open, so we could incorrectly + * think it was OK to unload/export/destroy the pool. We need + * the instantiate-to-evict hold because the dsl_dir_t has a + * pointer to the dd_pool, which has a pointer to the spa_t. + */ + spa_open_ref(dp->dp_spa, tag); + ASSERT3P(dd->dd_pool, ==, dp); + ASSERT3U(dd->dd_object, ==, ddobj); + ASSERT3P(dd->dd_dbuf, ==, dbuf); + *ddp = dd; + return (0); + +errout: + if (dd->dd_parent) + dsl_dir_close(dd->dd_parent, dd); + mutex_destroy(&dd->dd_lock); + kmem_free(dd, sizeof (dsl_dir_t)); + dmu_buf_rele(dbuf, tag); + return (err); + +} + +void +dsl_dir_close(dsl_dir_t *dd, void *tag) +{ + dprintf_dd(dd, "%s\n", ""); + spa_close(dd->dd_pool->dp_spa, tag); + dmu_buf_rele(dd->dd_dbuf, tag); +} + +/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ +void +dsl_dir_name(dsl_dir_t *dd, char *buf) +{ + if (dd->dd_parent) { + dsl_dir_name(dd->dd_parent, buf); + (void) strcat(buf, "/"); + } else { + buf[0] = '\0'; + } + if (!MUTEX_HELD(&dd->dd_lock)) { + /* + * recursive mutex so that we can use + * dprintf_dd() with dd_lock held + */ + mutex_enter(&dd->dd_lock); + (void) strcat(buf, dd->dd_myname); + mutex_exit(&dd->dd_lock); + } else { + (void) strcat(buf, dd->dd_myname); + } +} + +/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */ +int +dsl_dir_namelen(dsl_dir_t *dd) +{ + int result = 0; + + if (dd->dd_parent) { + /* parent's name + 1 for the "/" */ + result = dsl_dir_namelen(dd->dd_parent) + 1; + } + + if (!MUTEX_HELD(&dd->dd_lock)) { + /* see dsl_dir_name */ + mutex_enter(&dd->dd_lock); + result += strlen(dd->dd_myname); + mutex_exit(&dd->dd_lock); + } else { + result += strlen(dd->dd_myname); + } + + return (result); +} + +static int +getcomponent(const char *path, char *component, const char **nextp) +{ + char *p; + if ((path == NULL) || (path[0] == '\0')) + return (ENOENT); + /* This would be a good place to reserve some namespace... */ + p = strpbrk(path, "/@"); + if (p && (p[1] == '/' || p[1] == '@')) { + /* two separators in a row */ + return (EINVAL); + } + if (p == NULL || p == path) { + /* + * if the first thing is an @ or /, it had better be an + * @ and it had better not have any more ats or slashes, + * and it had better have something after the @. + */ + if (p != NULL && + (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) + return (EINVAL); + if (strlen(path) >= MAXNAMELEN) + return (ENAMETOOLONG); + (void) strcpy(component, path); + p = NULL; + } else if (p[0] == '/') { + if (p-path >= MAXNAMELEN) + return (ENAMETOOLONG); + (void) strncpy(component, path, p - path); + component[p-path] = '\0'; + p++; + } else if (p[0] == '@') { + /* + * if the next separator is an @, there better not be + * any more slashes. + */ + if (strchr(path, '/')) + return (EINVAL); + if (p-path >= MAXNAMELEN) + return (ENAMETOOLONG); + (void) strncpy(component, path, p - path); + component[p-path] = '\0'; + } else { + ASSERT(!"invalid p"); + } + *nextp = p; + return (0); +} + +/* + * same as dsl_open_dir, ignore the first component of name and use the + * spa instead + */ +int +dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, + dsl_dir_t **ddp, const char **tailp) +{ + char buf[MAXNAMELEN]; + const char *next, *nextnext = NULL; + int err; + dsl_dir_t *dd; + dsl_pool_t *dp; + uint64_t ddobj; + int openedspa = FALSE; + + dprintf("%s\n", name); + + err = getcomponent(name, buf, &next); + if (err) + return (err); + if (spa == NULL) { + err = spa_open(buf, &spa, FTAG); + if (err) { + dprintf("spa_open(%s) failed\n", buf); + return (err); + } + openedspa = TRUE; + + /* XXX this assertion belongs in spa_open */ + ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); + } + + dp = spa_get_dsl(spa); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); + if (err) { + rw_exit(&dp->dp_config_rwlock); + if (openedspa) + spa_close(spa, FTAG); + return (err); + } + + while (next != NULL) { + dsl_dir_t *child_ds; + err = getcomponent(next, buf, &nextnext); + if (err) + break; + ASSERT(next[0] != '\0'); + if (next[0] == '@') + break; + dprintf("looking up %s in obj%lld\n", + buf, dd->dd_phys->dd_child_dir_zapobj); + + err = zap_lookup(dp->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj, + buf, sizeof (ddobj), 1, &ddobj); + if (err) { + if (err == ENOENT) + err = 0; + break; + } + + err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); + if (err) + break; + dsl_dir_close(dd, tag); + dd = child_ds; + next = nextnext; + } + rw_exit(&dp->dp_config_rwlock); + + if (err) { + dsl_dir_close(dd, tag); + if (openedspa) + spa_close(spa, FTAG); + return (err); + } + + /* + * It's an error if there's more than one component left, or + * tailp==NULL and there's any component left. + */ + if (next != NULL && + (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { + /* bad path name */ + dsl_dir_close(dd, tag); + dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); + err = ENOENT; + } + if (tailp) + *tailp = next; + if (openedspa) + spa_close(spa, FTAG); + *ddp = dd; + return (err); +} + +/* + * Return the dsl_dir_t, and possibly the last component which couldn't + * be found in *tail. Return NULL if the path is bogus, or if + * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' + * means that the last component is a snapshot. + */ +int +dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) +{ + return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); +} + +uint64_t +dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, + dmu_tx_t *tx) +{ + objset_t *mos = dp->dp_meta_objset; + uint64_t ddobj; + dsl_dir_phys_t *ddphys; + dmu_buf_t *dbuf; + + ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, + DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); + if (pds) { + VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, + name, sizeof (uint64_t), 1, &ddobj, tx)); + } else { + /* it's the root dir */ + VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); + } + VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); + dmu_buf_will_dirty(dbuf, tx); + ddphys = dbuf->db_data; + + ddphys->dd_creation_time = gethrestime_sec(); + if (pds) + ddphys->dd_parent_obj = pds->dd_object; + ddphys->dd_props_zapobj = zap_create(mos, + DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); + ddphys->dd_child_dir_zapobj = zap_create(mos, + DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); + if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) + ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dmu_buf_rele(dbuf, FTAG); + + return (ddobj); +} + +/* ARGSUSED */ +int +dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_pool_t *dp = dd->dd_pool; + objset_t *mos = dp->dp_meta_objset; + int err; + uint64_t count; + + /* + * There should be exactly two holds, both from + * dsl_dataset_destroy: one on the dd directory, and one on its + * head ds. Otherwise, someone is trying to lookup something + * inside this dir while we want to destroy it. The + * config_rwlock ensures that nobody else opens it after we + * check. + */ + if (dmu_buf_refcount(dd->dd_dbuf) > 2) + return (EBUSY); + + err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); + if (err) + return (err); + if (count != 0) + return (EEXIST); + + return (0); +} + +void +dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_prop_setarg_t psa; + uint64_t value = 0; + uint64_t obj; + dd_used_t t; + + ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); + ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); + + /* Remove our reservation. */ + dsl_prop_setarg_init_uint64(&psa, "reservation", + (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), + &value); + psa.psa_effective_value = 0; /* predict default value */ + + dsl_dir_set_reservation_sync(ds, &psa, tx); + + ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0); + ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); + for (t = 0; t < DD_USED_NUM; t++) + ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0); + + VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); + VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); + VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); + VERIFY(0 == zap_remove(mos, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); + + obj = dd->dd_object; + dsl_dir_close(dd, tag); + VERIFY(0 == dmu_object_free(mos, obj, tx)); +} + +boolean_t +dsl_dir_is_clone(dsl_dir_t *dd) +{ + return (dd->dd_phys->dd_origin_obj && + (dd->dd_pool->dp_origin_snap == NULL || + dd->dd_phys->dd_origin_obj != + dd->dd_pool->dp_origin_snap->ds_object)); +} + +void +dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) +{ + mutex_enter(&dd->dd_lock); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + dd->dd_phys->dd_used_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, + dd->dd_phys->dd_reserved); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, + dd->dd_phys->dd_compressed_bytes == 0 ? 100 : + (dd->dd_phys->dd_uncompressed_bytes * 100 / + dd->dd_phys->dd_compressed_bytes)); + if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, + dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, + dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, + dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, + dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] + + dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]); + } + mutex_exit(&dd->dd_lock); + + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(dd)) { + dsl_dataset_t *ds; + char buf[MAXNAMELEN]; + + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_origin_obj, FTAG, &ds)); + dsl_dataset_name(ds, buf); + dsl_dataset_rele(ds, FTAG); + dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); + } + rw_exit(&dd->dd_pool->dp_config_rwlock); +} + +void +dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dd->dd_pool; + + ASSERT(dd->dd_phys); + + if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(dd->dd_dbuf, dd); + } +} + +static int64_t +parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) +{ + uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); + uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); + return (new_accounted - old_accounted); +} + +void +dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + mutex_enter(&dd->dd_lock); + ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); + dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, + dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); + dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; + mutex_exit(&dd->dd_lock); + + /* release the hold from dsl_dir_dirty */ + dmu_buf_rele(dd->dd_dbuf, dd); +} + +static uint64_t +dsl_dir_space_towrite(dsl_dir_t *dd) +{ + uint64_t space = 0; + int i; + + ASSERT(MUTEX_HELD(&dd->dd_lock)); + + for (i = 0; i < TXG_SIZE; i++) { + space += dd->dd_space_towrite[i&TXG_MASK]; + ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); + } + return (space); +} + +/* + * How much space would dd have available if ancestor had delta applied + * to it? If ondiskonly is set, we're only interested in what's + * on-disk, not estimated pending changes. + */ +uint64_t +dsl_dir_space_available(dsl_dir_t *dd, + dsl_dir_t *ancestor, int64_t delta, int ondiskonly) +{ + uint64_t parentspace, myspace, quota, used; + + /* + * If there are no restrictions otherwise, assume we have + * unlimited space available. + */ + quota = UINT64_MAX; + parentspace = UINT64_MAX; + + if (dd->dd_parent != NULL) { + parentspace = dsl_dir_space_available(dd->dd_parent, + ancestor, delta, ondiskonly); + } + + mutex_enter(&dd->dd_lock); + if (dd->dd_phys->dd_quota != 0) + quota = dd->dd_phys->dd_quota; + used = dd->dd_phys->dd_used_bytes; + if (!ondiskonly) + used += dsl_dir_space_towrite(dd); + + if (dd->dd_parent == NULL) { + uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); + quota = MIN(quota, poolsize); + } + + if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { + /* + * We have some space reserved, in addition to what our + * parent gave us. + */ + parentspace += dd->dd_phys->dd_reserved - used; + } + + if (dd == ancestor) { + ASSERT(delta <= 0); + ASSERT(used >= -delta); + used += delta; + if (parentspace != UINT64_MAX) + parentspace -= delta; + } + + if (used > quota) { + /* over quota */ + myspace = 0; + } else { + /* + * the lesser of the space provided by our parent and + * the space left in our quota + */ + myspace = MIN(parentspace, quota - used); + } + + mutex_exit(&dd->dd_lock); + + return (myspace); +} + +struct tempreserve { + list_node_t tr_node; + dsl_pool_t *tr_dp; + dsl_dir_t *tr_ds; + uint64_t tr_size; +}; + +static int +dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, + boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list, + dmu_tx_t *tx, boolean_t first) +{ + uint64_t txg = tx->tx_txg; + uint64_t est_inflight, used_on_disk, quota, parent_rsrv; + uint64_t deferred = 0; + struct tempreserve *tr; + int retval = EDQUOT; + int txgidx = txg & TXG_MASK; + int i; + uint64_t ref_rsrv = 0; + + ASSERT3U(txg, !=, 0); + ASSERT3S(asize, >, 0); + + mutex_enter(&dd->dd_lock); + + /* + * Check against the dsl_dir's quota. We don't add in the delta + * when checking for over-quota because they get one free hit. + */ + est_inflight = dsl_dir_space_towrite(dd); + for (i = 0; i < TXG_SIZE; i++) + est_inflight += dd->dd_tempreserved[i]; + used_on_disk = dd->dd_phys->dd_used_bytes; + + /* + * On the first iteration, fetch the dataset's used-on-disk and + * refreservation values. Also, if checkrefquota is set, test if + * allocating this space would exceed the dataset's refquota. + */ + if (first && tx->tx_objset) { + int error; + dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; + + error = dsl_dataset_check_quota(ds, checkrefquota, + asize, est_inflight, &used_on_disk, &ref_rsrv); + if (error) { + mutex_exit(&dd->dd_lock); + return (error); + } + } + + /* + * If this transaction will result in a net free of space, + * we want to let it through. + */ + if (ignorequota || netfree || dd->dd_phys->dd_quota == 0) + quota = UINT64_MAX; + else + quota = dd->dd_phys->dd_quota; + + /* + * Adjust the quota against the actual pool size at the root + * minus any outstanding deferred frees. + * To ensure that it's possible to remove files from a full + * pool without inducing transient overcommits, we throttle + * netfree transactions against a quota that is slightly larger, + * but still within the pool's allocation slop. In cases where + * we're very close to full, this will allow a steady trickle of + * removes to get through. + */ + if (dd->dd_parent == NULL) { + spa_t *spa = dd->dd_pool->dp_spa; + uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); + deferred = metaslab_class_get_deferred(spa_normal_class(spa)); + if (poolsize - deferred < quota) { + quota = poolsize - deferred; + retval = ENOSPC; + } + } + + /* + * If they are requesting more space, and our current estimate + * is over quota, they get to try again unless the actual + * on-disk is over quota and there are no pending changes (which + * may free up space for us). + */ + if (used_on_disk + est_inflight >= quota) { + if (est_inflight > 0 || used_on_disk < quota || + (retval == ENOSPC && used_on_disk < quota + deferred)) + retval = ERESTART; + dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " + "quota=%lluK tr=%lluK err=%d\n", + used_on_disk>>10, est_inflight>>10, + quota>>10, asize>>10, retval); + mutex_exit(&dd->dd_lock); + return (retval); + } + + /* We need to up our estimated delta before dropping dd_lock */ + dd->dd_tempreserved[txgidx] += asize; + + parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, + asize - ref_rsrv); + mutex_exit(&dd->dd_lock); + + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); + tr->tr_ds = dd; + tr->tr_size = asize; + list_insert_tail(tr_list, tr); + + /* see if it's OK with our parent */ + if (dd->dd_parent && parent_rsrv) { + boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); + + return (dsl_dir_tempreserve_impl(dd->dd_parent, + parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); + } else { + return (0); + } +} + +/* + * Reserve space in this dsl_dir, to be used in this tx's txg. + * After the space has been dirtied (and dsl_dir_willuse_space() + * has been called), the reservation should be canceled, using + * dsl_dir_tempreserve_clear(). + */ +int +dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, + uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx) +{ + int err; + list_t *tr_list; + + if (asize == 0) { + *tr_cookiep = NULL; + return (0); + } + + tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); + list_create(tr_list, sizeof (struct tempreserve), + offsetof(struct tempreserve, tr_node)); + ASSERT3S(asize, >, 0); + ASSERT3S(fsize, >=, 0); + + err = arc_tempreserve_space(lsize, tx->tx_txg); + if (err == 0) { + struct tempreserve *tr; + + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); + tr->tr_size = lsize; + list_insert_tail(tr_list, tr); + + err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx); + } else { + if (err == EAGAIN) { + txg_delay(dd->dd_pool, tx->tx_txg, 1); + err = ERESTART; + } + dsl_pool_memory_pressure(dd->dd_pool); + } + + if (err == 0) { + struct tempreserve *tr; + + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); + tr->tr_dp = dd->dd_pool; + tr->tr_size = asize; + list_insert_tail(tr_list, tr); + + err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, + FALSE, asize > usize, tr_list, tx, TRUE); + } + + if (err) + dsl_dir_tempreserve_clear(tr_list, tx); + else + *tr_cookiep = tr_list; + + return (err); +} + +/* + * Clear a temporary reservation that we previously made with + * dsl_dir_tempreserve_space(). + */ +void +dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) +{ + int txgidx = tx->tx_txg & TXG_MASK; + list_t *tr_list = tr_cookie; + struct tempreserve *tr; + + ASSERT3U(tx->tx_txg, !=, 0); + + if (tr_cookie == NULL) + return; + + while (tr = list_head(tr_list)) { + if (tr->tr_dp) { + dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx); + } else if (tr->tr_ds) { + mutex_enter(&tr->tr_ds->dd_lock); + ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, + tr->tr_size); + tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; + mutex_exit(&tr->tr_ds->dd_lock); + } else { + arc_tempreserve_clear(tr->tr_size); + } + list_remove(tr_list, tr); + kmem_free(tr, sizeof (struct tempreserve)); + } + + kmem_free(tr_list, sizeof (list_t)); +} + +static void +dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) +{ + int64_t parent_space; + uint64_t est_used; + + mutex_enter(&dd->dd_lock); + if (space > 0) + dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; + + est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes; + parent_space = parent_delta(dd, est_used, space); + mutex_exit(&dd->dd_lock); + + /* Make sure that we clean up dd_space_to* */ + dsl_dir_dirty(dd, tx); + + /* XXX this is potentially expensive and unnecessary... */ + if (parent_space && dd->dd_parent) + dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx); +} + +/* + * Call in open context when we think we're going to write/free space, + * eg. when dirtying data. Be conservative (ie. OK to write less than + * this or free more than this, but don't write more or free less). + */ +void +dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) +{ + dsl_pool_willuse_space(dd->dd_pool, space, tx); + dsl_dir_willuse_space_impl(dd, space, tx); +} + +/* call from syncing context when we actually write/free space for this dd */ +void +dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, + int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) +{ + int64_t accounted_delta; + boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(type < DD_USED_NUM); + + dsl_dir_dirty(dd, tx); + + if (needlock) + mutex_enter(&dd->dd_lock); + accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); + ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used); + ASSERT(compressed >= 0 || + dd->dd_phys->dd_compressed_bytes >= -compressed); + ASSERT(uncompressed >= 0 || + dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); + dd->dd_phys->dd_used_bytes += used; + dd->dd_phys->dd_uncompressed_bytes += uncompressed; + dd->dd_phys->dd_compressed_bytes += compressed; + + if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + ASSERT(used > 0 || + dd->dd_phys->dd_used_breakdown[type] >= -used); + dd->dd_phys->dd_used_breakdown[type] += used; +#ifdef DEBUG + dd_used_t t; + uint64_t u = 0; + for (t = 0; t < DD_USED_NUM; t++) + u += dd->dd_phys->dd_used_breakdown[t]; + ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes); +#endif + } + if (needlock) + mutex_exit(&dd->dd_lock); + + if (dd->dd_parent != NULL) { + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, + accounted_delta, compressed, uncompressed, tx); + dsl_dir_transfer_space(dd->dd_parent, + used - accounted_delta, + DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); + } +} + +void +dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, + dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) +{ + boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(oldtype < DD_USED_NUM); + ASSERT(newtype < DD_USED_NUM); + + if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) + return; + + dsl_dir_dirty(dd, tx); + if (needlock) + mutex_enter(&dd->dd_lock); + ASSERT(delta > 0 ? + dd->dd_phys->dd_used_breakdown[oldtype] >= delta : + dd->dd_phys->dd_used_breakdown[newtype] >= -delta); + ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); + dd->dd_phys->dd_used_breakdown[oldtype] -= delta; + dd->dd_phys->dd_used_breakdown[newtype] += delta; + if (needlock) + mutex_exit(&dd->dd_lock); +} + +static int +dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + int err; + uint64_t towrite; + + if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) + return (err); + + if (psa->psa_effective_value == 0) + return (0); + + mutex_enter(&dd->dd_lock); + /* + * If we are doing the preliminary check in open context, and + * there are pending changes, then don't fail it, since the + * pending changes could under-estimate the amount of space to be + * freed up. + */ + towrite = dsl_dir_space_towrite(dd); + if ((dmu_tx_is_syncing(tx) || towrite == 0) && + (psa->psa_effective_value < dd->dd_phys->dd_reserved || + psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) { + err = ENOSPC; + } + mutex_exit(&dd->dd_lock); + return (err); +} + +extern dsl_syncfunc_t dsl_prop_set_sync; + +static void +dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value = psa->psa_effective_value; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + mutex_enter(&dd->dd_lock); + dd->dd_phys->dd_quota = effective_value; + mutex_exit(&dd->dd_lock); + + spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa, + tx, "%lld dataset = %llu ", + (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); +} + +int +dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) +{ + dsl_dir_t *dd; + dsl_dataset_t *ds; + dsl_prop_setarg_t psa; + int err; + + dsl_prop_setarg_init_uint64(&psa, "quota", source, "a); + + err = dsl_dataset_hold(ddname, FTAG, &ds); + if (err) + return (err); + + err = dsl_dir_open(ddname, FTAG, &dd, NULL); + if (err) { + dsl_dataset_rele(ds, FTAG); + return (err); + } + + ASSERT(ds->ds_dir == dd); + + /* + * If someone removes a file, then tries to set the quota, we want to + * make sure the file freeing takes effect. + */ + txg_wait_open(dd->dd_pool, 0); + + err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, + dsl_dir_set_quota_sync, ds, &psa, 0); + + dsl_dir_close(dd, FTAG); + dsl_dataset_rele(ds, FTAG); + return (err); +} + +int +dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value; + uint64_t used, avail; + int err; + + if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) + return (err); + + effective_value = psa->psa_effective_value; + + /* + * If we are doing the preliminary check in open context, the + * space estimates may be inaccurate. + */ + if (!dmu_tx_is_syncing(tx)) + return (0); + + mutex_enter(&dd->dd_lock); + used = dd->dd_phys->dd_used_bytes; + mutex_exit(&dd->dd_lock); + + if (dd->dd_parent) { + avail = dsl_dir_space_available(dd->dd_parent, + NULL, 0, FALSE); + } else { + avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; + } + + if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) { + uint64_t delta = MAX(used, effective_value) - + MAX(used, dd->dd_phys->dd_reserved); + + if (delta > avail) + return (ENOSPC); + if (dd->dd_phys->dd_quota > 0 && + effective_value > dd->dd_phys->dd_quota) + return (ENOSPC); + } + + return (0); +} + +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value = psa->psa_effective_value; + uint64_t used; + int64_t delta; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + mutex_enter(&dd->dd_lock); + used = dd->dd_phys->dd_used_bytes; + delta = MAX(used, effective_value) - + MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = effective_value; + + if (dd->dd_parent != NULL) { + /* Roll up this additional usage into our ancestors */ + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, + delta, 0, 0, tx); + } + mutex_exit(&dd->dd_lock); + + spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa, + tx, "%lld dataset = %llu", + (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); +} + +int +dsl_dir_set_reservation(const char *ddname, zprop_source_t source, + uint64_t reservation) +{ + dsl_dir_t *dd; + dsl_dataset_t *ds; + dsl_prop_setarg_t psa; + int err; + + dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation); + + err = dsl_dataset_hold(ddname, FTAG, &ds); + if (err) + return (err); + + err = dsl_dir_open(ddname, FTAG, &dd, NULL); + if (err) { + dsl_dataset_rele(ds, FTAG); + return (err); + } + + ASSERT(ds->ds_dir == dd); + + err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, + dsl_dir_set_reservation_sync, ds, &psa, 0); + + dsl_dir_close(dd, FTAG); + dsl_dataset_rele(ds, FTAG); + return (err); +} + +static dsl_dir_t * +closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) +{ + for (; ds1; ds1 = ds1->dd_parent) { + dsl_dir_t *dd; + for (dd = ds2; dd; dd = dd->dd_parent) { + if (ds1 == dd) + return (dd); + } + } + return (NULL); +} + +/* + * If delta is applied to dd, how much of that delta would be applied to + * ancestor? Syncing context only. + */ +static int64_t +would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) +{ + if (dd == ancestor) + return (delta); + + mutex_enter(&dd->dd_lock); + delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta); + mutex_exit(&dd->dd_lock); + return (would_change(dd->dd_parent, delta, ancestor)); +} + +struct renamearg { + dsl_dir_t *newparent; + const char *mynewname; +}; + +static int +dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct renamearg *ra = arg2; + dsl_pool_t *dp = dd->dd_pool; + objset_t *mos = dp->dp_meta_objset; + int err; + uint64_t val; + + /* + * There should only be one reference, from dmu_objset_rename(). + * Fleeting holds are also possible (eg, from "zfs list" getting + * stats), but any that are present in open context will likely + * be gone by syncing context, so only fail from syncing + * context. + */ + if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1) + return (EBUSY); + + /* check for existing name */ + err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, + ra->mynewname, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + + if (ra->newparent != dd->dd_parent) { + /* is there enough space? */ + uint64_t myspace = + MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); + + /* no rename into our descendant */ + if (closest_common_ancestor(dd, ra->newparent) == dd) + return (EINVAL); + + if (err = dsl_dir_transfer_possible(dd->dd_parent, + ra->newparent, myspace)) + return (err); + } + + return (0); +} + +static void +dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct renamearg *ra = arg2; + dsl_pool_t *dp = dd->dd_pool; + objset_t *mos = dp->dp_meta_objset; + int err; + + ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + + if (ra->newparent != dd->dd_parent) { + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, + -dd->dd_phys->dd_used_bytes, + -dd->dd_phys->dd_compressed_bytes, + -dd->dd_phys->dd_uncompressed_bytes, tx); + dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD, + dd->dd_phys->dd_used_bytes, + dd->dd_phys->dd_compressed_bytes, + dd->dd_phys->dd_uncompressed_bytes, tx); + + if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) { + uint64_t unused_rsrv = dd->dd_phys->dd_reserved - + dd->dd_phys->dd_used_bytes; + + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, + -unused_rsrv, 0, 0, tx); + dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV, + unused_rsrv, 0, 0, tx); + } + } + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + /* remove from old parent zapobj */ + err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, + dd->dd_myname, tx); + ASSERT3U(err, ==, 0); + + (void) strcpy(dd->dd_myname, ra->mynewname); + dsl_dir_close(dd->dd_parent, dd); + dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; + VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, + ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); + + /* add to new parent zapobj */ + err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, + dd->dd_myname, 8, 1, &dd->dd_object, tx); + ASSERT3U(err, ==, 0); + + spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, + tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); +} + +int +dsl_dir_rename(dsl_dir_t *dd, const char *newname) +{ + struct renamearg ra; + int err; + + /* new parent should exist */ + err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); + if (err) + return (err); + + /* can't rename to different pool */ + if (dd->dd_pool != ra.newparent->dd_pool) { + err = ENXIO; + goto out; + } + + /* new name should not already exist */ + if (ra.mynewname == NULL) { + err = EEXIST; + goto out; + } + + err = dsl_sync_task_do(dd->dd_pool, + dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); + +out: + dsl_dir_close(ra.newparent, FTAG); + return (err); +} + +int +dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) +{ + dsl_dir_t *ancestor; + int64_t adelta; + uint64_t avail; + + ancestor = closest_common_ancestor(sdd, tdd); + adelta = would_change(sdd, -space, ancestor); + avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); + if (avail < space) + return (ENOSPC); + + return (0); +} + +timestruc_t +dsl_dir_snap_cmtime(dsl_dir_t *dd) +{ + timestruc_t t; + + mutex_enter(&dd->dd_lock); + t = dd->dd_snap_cmtime; + mutex_exit(&dd->dd_lock); + + return (t); +} + +void +dsl_dir_snap_cmtime_update(dsl_dir_t *dd) +{ + timestruc_t t; + + gethrestime(&t); + mutex_enter(&dd->dd_lock); + dd->dd_snap_cmtime = t; + mutex_exit(&dd->dd_lock); +} diff --git a/uts/common/fs/zfs/dsl_pool.c b/uts/common/fs/zfs/dsl_pool.c new file mode 100644 index 000000000000..700cc962865d --- /dev/null +++ b/uts/common/fs/zfs/dsl_pool.c @@ -0,0 +1,848 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dsl_pool.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_scan.h> +#include <sys/dnode.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/arc.h> +#include <sys/zap.h> +#include <sys/zio.h> +#include <sys/zfs_context.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_znode.h> +#include <sys/spa_impl.h> +#include <sys/dsl_deadlist.h> + +int zfs_no_write_throttle = 0; +int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ +int zfs_txg_synctime_ms = 1000; /* target millisecs to sync a txg */ + +uint64_t zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */ +uint64_t zfs_write_limit_max = 0; /* max data payload per txg */ +uint64_t zfs_write_limit_inflated = 0; +uint64_t zfs_write_limit_override = 0; + +kmutex_t zfs_write_limit_lock; + +static pgcnt_t old_physmem = 0; + +int +dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) +{ + uint64_t obj; + int err; + + err = zap_lookup(dp->dp_meta_objset, + dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, + name, sizeof (obj), 1, &obj); + if (err) + return (err); + + return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); +} + +static dsl_pool_t * +dsl_pool_open_impl(spa_t *spa, uint64_t txg) +{ + dsl_pool_t *dp; + blkptr_t *bp = spa_get_rootblkptr(spa); + + dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); + dp->dp_spa = spa; + dp->dp_meta_rootbp = *bp; + rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); + dp->dp_write_limit = zfs_write_limit_min; + txg_init(dp, txg); + + txg_list_create(&dp->dp_dirty_datasets, + offsetof(dsl_dataset_t, ds_dirty_link)); + txg_list_create(&dp->dp_dirty_dirs, + offsetof(dsl_dir_t, dd_dirty_link)); + txg_list_create(&dp->dp_sync_tasks, + offsetof(dsl_sync_task_group_t, dstg_node)); + list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), + offsetof(dsl_dataset_t, ds_synced_link)); + + mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); + + dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, + 1, 4, 0); + + return (dp); +} + +int +dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) +{ + int err; + dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); + dsl_dir_t *dd; + dsl_dataset_t *ds; + uint64_t obj; + + rw_enter(&dp->dp_config_rwlock, RW_WRITER); + err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, + &dp->dp_meta_objset); + if (err) + goto out; + + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, + &dp->dp_root_dir_obj); + if (err) + goto out; + + err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + NULL, dp, &dp->dp_root_dir); + if (err) + goto out; + + err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); + if (err) + goto out; + + if (spa_version(spa) >= SPA_VERSION_ORIGIN) { + err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); + if (err) + goto out; + err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, + FTAG, &ds); + if (err == 0) { + err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, dp, + &dp->dp_origin_snap); + dsl_dataset_rele(ds, FTAG); + } + dsl_dir_close(dd, dp); + if (err) + goto out; + } + + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { + err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, + &dp->dp_free_dir); + if (err) + goto out; + + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); + if (err) + goto out; + VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + dp->dp_meta_objset, obj)); + } + + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, + &dp->dp_tmp_userrefs_obj); + if (err == ENOENT) + err = 0; + if (err) + goto out; + + err = dsl_scan_init(dp, txg); + +out: + rw_exit(&dp->dp_config_rwlock); + if (err) + dsl_pool_close(dp); + else + *dpp = dp; + + return (err); +} + +void +dsl_pool_close(dsl_pool_t *dp) +{ + /* drop our references from dsl_pool_open() */ + + /* + * Since we held the origin_snap from "syncing" context (which + * includes pool-opening context), it actually only got a "ref" + * and not a hold, so just drop that here. + */ + if (dp->dp_origin_snap) + dsl_dataset_drop_ref(dp->dp_origin_snap, dp); + if (dp->dp_mos_dir) + dsl_dir_close(dp->dp_mos_dir, dp); + if (dp->dp_free_dir) + dsl_dir_close(dp->dp_free_dir, dp); + if (dp->dp_root_dir) + dsl_dir_close(dp->dp_root_dir, dp); + + bpobj_close(&dp->dp_free_bpobj); + + /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ + if (dp->dp_meta_objset) + dmu_objset_evict(dp->dp_meta_objset); + + txg_list_destroy(&dp->dp_dirty_datasets); + txg_list_destroy(&dp->dp_sync_tasks); + txg_list_destroy(&dp->dp_dirty_dirs); + list_destroy(&dp->dp_synced_datasets); + + arc_flush(dp->dp_spa); + txg_fini(dp); + dsl_scan_fini(dp); + rw_destroy(&dp->dp_config_rwlock); + mutex_destroy(&dp->dp_lock); + taskq_destroy(dp->dp_vnrele_taskq); + if (dp->dp_blkstats) + kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); + kmem_free(dp, sizeof (dsl_pool_t)); +} + +dsl_pool_t * +dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) +{ + int err; + dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); + dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); + objset_t *os; + dsl_dataset_t *ds; + uint64_t obj; + + /* create and open the MOS (meta-objset) */ + dp->dp_meta_objset = dmu_objset_create_impl(spa, + NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); + + /* create the pool directory */ + err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); + ASSERT3U(err, ==, 0); + + /* Initialize scan structures */ + VERIFY3U(0, ==, dsl_scan_init(dp, txg)); + + /* create and open the root dir */ + dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); + VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + NULL, dp, &dp->dp_root_dir)); + + /* create and open the meta-objset dir */ + (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); + VERIFY(0 == dsl_pool_open_special_dir(dp, + MOS_DIR_NAME, &dp->dp_mos_dir)); + + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { + /* create and open the free dir */ + (void) dsl_dir_create_sync(dp, dp->dp_root_dir, + FREE_DIR_NAME, tx); + VERIFY(0 == dsl_pool_open_special_dir(dp, + FREE_DIR_NAME, &dp->dp_free_dir)); + + /* create and open the free_bplist */ + obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); + VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); + VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + dp->dp_meta_objset, obj)); + } + + if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) + dsl_pool_create_origin(dp, tx); + + /* create the root dataset */ + obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); + + /* create the root objset */ + VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + os = dmu_objset_create_impl(dp->dp_spa, ds, + dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); +#ifdef _KERNEL + zfs_create_fs(os, kcred, zplprops, tx); +#endif + dsl_dataset_rele(ds, FTAG); + + dmu_tx_commit(tx); + + return (dp); +} + +static int +deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_deadlist_t *dl = arg; + dsl_deadlist_insert(dl, bp, tx); + return (0); +} + +void +dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) +{ + zio_t *zio; + dmu_tx_t *tx; + dsl_dir_t *dd; + dsl_dataset_t *ds; + dsl_sync_task_group_t *dstg; + objset_t *mos = dp->dp_meta_objset; + hrtime_t start, write_time; + uint64_t data_written; + int err; + + /* + * We need to copy dp_space_towrite() before doing + * dsl_sync_task_group_sync(), because + * dsl_dataset_snapshot_reserve_space() will increase + * dp_space_towrite but not actually write anything. + */ + data_written = dp->dp_space_towrite[txg & TXG_MASK]; + + tx = dmu_tx_create_assigned(dp, txg); + + dp->dp_read_overhead = 0; + start = gethrtime(); + + zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { + /* + * We must not sync any non-MOS datasets twice, because + * we may have taken a snapshot of them. However, we + * may sync newly-created datasets on pass 2. + */ + ASSERT(!list_link_active(&ds->ds_synced_link)); + list_insert_tail(&dp->dp_synced_datasets, ds); + dsl_dataset_sync(ds, zio, tx); + } + DTRACE_PROBE(pool_sync__1setup); + err = zio_wait(zio); + + write_time = gethrtime() - start; + ASSERT(err == 0); + DTRACE_PROBE(pool_sync__2rootzio); + + for (ds = list_head(&dp->dp_synced_datasets); ds; + ds = list_next(&dp->dp_synced_datasets, ds)) + dmu_objset_do_userquota_updates(ds->ds_objset, tx); + + /* + * Sync the datasets again to push out the changes due to + * userspace updates. This must be done before we process the + * sync tasks, because that could cause a snapshot of a dataset + * whose ds_bp will be rewritten when we do this 2nd sync. + */ + zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { + ASSERT(list_link_active(&ds->ds_synced_link)); + dmu_buf_rele(ds->ds_dbuf, ds); + dsl_dataset_sync(ds, zio, tx); + } + err = zio_wait(zio); + + /* + * Move dead blocks from the pending deadlist to the on-disk + * deadlist. + */ + for (ds = list_head(&dp->dp_synced_datasets); ds; + ds = list_next(&dp->dp_synced_datasets, ds)) { + bplist_iterate(&ds->ds_pending_deadlist, + deadlist_enqueue_cb, &ds->ds_deadlist, tx); + } + + while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) { + /* + * No more sync tasks should have been added while we + * were syncing. + */ + ASSERT(spa_sync_pass(dp->dp_spa) == 1); + dsl_sync_task_group_sync(dstg, tx); + } + DTRACE_PROBE(pool_sync__3task); + + start = gethrtime(); + while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) + dsl_dir_sync(dd, tx); + write_time += gethrtime() - start; + + start = gethrtime(); + if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || + list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { + zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + dmu_objset_sync(mos, zio, tx); + err = zio_wait(zio); + ASSERT(err == 0); + dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); + spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); + } + write_time += gethrtime() - start; + DTRACE_PROBE2(pool_sync__4io, hrtime_t, write_time, + hrtime_t, dp->dp_read_overhead); + write_time -= dp->dp_read_overhead; + + dmu_tx_commit(tx); + + dp->dp_space_towrite[txg & TXG_MASK] = 0; + ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0); + + /* + * If the write limit max has not been explicitly set, set it + * to a fraction of available physical memory (default 1/8th). + * Note that we must inflate the limit because the spa + * inflates write sizes to account for data replication. + * Check this each sync phase to catch changing memory size. + */ + if (physmem != old_physmem && zfs_write_limit_shift) { + mutex_enter(&zfs_write_limit_lock); + old_physmem = physmem; + zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift; + zfs_write_limit_inflated = MAX(zfs_write_limit_min, + spa_get_asize(dp->dp_spa, zfs_write_limit_max)); + mutex_exit(&zfs_write_limit_lock); + } + + /* + * Attempt to keep the sync time consistent by adjusting the + * amount of write traffic allowed into each transaction group. + * Weight the throughput calculation towards the current value: + * thru = 3/4 old_thru + 1/4 new_thru + * + * Note: write_time is in nanosecs, so write_time/MICROSEC + * yields millisecs + */ + ASSERT(zfs_write_limit_min > 0); + if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) { + uint64_t throughput = data_written / (write_time / MICROSEC); + + if (dp->dp_throughput) + dp->dp_throughput = throughput / 4 + + 3 * dp->dp_throughput / 4; + else + dp->dp_throughput = throughput; + dp->dp_write_limit = MIN(zfs_write_limit_inflated, + MAX(zfs_write_limit_min, + dp->dp_throughput * zfs_txg_synctime_ms)); + } +} + +void +dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) +{ + dsl_dataset_t *ds; + objset_t *os; + + while (ds = list_head(&dp->dp_synced_datasets)) { + list_remove(&dp->dp_synced_datasets, ds); + os = ds->ds_objset; + zil_clean(os->os_zil, txg); + ASSERT(!dmu_objset_is_dirty(os, txg)); + dmu_buf_rele(ds->ds_dbuf, ds); + } + ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); +} + +/* + * TRUE if the current thread is the tx_sync_thread or if we + * are being called from SPA context during pool initialization. + */ +int +dsl_pool_sync_context(dsl_pool_t *dp) +{ + return (curthread == dp->dp_tx.tx_sync_thread || + spa_get_dsl(dp->dp_spa) == NULL); +} + +uint64_t +dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) +{ + uint64_t space, resv; + + /* + * Reserve about 1.6% (1/64), or at least 32MB, for allocation + * efficiency. + * XXX The intent log is not accounted for, so it must fit + * within this slop. + * + * If we're trying to assess whether it's OK to do a free, + * cut the reservation in half to allow forward progress + * (e.g. make it possible to rm(1) files from a full pool). + */ + space = spa_get_dspace(dp->dp_spa); + resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); + if (netfree) + resv >>= 1; + + return (space - resv); +} + +int +dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) +{ + uint64_t reserved = 0; + uint64_t write_limit = (zfs_write_limit_override ? + zfs_write_limit_override : dp->dp_write_limit); + + if (zfs_no_write_throttle) { + atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], + space); + return (0); + } + + /* + * Check to see if we have exceeded the maximum allowed IO for + * this transaction group. We can do this without locks since + * a little slop here is ok. Note that we do the reserved check + * with only half the requested reserve: this is because the + * reserve requests are worst-case, and we really don't want to + * throttle based off of worst-case estimates. + */ + if (write_limit > 0) { + reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK] + + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; + + if (reserved && reserved > write_limit) + return (ERESTART); + } + + atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); + + /* + * If this transaction group is over 7/8ths capacity, delay + * the caller 1 clock tick. This will slow down the "fill" + * rate until the sync process can catch up with us. + */ + if (reserved && reserved > (write_limit - (write_limit >> 3))) + txg_delay(dp, tx->tx_txg, 1); + + return (0); +} + +void +dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) +{ + ASSERT(dp->dp_tempreserved[tx->tx_txg & TXG_MASK] >= space); + atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], -space); +} + +void +dsl_pool_memory_pressure(dsl_pool_t *dp) +{ + uint64_t space_inuse = 0; + int i; + + if (dp->dp_write_limit == zfs_write_limit_min) + return; + + for (i = 0; i < TXG_SIZE; i++) { + space_inuse += dp->dp_space_towrite[i]; + space_inuse += dp->dp_tempreserved[i]; + } + dp->dp_write_limit = MAX(zfs_write_limit_min, + MIN(dp->dp_write_limit, space_inuse / 4)); +} + +void +dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) +{ + if (space > 0) { + mutex_enter(&dp->dp_lock); + dp->dp_space_towrite[tx->tx_txg & TXG_MASK] += space; + mutex_exit(&dp->dp_lock); + } +} + +/* ARGSUSED */ +static int +upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + dmu_tx_t *tx = arg; + dsl_dataset_t *ds, *prev = NULL; + int err; + dsl_pool_t *dp = spa_get_dsl(spa); + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err) + return (err); + + while (ds->ds_phys->ds_prev_snap_obj != 0) { + err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + FTAG, &prev); + if (err) { + dsl_dataset_rele(ds, FTAG); + return (err); + } + + if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) + break; + dsl_dataset_rele(ds, FTAG); + ds = prev; + prev = NULL; + } + + if (prev == NULL) { + prev = dp->dp_origin_snap; + + /* + * The $ORIGIN can't have any data, or the accounting + * will be wrong. + */ + ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); + + /* The origin doesn't get attached to itself */ + if (ds->ds_object == prev->ds_object) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_prev_snap_obj = prev->ds_object; + ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg; + + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object; + + dmu_buf_will_dirty(prev->ds_dbuf, tx); + prev->ds_phys->ds_num_children++; + + if (ds->ds_phys->ds_next_snap_obj == 0) { + ASSERT(ds->ds_prev == NULL); + VERIFY(0 == dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + } + } + + ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); + ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); + + if (prev->ds_phys->ds_next_clones_obj == 0) { + dmu_buf_will_dirty(prev->ds_dbuf, tx); + prev->ds_phys->ds_next_clones_obj = + zap_create(dp->dp_meta_objset, + DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); + } + VERIFY(0 == zap_add_int(dp->dp_meta_objset, + prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); + + dsl_dataset_rele(ds, FTAG); + if (prev != dp->dp_origin_snap) + dsl_dataset_rele(prev, FTAG); + return (0); +} + +void +dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(dp->dp_origin_snap != NULL); + + VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, + tx, DS_FIND_CHILDREN)); +} + +/* ARGSUSED */ +static int +upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + dmu_tx_t *tx = arg; + dsl_dataset_t *ds; + dsl_pool_t *dp = spa_get_dsl(spa); + objset_t *mos = dp->dp_meta_objset; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + + if (ds->ds_dir->dd_phys->dd_origin_obj) { + dsl_dataset_t *origin; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin)); + + if (origin->ds_dir->dd_phys->dd_clones == 0) { + dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); + origin->ds_dir->dd_phys->dd_clones = zap_create(mos, + DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + } + + VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + + dsl_dataset_rele(origin, FTAG); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + +void +dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + uint64_t obj; + + (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); + VERIFY(0 == dsl_pool_open_special_dir(dp, + FREE_DIR_NAME, &dp->dp_free_dir)); + + /* + * We can't use bpobj_alloc(), because spa_version() still + * returns the old version, and we need a new-version bpobj with + * subobj support. So call dmu_object_alloc() directly. + */ + obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, + SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); + VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); + VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + dp->dp_meta_objset, obj)); + + VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, + upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN)); +} + +void +dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) +{ + uint64_t dsobj; + dsl_dataset_t *ds; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(dp->dp_origin_snap == NULL); + + /* create the origin dir, ds, & snap-ds */ + rw_enter(&dp->dp_config_rwlock, RW_WRITER); + dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, + NULL, 0, kcred, tx); + VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx); + VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + dp, &dp->dp_origin_snap)); + dsl_dataset_rele(ds, FTAG); + rw_exit(&dp->dp_config_rwlock); +} + +taskq_t * +dsl_pool_vnrele_taskq(dsl_pool_t *dp) +{ + return (dp->dp_vnrele_taskq); +} + +/* + * Walk through the pool-wide zap object of temporary snapshot user holds + * and release them. + */ +void +dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) +{ + zap_attribute_t za; + zap_cursor_t zc; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj = dp->dp_tmp_userrefs_obj; + + if (zapobj == 0) + return; + ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + + for (zap_cursor_init(&zc, mos, zapobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + char *htag; + uint64_t dsobj; + + htag = strchr(za.za_name, '-'); + *htag = '\0'; + ++htag; + dsobj = strtonum(za.za_name, NULL); + (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE); + } + zap_cursor_fini(&zc); +} + +/* + * Create the pool-wide zap object for storing temporary snapshot holds. + */ +void +dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) +{ + objset_t *mos = dp->dp_meta_objset; + + ASSERT(dp->dp_tmp_userrefs_obj == 0); + ASSERT(dmu_tx_is_syncing(tx)); + + dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, + DMU_OT_NONE, 0, tx); + + VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, + sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0); +} + +static int +dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, + const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding) +{ + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj = dp->dp_tmp_userrefs_obj; + char *name; + int error; + + ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + ASSERT(dmu_tx_is_syncing(tx)); + + /* + * If the pool was created prior to SPA_VERSION_USERREFS, the + * zap object for temporary holds might not exist yet. + */ + if (zapobj == 0) { + if (holding) { + dsl_pool_user_hold_create_obj(dp, tx); + zapobj = dp->dp_tmp_userrefs_obj; + } else { + return (ENOENT); + } + } + + name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); + if (holding) + error = zap_add(mos, zapobj, name, 8, 1, now, tx); + else + error = zap_remove(mos, zapobj, name, tx); + strfree(name); + + return (error); +} + +/* + * Add a temporary hold for the given dataset object and tag. + */ +int +dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, + uint64_t *now, dmu_tx_t *tx) +{ + return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); +} + +/* + * Release a temporary hold for the given dataset object and tag. + */ +int +dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, + dmu_tx_t *tx) +{ + return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL, + tx, B_FALSE)); +} diff --git a/uts/common/fs/zfs/dsl_prop.c b/uts/common/fs/zfs/dsl_prop.c new file mode 100644 index 000000000000..aa66b32e7938 --- /dev/null +++ b/uts/common/fs/zfs/dsl_prop.c @@ -0,0 +1,1153 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/spa.h> +#include <sys/zap.h> +#include <sys/fs/zfs.h> + +#include "zfs_prop.h" + +#define ZPROP_INHERIT_SUFFIX "$inherit" +#define ZPROP_RECVD_SUFFIX "$recvd" + +static int +dodefault(const char *propname, int intsz, int numints, void *buf) +{ + zfs_prop_t prop; + + /* + * The setonce properties are read-only, BUT they still + * have a default value that can be used as the initial + * value. + */ + if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL || + (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop))) + return (ENOENT); + + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + if (intsz != 1) + return (EOVERFLOW); + (void) strncpy(buf, zfs_prop_default_string(prop), + numints); + } else { + if (intsz != 8 || numints < 1) + return (EOVERFLOW); + + *(uint64_t *)buf = zfs_prop_default_numeric(prop); + } + + return (0); +} + +int +dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, + int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot) +{ + int err = ENOENT; + dsl_dir_t *target = dd; + objset_t *mos = dd->dd_pool->dp_meta_objset; + zfs_prop_t prop; + boolean_t inheritable; + boolean_t inheriting = B_FALSE; + char *inheritstr; + char *recvdstr; + + ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); + + if (setpoint) + setpoint[0] = '\0'; + + prop = zfs_name_to_prop(propname); + inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); + inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); + recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + + /* + * Note: dd may become NULL, therefore we shouldn't dereference it + * after this loop. + */ + for (; dd != NULL; dd = dd->dd_parent) { + ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); + + if (dd != target || snapshot) { + if (!inheritable) + break; + inheriting = B_TRUE; + } + + /* Check for a local value. */ + err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname, + intsz, numints, buf); + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + dsl_dir_name(dd, setpoint); + break; + } + + /* + * Skip the check for a received value if there is an explicit + * inheritance entry. + */ + err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, + inheritstr); + if (err != 0 && err != ENOENT) + break; + + if (err == ENOENT) { + /* Check for a received value. */ + err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, + recvdstr, intsz, numints, buf); + if (err != ENOENT) { + if (setpoint != NULL && err == 0) { + if (inheriting) { + dsl_dir_name(dd, setpoint); + } else { + (void) strcpy(setpoint, + ZPROP_SOURCE_VAL_RECVD); + } + } + break; + } + } + + /* + * If we found an explicit inheritance entry, err is zero even + * though we haven't yet found the value, so reinitializing err + * at the end of the loop (instead of at the beginning) ensures + * that err has a valid post-loop value. + */ + err = ENOENT; + } + + if (err == ENOENT) + err = dodefault(propname, intsz, numints, buf); + + strfree(inheritstr); + strfree(recvdstr); + + return (err); +} + +int +dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname, + int intsz, int numints, void *buf, char *setpoint) +{ + zfs_prop_t prop = zfs_name_to_prop(propname); + boolean_t inheritable; + boolean_t snapshot; + uint64_t zapobj; + + ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock)); + inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); + snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)); + zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj); + + if (zapobj != 0) { + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + int err; + + ASSERT(snapshot); + + /* Check for a local value. */ + err = zap_lookup(mos, zapobj, propname, intsz, numints, buf); + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + dsl_dataset_name(ds, setpoint); + return (err); + } + + /* + * Skip the check for a received value if there is an explicit + * inheritance entry. + */ + if (inheritable) { + char *inheritstr = kmem_asprintf("%s%s", propname, + ZPROP_INHERIT_SUFFIX); + err = zap_contains(mos, zapobj, inheritstr); + strfree(inheritstr); + if (err != 0 && err != ENOENT) + return (err); + } + + if (err == ENOENT) { + /* Check for a received value. */ + char *recvdstr = kmem_asprintf("%s%s", propname, + ZPROP_RECVD_SUFFIX); + err = zap_lookup(mos, zapobj, recvdstr, + intsz, numints, buf); + strfree(recvdstr); + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + (void) strcpy(setpoint, + ZPROP_SOURCE_VAL_RECVD); + return (err); + } + } + } + + return (dsl_prop_get_dd(ds->ds_dir, propname, + intsz, numints, buf, setpoint, snapshot)); +} + +/* + * Register interest in the named property. We'll call the callback + * once to notify it of the current property value, and again each time + * the property changes, until this callback is unregistered. + * + * Return 0 on success, errno if the prop is not an integer value. + */ +int +dsl_prop_register(dsl_dataset_t *ds, const char *propname, + dsl_prop_changed_cb_t *callback, void *cbarg) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_pool_t *dp = dd->dd_pool; + uint64_t value; + dsl_prop_cb_record_t *cbr; + int err; + int need_rwlock; + + need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock); + if (need_rwlock) + rw_enter(&dp->dp_config_rwlock, RW_READER); + + err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL); + if (err != 0) { + if (need_rwlock) + rw_exit(&dp->dp_config_rwlock); + return (err); + } + + cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP); + cbr->cbr_ds = ds; + cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP); + (void) strcpy((char *)cbr->cbr_propname, propname); + cbr->cbr_func = callback; + cbr->cbr_arg = cbarg; + mutex_enter(&dd->dd_lock); + list_insert_head(&dd->dd_prop_cbs, cbr); + mutex_exit(&dd->dd_lock); + + cbr->cbr_func(cbr->cbr_arg, value); + + if (need_rwlock) + rw_exit(&dp->dp_config_rwlock); + return (0); +} + +int +dsl_prop_get(const char *dsname, const char *propname, + int intsz, int numints, void *buf, char *setpoint) +{ + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_hold(dsname, FTAG, &ds); + if (err) + return (err); + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint); + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + + dsl_dataset_rele(ds, FTAG); + return (err); +} + +/* + * Get the current property value. It may have changed by the time this + * function returns, so it is NOT safe to follow up with + * dsl_prop_register() and assume that the value has not changed in + * between. + * + * Return 0 on success, ENOENT if ddname is invalid. + */ +int +dsl_prop_get_integer(const char *ddname, const char *propname, + uint64_t *valuep, char *setpoint) +{ + return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint)); +} + +void +dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, + zprop_source_t source, uint64_t *value) +{ + psa->psa_name = propname; + psa->psa_source = source; + psa->psa_intsz = 8; + psa->psa_numints = 1; + psa->psa_value = value; + + psa->psa_effective_value = -1ULL; +} + +/* + * Predict the effective value of the given special property if it were set with + * the given value and source. This is not a general purpose function. It exists + * only to handle the special requirements of the quota and reservation + * properties. The fact that these properties are non-inheritable greatly + * simplifies the prediction logic. + * + * Returns 0 on success, a positive error code on failure, or -1 if called with + * a property not handled by this function. + */ +int +dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) +{ + const char *propname = psa->psa_name; + zfs_prop_t prop = zfs_name_to_prop(propname); + zprop_source_t source = psa->psa_source; + objset_t *mos; + uint64_t zapobj; + uint64_t version; + char *recvdstr; + int err = 0; + + switch (prop) { + case ZFS_PROP_QUOTA: + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFQUOTA: + case ZFS_PROP_REFRESERVATION: + break; + default: + return (-1); + } + + mos = dd->dd_pool->dp_meta_objset; + zapobj = dd->dd_phys->dd_props_zapobj; + recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + + version = spa_version(dd->dd_pool->dp_spa); + if (version < SPA_VERSION_RECVD_PROPS) { + if (source & ZPROP_SRC_NONE) + source = ZPROP_SRC_NONE; + else if (source & ZPROP_SRC_RECEIVED) + source = ZPROP_SRC_LOCAL; + } + + switch (source) { + case ZPROP_SRC_NONE: + /* Revert to the received value, if any. */ + err = zap_lookup(mos, zapobj, recvdstr, 8, 1, + &psa->psa_effective_value); + if (err == ENOENT) + psa->psa_effective_value = 0; + break; + case ZPROP_SRC_LOCAL: + psa->psa_effective_value = *(uint64_t *)psa->psa_value; + break; + case ZPROP_SRC_RECEIVED: + /* + * If there's no local setting, then the new received value will + * be the effective value. + */ + err = zap_lookup(mos, zapobj, propname, 8, 1, + &psa->psa_effective_value); + if (err == ENOENT) + psa->psa_effective_value = *(uint64_t *)psa->psa_value; + break; + case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED): + /* + * We're clearing the received value, so the local setting (if + * it exists) remains the effective value. + */ + err = zap_lookup(mos, zapobj, propname, 8, 1, + &psa->psa_effective_value); + if (err == ENOENT) + psa->psa_effective_value = 0; + break; + default: + cmn_err(CE_PANIC, "unexpected property source: %d", source); + } + + strfree(recvdstr); + + if (err == ENOENT) + return (0); + + return (err); +} + +#ifdef ZFS_DEBUG +void +dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa) +{ + zfs_prop_t prop = zfs_name_to_prop(psa->psa_name); + uint64_t intval; + char setpoint[MAXNAMELEN]; + uint64_t version = spa_version(dd->dd_pool->dp_spa); + int err; + + if (version < SPA_VERSION_RECVD_PROPS) { + switch (prop) { + case ZFS_PROP_QUOTA: + case ZFS_PROP_RESERVATION: + return; + } + } + + err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval, + setpoint, B_FALSE); + if (err == 0 && intval != psa->psa_effective_value) { + cmn_err(CE_PANIC, "%s property, source: %x, " + "predicted effective value: %llu, " + "actual effective value: %llu (setpoint: %s)", + psa->psa_name, psa->psa_source, + (unsigned long long)psa->psa_effective_value, + (unsigned long long)intval, setpoint); + } +} +#endif + +/* + * Unregister this callback. Return 0 on success, ENOENT if ddname is + * invalid, ENOMSG if no matching callback registered. + */ +int +dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, + dsl_prop_changed_cb_t *callback, void *cbarg) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_cb_record_t *cbr; + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); + cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds && + cbr->cbr_func == callback && + cbr->cbr_arg == cbarg && + strcmp(cbr->cbr_propname, propname) == 0) + break; + } + + if (cbr == NULL) { + mutex_exit(&dd->dd_lock); + return (ENOMSG); + } + + list_remove(&dd->dd_prop_cbs, cbr); + mutex_exit(&dd->dd_lock); + kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1); + kmem_free(cbr, sizeof (dsl_prop_cb_record_t)); + + return (0); +} + +/* + * Return the number of callbacks that are registered for this dataset. + */ +int +dsl_prop_numcb(dsl_dataset_t *ds) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_cb_record_t *cbr; + int num = 0; + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); + cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds) + num++; + } + mutex_exit(&dd->dd_lock); + + return (num); +} + +static void +dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, + const char *propname, uint64_t value, int first) +{ + dsl_dir_t *dd; + dsl_prop_cb_record_t *cbr; + objset_t *mos = dp->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t *za; + int err; + + ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); + err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd); + if (err) + return; + + if (!first) { + /* + * If the prop is set here, then this change is not + * being inherited here or below; stop the recursion. + */ + err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname); + if (err == 0) { + dsl_dir_close(dd, FTAG); + return; + } + ASSERT3U(err, ==, ENOENT); + } + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); cbr; + cbr = list_next(&dd->dd_prop_cbs, cbr)) { + uint64_t propobj = cbr->cbr_ds->ds_phys->ds_props_obj; + + if (strcmp(cbr->cbr_propname, propname) != 0) + continue; + + /* + * If the property is set on this ds, then it is not + * inherited here; don't call the callback. + */ + if (propobj && 0 == zap_contains(mos, propobj, propname)) + continue; + + cbr->cbr_func(cbr->cbr_arg, value); + } + mutex_exit(&dd->dd_lock); + + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + for (zap_cursor_init(&zc, mos, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, za) == 0; + zap_cursor_advance(&zc)) { + dsl_prop_changed_notify(dp, za->za_first_integer, + propname, value, FALSE); + } + kmem_free(za, sizeof (zap_attribute_t)); + zap_cursor_fini(&zc); + dsl_dir_close(dd, FTAG); +} + +void +dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_prop_setarg_t *psa = arg2; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t zapobj, intval, dummy; + int isint; + char valbuf[32]; + char *valstr = NULL; + char *inheritstr; + char *recvdstr; + char *tbuf = NULL; + int err; + uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); + const char *propname = psa->psa_name; + zprop_source_t source = psa->psa_source; + + isint = (dodefault(propname, 8, 1, &intval) == 0); + + if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) { + ASSERT(version >= SPA_VERSION_SNAP_PROPS); + if (ds->ds_phys->ds_props_obj == 0) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_props_obj = + zap_create(mos, + DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); + } + zapobj = ds->ds_phys->ds_props_obj; + } else { + zapobj = ds->ds_dir->dd_phys->dd_props_zapobj; + } + + if (version < SPA_VERSION_RECVD_PROPS) { + zfs_prop_t prop = zfs_name_to_prop(propname); + if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) + return; + + if (source & ZPROP_SRC_NONE) + source = ZPROP_SRC_NONE; + else if (source & ZPROP_SRC_RECEIVED) + source = ZPROP_SRC_LOCAL; + } + + inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); + recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + + switch (source) { + case ZPROP_SRC_NONE: + /* + * revert to received value, if any (inherit -S) + * - remove propname + * - remove propname$inherit + */ + err = zap_remove(mos, zapobj, propname, tx); + ASSERT(err == 0 || err == ENOENT); + err = zap_remove(mos, zapobj, inheritstr, tx); + ASSERT(err == 0 || err == ENOENT); + break; + case ZPROP_SRC_LOCAL: + /* + * remove propname$inherit + * set propname -> value + */ + err = zap_remove(mos, zapobj, inheritstr, tx); + ASSERT(err == 0 || err == ENOENT); + VERIFY(0 == zap_update(mos, zapobj, propname, + psa->psa_intsz, psa->psa_numints, psa->psa_value, tx)); + break; + case ZPROP_SRC_INHERITED: + /* + * explicitly inherit + * - remove propname + * - set propname$inherit + */ + err = zap_remove(mos, zapobj, propname, tx); + ASSERT(err == 0 || err == ENOENT); + if (version >= SPA_VERSION_RECVD_PROPS && + dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, + NULL) == 0) { + dummy = 0; + err = zap_update(mos, zapobj, inheritstr, + 8, 1, &dummy, tx); + ASSERT(err == 0); + } + break; + case ZPROP_SRC_RECEIVED: + /* + * set propname$recvd -> value + */ + err = zap_update(mos, zapobj, recvdstr, + psa->psa_intsz, psa->psa_numints, psa->psa_value, tx); + ASSERT(err == 0); + break; + case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED): + /* + * clear local and received settings + * - remove propname + * - remove propname$inherit + * - remove propname$recvd + */ + err = zap_remove(mos, zapobj, propname, tx); + ASSERT(err == 0 || err == ENOENT); + err = zap_remove(mos, zapobj, inheritstr, tx); + ASSERT(err == 0 || err == ENOENT); + /* FALLTHRU */ + case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED): + /* + * remove propname$recvd + */ + err = zap_remove(mos, zapobj, recvdstr, tx); + ASSERT(err == 0 || err == ENOENT); + break; + default: + cmn_err(CE_PANIC, "unexpected property source: %d", source); + } + + strfree(inheritstr); + strfree(recvdstr); + + if (isint) { + VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL)); + + if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) { + dsl_prop_cb_record_t *cbr; + /* + * It's a snapshot; nothing can inherit this + * property, so just look for callbacks on this + * ds here. + */ + mutex_enter(&ds->ds_dir->dd_lock); + for (cbr = list_head(&ds->ds_dir->dd_prop_cbs); cbr; + cbr = list_next(&ds->ds_dir->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds && + strcmp(cbr->cbr_propname, propname) == 0) + cbr->cbr_func(cbr->cbr_arg, intval); + } + mutex_exit(&ds->ds_dir->dd_lock); + } else { + dsl_prop_changed_notify(ds->ds_dir->dd_pool, + ds->ds_dir->dd_object, propname, intval, TRUE); + } + + (void) snprintf(valbuf, sizeof (valbuf), + "%lld", (longlong_t)intval); + valstr = valbuf; + } else { + if (source == ZPROP_SRC_LOCAL) { + valstr = (char *)psa->psa_value; + } else { + tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + if (dsl_prop_get_ds(ds, propname, 1, + ZAP_MAXVALUELEN, tbuf, NULL) == 0) + valstr = tbuf; + } + } + + spa_history_log_internal((source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : + LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, + "%s=%s dataset = %llu", propname, + (valstr == NULL ? "" : valstr), ds->ds_object); + + if (tbuf != NULL) + kmem_free(tbuf, ZAP_MAXVALUELEN); +} + +void +dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_props_arg_t *pa = arg2; + nvlist_t *props = pa->pa_props; + dsl_prop_setarg_t psa; + nvpair_t *elem = NULL; + + psa.psa_source = pa->pa_source; + + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + nvpair_t *pair = elem; + + psa.psa_name = nvpair_name(pair); + + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + /* + * dsl_prop_get_all_impl() returns properties in this + * format. + */ + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &pair) == 0); + } + + if (nvpair_type(pair) == DATA_TYPE_STRING) { + VERIFY(nvpair_value_string(pair, + (char **)&psa.psa_value) == 0); + psa.psa_intsz = 1; + psa.psa_numints = strlen(psa.psa_value) + 1; + } else { + uint64_t intval; + VERIFY(nvpair_value_uint64(pair, &intval) == 0); + psa.psa_intsz = sizeof (intval); + psa.psa_numints = 1; + psa.psa_value = &intval; + } + dsl_prop_set_sync(ds, &psa, tx); + } +} + +void +dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, + dmu_tx_t *tx) +{ + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t zapobj = dd->dd_phys->dd_props_zapobj; + + ASSERT(dmu_tx_is_syncing(tx)); + + VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); + + dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); + + spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, + "%s=%llu dataset = %llu", name, (u_longlong_t)val, + dd->dd_phys->dd_head_dataset_obj); +} + +int +dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, + int intsz, int numints, const void *buf) +{ + dsl_dataset_t *ds; + uint64_t version; + int err; + dsl_prop_setarg_t psa; + + /* + * We must do these checks before we get to the syncfunc, since + * it can't fail. + */ + if (strlen(propname) >= ZAP_MAXNAMELEN) + return (ENAMETOOLONG); + + err = dsl_dataset_hold(dsname, FTAG, &ds); + if (err) + return (err); + + version = spa_version(ds->ds_dir->dd_pool->dp_spa); + if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ? + ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { + dsl_dataset_rele(ds, FTAG); + return (E2BIG); + } + if (dsl_dataset_is_snapshot(ds) && + version < SPA_VERSION_SNAP_PROPS) { + dsl_dataset_rele(ds, FTAG); + return (ENOTSUP); + } + + psa.psa_name = propname; + psa.psa_source = source; + psa.psa_intsz = intsz; + psa.psa_numints = numints; + psa.psa_value = buf; + psa.psa_effective_value = -1ULL; + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + NULL, dsl_prop_set_sync, ds, &psa, 2); + + dsl_dataset_rele(ds, FTAG); + return (err); +} + +int +dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) +{ + dsl_dataset_t *ds; + uint64_t version; + nvpair_t *elem = NULL; + dsl_props_arg_t pa; + int err; + + if (err = dsl_dataset_hold(dsname, FTAG, &ds)) + return (err); + /* + * Do these checks before the syncfunc, since it can't fail. + */ + version = spa_version(ds->ds_dir->dd_pool->dp_spa); + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { + dsl_dataset_rele(ds, FTAG); + return (ENAMETOOLONG); + } + if (nvpair_type(elem) == DATA_TYPE_STRING) { + char *valstr; + VERIFY(nvpair_value_string(elem, &valstr) == 0); + if (strlen(valstr) >= (version < + SPA_VERSION_STMF_PROP ? + ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { + dsl_dataset_rele(ds, FTAG); + return (E2BIG); + } + } + } + + if (dsl_dataset_is_snapshot(ds) && + version < SPA_VERSION_SNAP_PROPS) { + dsl_dataset_rele(ds, FTAG); + return (ENOTSUP); + } + + pa.pa_props = props; + pa.pa_source = source; + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + NULL, dsl_props_set_sync, ds, &pa, 2); + + dsl_dataset_rele(ds, FTAG); + return (err); +} + +typedef enum dsl_prop_getflags { + DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */ + DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */ + DSL_PROP_GET_LOCAL = 0x4, /* local properties */ + DSL_PROP_GET_RECEIVED = 0x8 /* received properties */ +} dsl_prop_getflags_t; + +static int +dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, + const char *setpoint, dsl_prop_getflags_t flags, nvlist_t *nv) +{ + zap_cursor_t zc; + zap_attribute_t za; + int err = 0; + + for (zap_cursor_init(&zc, mos, propobj); + (err = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + nvlist_t *propval; + zfs_prop_t prop; + char buf[ZAP_MAXNAMELEN]; + char *valstr; + const char *suffix; + const char *propname; + const char *source; + + suffix = strchr(za.za_name, '$'); + + if (suffix == NULL) { + /* + * Skip local properties if we only want received + * properties. + */ + if (flags & DSL_PROP_GET_RECEIVED) + continue; + + propname = za.za_name; + source = setpoint; + } else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) { + /* Skip explicitly inherited entries. */ + continue; + } else if (strcmp(suffix, ZPROP_RECVD_SUFFIX) == 0) { + if (flags & DSL_PROP_GET_LOCAL) + continue; + + (void) strncpy(buf, za.za_name, (suffix - za.za_name)); + buf[suffix - za.za_name] = '\0'; + propname = buf; + + if (!(flags & DSL_PROP_GET_RECEIVED)) { + /* Skip if locally overridden. */ + err = zap_contains(mos, propobj, propname); + if (err == 0) + continue; + if (err != ENOENT) + break; + + /* Skip if explicitly inherited. */ + valstr = kmem_asprintf("%s%s", propname, + ZPROP_INHERIT_SUFFIX); + err = zap_contains(mos, propobj, valstr); + strfree(valstr); + if (err == 0) + continue; + if (err != ENOENT) + break; + } + + source = ((flags & DSL_PROP_GET_INHERITING) ? + setpoint : ZPROP_SOURCE_VAL_RECVD); + } else { + /* + * For backward compatibility, skip suffixes we don't + * recognize. + */ + continue; + } + + prop = zfs_name_to_prop(propname); + + /* Skip non-inheritable properties. */ + if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL && + !zfs_prop_inheritable(prop)) + continue; + + /* Skip properties not valid for this type. */ + if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL && + !zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT)) + continue; + + /* Skip properties already defined. */ + if (nvlist_exists(nv, propname)) + continue; + + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + if (za.za_integer_length == 1) { + /* + * String property + */ + char *tmp = kmem_alloc(za.za_num_integers, + KM_SLEEP); + err = zap_lookup(mos, propobj, + za.za_name, 1, za.za_num_integers, tmp); + if (err != 0) { + kmem_free(tmp, za.za_num_integers); + break; + } + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, + tmp) == 0); + kmem_free(tmp, za.za_num_integers); + } else { + /* + * Integer property + */ + ASSERT(za.za_integer_length == 8); + (void) nvlist_add_uint64(propval, ZPROP_VALUE, + za.za_first_integer); + } + + VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0); + VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + nvlist_free(propval); + } + zap_cursor_fini(&zc); + if (err == ENOENT) + err = 0; + return (err); +} + +/* + * Iterate over all properties for this dataset and return them in an nvlist. + */ +static int +dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, + dsl_prop_getflags_t flags) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_pool_t *dp = dd->dd_pool; + objset_t *mos = dp->dp_meta_objset; + int err = 0; + char setpoint[MAXNAMELEN]; + + VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + if (dsl_dataset_is_snapshot(ds)) + flags |= DSL_PROP_GET_SNAPSHOT; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + + if (ds->ds_phys->ds_props_obj != 0) { + ASSERT(flags & DSL_PROP_GET_SNAPSHOT); + dsl_dataset_name(ds, setpoint); + err = dsl_prop_get_all_impl(mos, ds->ds_phys->ds_props_obj, + setpoint, flags, *nvp); + if (err) + goto out; + } + + for (; dd != NULL; dd = dd->dd_parent) { + if (dd != ds->ds_dir || (flags & DSL_PROP_GET_SNAPSHOT)) { + if (flags & (DSL_PROP_GET_LOCAL | + DSL_PROP_GET_RECEIVED)) + break; + flags |= DSL_PROP_GET_INHERITING; + } + dsl_dir_name(dd, setpoint); + err = dsl_prop_get_all_impl(mos, dd->dd_phys->dd_props_zapobj, + setpoint, flags, *nvp); + if (err) + break; + } +out: + rw_exit(&dp->dp_config_rwlock); + return (err); +} + +boolean_t +dsl_prop_get_hasrecvd(objset_t *os) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + int rc; + uint64_t dummy; + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL); + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); + return (rc == 0); +} + +static void +dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + uint64_t dummy = 0; + dsl_prop_setarg_t psa; + + if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS) + return; + + dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy); + + (void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL, + dsl_prop_set_sync, ds, &psa, 2); +} + +/* + * Call after successfully receiving properties to ensure that only the first + * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties. + */ +void +dsl_prop_set_hasrecvd(objset_t *os) +{ + if (dsl_prop_get_hasrecvd(os)) { + ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); + return; + } + dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL); +} + +void +dsl_prop_unset_hasrecvd(objset_t *os) +{ + dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE); +} + +int +dsl_prop_get_all(objset_t *os, nvlist_t **nvp) +{ + return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, 0)); +} + +int +dsl_prop_get_received(objset_t *os, nvlist_t **nvp) +{ + /* + * Received properties are not distinguishable from local properties + * until the dataset has received properties on or after + * SPA_VERSION_RECVD_PROPS. + */ + dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ? + DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL); + return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags)); +} + +void +dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value) +{ + nvlist_t *propval; + const char *propname = zfs_prop_to_name(prop); + uint64_t default_value; + + if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) { + VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0); + return; + } + + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0); + /* Indicate the default source if we can. */ + if (dodefault(propname, 8, 1, &default_value) == 0 && + value == default_value) { + VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0); + } + VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + nvlist_free(propval); +} + +void +dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value) +{ + nvlist_t *propval; + const char *propname = zfs_prop_to_name(prop); + + if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) { + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0); + return; + } + + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0); + VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + nvlist_free(propval); +} diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c new file mode 100644 index 000000000000..56d41083673e --- /dev/null +++ b/uts/common/fs/zfs/dsl_scan.c @@ -0,0 +1,1766 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dsl_scan.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_synctask.h> +#include <sys/dnode.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/arc.h> +#include <sys/zap.h> +#include <sys/zio.h> +#include <sys/zfs_context.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_znode.h> +#include <sys/spa_impl.h> +#include <sys/vdev_impl.h> +#include <sys/zil_impl.h> +#include <sys/zio_checksum.h> +#include <sys/ddt.h> +#include <sys/sa.h> +#include <sys/sa_impl.h> +#ifdef _KERNEL +#include <sys/zfs_vfsops.h> +#endif + +typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); + +static scan_cb_t dsl_scan_defrag_cb; +static scan_cb_t dsl_scan_scrub_cb; +static scan_cb_t dsl_scan_remove_cb; +static dsl_syncfunc_t dsl_scan_cancel_sync; +static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); + +int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ +int zfs_resilver_delay = 2; /* number of ticks to delay resilver */ +int zfs_scrub_delay = 4; /* number of ticks to delay scrub */ +int zfs_scan_idle = 50; /* idle window in clock ticks */ + +int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */ +int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ +int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */ +boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ +boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */ +enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE; +int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */ + +#define DSL_SCAN_IS_SCRUB_RESILVER(scn) \ + ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ + (scn)->scn_phys.scn_func == POOL_SCAN_RESILVER) + +extern int zfs_txg_timeout; + +/* the order has to match pool_scan_type */ +static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = { + NULL, + dsl_scan_scrub_cb, /* POOL_SCAN_SCRUB */ + dsl_scan_scrub_cb, /* POOL_SCAN_RESILVER */ +}; + +int +dsl_scan_init(dsl_pool_t *dp, uint64_t txg) +{ + int err; + dsl_scan_t *scn; + spa_t *spa = dp->dp_spa; + uint64_t f; + + scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP); + scn->scn_dp = dp; + + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + "scrub_func", sizeof (uint64_t), 1, &f); + if (err == 0) { + /* + * There was an old-style scrub in progress. Restart a + * new-style scrub from the beginning. + */ + scn->scn_restart_txg = txg; + zfs_dbgmsg("old-style scrub was in progress; " + "restarting new-style scrub in txg %llu", + scn->scn_restart_txg); + + /* + * Load the queue obj from the old location so that it + * can be freed by dsl_scan_done(). + */ + (void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + "scrub_queue", sizeof (uint64_t), 1, + &scn->scn_phys.scn_queue_obj); + } else { + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, + &scn->scn_phys); + if (err == ENOENT) + return (0); + else if (err) + return (err); + + if (scn->scn_phys.scn_state == DSS_SCANNING && + spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) { + /* + * A new-type scrub was in progress on an old + * pool, and the pool was accessed by old + * software. Restart from the beginning, since + * the old software may have changed the pool in + * the meantime. + */ + scn->scn_restart_txg = txg; + zfs_dbgmsg("new-style scrub was modified " + "by old software; restarting in txg %llu", + scn->scn_restart_txg); + } + } + + spa_scan_stat_init(spa); + return (0); +} + +void +dsl_scan_fini(dsl_pool_t *dp) +{ + if (dp->dp_scan) { + kmem_free(dp->dp_scan, sizeof (dsl_scan_t)); + dp->dp_scan = NULL; + } +} + +/* ARGSUSED */ +static int +dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg1; + + if (scn->scn_phys.scn_state == DSS_SCANNING) + return (EBUSY); + + return (0); +} + +/* ARGSUSED */ +static void +dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg1; + pool_scan_func_t *funcp = arg2; + dmu_object_type_t ot = 0; + dsl_pool_t *dp = scn->scn_dp; + spa_t *spa = dp->dp_spa; + + ASSERT(scn->scn_phys.scn_state != DSS_SCANNING); + ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); + bzero(&scn->scn_phys, sizeof (scn->scn_phys)); + scn->scn_phys.scn_func = *funcp; + scn->scn_phys.scn_state = DSS_SCANNING; + scn->scn_phys.scn_min_txg = 0; + scn->scn_phys.scn_max_txg = tx->tx_txg; + scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ + scn->scn_phys.scn_start_time = gethrestime_sec(); + scn->scn_phys.scn_errors = 0; + scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; + scn->scn_restart_txg = 0; + spa_scan_stat_init(spa); + + if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { + scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; + + /* rewrite all disk labels */ + vdev_config_dirty(spa->spa_root_vdev); + + if (vdev_resilver_needed(spa->spa_root_vdev, + &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { + spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); + } else { + spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); + } + + spa->spa_scrub_started = B_TRUE; + /* + * If this is an incremental scrub, limit the DDT scrub phase + * to just the auto-ditto class (for correctness); the rest + * of the scrub should go faster using top-down pruning. + */ + if (scn->scn_phys.scn_min_txg > TXG_INITIAL) + scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO; + + } + + /* back to the generic stuff */ + + if (dp->dp_blkstats == NULL) { + dp->dp_blkstats = + kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); + } + bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); + + if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) + ot = DMU_OT_ZAP_OTHER; + + scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, + ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx); + + dsl_scan_sync_state(scn, tx); + + spa_history_log_internal(LOG_POOL_SCAN, spa, tx, + "func=%u mintxg=%llu maxtxg=%llu", + *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); +} + +/* ARGSUSED */ +static void +dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) +{ + static const char *old_names[] = { + "scrub_bookmark", + "scrub_ddt_bookmark", + "scrub_ddt_class_max", + "scrub_queue", + "scrub_min_txg", + "scrub_max_txg", + "scrub_func", + "scrub_errors", + NULL + }; + + dsl_pool_t *dp = scn->scn_dp; + spa_t *spa = dp->dp_spa; + int i; + + /* Remove any remnants of an old-style scrub. */ + for (i = 0; old_names[i]; i++) { + (void) zap_remove(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx); + } + + if (scn->scn_phys.scn_queue_obj != 0) { + VERIFY(0 == dmu_object_free(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, tx)); + scn->scn_phys.scn_queue_obj = 0; + } + + /* + * If we were "restarted" from a stopped state, don't bother + * with anything else. + */ + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + if (complete) + scn->scn_phys.scn_state = DSS_FINISHED; + else + scn->scn_phys.scn_state = DSS_CANCELED; + + spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, + "complete=%u", complete); + + if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > 0) { + cv_wait(&spa->spa_scrub_io_cv, + &spa->spa_scrub_lock); + } + mutex_exit(&spa->spa_scrub_lock); + spa->spa_scrub_started = B_FALSE; + spa->spa_scrub_active = B_FALSE; + + /* + * If the scrub/resilver completed, update all DTLs to + * reflect this. Whether it succeeded or not, vacate + * all temporary scrub DTLs. + */ + vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg, + complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE); + if (complete) { + spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ? + ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); + } + spa_errlog_rotate(spa); + + /* + * We may have finished replacing a device. + * Let the async thread assess this and handle the detach. + */ + spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); + } + + scn->scn_phys.scn_end_time = gethrestime_sec(); +} + +/* ARGSUSED */ +static int +dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg1; + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return (ENOENT); + return (0); +} + +/* ARGSUSED */ +static void +dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg1; + + dsl_scan_done(scn, B_FALSE, tx); + dsl_scan_sync_state(scn, tx); +} + +int +dsl_scan_cancel(dsl_pool_t *dp) +{ + boolean_t complete = B_FALSE; + int err; + + err = dsl_sync_task_do(dp, dsl_scan_cancel_check, + dsl_scan_cancel_sync, dp->dp_scan, &complete, 3); + return (err); +} + +static void dsl_scan_visitbp(blkptr_t *bp, + const zbookmark_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf, + dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, + dmu_tx_t *tx); +static void dsl_scan_visitdnode(dsl_scan_t *, dsl_dataset_t *ds, + dmu_objset_type_t ostype, + dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx); + +void +dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp) +{ + zio_free(dp->dp_spa, txg, bp); +} + +void +dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp) +{ + ASSERT(dsl_pool_sync_context(dp)); + zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags)); +} + +int +dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb) +{ + return (arc_read(pio, spa, bpp, pbuf, done, private, + priority, zio_flags, arc_flags, zb)); +} + +int +dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb) +{ + return (arc_read_nolock(pio, spa, bpp, done, private, + priority, zio_flags, arc_flags, zb)); +} + +static boolean_t +bookmark_is_zero(const zbookmark_t *zb) +{ + return (zb->zb_objset == 0 && zb->zb_object == 0 && + zb->zb_level == 0 && zb->zb_blkid == 0); +} + +/* dnp is the dnode for zb1->zb_object */ +static boolean_t +bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, + const zbookmark_t *zb2) +{ + uint64_t zb1nextL0, zb2thisobj; + + ASSERT(zb1->zb_objset == zb2->zb_objset); + ASSERT(zb2->zb_level == 0); + + /* + * A bookmark in the deadlist is considered to be after + * everything else. + */ + if (zb2->zb_object == DMU_DEADLIST_OBJECT) + return (B_TRUE); + + /* The objset_phys_t isn't before anything. */ + if (dnp == NULL) + return (B_FALSE); + + zb1nextL0 = (zb1->zb_blkid + 1) << + ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + + zb2thisobj = zb2->zb_object ? zb2->zb_object : + zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); + + if (zb1->zb_object == DMU_META_DNODE_OBJECT) { + uint64_t nextobj = zb1nextL0 * + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; + return (nextobj <= zb2thisobj); + } + + if (zb1->zb_object < zb2thisobj) + return (B_TRUE); + if (zb1->zb_object > zb2thisobj) + return (B_FALSE); + if (zb2->zb_object == DMU_META_DNODE_OBJECT) + return (B_FALSE); + return (zb1nextL0 <= zb2->zb_blkid); +} + +static uint64_t +dsl_scan_ds_maxtxg(dsl_dataset_t *ds) +{ + uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg; + if (dsl_dataset_is_snapshot(ds)) + return (MIN(smt, ds->ds_phys->ds_creation_txg)); + return (smt); +} + +static void +dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) +{ + VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, + &scn->scn_phys, tx)); +} + +static boolean_t +dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) +{ + uint64_t elapsed_nanosecs; + int mintime; + + /* we never skip user/group accounting objects */ + if (zb && (int64_t)zb->zb_object < 0) + return (B_FALSE); + + if (scn->scn_pausing) + return (B_TRUE); /* we're already pausing */ + + if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark)) + return (B_FALSE); /* we're resuming */ + + /* We only know how to resume from level-0 blocks. */ + if (zb && zb->zb_level != 0) + return (B_FALSE); + + mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ? + zfs_resilver_min_time_ms : zfs_scan_min_time_ms; + elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; + if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || + (elapsed_nanosecs / MICROSEC > mintime && + txg_sync_waiting(scn->scn_dp)) || + spa_shutting_down(scn->scn_dp->dp_spa)) { + if (zb) { + dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n", + (longlong_t)zb->zb_objset, + (longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (longlong_t)zb->zb_blkid); + scn->scn_phys.scn_bookmark = *zb; + } + dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n", + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); + scn->scn_pausing = B_TRUE; + return (B_TRUE); + } + return (B_FALSE); +} + +typedef struct zil_scan_arg { + dsl_pool_t *zsa_dp; + zil_header_t *zsa_zh; +} zil_scan_arg_t; + +/* ARGSUSED */ +static int +dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) +{ + zil_scan_arg_t *zsa = arg; + dsl_pool_t *dp = zsa->zsa_dp; + dsl_scan_t *scn = dp->dp_scan; + zil_header_t *zh = zsa->zsa_zh; + zbookmark_t zb; + + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) + return (0); + + /* + * One block ("stubby") can be allocated a long time ago; we + * want to visit that one because it has been allocated + * (on-disk) even if it hasn't been claimed (even though for + * scrub there's nothing to do to it). + */ + if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa)) + return (0); + + SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], + ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); + + VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + return (0); +} + +/* ARGSUSED */ +static int +dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) +{ + if (lrc->lrc_txtype == TX_WRITE) { + zil_scan_arg_t *zsa = arg; + dsl_pool_t *dp = zsa->zsa_dp; + dsl_scan_t *scn = dp->dp_scan; + zil_header_t *zh = zsa->zsa_zh; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + zbookmark_t zb; + + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) + return (0); + + /* + * birth can be < claim_txg if this record's txg is + * already txg sync'ed (but this log block contains + * other records that are not synced) + */ + if (claim_txg == 0 || bp->blk_birth < claim_txg) + return (0); + + SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], + lr->lr_foid, ZB_ZIL_LEVEL, + lr->lr_offset / BP_GET_LSIZE(bp)); + + VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + } + return (0); +} + +static void +dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh) +{ + uint64_t claim_txg = zh->zh_claim_txg; + zil_scan_arg_t zsa = { dp, zh }; + zilog_t *zilog; + + /* + * We only want to visit blocks that have been claimed but not yet + * replayed (or, in read-only mode, blocks that *would* be claimed). + */ + if (claim_txg == 0 && spa_writeable(dp->dp_spa)) + return; + + zilog = zil_alloc(dp->dp_meta_objset, zh); + + (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, + claim_txg); + + zil_free(zilog); +} + +/* ARGSUSED */ +static void +dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp, + uint64_t objset, uint64_t object, uint64_t blkid) +{ + zbookmark_t czb; + uint32_t flags = ARC_NOWAIT | ARC_PREFETCH; + + if (zfs_no_scrub_prefetch) + return; + + if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg || + (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)) + return; + + SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid); + + /* + * XXX need to make sure all of these arc_read() prefetches are + * done before setting xlateall (similar to dsl_read()) + */ + (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp, + buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb); +} + +static boolean_t +dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, + const zbookmark_t *zb) +{ + /* + * We never skip over user/group accounting objects (obj<0) + */ + if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) && + (int64_t)zb->zb_object >= 0) { + /* + * If we already visited this bp & everything below (in + * a prior txg sync), don't bother doing it again. + */ + if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark)) + return (B_TRUE); + + /* + * If we found the block we're trying to resume from, or + * we went past it to a different object, zero it out to + * indicate that it's OK to start checking for pausing + * again. + */ + if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || + zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) { + dprintf("resuming at %llx/%llx/%llx/%llx\n", + (longlong_t)zb->zb_objset, + (longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (longlong_t)zb->zb_blkid); + bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb)); + } + } + return (B_FALSE); +} + +/* + * Return nonzero on i/o error. + * Return new buf to write out in *bufp. + */ +static int +dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, + dnode_phys_t *dnp, const blkptr_t *bp, + const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp) +{ + dsl_pool_t *dp = scn->scn_dp; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; + int err; + + if (BP_GET_LEVEL(bp) > 0) { + uint32_t flags = ARC_WAIT; + int i; + blkptr_t *cbp; + int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + + err = arc_read_nolock(NULL, dp->dp_spa, bp, + arc_getbuf_func, bufp, + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); + if (err) { + scn->scn_phys.scn_errors++; + return (err); + } + for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { + dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset, + zb->zb_object, zb->zb_blkid * epb + i); + } + for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { + zbookmark_t czb; + + SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, + zb->zb_level - 1, + zb->zb_blkid * epb + i); + dsl_scan_visitbp(cbp, &czb, dnp, + *bufp, ds, scn, ostype, tx); + } + } else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) { + uint32_t flags = ARC_WAIT; + + err = arc_read_nolock(NULL, dp->dp_spa, bp, + arc_getbuf_func, bufp, + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); + if (err) { + scn->scn_phys.scn_errors++; + return (err); + } + } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { + uint32_t flags = ARC_WAIT; + dnode_phys_t *cdnp; + int i, j; + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + + err = arc_read_nolock(NULL, dp->dp_spa, bp, + arc_getbuf_func, bufp, + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); + if (err) { + scn->scn_phys.scn_errors++; + return (err); + } + for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) { + for (j = 0; j < cdnp->dn_nblkptr; j++) { + blkptr_t *cbp = &cdnp->dn_blkptr[j]; + dsl_scan_prefetch(scn, *bufp, cbp, + zb->zb_objset, zb->zb_blkid * epb + i, j); + } + } + for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) { + dsl_scan_visitdnode(scn, ds, ostype, + cdnp, *bufp, zb->zb_blkid * epb + i, tx); + } + + } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + uint32_t flags = ARC_WAIT; + objset_phys_t *osp; + + err = arc_read_nolock(NULL, dp->dp_spa, bp, + arc_getbuf_func, bufp, + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); + if (err) { + scn->scn_phys.scn_errors++; + return (err); + } + + osp = (*bufp)->b_data; + + dsl_scan_visitdnode(scn, ds, osp->os_type, + &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx); + + if (OBJSET_BUF_HAS_USERUSED(*bufp)) { + /* + * We also always visit user/group accounting + * objects, and never skip them, even if we are + * pausing. This is necessary so that the space + * deltas from this txg get integrated. + */ + dsl_scan_visitdnode(scn, ds, osp->os_type, + &osp->os_groupused_dnode, *bufp, + DMU_GROUPUSED_OBJECT, tx); + dsl_scan_visitdnode(scn, ds, osp->os_type, + &osp->os_userused_dnode, *bufp, + DMU_USERUSED_OBJECT, tx); + } + } + + return (0); +} + +static void +dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, + dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf, + uint64_t object, dmu_tx_t *tx) +{ + int j; + + for (j = 0; j < dnp->dn_nblkptr; j++) { + zbookmark_t czb; + + SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, + dnp->dn_nlevels - 1, j); + dsl_scan_visitbp(&dnp->dn_blkptr[j], + &czb, dnp, buf, ds, scn, ostype, tx); + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + zbookmark_t czb; + SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, + 0, DMU_SPILL_BLKID); + dsl_scan_visitbp(&dnp->dn_spill, + &czb, dnp, buf, ds, scn, ostype, tx); + } +} + +/* + * The arguments are in this order because mdb can only print the + * first 5; we want them to be useful. + */ +static void +dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, + dnode_phys_t *dnp, arc_buf_t *pbuf, + dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = scn->scn_dp; + arc_buf_t *buf = NULL; + blkptr_t bp_toread = *bp; + + /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ + + if (dsl_scan_check_pause(scn, zb)) + return; + + if (dsl_scan_check_resume(scn, dnp, zb)) + return; + + if (bp->blk_birth == 0) + return; + + scn->scn_visited_this_txg++; + + dprintf_bp(bp, + "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p", + ds, ds ? ds->ds_object : 0, + zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, + pbuf, bp); + + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) + return; + + if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) { + /* + * For non-user-accounting blocks, we need to read the + * new bp (from a deleted snapshot, found in + * check_existing_xlation). If we used the old bp, + * pointers inside this block from before we resumed + * would be untranslated. + * + * For user-accounting blocks, we need to read the old + * bp, because we will apply the entire space delta to + * it (original untranslated -> translations from + * deleted snap -> now). + */ + bp_toread = *bp; + } + + if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx, + &buf) != 0) + return; + + /* + * If dsl_scan_ddt() has aready visited this block, it will have + * already done any translations or scrubbing, so don't call the + * callback again. + */ + if (ddt_class_contains(dp->dp_spa, + scn->scn_phys.scn_ddt_class_max, bp)) { + ASSERT(buf == NULL); + return; + } + + /* + * If this block is from the future (after cur_max_txg), then we + * are doing this on behalf of a deleted snapshot, and we will + * revisit the future block on the next pass of this dataset. + * Don't scan it now unless we need to because something + * under it was modified. + */ + if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) { + scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); + } + if (buf) + (void) arc_buf_remove_ref(buf, &buf); +} + +static void +dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp, + dmu_tx_t *tx) +{ + zbookmark_t zb; + + SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + dsl_scan_visitbp(bp, &zb, NULL, NULL, + ds, scn, DMU_OST_NONE, tx); + + dprintf_ds(ds, "finished scan%s", ""); +} + +void +dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_scan_t *scn = dp->dp_scan; + uint64_t mintxg; + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { + if (dsl_dataset_is_snapshot(ds)) { + /* Note, scn_cur_{min,max}_txg stays the same. */ + scn->scn_phys.scn_bookmark.zb_objset = + ds->ds_phys->ds_next_snap_obj; + zfs_dbgmsg("destroying ds %llu; currently traversing; " + "reset zb_objset to %llu", + (u_longlong_t)ds->ds_object, + (u_longlong_t)ds->ds_phys->ds_next_snap_obj); + scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN; + } else { + SET_BOOKMARK(&scn->scn_phys.scn_bookmark, + ZB_DESTROYED_OBJSET, 0, 0, 0); + zfs_dbgmsg("destroying ds %llu; currently traversing; " + "reset bookmark to -1,0,0,0", + (u_longlong_t)ds->ds_object); + } + } else if (zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); + if (dsl_dataset_is_snapshot(ds)) { + /* + * We keep the same mintxg; it could be > + * ds_creation_txg if the previous snapshot was + * deleted too. + */ + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, + ds->ds_phys->ds_next_snap_obj, mintxg, tx) == 0); + zfs_dbgmsg("destroying ds %llu; in queue; " + "replacing with %llu", + (u_longlong_t)ds->ds_object, + (u_longlong_t)ds->ds_phys->ds_next_snap_obj); + } else { + zfs_dbgmsg("destroying ds %llu; in queue; removing", + (u_longlong_t)ds->ds_object); + } + } else { + zfs_dbgmsg("destroying ds %llu; ignoring", + (u_longlong_t)ds->ds_object); + } + + /* + * dsl_scan_sync() should be called after this, and should sync + * out our changed state, but just to be safe, do it here. + */ + dsl_scan_sync_state(scn, tx); +} + +void +dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_scan_t *scn = dp->dp_scan; + uint64_t mintxg; + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + ASSERT(ds->ds_phys->ds_prev_snap_obj != 0); + + if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { + scn->scn_phys.scn_bookmark.zb_objset = + ds->ds_phys->ds_prev_snap_obj; + zfs_dbgmsg("snapshotting ds %llu; currently traversing; " + "reset zb_objset to %llu", + (u_longlong_t)ds->ds_object, + (u_longlong_t)ds->ds_phys->ds_prev_snap_obj); + } else if (zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, + ds->ds_phys->ds_prev_snap_obj, mintxg, tx) == 0); + zfs_dbgmsg("snapshotting ds %llu; in queue; " + "replacing with %llu", + (u_longlong_t)ds->ds_object, + (u_longlong_t)ds->ds_phys->ds_prev_snap_obj); + } + dsl_scan_sync_state(scn, tx); +} + +void +dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds1->ds_dir->dd_pool; + dsl_scan_t *scn = dp->dp_scan; + uint64_t mintxg; + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) { + scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object; + zfs_dbgmsg("clone_swap ds %llu; currently traversing; " + "reset zb_objset to %llu", + (u_longlong_t)ds1->ds_object, + (u_longlong_t)ds2->ds_object); + } else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) { + scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object; + zfs_dbgmsg("clone_swap ds %llu; currently traversing; " + "reset zb_objset to %llu", + (u_longlong_t)ds2->ds_object, + (u_longlong_t)ds1->ds_object); + } + + if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, + ds1->ds_object, &mintxg) == 0) { + int err; + + ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg); + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); + err = zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); + VERIFY(err == 0 || err == EEXIST); + if (err == EEXIST) { + /* Both were there to begin with */ + VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, + ds1->ds_object, mintxg, tx)); + } + zfs_dbgmsg("clone_swap ds %llu; in queue; " + "replacing with %llu", + (u_longlong_t)ds1->ds_object, + (u_longlong_t)ds2->ds_object); + } else if (zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) { + ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg); + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); + VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); + zfs_dbgmsg("clone_swap ds %llu; in queue; " + "replacing with %llu", + (u_longlong_t)ds2->ds_object, + (u_longlong_t)ds1->ds_object); + } + + dsl_scan_sync_state(scn, tx); +} + +struct enqueue_clones_arg { + dmu_tx_t *tx; + uint64_t originobj; +}; + +/* ARGSUSED */ +static int +enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + struct enqueue_clones_arg *eca = arg; + dsl_dataset_t *ds; + int err; + dsl_pool_t *dp = spa->spa_dsl_pool; + dsl_scan_t *scn = dp->dp_scan; + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err) + return (err); + + if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) { + while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { + dsl_dataset_t *prev; + err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + + dsl_dataset_rele(ds, FTAG); + if (err) + return (err); + ds = prev; + } + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, + ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); + } + dsl_dataset_rele(ds, FTAG); + return (0); +} + +static void +dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) +{ + dsl_pool_t *dp = scn->scn_dp; + dsl_dataset_t *ds; + objset_t *os; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + + if (dmu_objset_from_ds(ds, &os)) + goto out; + + /* + * Only the ZIL in the head (non-snapshot) is valid. Even though + * snapshots can have ZIL block pointers (which may be the same + * BP as in the head), they must be ignored. So we traverse the + * ZIL here, rather than in scan_recurse(), because the regular + * snapshot block-sharing rules don't apply to it. + */ + if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds)) + dsl_scan_zil(dp, &os->os_zil_header); + + /* + * Iterate over the bps in this ds. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx); + + char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); + dsl_dataset_name(ds, dsname); + zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " + "pausing=%u", + (longlong_t)dsobj, dsname, + (longlong_t)scn->scn_phys.scn_cur_min_txg, + (longlong_t)scn->scn_phys.scn_cur_max_txg, + (int)scn->scn_pausing); + kmem_free(dsname, ZFS_MAXNAMELEN); + + if (scn->scn_pausing) + goto out; + + /* + * We've finished this pass over this dataset. + */ + + /* + * If we did not completely visit this dataset, do another pass. + */ + if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) { + zfs_dbgmsg("incomplete pass; visiting again"); + scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN; + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, + scn->scn_phys.scn_cur_max_txg, tx) == 0); + goto out; + } + + /* + * Add descendent datasets to work queue. + */ + if (ds->ds_phys->ds_next_snap_obj != 0) { + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj, + ds->ds_phys->ds_creation_txg, tx) == 0); + } + if (ds->ds_phys->ds_num_children > 1) { + boolean_t usenext = B_FALSE; + if (ds->ds_phys->ds_next_clones_obj != 0) { + uint64_t count; + /* + * A bug in a previous version of the code could + * cause upgrade_clones_cb() to not set + * ds_next_snap_obj when it should, leading to a + * missing entry. Therefore we can only use the + * next_clones_obj when its count is correct. + */ + int err = zap_count(dp->dp_meta_objset, + ds->ds_phys->ds_next_clones_obj, &count); + if (err == 0 && + count == ds->ds_phys->ds_num_children - 1) + usenext = B_TRUE; + } + + if (usenext) { + VERIFY(zap_join_key(dp->dp_meta_objset, + ds->ds_phys->ds_next_clones_obj, + scn->scn_phys.scn_queue_obj, + ds->ds_phys->ds_creation_txg, tx) == 0); + } else { + struct enqueue_clones_arg eca; + eca.tx = tx; + eca.originobj = ds->ds_object; + + (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa, + NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN); + } + } + +out: + dsl_dataset_rele(ds, FTAG); +} + +/* ARGSUSED */ +static int +enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + dmu_tx_t *tx = arg; + dsl_dataset_t *ds; + int err; + dsl_pool_t *dp = spa->spa_dsl_pool; + dsl_scan_t *scn = dp->dp_scan; + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err) + return (err); + + while (ds->ds_phys->ds_prev_snap_obj != 0) { + dsl_dataset_t *prev; + err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + FTAG, &prev); + if (err) { + dsl_dataset_rele(ds, FTAG); + return (err); + } + + /* + * If this is a clone, we don't need to worry about it for now. + */ + if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { + dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele(prev, FTAG); + return (0); + } + dsl_dataset_rele(ds, FTAG); + ds = prev; + } + + VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, + ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0); + dsl_dataset_rele(ds, FTAG); + return (0); +} + +/* + * Scrub/dedup interaction. + * + * If there are N references to a deduped block, we don't want to scrub it + * N times -- ideally, we should scrub it exactly once. + * + * We leverage the fact that the dde's replication class (enum ddt_class) + * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest + * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order. + * + * To prevent excess scrubbing, the scrub begins by walking the DDT + * to find all blocks with refcnt > 1, and scrubs each of these once. + * Since there are two replication classes which contain blocks with + * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first. + * Finally the top-down scrub begins, only visiting blocks with refcnt == 1. + * + * There would be nothing more to say if a block's refcnt couldn't change + * during a scrub, but of course it can so we must account for changes + * in a block's replication class. + * + * Here's an example of what can occur: + * + * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1 + * when visited during the top-down scrub phase, it will be scrubbed twice. + * This negates our scrub optimization, but is otherwise harmless. + * + * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1 + * on each visit during the top-down scrub phase, it will never be scrubbed. + * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's + * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to + * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1 + * while a scrub is in progress, it scrubs the block right then. + */ +static void +dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) +{ + ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark; + ddt_entry_t dde = { 0 }; + int error; + uint64_t n = 0; + + while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) { + ddt_t *ddt; + + if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max) + break; + dprintf("visiting ddb=%llu/%llu/%llu/%llx\n", + (longlong_t)ddb->ddb_class, + (longlong_t)ddb->ddb_type, + (longlong_t)ddb->ddb_checksum, + (longlong_t)ddb->ddb_cursor); + + /* There should be no pending changes to the dedup table */ + ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum]; + ASSERT(avl_first(&ddt->ddt_tree) == NULL); + + dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); + n++; + + if (dsl_scan_check_pause(scn, NULL)) + break; + } + + zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u", + (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max, + (int)scn->scn_pausing); + + ASSERT(error == 0 || error == ENOENT); + ASSERT(error != ENOENT || + ddb->ddb_class > scn->scn_phys.scn_ddt_class_max); +} + +/* ARGSUSED */ +void +dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, + ddt_entry_t *dde, dmu_tx_t *tx) +{ + const ddt_key_t *ddk = &dde->dde_key; + ddt_phys_t *ddp = dde->dde_phys; + blkptr_t bp; + zbookmark_t zb = { 0 }; + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + if (ddp->ddp_phys_birth == 0 || + ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg) + continue; + ddt_bp_create(checksum, ddk, ddp, &bp); + + scn->scn_visited_this_txg++; + scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb); + } +} + +static void +dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) +{ + dsl_pool_t *dp = scn->scn_dp; + zap_cursor_t zc; + zap_attribute_t za; + + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= + scn->scn_phys.scn_ddt_class_max) { + scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; + scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; + dsl_scan_ddt(scn, tx); + if (scn->scn_pausing) + return; + } + + if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) { + /* First do the MOS & ORIGIN */ + + scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; + scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; + dsl_scan_visit_rootbp(scn, NULL, + &dp->dp_meta_rootbp, tx); + spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); + if (scn->scn_pausing) + return; + + if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { + VERIFY(0 == dmu_objset_find_spa(dp->dp_spa, + NULL, enqueue_cb, tx, DS_FIND_CHILDREN)); + } else { + dsl_scan_visitds(scn, + dp->dp_origin_snap->ds_object, tx); + } + ASSERT(!scn->scn_pausing); + } else if (scn->scn_phys.scn_bookmark.zb_objset != + ZB_DESTROYED_OBJSET) { + /* + * If we were paused, continue from here. Note if the + * ds we were paused on was deleted, the zb_objset may + * be -1, so we will skip this and find a new objset + * below. + */ + dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx); + if (scn->scn_pausing) + return; + } + + /* + * In case we were paused right at the end of the ds, zero the + * bookmark so we don't think that we're still trying to resume. + */ + bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t)); + + /* keep pulling things out of the zap-object-as-queue */ + while (zap_cursor_init(&zc, dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj), + zap_cursor_retrieve(&zc, &za) == 0) { + dsl_dataset_t *ds; + uint64_t dsobj; + + dsobj = strtonum(za.za_name, NULL); + VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, dsobj, tx)); + + /* Set up min/max txg */ + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + if (za.za_first_integer != 0) { + scn->scn_phys.scn_cur_min_txg = + MAX(scn->scn_phys.scn_min_txg, + za.za_first_integer); + } else { + scn->scn_phys.scn_cur_min_txg = + MAX(scn->scn_phys.scn_min_txg, + ds->ds_phys->ds_prev_snap_txg); + } + scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds); + dsl_dataset_rele(ds, FTAG); + + dsl_scan_visitds(scn, dsobj, tx); + zap_cursor_fini(&zc); + if (scn->scn_pausing) + return; + } + zap_cursor_fini(&zc); +} + +static int +dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg; + uint64_t elapsed_nanosecs; + + elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; + + if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || + (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms && + txg_sync_waiting(scn->scn_dp)) || + spa_shutting_down(scn->scn_dp->dp_spa)) + return (ERESTART); + + zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa, + dmu_tx_get_txg(tx), bp, 0)); + dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, + -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp), + -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx); + scn->scn_visited_this_txg++; + return (0); +} + +boolean_t +dsl_scan_active(dsl_scan_t *scn) +{ + spa_t *spa = scn->scn_dp->dp_spa; + uint64_t used = 0, comp, uncomp; + + if (spa->spa_load_state != SPA_LOAD_NONE) + return (B_FALSE); + if (spa_shutting_down(spa)) + return (B_FALSE); + + if (scn->scn_phys.scn_state == DSS_SCANNING) + return (B_TRUE); + + if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { + (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, + &used, &comp, &uncomp); + } + return (used != 0); +} + +void +dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) +{ + dsl_scan_t *scn = dp->dp_scan; + spa_t *spa = dp->dp_spa; + int err; + + /* + * Check for scn_restart_txg before checking spa_load_state, so + * that we can restart an old-style scan while the pool is being + * imported (see dsl_scan_init). + */ + if (scn->scn_restart_txg != 0 && + scn->scn_restart_txg <= tx->tx_txg) { + pool_scan_func_t func = POOL_SCAN_SCRUB; + dsl_scan_done(scn, B_FALSE, tx); + if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + func = POOL_SCAN_RESILVER; + zfs_dbgmsg("restarting scan func=%u txg=%llu", + func, tx->tx_txg); + dsl_scan_setup_sync(scn, &func, tx); + } + + if (!dsl_scan_active(scn) || + spa_sync_pass(dp->dp_spa) > 1) + return; + + scn->scn_visited_this_txg = 0; + scn->scn_pausing = B_FALSE; + scn->scn_sync_start_time = gethrtime(); + spa->spa_scrub_active = B_TRUE; + + /* + * First process the free list. If we pause the free, don't do + * any scanning. This ensures that there is no free list when + * we are scanning, so the scan code doesn't have to worry about + * traversing it. + */ + if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { + scn->scn_zio_root = zio_root(dp->dp_spa, NULL, + NULL, ZIO_FLAG_MUSTSUCCEED); + err = bpobj_iterate(&dp->dp_free_bpobj, + dsl_scan_free_cb, scn, tx); + VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); + if (scn->scn_visited_this_txg) { + zfs_dbgmsg("freed %llu blocks in %llums from " + "free_bpobj txg %llu", + (longlong_t)scn->scn_visited_this_txg, + (longlong_t) + (gethrtime() - scn->scn_sync_start_time) / MICROSEC, + (longlong_t)tx->tx_txg); + scn->scn_visited_this_txg = 0; + /* + * Re-sync the ddt so that we can further modify + * it when doing bprewrite. + */ + ddt_sync(spa, tx->tx_txg); + } + if (err == ERESTART) + return; + } + + if (scn->scn_phys.scn_state != DSS_SCANNING) + return; + + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= + scn->scn_phys.scn_ddt_class_max) { + zfs_dbgmsg("doing scan sync txg %llu; " + "ddt bm=%llu/%llu/%llu/%llx", + (longlong_t)tx->tx_txg, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, + (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); + ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0); + } else { + zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu", + (longlong_t)tx->tx_txg, + (longlong_t)scn->scn_phys.scn_bookmark.zb_objset, + (longlong_t)scn->scn_phys.scn_bookmark.zb_object, + (longlong_t)scn->scn_phys.scn_bookmark.zb_level, + (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid); + } + + scn->scn_zio_root = zio_root(dp->dp_spa, NULL, + NULL, ZIO_FLAG_CANFAIL); + dsl_scan_visit(scn, tx); + (void) zio_wait(scn->scn_zio_root); + scn->scn_zio_root = NULL; + + zfs_dbgmsg("visited %llu blocks in %llums", + (longlong_t)scn->scn_visited_this_txg, + (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC); + + if (!scn->scn_pausing) { + /* finished with scan. */ + zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg); + dsl_scan_done(scn, B_TRUE, tx); + } + + if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > 0) { + cv_wait(&spa->spa_scrub_io_cv, + &spa->spa_scrub_lock); + } + mutex_exit(&spa->spa_scrub_lock); + } + + dsl_scan_sync_state(scn, tx); +} + +/* + * This will start a new scan, or restart an existing one. + */ +void +dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg) +{ + if (txg == 0) { + dmu_tx_t *tx; + tx = dmu_tx_create_dd(dp->dp_mos_dir); + VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); + + txg = dmu_tx_get_txg(tx); + dp->dp_scan->scn_restart_txg = txg; + dmu_tx_commit(tx); + } else { + dp->dp_scan->scn_restart_txg = txg; + } + zfs_dbgmsg("restarting resilver txg=%llu", txg); +} + +boolean_t +dsl_scan_resilvering(dsl_pool_t *dp) +{ + return (dp->dp_scan->scn_phys.scn_state == DSS_SCANNING && + dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER); +} + +/* + * scrub consumers + */ + +static void +count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp) +{ + int i; + + /* + * If we resume after a reboot, zab will be NULL; don't record + * incomplete stats in that case. + */ + if (zab == NULL) + return; + + for (i = 0; i < 4; i++) { + int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS; + int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL; + zfs_blkstat_t *zb = &zab->zab_type[l][t]; + int equal; + + zb->zb_count++; + zb->zb_asize += BP_GET_ASIZE(bp); + zb->zb_lsize += BP_GET_LSIZE(bp); + zb->zb_psize += BP_GET_PSIZE(bp); + zb->zb_gangs += BP_COUNT_GANG(bp); + + switch (BP_GET_NDVAS(bp)) { + case 2: + if (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + zb->zb_ditto_2_of_2_samevdev++; + break; + case 3: + equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + + (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[2])) + + (DVA_GET_VDEV(&bp->blk_dva[1]) == + DVA_GET_VDEV(&bp->blk_dva[2])); + if (equal == 1) + zb->zb_ditto_2_of_3_samevdev++; + else if (equal == 3) + zb->zb_ditto_3_of_3_samevdev++; + break; + } + } +} + +static void +dsl_scan_scrub_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + + zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + + if (zio->io_error && (zio->io_error != ECKSUM || + !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) { + spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++; + } + mutex_exit(&spa->spa_scrub_lock); +} + +static int +dsl_scan_scrub_cb(dsl_pool_t *dp, + const blkptr_t *bp, const zbookmark_t *zb) +{ + dsl_scan_t *scn = dp->dp_scan; + size_t size = BP_GET_PSIZE(bp); + spa_t *spa = dp->dp_spa; + uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp); + boolean_t needs_io; + int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; + int zio_priority; + int scan_delay = 0; + + if (phys_birth <= scn->scn_phys.scn_min_txg || + phys_birth >= scn->scn_phys.scn_max_txg) + return (0); + + count_block(dp->dp_blkstats, bp); + + ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn)); + if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) { + zio_flags |= ZIO_FLAG_SCRUB; + zio_priority = ZIO_PRIORITY_SCRUB; + needs_io = B_TRUE; + scan_delay = zfs_scrub_delay; + } else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) { + zio_flags |= ZIO_FLAG_RESILVER; + zio_priority = ZIO_PRIORITY_RESILVER; + needs_io = B_FALSE; + scan_delay = zfs_resilver_delay; + } + + /* If it's an intent log block, failure is expected. */ + if (zb->zb_level == ZB_ZIL_LEVEL) + zio_flags |= ZIO_FLAG_SPECULATIVE; + + for (int d = 0; d < BP_GET_NDVAS(bp); d++) { + vdev_t *vd = vdev_lookup_top(spa, + DVA_GET_VDEV(&bp->blk_dva[d])); + + /* + * Keep track of how much data we've examined so that + * zpool(1M) status can make useful progress reports. + */ + scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]); + spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]); + + /* if it's a resilver, this may not be in the target range */ + if (!needs_io) { + if (DVA_GET_GANG(&bp->blk_dva[d])) { + /* + * Gang members may be spread across multiple + * vdevs, so the best estimate we have is the + * scrub range, which has already been checked. + * XXX -- it would be better to change our + * allocation policy to ensure that all + * gang members reside on the same vdev. + */ + needs_io = B_TRUE; + } else { + needs_io = vdev_dtl_contains(vd, DTL_PARTIAL, + phys_birth, 1); + } + } + } + + if (needs_io && !zfs_no_scrub_io) { + vdev_t *rvd = spa->spa_root_vdev; + uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight; + void *data = zio_data_buf_alloc(size); + + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight >= maxinflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); + + /* + * If we're seeing recent (zfs_scan_idle) "important" I/Os + * then throttle our workload to limit the impact of a scan. + */ + if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle) + delay(scan_delay); + + zio_nowait(zio_read(NULL, spa, bp, data, size, + dsl_scan_scrub_done, NULL, zio_priority, + zio_flags, zb)); + } + + /* do not relocate this block */ + return (0); +} + +int +dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) +{ + spa_t *spa = dp->dp_spa; + + /* + * Purge all vdev caches and probe all devices. We do this here + * rather than in sync context because this requires a writer lock + * on the spa_config lock, which we can't do from sync context. The + * spa_scrub_reopen flag indicates that vdev_open() should not + * attempt to start another scrub. + */ + spa_vdev_state_enter(spa, SCL_NONE); + spa->spa_scrub_reopen = B_TRUE; + vdev_reopen(spa->spa_root_vdev); + spa->spa_scrub_reopen = B_FALSE; + (void) spa_vdev_state_exit(spa, NULL, 0); + + return (dsl_sync_task_do(dp, dsl_scan_setup_check, + dsl_scan_setup_sync, dp->dp_scan, &func, 0)); +} diff --git a/uts/common/fs/zfs/dsl_synctask.c b/uts/common/fs/zfs/dsl_synctask.c new file mode 100644 index 000000000000..b0818ce274d4 --- /dev/null +++ b/uts/common/fs/zfs/dsl_synctask.c @@ -0,0 +1,240 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_synctask.h> +#include <sys/metaslab.h> + +#define DST_AVG_BLKSHIFT 14 + +/* ARGSUSED */ +static int +dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx) +{ + return (0); +} + +dsl_sync_task_group_t * +dsl_sync_task_group_create(dsl_pool_t *dp) +{ + dsl_sync_task_group_t *dstg; + + dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP); + list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t), + offsetof(dsl_sync_task_t, dst_node)); + dstg->dstg_pool = dp; + + return (dstg); +} + +void +dsl_sync_task_create(dsl_sync_task_group_t *dstg, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified) +{ + dsl_sync_task_t *dst; + + if (checkfunc == NULL) + checkfunc = dsl_null_checkfunc; + dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP); + dst->dst_checkfunc = checkfunc; + dst->dst_syncfunc = syncfunc; + dst->dst_arg1 = arg1; + dst->dst_arg2 = arg2; + list_insert_tail(&dstg->dstg_tasks, dst); + + dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT; +} + +int +dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) +{ + dmu_tx_t *tx; + uint64_t txg; + dsl_sync_task_t *dst; + +top: + tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir); + VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); + + txg = dmu_tx_get_txg(tx); + + /* Do a preliminary error check. */ + dstg->dstg_err = 0; + rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + continue; +#endif + dst->dst_err = + dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); + if (dst->dst_err) + dstg->dstg_err = dst->dst_err; + } + rw_exit(&dstg->dstg_pool->dp_config_rwlock); + + if (dstg->dstg_err) { + dmu_tx_commit(tx); + return (dstg->dstg_err); + } + + /* + * We don't generally have many sync tasks, so pay the price of + * add_tail to get the tasks executed in the right order. + */ + VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, + dstg, txg)); + + dmu_tx_commit(tx); + + txg_wait_synced(dstg->dstg_pool, txg); + + if (dstg->dstg_err == EAGAIN) { + txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE); + goto top; + } + + return (dstg->dstg_err); +} + +void +dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +{ + uint64_t txg; + + dstg->dstg_nowaiter = B_TRUE; + txg = dmu_tx_get_txg(tx); + /* + * We don't generally have many sync tasks, so pay the price of + * add_tail to get the tasks executed in the right order. + */ + VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, + dstg, txg)); +} + +void +dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg) +{ + dsl_sync_task_t *dst; + + while (dst = list_head(&dstg->dstg_tasks)) { + list_remove(&dstg->dstg_tasks, dst); + kmem_free(dst, sizeof (dsl_sync_task_t)); + } + kmem_free(dstg, sizeof (dsl_sync_task_group_t)); +} + +void +dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +{ + dsl_sync_task_t *dst; + dsl_pool_t *dp = dstg->dstg_pool; + uint64_t quota, used; + + ASSERT3U(dstg->dstg_err, ==, 0); + + /* + * Check for sufficient space. We just check against what's + * on-disk; we don't want any in-flight accounting to get in our + * way, because open context may have already used up various + * in-core limits (arc_tempreserve, dsl_pool_tempreserve). + */ + quota = dsl_pool_adjustedsize(dp, B_FALSE) - + metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); + used = dp->dp_root_dir->dd_phys->dd_used_bytes; + /* MOS space is triple-dittoed, so we multiply by 3. */ + if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) { + dstg->dstg_err = ENOSPC; + return; + } + + /* + * Check for errors by calling checkfuncs. + */ + rw_enter(&dp->dp_config_rwlock, RW_WRITER); + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { + dst->dst_err = + dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); + if (dst->dst_err) + dstg->dstg_err = dst->dst_err; + } + + if (dstg->dstg_err == 0) { + /* + * Execute sync tasks. + */ + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { + dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx); + } + } + rw_exit(&dp->dp_config_rwlock); + + if (dstg->dstg_nowaiter) + dsl_sync_task_group_destroy(dstg); +} + +int +dsl_sync_task_do(dsl_pool_t *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified) +{ + dsl_sync_task_group_t *dstg; + int err; + + ASSERT(spa_writeable(dp->dp_spa)); + + dstg = dsl_sync_task_group_create(dp); + dsl_sync_task_create(dstg, checkfunc, syncfunc, + arg1, arg2, blocks_modified); + err = dsl_sync_task_group_wait(dstg); + dsl_sync_task_group_destroy(dstg); + return (err); +} + +void +dsl_sync_task_do_nowait(dsl_pool_t *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx) +{ + dsl_sync_task_group_t *dstg; + + if (!spa_writeable(dp->dp_spa)) + return; + + dstg = dsl_sync_task_group_create(dp); + dsl_sync_task_create(dstg, checkfunc, syncfunc, + arg1, arg2, blocks_modified); + dsl_sync_task_group_nowait(dstg, tx); +} diff --git a/uts/common/fs/zfs/gzip.c b/uts/common/fs/zfs/gzip.c new file mode 100644 index 000000000000..b257d4af753c --- /dev/null +++ b/uts/common/fs/zfs/gzip.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/debug.h> +#include <sys/types.h> +#include <sys/zmod.h> + +#ifdef _KERNEL +#include <sys/systm.h> +#else +#include <strings.h> +#endif + +size_t +gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + size_t dstlen = d_len; + + ASSERT(d_len <= s_len); + + if (z_compress_level(d_start, &dstlen, s_start, s_len, n) != Z_OK) { + if (d_len != s_len) + return (s_len); + + bcopy(s_start, d_start, s_len); + return (s_len); + } + + return (dstlen); +} + +/*ARGSUSED*/ +int +gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + size_t dstlen = d_len; + + ASSERT(d_len >= s_len); + + if (z_uncompress(d_start, &dstlen, s_start, s_len) != Z_OK) + return (-1); + + return (0); +} diff --git a/uts/common/fs/zfs/lzjb.c b/uts/common/fs/zfs/lzjb.c new file mode 100644 index 000000000000..ab3de51b7259 --- /dev/null +++ b/uts/common/fs/zfs/lzjb.c @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * We keep our own copy of this algorithm for 3 main reasons: + * 1. If we didn't, anyone modifying common/os/compress.c would + * directly break our on disk format + * 2. Our version of lzjb does not have a number of checks that the + * common/os version needs and uses + * 3. We initialize the lempel to ensure deterministic results, + * so that identical blocks can always be deduplicated. + * In particular, we are adding the "feature" that compress() can + * take a destination buffer size and returns the compressed length, or the + * source length if compression would overflow the destination buffer. + */ + +#include <sys/types.h> + +#define MATCH_BITS 6 +#define MATCH_MIN 3 +#define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) +#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) +#define LEMPEL_SIZE 1024 + +/*ARGSUSED*/ +size_t +lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *cpy, *copymap; + int copymask = 1 << (NBBY - 1); + int mlen, offset, hash; + uint16_t *hp; + uint16_t lempel[LEMPEL_SIZE] = { 0 }; + + while (src < (uchar_t *)s_start + s_len) { + if ((copymask <<= 1) == (1 << NBBY)) { + if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) + return (s_len); + copymask = 1; + copymap = dst; + *dst++ = 0; + } + if (src > (uchar_t *)s_start + s_len - MATCH_MAX) { + *dst++ = *src++; + continue; + } + hash = (src[0] << 16) + (src[1] << 8) + src[2]; + hash += hash >> 9; + hash += hash >> 5; + hp = &lempel[hash & (LEMPEL_SIZE - 1)]; + offset = (intptr_t)(src - *hp) & OFFSET_MASK; + *hp = (uint16_t)(uintptr_t)src; + cpy = src - offset; + if (cpy >= (uchar_t *)s_start && cpy != src && + src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { + *copymap |= copymask; + for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++) + if (src[mlen] != cpy[mlen]) + break; + *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | + (offset >> NBBY); + *dst++ = (uchar_t)offset; + src += mlen; + } else { + *dst++ = *src++; + } + } + return (dst - (uchar_t *)d_start); +} + +/*ARGSUSED*/ +int +lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *d_end = (uchar_t *)d_start + d_len; + uchar_t *cpy, copymap; + int copymask = 1 << (NBBY - 1); + + while (dst < d_end) { + if ((copymask <<= 1) == (1 << NBBY)) { + copymask = 1; + copymap = *src++; + } + if (copymap & copymask) { + int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; + int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; + src += 2; + if ((cpy = dst - offset) < (uchar_t *)d_start) + return (-1); + while (--mlen >= 0 && dst < d_end) + *dst++ = *cpy++; + } else { + *dst++ = *src++; + } + } + return (0); +} diff --git a/uts/common/fs/zfs/metaslab.c b/uts/common/fs/zfs/metaslab.c new file mode 100644 index 000000000000..17b4b12c4ee4 --- /dev/null +++ b/uts/common/fs/zfs/metaslab.c @@ -0,0 +1,1604 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/space_map.h> +#include <sys/metaslab_impl.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> + +uint64_t metaslab_aliquot = 512ULL << 10; +uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ + +/* + * Metaslab debugging: when set, keeps all space maps in core to verify frees. + */ +static int metaslab_debug = 0; + +/* + * Minimum size which forces the dynamic allocator to change + * it's allocation strategy. Once the space map cannot satisfy + * an allocation of this size then it switches to using more + * aggressive strategy (i.e search by size rather than offset). + */ +uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; + +/* + * The minimum free space, in percent, which must be available + * in a space map to continue allocations in a first-fit fashion. + * Once the space_map's free space drops below this level we dynamically + * switch to using best-fit allocations. + */ +int metaslab_df_free_pct = 4; + +/* + * A metaslab is considered "free" if it contains a contiguous + * segment which is greater than metaslab_min_alloc_size. + */ +uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS; + +/* + * Max number of space_maps to prefetch. + */ +int metaslab_prefetch_limit = SPA_DVAS_PER_BP; + +/* + * Percentage bonus multiplier for metaslabs that are in the bonus area. + */ +int metaslab_smo_bonus_pct = 150; + +/* + * ========================================================================== + * Metaslab classes + * ========================================================================== + */ +metaslab_class_t * +metaslab_class_create(spa_t *spa, space_map_ops_t *ops) +{ + metaslab_class_t *mc; + + mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); + + mc->mc_spa = spa; + mc->mc_rotor = NULL; + mc->mc_ops = ops; + + return (mc); +} + +void +metaslab_class_destroy(metaslab_class_t *mc) +{ + ASSERT(mc->mc_rotor == NULL); + ASSERT(mc->mc_alloc == 0); + ASSERT(mc->mc_deferred == 0); + ASSERT(mc->mc_space == 0); + ASSERT(mc->mc_dspace == 0); + + kmem_free(mc, sizeof (metaslab_class_t)); +} + +int +metaslab_class_validate(metaslab_class_t *mc) +{ + metaslab_group_t *mg; + vdev_t *vd; + + /* + * Must hold one of the spa_config locks. + */ + ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) || + spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER)); + + if ((mg = mc->mc_rotor) == NULL) + return (0); + + do { + vd = mg->mg_vd; + ASSERT(vd->vdev_mg != NULL); + ASSERT3P(vd->vdev_top, ==, vd); + ASSERT3P(mg->mg_class, ==, mc); + ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops); + } while ((mg = mg->mg_next) != mc->mc_rotor); + + return (0); +} + +void +metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta, + int64_t defer_delta, int64_t space_delta, int64_t dspace_delta) +{ + atomic_add_64(&mc->mc_alloc, alloc_delta); + atomic_add_64(&mc->mc_deferred, defer_delta); + atomic_add_64(&mc->mc_space, space_delta); + atomic_add_64(&mc->mc_dspace, dspace_delta); +} + +uint64_t +metaslab_class_get_alloc(metaslab_class_t *mc) +{ + return (mc->mc_alloc); +} + +uint64_t +metaslab_class_get_deferred(metaslab_class_t *mc) +{ + return (mc->mc_deferred); +} + +uint64_t +metaslab_class_get_space(metaslab_class_t *mc) +{ + return (mc->mc_space); +} + +uint64_t +metaslab_class_get_dspace(metaslab_class_t *mc) +{ + return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); +} + +/* + * ========================================================================== + * Metaslab groups + * ========================================================================== + */ +static int +metaslab_compare(const void *x1, const void *x2) +{ + const metaslab_t *m1 = x1; + const metaslab_t *m2 = x2; + + if (m1->ms_weight < m2->ms_weight) + return (1); + if (m1->ms_weight > m2->ms_weight) + return (-1); + + /* + * If the weights are identical, use the offset to force uniqueness. + */ + if (m1->ms_map.sm_start < m2->ms_map.sm_start) + return (-1); + if (m1->ms_map.sm_start > m2->ms_map.sm_start) + return (1); + + ASSERT3P(m1, ==, m2); + + return (0); +} + +metaslab_group_t * +metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) +{ + metaslab_group_t *mg; + + mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); + mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&mg->mg_metaslab_tree, metaslab_compare, + sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); + mg->mg_vd = vd; + mg->mg_class = mc; + mg->mg_activation_count = 0; + + return (mg); +} + +void +metaslab_group_destroy(metaslab_group_t *mg) +{ + ASSERT(mg->mg_prev == NULL); + ASSERT(mg->mg_next == NULL); + /* + * We may have gone below zero with the activation count + * either because we never activated in the first place or + * because we're done, and possibly removing the vdev. + */ + ASSERT(mg->mg_activation_count <= 0); + + avl_destroy(&mg->mg_metaslab_tree); + mutex_destroy(&mg->mg_lock); + kmem_free(mg, sizeof (metaslab_group_t)); +} + +void +metaslab_group_activate(metaslab_group_t *mg) +{ + metaslab_class_t *mc = mg->mg_class; + metaslab_group_t *mgprev, *mgnext; + + ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); + + ASSERT(mc->mc_rotor != mg); + ASSERT(mg->mg_prev == NULL); + ASSERT(mg->mg_next == NULL); + ASSERT(mg->mg_activation_count <= 0); + + if (++mg->mg_activation_count <= 0) + return; + + mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children); + + if ((mgprev = mc->mc_rotor) == NULL) { + mg->mg_prev = mg; + mg->mg_next = mg; + } else { + mgnext = mgprev->mg_next; + mg->mg_prev = mgprev; + mg->mg_next = mgnext; + mgprev->mg_next = mg; + mgnext->mg_prev = mg; + } + mc->mc_rotor = mg; +} + +void +metaslab_group_passivate(metaslab_group_t *mg) +{ + metaslab_class_t *mc = mg->mg_class; + metaslab_group_t *mgprev, *mgnext; + + ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); + + if (--mg->mg_activation_count != 0) { + ASSERT(mc->mc_rotor != mg); + ASSERT(mg->mg_prev == NULL); + ASSERT(mg->mg_next == NULL); + ASSERT(mg->mg_activation_count < 0); + return; + } + + mgprev = mg->mg_prev; + mgnext = mg->mg_next; + + if (mg == mgnext) { + mc->mc_rotor = NULL; + } else { + mc->mc_rotor = mgnext; + mgprev->mg_next = mgnext; + mgnext->mg_prev = mgprev; + } + + mg->mg_prev = NULL; + mg->mg_next = NULL; +} + +static void +metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) +{ + mutex_enter(&mg->mg_lock); + ASSERT(msp->ms_group == NULL); + msp->ms_group = mg; + msp->ms_weight = 0; + avl_add(&mg->mg_metaslab_tree, msp); + mutex_exit(&mg->mg_lock); +} + +static void +metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) +{ + mutex_enter(&mg->mg_lock); + ASSERT(msp->ms_group == mg); + avl_remove(&mg->mg_metaslab_tree, msp); + msp->ms_group = NULL; + mutex_exit(&mg->mg_lock); +} + +static void +metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) +{ + /* + * Although in principle the weight can be any value, in + * practice we do not use values in the range [1, 510]. + */ + ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0); + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + mutex_enter(&mg->mg_lock); + ASSERT(msp->ms_group == mg); + avl_remove(&mg->mg_metaslab_tree, msp); + msp->ms_weight = weight; + avl_add(&mg->mg_metaslab_tree, msp); + mutex_exit(&mg->mg_lock); +} + +/* + * ========================================================================== + * Common allocator routines + * ========================================================================== + */ +static int +metaslab_segsize_compare(const void *x1, const void *x2) +{ + const space_seg_t *s1 = x1; + const space_seg_t *s2 = x2; + uint64_t ss_size1 = s1->ss_end - s1->ss_start; + uint64_t ss_size2 = s2->ss_end - s2->ss_start; + + if (ss_size1 < ss_size2) + return (-1); + if (ss_size1 > ss_size2) + return (1); + + if (s1->ss_start < s2->ss_start) + return (-1); + if (s1->ss_start > s2->ss_start) + return (1); + + return (0); +} + +/* + * This is a helper function that can be used by the allocator to find + * a suitable block to allocate. This will search the specified AVL + * tree looking for a block that matches the specified criteria. + */ +static uint64_t +metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, + uint64_t align) +{ + space_seg_t *ss, ssearch; + avl_index_t where; + + ssearch.ss_start = *cursor; + ssearch.ss_end = *cursor + size; + + ss = avl_find(t, &ssearch, &where); + if (ss == NULL) + ss = avl_nearest(t, where, AVL_AFTER); + + while (ss != NULL) { + uint64_t offset = P2ROUNDUP(ss->ss_start, align); + + if (offset + size <= ss->ss_end) { + *cursor = offset + size; + return (offset); + } + ss = AVL_NEXT(t, ss); + } + + /* + * If we know we've searched the whole map (*cursor == 0), give up. + * Otherwise, reset the cursor to the beginning and try again. + */ + if (*cursor == 0) + return (-1ULL); + + *cursor = 0; + return (metaslab_block_picker(t, cursor, size, align)); +} + +static void +metaslab_pp_load(space_map_t *sm) +{ + space_seg_t *ss; + + ASSERT(sm->sm_ppd == NULL); + sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); + + sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + avl_create(sm->sm_pp_root, metaslab_segsize_compare, + sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); + + for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) + avl_add(sm->sm_pp_root, ss); +} + +static void +metaslab_pp_unload(space_map_t *sm) +{ + void *cookie = NULL; + + kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); + sm->sm_ppd = NULL; + + while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) { + /* tear down the tree */ + } + + avl_destroy(sm->sm_pp_root); + kmem_free(sm->sm_pp_root, sizeof (avl_tree_t)); + sm->sm_pp_root = NULL; +} + +/* ARGSUSED */ +static void +metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size) +{ + /* No need to update cursor */ +} + +/* ARGSUSED */ +static void +metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size) +{ + /* No need to update cursor */ +} + +/* + * Return the maximum contiguous segment within the metaslab. + */ +uint64_t +metaslab_pp_maxsize(space_map_t *sm) +{ + avl_tree_t *t = sm->sm_pp_root; + space_seg_t *ss; + + if (t == NULL || (ss = avl_last(t)) == NULL) + return (0ULL); + + return (ss->ss_end - ss->ss_start); +} + +/* + * ========================================================================== + * The first-fit block allocator + * ========================================================================== + */ +static uint64_t +metaslab_ff_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + uint64_t align = size & -size; + uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; + + return (metaslab_block_picker(t, cursor, size, align)); +} + +/* ARGSUSED */ +boolean_t +metaslab_ff_fragmented(space_map_t *sm) +{ + return (B_TRUE); +} + +static space_map_ops_t metaslab_ff_ops = { + metaslab_pp_load, + metaslab_pp_unload, + metaslab_ff_alloc, + metaslab_pp_claim, + metaslab_pp_free, + metaslab_pp_maxsize, + metaslab_ff_fragmented +}; + +/* + * ========================================================================== + * Dynamic block allocator - + * Uses the first fit allocation scheme until space get low and then + * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold + * and metaslab_df_free_pct to determine when to switch the allocation scheme. + * ========================================================================== + */ +static uint64_t +metaslab_df_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + uint64_t align = size & -size; + uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; + uint64_t max_size = metaslab_pp_maxsize(sm); + int free_pct = sm->sm_space * 100 / sm->sm_size; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); + + if (max_size < size) + return (-1ULL); + + /* + * If we're running low on space switch to using the size + * sorted AVL tree (best-fit). + */ + if (max_size < metaslab_df_alloc_threshold || + free_pct < metaslab_df_free_pct) { + t = sm->sm_pp_root; + *cursor = 0; + } + + return (metaslab_block_picker(t, cursor, size, 1ULL)); +} + +static boolean_t +metaslab_df_fragmented(space_map_t *sm) +{ + uint64_t max_size = metaslab_pp_maxsize(sm); + int free_pct = sm->sm_space * 100 / sm->sm_size; + + if (max_size >= metaslab_df_alloc_threshold && + free_pct >= metaslab_df_free_pct) + return (B_FALSE); + + return (B_TRUE); +} + +static space_map_ops_t metaslab_df_ops = { + metaslab_pp_load, + metaslab_pp_unload, + metaslab_df_alloc, + metaslab_pp_claim, + metaslab_pp_free, + metaslab_pp_maxsize, + metaslab_df_fragmented +}; + +/* + * ========================================================================== + * Other experimental allocators + * ========================================================================== + */ +static uint64_t +metaslab_cdf_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + uint64_t *cursor = (uint64_t *)sm->sm_ppd; + uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1; + uint64_t max_size = metaslab_pp_maxsize(sm); + uint64_t rsize = size; + uint64_t offset = 0; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); + + if (max_size < size) + return (-1ULL); + + ASSERT3U(*extent_end, >=, *cursor); + + /* + * If we're running low on space switch to using the size + * sorted AVL tree (best-fit). + */ + if ((*cursor + size) > *extent_end) { + + t = sm->sm_pp_root; + *cursor = *extent_end = 0; + + if (max_size > 2 * SPA_MAXBLOCKSIZE) + rsize = MIN(metaslab_min_alloc_size, max_size); + offset = metaslab_block_picker(t, extent_end, rsize, 1ULL); + if (offset != -1) + *cursor = offset + size; + } else { + offset = metaslab_block_picker(t, cursor, rsize, 1ULL); + } + ASSERT3U(*cursor, <=, *extent_end); + return (offset); +} + +static boolean_t +metaslab_cdf_fragmented(space_map_t *sm) +{ + uint64_t max_size = metaslab_pp_maxsize(sm); + + if (max_size > (metaslab_min_alloc_size * 10)) + return (B_FALSE); + return (B_TRUE); +} + +static space_map_ops_t metaslab_cdf_ops = { + metaslab_pp_load, + metaslab_pp_unload, + metaslab_cdf_alloc, + metaslab_pp_claim, + metaslab_pp_free, + metaslab_pp_maxsize, + metaslab_cdf_fragmented +}; + +uint64_t metaslab_ndf_clump_shift = 4; + +static uint64_t +metaslab_ndf_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + avl_index_t where; + space_seg_t *ss, ssearch; + uint64_t hbit = highbit(size); + uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1; + uint64_t max_size = metaslab_pp_maxsize(sm); + + ASSERT(MUTEX_HELD(sm->sm_lock)); + ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); + + if (max_size < size) + return (-1ULL); + + ssearch.ss_start = *cursor; + ssearch.ss_end = *cursor + size; + + ss = avl_find(t, &ssearch, &where); + if (ss == NULL || (ss->ss_start + size > ss->ss_end)) { + t = sm->sm_pp_root; + + ssearch.ss_start = 0; + ssearch.ss_end = MIN(max_size, + 1ULL << (hbit + metaslab_ndf_clump_shift)); + ss = avl_find(t, &ssearch, &where); + if (ss == NULL) + ss = avl_nearest(t, where, AVL_AFTER); + ASSERT(ss != NULL); + } + + if (ss != NULL) { + if (ss->ss_start + size <= ss->ss_end) { + *cursor = ss->ss_start + size; + return (ss->ss_start); + } + } + return (-1ULL); +} + +static boolean_t +metaslab_ndf_fragmented(space_map_t *sm) +{ + uint64_t max_size = metaslab_pp_maxsize(sm); + + if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift)) + return (B_FALSE); + return (B_TRUE); +} + + +static space_map_ops_t metaslab_ndf_ops = { + metaslab_pp_load, + metaslab_pp_unload, + metaslab_ndf_alloc, + metaslab_pp_claim, + metaslab_pp_free, + metaslab_pp_maxsize, + metaslab_ndf_fragmented +}; + +space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops; + +/* + * ========================================================================== + * Metaslabs + * ========================================================================== + */ +metaslab_t * +metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, + uint64_t start, uint64_t size, uint64_t txg) +{ + vdev_t *vd = mg->mg_vd; + metaslab_t *msp; + + msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); + mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); + + msp->ms_smo_syncing = *smo; + + /* + * We create the main space map here, but we don't create the + * allocmaps and freemaps until metaslab_sync_done(). This serves + * two purposes: it allows metaslab_sync_done() to detect the + * addition of new space; and for debugging, it ensures that we'd + * data fault on any attempt to use this metaslab before it's ready. + */ + space_map_create(&msp->ms_map, start, size, + vd->vdev_ashift, &msp->ms_lock); + + metaslab_group_add(mg, msp); + + if (metaslab_debug && smo->smo_object != 0) { + mutex_enter(&msp->ms_lock); + VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops, + SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0); + mutex_exit(&msp->ms_lock); + } + + /* + * If we're opening an existing pool (txg == 0) or creating + * a new one (txg == TXG_INITIAL), all space is available now. + * If we're adding space to an existing pool, the new space + * does not become available until after this txg has synced. + */ + if (txg <= TXG_INITIAL) + metaslab_sync_done(msp, 0); + + if (txg != 0) { + vdev_dirty(vd, 0, NULL, txg); + vdev_dirty(vd, VDD_METASLAB, msp, txg); + } + + return (msp); +} + +void +metaslab_fini(metaslab_t *msp) +{ + metaslab_group_t *mg = msp->ms_group; + + vdev_space_update(mg->mg_vd, + -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size); + + metaslab_group_remove(mg, msp); + + mutex_enter(&msp->ms_lock); + + space_map_unload(&msp->ms_map); + space_map_destroy(&msp->ms_map); + + for (int t = 0; t < TXG_SIZE; t++) { + space_map_destroy(&msp->ms_allocmap[t]); + space_map_destroy(&msp->ms_freemap[t]); + } + + for (int t = 0; t < TXG_DEFER_SIZE; t++) + space_map_destroy(&msp->ms_defermap[t]); + + ASSERT3S(msp->ms_deferspace, ==, 0); + + mutex_exit(&msp->ms_lock); + mutex_destroy(&msp->ms_lock); + + kmem_free(msp, sizeof (metaslab_t)); +} + +#define METASLAB_WEIGHT_PRIMARY (1ULL << 63) +#define METASLAB_WEIGHT_SECONDARY (1ULL << 62) +#define METASLAB_ACTIVE_MASK \ + (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY) + +static uint64_t +metaslab_weight(metaslab_t *msp) +{ + metaslab_group_t *mg = msp->ms_group; + space_map_t *sm = &msp->ms_map; + space_map_obj_t *smo = &msp->ms_smo; + vdev_t *vd = mg->mg_vd; + uint64_t weight, space; + + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + /* + * The baseline weight is the metaslab's free space. + */ + space = sm->sm_size - smo->smo_alloc; + weight = space; + + /* + * Modern disks have uniform bit density and constant angular velocity. + * Therefore, the outer recording zones are faster (higher bandwidth) + * than the inner zones by the ratio of outer to inner track diameter, + * which is typically around 2:1. We account for this by assigning + * higher weight to lower metaslabs (multiplier ranging from 2x to 1x). + * In effect, this means that we'll select the metaslab with the most + * free bandwidth rather than simply the one with the most free space. + */ + weight = 2 * weight - + ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count; + ASSERT(weight >= space && weight <= 2 * space); + + /* + * For locality, assign higher weight to metaslabs which have + * a lower offset than what we've already activated. + */ + if (sm->sm_start <= mg->mg_bonus_area) + weight *= (metaslab_smo_bonus_pct / 100); + ASSERT(weight >= space && + weight <= 2 * (metaslab_smo_bonus_pct / 100) * space); + + if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) { + /* + * If this metaslab is one we're actively using, adjust its + * weight to make it preferable to any inactive metaslab so + * we'll polish it off. + */ + weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK); + } + return (weight); +} + +static void +metaslab_prefetch(metaslab_group_t *mg) +{ + spa_t *spa = mg->mg_vd->vdev_spa; + metaslab_t *msp; + avl_tree_t *t = &mg->mg_metaslab_tree; + int m; + + mutex_enter(&mg->mg_lock); + + /* + * Prefetch the next potential metaslabs + */ + for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) { + space_map_t *sm = &msp->ms_map; + space_map_obj_t *smo = &msp->ms_smo; + + /* If we have reached our prefetch limit then we're done */ + if (m >= metaslab_prefetch_limit) + break; + + if (!sm->sm_loaded && smo->smo_object != 0) { + mutex_exit(&mg->mg_lock); + dmu_prefetch(spa_meta_objset(spa), smo->smo_object, + 0ULL, smo->smo_objsize); + mutex_enter(&mg->mg_lock); + } + } + mutex_exit(&mg->mg_lock); +} + +static int +metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) +{ + metaslab_group_t *mg = msp->ms_group; + space_map_t *sm = &msp->ms_map; + space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; + + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { + space_map_load_wait(sm); + if (!sm->sm_loaded) { + int error = space_map_load(sm, sm_ops, SM_FREE, + &msp->ms_smo, + spa_meta_objset(msp->ms_group->mg_vd->vdev_spa)); + if (error) { + metaslab_group_sort(msp->ms_group, msp, 0); + return (error); + } + for (int t = 0; t < TXG_DEFER_SIZE; t++) + space_map_walk(&msp->ms_defermap[t], + space_map_claim, sm); + + } + + /* + * Track the bonus area as we activate new metaslabs. + */ + if (sm->sm_start > mg->mg_bonus_area) { + mutex_enter(&mg->mg_lock); + mg->mg_bonus_area = sm->sm_start; + mutex_exit(&mg->mg_lock); + } + + /* + * If we were able to load the map then make sure + * that this map is still able to satisfy our request. + */ + if (msp->ms_weight < size) + return (ENOSPC); + + metaslab_group_sort(msp->ms_group, msp, + msp->ms_weight | activation_weight); + } + ASSERT(sm->sm_loaded); + ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); + + return (0); +} + +static void +metaslab_passivate(metaslab_t *msp, uint64_t size) +{ + /* + * If size < SPA_MINBLOCKSIZE, then we will not allocate from + * this metaslab again. In that case, it had better be empty, + * or we would be leaving space on the table. + */ + ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0); + metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size)); + ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); +} + +/* + * Write a metaslab to disk in the context of the specified transaction group. + */ +void +metaslab_sync(metaslab_t *msp, uint64_t txg) +{ + vdev_t *vd = msp->ms_group->mg_vd; + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa_meta_objset(spa); + space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK]; + space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK]; + space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; + space_map_t *sm = &msp->ms_map; + space_map_obj_t *smo = &msp->ms_smo_syncing; + dmu_buf_t *db; + dmu_tx_t *tx; + + ASSERT(!vd->vdev_ishole); + + if (allocmap->sm_space == 0 && freemap->sm_space == 0) + return; + + /* + * The only state that can actually be changing concurrently with + * metaslab_sync() is the metaslab's ms_map. No other thread can + * be modifying this txg's allocmap, freemap, freed_map, or smo. + * Therefore, we only hold ms_lock to satify space_map ASSERTs. + * We drop it whenever we call into the DMU, because the DMU + * can call down to us (e.g. via zio_free()) at any time. + */ + + tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); + + if (smo->smo_object == 0) { + ASSERT(smo->smo_objsize == 0); + ASSERT(smo->smo_alloc == 0); + smo->smo_object = dmu_object_alloc(mos, + DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, + DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); + ASSERT(smo->smo_object != 0); + dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) * + (sm->sm_start >> vd->vdev_ms_shift), + sizeof (uint64_t), &smo->smo_object, tx); + } + + mutex_enter(&msp->ms_lock); + + space_map_walk(freemap, space_map_add, freed_map); + + if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >= + 2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) { + /* + * The in-core space map representation is twice as compact + * as the on-disk one, so it's time to condense the latter + * by generating a pure allocmap from first principles. + * + * This metaslab is 100% allocated, + * minus the content of the in-core map (sm), + * minus what's been freed this txg (freed_map), + * minus deferred frees (ms_defermap[]), + * minus allocations from txgs in the future + * (because they haven't been committed yet). + */ + space_map_vacate(allocmap, NULL, NULL); + space_map_vacate(freemap, NULL, NULL); + + space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size); + + space_map_walk(sm, space_map_remove, allocmap); + space_map_walk(freed_map, space_map_remove, allocmap); + + for (int t = 0; t < TXG_DEFER_SIZE; t++) + space_map_walk(&msp->ms_defermap[t], + space_map_remove, allocmap); + + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) + space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK], + space_map_remove, allocmap); + + mutex_exit(&msp->ms_lock); + space_map_truncate(smo, mos, tx); + mutex_enter(&msp->ms_lock); + } + + space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); + space_map_sync(freemap, SM_FREE, smo, mos, tx); + + mutex_exit(&msp->ms_lock); + + VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); + dmu_buf_will_dirty(db, tx); + ASSERT3U(db->db_size, >=, sizeof (*smo)); + bcopy(smo, db->db_data, sizeof (*smo)); + dmu_buf_rele(db, FTAG); + + dmu_tx_commit(tx); +} + +/* + * Called after a transaction group has completely synced to mark + * all of the metaslab's free space as usable. + */ +void +metaslab_sync_done(metaslab_t *msp, uint64_t txg) +{ + space_map_obj_t *smo = &msp->ms_smo; + space_map_obj_t *smosync = &msp->ms_smo_syncing; + space_map_t *sm = &msp->ms_map; + space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; + space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; + metaslab_group_t *mg = msp->ms_group; + vdev_t *vd = mg->mg_vd; + int64_t alloc_delta, defer_delta; + + ASSERT(!vd->vdev_ishole); + + mutex_enter(&msp->ms_lock); + + /* + * If this metaslab is just becoming available, initialize its + * allocmaps and freemaps and add its capacity to the vdev. + */ + if (freed_map->sm_size == 0) { + for (int t = 0; t < TXG_SIZE; t++) { + space_map_create(&msp->ms_allocmap[t], sm->sm_start, + sm->sm_size, sm->sm_shift, sm->sm_lock); + space_map_create(&msp->ms_freemap[t], sm->sm_start, + sm->sm_size, sm->sm_shift, sm->sm_lock); + } + + for (int t = 0; t < TXG_DEFER_SIZE; t++) + space_map_create(&msp->ms_defermap[t], sm->sm_start, + sm->sm_size, sm->sm_shift, sm->sm_lock); + + vdev_space_update(vd, 0, 0, sm->sm_size); + } + + alloc_delta = smosync->smo_alloc - smo->smo_alloc; + defer_delta = freed_map->sm_space - defer_map->sm_space; + + vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0); + + ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0); + ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0); + + /* + * If there's a space_map_load() in progress, wait for it to complete + * so that we have a consistent view of the in-core space map. + * Then, add defer_map (oldest deferred frees) to this map and + * transfer freed_map (this txg's frees) to defer_map. + */ + space_map_load_wait(sm); + space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm); + space_map_vacate(freed_map, space_map_add, defer_map); + + *smo = *smosync; + + msp->ms_deferspace += defer_delta; + ASSERT3S(msp->ms_deferspace, >=, 0); + ASSERT3S(msp->ms_deferspace, <=, sm->sm_size); + if (msp->ms_deferspace != 0) { + /* + * Keep syncing this metaslab until all deferred frees + * are back in circulation. + */ + vdev_dirty(vd, VDD_METASLAB, msp, txg + 1); + } + + /* + * If the map is loaded but no longer active, evict it as soon as all + * future allocations have synced. (If we unloaded it now and then + * loaded a moment later, the map wouldn't reflect those allocations.) + */ + if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { + int evictable = 1; + + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) + if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space) + evictable = 0; + + if (evictable && !metaslab_debug) + space_map_unload(sm); + } + + metaslab_group_sort(mg, msp, metaslab_weight(msp)); + + mutex_exit(&msp->ms_lock); +} + +void +metaslab_sync_reassess(metaslab_group_t *mg) +{ + vdev_t *vd = mg->mg_vd; + + /* + * Re-evaluate all metaslabs which have lower offsets than the + * bonus area. + */ + for (int m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + + if (msp->ms_map.sm_start > mg->mg_bonus_area) + break; + + mutex_enter(&msp->ms_lock); + metaslab_group_sort(mg, msp, metaslab_weight(msp)); + mutex_exit(&msp->ms_lock); + } + + /* + * Prefetch the next potential metaslabs + */ + metaslab_prefetch(mg); +} + +static uint64_t +metaslab_distance(metaslab_t *msp, dva_t *dva) +{ + uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift; + uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift; + uint64_t start = msp->ms_map.sm_start >> ms_shift; + + if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva)) + return (1ULL << 63); + + if (offset < start) + return ((start - offset) << ms_shift); + if (offset > start) + return ((offset - start) << ms_shift); + return (0); +} + +static uint64_t +metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, + uint64_t min_distance, dva_t *dva, int d) +{ + metaslab_t *msp = NULL; + uint64_t offset = -1ULL; + avl_tree_t *t = &mg->mg_metaslab_tree; + uint64_t activation_weight; + uint64_t target_distance; + int i; + + activation_weight = METASLAB_WEIGHT_PRIMARY; + for (i = 0; i < d; i++) { + if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { + activation_weight = METASLAB_WEIGHT_SECONDARY; + break; + } + } + + for (;;) { + boolean_t was_active; + + mutex_enter(&mg->mg_lock); + for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { + if (msp->ms_weight < size) { + mutex_exit(&mg->mg_lock); + return (-1ULL); + } + + was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; + if (activation_weight == METASLAB_WEIGHT_PRIMARY) + break; + + target_distance = min_distance + + (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1); + + for (i = 0; i < d; i++) + if (metaslab_distance(msp, &dva[i]) < + target_distance) + break; + if (i == d) + break; + } + mutex_exit(&mg->mg_lock); + if (msp == NULL) + return (-1ULL); + + mutex_enter(&msp->ms_lock); + + /* + * Ensure that the metaslab we have selected is still + * capable of handling our request. It's possible that + * another thread may have changed the weight while we + * were blocked on the metaslab lock. + */ + if (msp->ms_weight < size || (was_active && + !(msp->ms_weight & METASLAB_ACTIVE_MASK) && + activation_weight == METASLAB_WEIGHT_PRIMARY)) { + mutex_exit(&msp->ms_lock); + continue; + } + + if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) && + activation_weight == METASLAB_WEIGHT_PRIMARY) { + metaslab_passivate(msp, + msp->ms_weight & ~METASLAB_ACTIVE_MASK); + mutex_exit(&msp->ms_lock); + continue; + } + + if (metaslab_activate(msp, activation_weight, size) != 0) { + mutex_exit(&msp->ms_lock); + continue; + } + + if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) + break; + + metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); + + mutex_exit(&msp->ms_lock); + } + + if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) + vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); + + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + + mutex_exit(&msp->ms_lock); + + return (offset); +} + +/* + * Allocate a block for the specified i/o. + */ +static int +metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, + dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) +{ + metaslab_group_t *mg, *rotor; + vdev_t *vd; + int dshift = 3; + int all_zero; + int zio_lock = B_FALSE; + boolean_t allocatable; + uint64_t offset = -1ULL; + uint64_t asize; + uint64_t distance; + + ASSERT(!DVA_IS_VALID(&dva[d])); + + /* + * For testing, make some blocks above a certain size be gang blocks. + */ + if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) + return (ENOSPC); + + /* + * Start at the rotor and loop through all mgs until we find something. + * Note that there's no locking on mc_rotor or mc_aliquot because + * nothing actually breaks if we miss a few updates -- we just won't + * allocate quite as evenly. It all balances out over time. + * + * If we are doing ditto or log blocks, try to spread them across + * consecutive vdevs. If we're forced to reuse a vdev before we've + * allocated all of our ditto blocks, then try and spread them out on + * that vdev as much as possible. If it turns out to not be possible, + * gradually lower our standards until anything becomes acceptable. + * Also, allocating on consecutive vdevs (as opposed to random vdevs) + * gives us hope of containing our fault domains to something we're + * able to reason about. Otherwise, any two top-level vdev failures + * will guarantee the loss of data. With consecutive allocation, + * only two adjacent top-level vdev failures will result in data loss. + * + * If we are doing gang blocks (hintdva is non-NULL), try to keep + * ourselves on the same vdev as our gang block header. That + * way, we can hope for locality in vdev_cache, plus it makes our + * fault domains something tractable. + */ + if (hintdva) { + vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d])); + + /* + * It's possible the vdev we're using as the hint no + * longer exists (i.e. removed). Consult the rotor when + * all else fails. + */ + if (vd != NULL) { + mg = vd->vdev_mg; + + if (flags & METASLAB_HINTBP_AVOID && + mg->mg_next != NULL) + mg = mg->mg_next; + } else { + mg = mc->mc_rotor; + } + } else if (d != 0) { + vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); + mg = vd->vdev_mg->mg_next; + } else { + mg = mc->mc_rotor; + } + + /* + * If the hint put us into the wrong metaslab class, or into a + * metaslab group that has been passivated, just follow the rotor. + */ + if (mg->mg_class != mc || mg->mg_activation_count <= 0) + mg = mc->mc_rotor; + + rotor = mg; +top: + all_zero = B_TRUE; + do { + ASSERT(mg->mg_activation_count == 1); + + vd = mg->mg_vd; + + /* + * Don't allocate from faulted devices. + */ + if (zio_lock) { + spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER); + allocatable = vdev_allocatable(vd); + spa_config_exit(spa, SCL_ZIO, FTAG); + } else { + allocatable = vdev_allocatable(vd); + } + if (!allocatable) + goto next; + + /* + * Avoid writing single-copy data to a failing vdev + */ + if ((vd->vdev_stat.vs_write_errors > 0 || + vd->vdev_state < VDEV_STATE_HEALTHY) && + d == 0 && dshift == 3) { + all_zero = B_FALSE; + goto next; + } + + ASSERT(mg->mg_class == mc); + + distance = vd->vdev_asize >> dshift; + if (distance <= (1ULL << vd->vdev_ms_shift)) + distance = 0; + else + all_zero = B_FALSE; + + asize = vdev_psize_to_asize(vd, psize); + ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); + + offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); + if (offset != -1ULL) { + /* + * If we've just selected this metaslab group, + * figure out whether the corresponding vdev is + * over- or under-used relative to the pool, + * and set an allocation bias to even it out. + */ + if (mc->mc_aliquot == 0) { + vdev_stat_t *vs = &vd->vdev_stat; + int64_t vu, cu; + + /* + * Determine percent used in units of 0..1024. + * (This is just to avoid floating point.) + */ + vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); + cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); + + /* + * Bias by at most +/- 25% of the aliquot. + */ + mg->mg_bias = ((cu - vu) * + (int64_t)mg->mg_aliquot) / (1024 * 4); + } + + if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= + mg->mg_aliquot + mg->mg_bias) { + mc->mc_rotor = mg->mg_next; + mc->mc_aliquot = 0; + } + + DVA_SET_VDEV(&dva[d], vd->vdev_id); + DVA_SET_OFFSET(&dva[d], offset); + DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); + DVA_SET_ASIZE(&dva[d], asize); + + return (0); + } +next: + mc->mc_rotor = mg->mg_next; + mc->mc_aliquot = 0; + } while ((mg = mg->mg_next) != rotor); + + if (!all_zero) { + dshift++; + ASSERT(dshift < 64); + goto top; + } + + if (!allocatable && !zio_lock) { + dshift = 3; + zio_lock = B_TRUE; + goto top; + } + + bzero(&dva[d], sizeof (dva_t)); + + return (ENOSPC); +} + +/* + * Free the block represented by DVA in the context of the specified + * transaction group. + */ +static void +metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now) +{ + uint64_t vdev = DVA_GET_VDEV(dva); + uint64_t offset = DVA_GET_OFFSET(dva); + uint64_t size = DVA_GET_ASIZE(dva); + vdev_t *vd; + metaslab_t *msp; + + ASSERT(DVA_IS_VALID(dva)); + + if (txg > spa_freeze_txg(spa)) + return; + + if ((vd = vdev_lookup_top(spa, vdev)) == NULL || + (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) { + cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu", + (u_longlong_t)vdev, (u_longlong_t)offset); + ASSERT(0); + return; + } + + msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + + if (DVA_GET_GANG(dva)) + size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); + + mutex_enter(&msp->ms_lock); + + if (now) { + space_map_remove(&msp->ms_allocmap[txg & TXG_MASK], + offset, size); + space_map_free(&msp->ms_map, offset, size); + } else { + if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0) + vdev_dirty(vd, VDD_METASLAB, msp, txg); + space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size); + } + + mutex_exit(&msp->ms_lock); +} + +/* + * Intent log support: upon opening the pool after a crash, notify the SPA + * of blocks that the intent log has allocated for immediate write, but + * which are still considered free by the SPA because the last transaction + * group didn't commit yet. + */ +static int +metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) +{ + uint64_t vdev = DVA_GET_VDEV(dva); + uint64_t offset = DVA_GET_OFFSET(dva); + uint64_t size = DVA_GET_ASIZE(dva); + vdev_t *vd; + metaslab_t *msp; + int error = 0; + + ASSERT(DVA_IS_VALID(dva)); + + if ((vd = vdev_lookup_top(spa, vdev)) == NULL || + (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) + return (ENXIO); + + msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + + if (DVA_GET_GANG(dva)) + size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); + + mutex_enter(&msp->ms_lock); + + if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); + + if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) + error = ENOENT; + + if (error || txg == 0) { /* txg == 0 indicates dry run */ + mutex_exit(&msp->ms_lock); + return (error); + } + + space_map_claim(&msp->ms_map, offset, size); + + if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */ + if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) + vdev_dirty(vd, VDD_METASLAB, msp, txg); + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + } + + mutex_exit(&msp->ms_lock); + + return (0); +} + +int +metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, + int ndvas, uint64_t txg, blkptr_t *hintbp, int flags) +{ + dva_t *dva = bp->blk_dva; + dva_t *hintdva = hintbp->blk_dva; + int error = 0; + + ASSERT(bp->blk_birth == 0); + ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); + + spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); + + if (mc->mc_rotor == NULL) { /* no vdevs in this class */ + spa_config_exit(spa, SCL_ALLOC, FTAG); + return (ENOSPC); + } + + ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); + ASSERT(BP_GET_NDVAS(bp) == 0); + ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); + + for (int d = 0; d < ndvas; d++) { + error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, + txg, flags); + if (error) { + for (d--; d >= 0; d--) { + metaslab_free_dva(spa, &dva[d], txg, B_TRUE); + bzero(&dva[d], sizeof (dva_t)); + } + spa_config_exit(spa, SCL_ALLOC, FTAG); + return (error); + } + } + ASSERT(error == 0); + ASSERT(BP_GET_NDVAS(bp) == ndvas); + + spa_config_exit(spa, SCL_ALLOC, FTAG); + + BP_SET_BIRTH(bp, txg, txg); + + return (0); +} + +void +metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); + + spa_config_enter(spa, SCL_FREE, FTAG, RW_READER); + + for (int d = 0; d < ndvas; d++) + metaslab_free_dva(spa, &dva[d], txg, now); + + spa_config_exit(spa, SCL_FREE, FTAG); +} + +int +metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) +{ + const dva_t *dva = bp->blk_dva; + int ndvas = BP_GET_NDVAS(bp); + int error = 0; + + ASSERT(!BP_IS_HOLE(bp)); + + if (txg != 0) { + /* + * First do a dry run to make sure all DVAs are claimable, + * so we don't have to unwind from partial failures below. + */ + if ((error = metaslab_claim(spa, bp, 0)) != 0) + return (error); + } + + spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); + + for (int d = 0; d < ndvas; d++) + if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0) + break; + + spa_config_exit(spa, SCL_ALLOC, FTAG); + + ASSERT(error == 0 || txg == 0); + + return (error); +} diff --git a/uts/common/fs/zfs/refcount.c b/uts/common/fs/zfs/refcount.c new file mode 100644 index 000000000000..600132f080e7 --- /dev/null +++ b/uts/common/fs/zfs/refcount.c @@ -0,0 +1,223 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/refcount.h> + +#ifdef ZFS_DEBUG + +#ifdef _KERNEL +int reference_tracking_enable = FALSE; /* runs out of memory too easily */ +#else +int reference_tracking_enable = TRUE; +#endif +int reference_history = 4; /* tunable */ + +static kmem_cache_t *reference_cache; +static kmem_cache_t *reference_history_cache; + +void +refcount_init(void) +{ + reference_cache = kmem_cache_create("reference_cache", + sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + + reference_history_cache = kmem_cache_create("reference_history_cache", + sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +refcount_fini(void) +{ + kmem_cache_destroy(reference_cache); + kmem_cache_destroy(reference_history_cache); +} + +void +refcount_create(refcount_t *rc) +{ + mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL); + list_create(&rc->rc_list, sizeof (reference_t), + offsetof(reference_t, ref_link)); + list_create(&rc->rc_removed, sizeof (reference_t), + offsetof(reference_t, ref_link)); + rc->rc_count = 0; + rc->rc_removed_count = 0; +} + +void +refcount_destroy_many(refcount_t *rc, uint64_t number) +{ + reference_t *ref; + + ASSERT(rc->rc_count == number); + while (ref = list_head(&rc->rc_list)) { + list_remove(&rc->rc_list, ref); + kmem_cache_free(reference_cache, ref); + } + list_destroy(&rc->rc_list); + + while (ref = list_head(&rc->rc_removed)) { + list_remove(&rc->rc_removed, ref); + kmem_cache_free(reference_history_cache, ref->ref_removed); + kmem_cache_free(reference_cache, ref); + } + list_destroy(&rc->rc_removed); + mutex_destroy(&rc->rc_mtx); +} + +void +refcount_destroy(refcount_t *rc) +{ + refcount_destroy_many(rc, 0); +} + +int +refcount_is_zero(refcount_t *rc) +{ + ASSERT(rc->rc_count >= 0); + return (rc->rc_count == 0); +} + +int64_t +refcount_count(refcount_t *rc) +{ + ASSERT(rc->rc_count >= 0); + return (rc->rc_count); +} + +int64_t +refcount_add_many(refcount_t *rc, uint64_t number, void *holder) +{ + reference_t *ref; + int64_t count; + + if (reference_tracking_enable) { + ref = kmem_cache_alloc(reference_cache, KM_SLEEP); + ref->ref_holder = holder; + ref->ref_number = number; + } + mutex_enter(&rc->rc_mtx); + ASSERT(rc->rc_count >= 0); + if (reference_tracking_enable) + list_insert_head(&rc->rc_list, ref); + rc->rc_count += number; + count = rc->rc_count; + mutex_exit(&rc->rc_mtx); + + return (count); +} + +int64_t +refcount_add(refcount_t *rc, void *holder) +{ + return (refcount_add_many(rc, 1, holder)); +} + +int64_t +refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) +{ + reference_t *ref; + int64_t count; + + mutex_enter(&rc->rc_mtx); + ASSERT(rc->rc_count >= number); + + if (!reference_tracking_enable) { + rc->rc_count -= number; + count = rc->rc_count; + mutex_exit(&rc->rc_mtx); + return (count); + } + + for (ref = list_head(&rc->rc_list); ref; + ref = list_next(&rc->rc_list, ref)) { + if (ref->ref_holder == holder && ref->ref_number == number) { + list_remove(&rc->rc_list, ref); + if (reference_history > 0) { + ref->ref_removed = + kmem_cache_alloc(reference_history_cache, + KM_SLEEP); + list_insert_head(&rc->rc_removed, ref); + rc->rc_removed_count++; + if (rc->rc_removed_count >= reference_history) { + ref = list_tail(&rc->rc_removed); + list_remove(&rc->rc_removed, ref); + kmem_cache_free(reference_history_cache, + ref->ref_removed); + kmem_cache_free(reference_cache, ref); + rc->rc_removed_count--; + } + } else { + kmem_cache_free(reference_cache, ref); + } + rc->rc_count -= number; + count = rc->rc_count; + mutex_exit(&rc->rc_mtx); + return (count); + } + } + panic("No such hold %p on refcount %llx", holder, + (u_longlong_t)(uintptr_t)rc); + return (-1); +} + +int64_t +refcount_remove(refcount_t *rc, void *holder) +{ + return (refcount_remove_many(rc, 1, holder)); +} + +void +refcount_transfer(refcount_t *dst, refcount_t *src) +{ + int64_t count, removed_count; + list_t list, removed; + + list_create(&list, sizeof (reference_t), + offsetof(reference_t, ref_link)); + list_create(&removed, sizeof (reference_t), + offsetof(reference_t, ref_link)); + + mutex_enter(&src->rc_mtx); + count = src->rc_count; + removed_count = src->rc_removed_count; + src->rc_count = 0; + src->rc_removed_count = 0; + list_move_tail(&list, &src->rc_list); + list_move_tail(&removed, &src->rc_removed); + mutex_exit(&src->rc_mtx); + + mutex_enter(&dst->rc_mtx); + dst->rc_count += count; + dst->rc_removed_count += removed_count; + list_move_tail(&dst->rc_list, &list); + list_move_tail(&dst->rc_removed, &removed); + mutex_exit(&dst->rc_mtx); + + list_destroy(&list); + list_destroy(&removed); +} + +#endif /* ZFS_DEBUG */ diff --git a/uts/common/fs/zfs/rrwlock.c b/uts/common/fs/zfs/rrwlock.c new file mode 100644 index 000000000000..4cef53f95132 --- /dev/null +++ b/uts/common/fs/zfs/rrwlock.c @@ -0,0 +1,264 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/refcount.h> +#include <sys/rrwlock.h> + +/* + * This file contains the implementation of a re-entrant read + * reader/writer lock (aka "rrwlock"). + * + * This is a normal reader/writer lock with the additional feature + * of allowing threads who have already obtained a read lock to + * re-enter another read lock (re-entrant read) - even if there are + * waiting writers. + * + * Callers who have not obtained a read lock give waiting writers priority. + * + * The rrwlock_t lock does not allow re-entrant writers, nor does it + * allow a re-entrant mix of reads and writes (that is, it does not + * allow a caller who has already obtained a read lock to be able to + * then grab a write lock without first dropping all read locks, and + * vice versa). + * + * The rrwlock_t uses tsd (thread specific data) to keep a list of + * nodes (rrw_node_t), where each node keeps track of which specific + * lock (rrw_node_t::rn_rrl) the thread has grabbed. Since re-entering + * should be rare, a thread that grabs multiple reads on the same rrwlock_t + * will store multiple rrw_node_ts of the same 'rrn_rrl'. Nodes on the + * tsd list can represent a different rrwlock_t. This allows a thread + * to enter multiple and unique rrwlock_ts for read locks at the same time. + * + * Since using tsd exposes some overhead, the rrwlock_t only needs to + * keep tsd data when writers are waiting. If no writers are waiting, then + * a reader just bumps the anonymous read count (rr_anon_rcount) - no tsd + * is needed. Once a writer attempts to grab the lock, readers then + * keep tsd data and bump the linked readers count (rr_linked_rcount). + * + * If there are waiting writers and there are anonymous readers, then a + * reader doesn't know if it is a re-entrant lock. But since it may be one, + * we allow the read to proceed (otherwise it could deadlock). Since once + * waiting writers are active, readers no longer bump the anonymous count, + * the anonymous readers will eventually flush themselves out. At this point, + * readers will be able to tell if they are a re-entrant lock (have a + * rrw_node_t entry for the lock) or not. If they are a re-entrant lock, then + * we must let the proceed. If they are not, then the reader blocks for the + * waiting writers. Hence, we do not starve writers. + */ + +/* global key for TSD */ +uint_t rrw_tsd_key; + +typedef struct rrw_node { + struct rrw_node *rn_next; + rrwlock_t *rn_rrl; +} rrw_node_t; + +static rrw_node_t * +rrn_find(rrwlock_t *rrl) +{ + rrw_node_t *rn; + + if (refcount_count(&rrl->rr_linked_rcount) == 0) + return (NULL); + + for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { + if (rn->rn_rrl == rrl) + return (rn); + } + return (NULL); +} + +/* + * Add a node to the head of the singly linked list. + */ +static void +rrn_add(rrwlock_t *rrl) +{ + rrw_node_t *rn; + + rn = kmem_alloc(sizeof (*rn), KM_SLEEP); + rn->rn_rrl = rrl; + rn->rn_next = tsd_get(rrw_tsd_key); + VERIFY(tsd_set(rrw_tsd_key, rn) == 0); +} + +/* + * If a node is found for 'rrl', then remove the node from this + * thread's list and return TRUE; otherwise return FALSE. + */ +static boolean_t +rrn_find_and_remove(rrwlock_t *rrl) +{ + rrw_node_t *rn; + rrw_node_t *prev = NULL; + + if (refcount_count(&rrl->rr_linked_rcount) == 0) + return (B_FALSE); + + for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { + if (rn->rn_rrl == rrl) { + if (prev) + prev->rn_next = rn->rn_next; + else + VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0); + kmem_free(rn, sizeof (*rn)); + return (B_TRUE); + } + prev = rn; + } + return (B_FALSE); +} + +void +rrw_init(rrwlock_t *rrl) +{ + mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL); + rrl->rr_writer = NULL; + refcount_create(&rrl->rr_anon_rcount); + refcount_create(&rrl->rr_linked_rcount); + rrl->rr_writer_wanted = B_FALSE; +} + +void +rrw_destroy(rrwlock_t *rrl) +{ + mutex_destroy(&rrl->rr_lock); + cv_destroy(&rrl->rr_cv); + ASSERT(rrl->rr_writer == NULL); + refcount_destroy(&rrl->rr_anon_rcount); + refcount_destroy(&rrl->rr_linked_rcount); +} + +static void +rrw_enter_read(rrwlock_t *rrl, void *tag) +{ + mutex_enter(&rrl->rr_lock); +#if !defined(DEBUG) && defined(_KERNEL) + if (!rrl->rr_writer && !rrl->rr_writer_wanted) { + rrl->rr_anon_rcount.rc_count++; + mutex_exit(&rrl->rr_lock); + return; + } + DTRACE_PROBE(zfs__rrwfastpath__rdmiss); +#endif + ASSERT(rrl->rr_writer != curthread); + ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0); + + while (rrl->rr_writer || (rrl->rr_writer_wanted && + refcount_is_zero(&rrl->rr_anon_rcount) && + rrn_find(rrl) == NULL)) + cv_wait(&rrl->rr_cv, &rrl->rr_lock); + + if (rrl->rr_writer_wanted) { + /* may or may not be a re-entrant enter */ + rrn_add(rrl); + (void) refcount_add(&rrl->rr_linked_rcount, tag); + } else { + (void) refcount_add(&rrl->rr_anon_rcount, tag); + } + ASSERT(rrl->rr_writer == NULL); + mutex_exit(&rrl->rr_lock); +} + +static void +rrw_enter_write(rrwlock_t *rrl) +{ + mutex_enter(&rrl->rr_lock); + ASSERT(rrl->rr_writer != curthread); + + while (refcount_count(&rrl->rr_anon_rcount) > 0 || + refcount_count(&rrl->rr_linked_rcount) > 0 || + rrl->rr_writer != NULL) { + rrl->rr_writer_wanted = B_TRUE; + cv_wait(&rrl->rr_cv, &rrl->rr_lock); + } + rrl->rr_writer_wanted = B_FALSE; + rrl->rr_writer = curthread; + mutex_exit(&rrl->rr_lock); +} + +void +rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag) +{ + if (rw == RW_READER) + rrw_enter_read(rrl, tag); + else + rrw_enter_write(rrl); +} + +void +rrw_exit(rrwlock_t *rrl, void *tag) +{ + mutex_enter(&rrl->rr_lock); +#if !defined(DEBUG) && defined(_KERNEL) + if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) { + rrl->rr_anon_rcount.rc_count--; + if (rrl->rr_anon_rcount.rc_count == 0) + cv_broadcast(&rrl->rr_cv); + mutex_exit(&rrl->rr_lock); + return; + } + DTRACE_PROBE(zfs__rrwfastpath__exitmiss); +#endif + ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) || + !refcount_is_zero(&rrl->rr_linked_rcount) || + rrl->rr_writer != NULL); + + if (rrl->rr_writer == NULL) { + int64_t count; + if (rrn_find_and_remove(rrl)) + count = refcount_remove(&rrl->rr_linked_rcount, tag); + else + count = refcount_remove(&rrl->rr_anon_rcount, tag); + if (count == 0) + cv_broadcast(&rrl->rr_cv); + } else { + ASSERT(rrl->rr_writer == curthread); + ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) && + refcount_is_zero(&rrl->rr_linked_rcount)); + rrl->rr_writer = NULL; + cv_broadcast(&rrl->rr_cv); + } + mutex_exit(&rrl->rr_lock); +} + +boolean_t +rrw_held(rrwlock_t *rrl, krw_t rw) +{ + boolean_t held; + + mutex_enter(&rrl->rr_lock); + if (rw == RW_WRITER) { + held = (rrl->rr_writer == curthread); + } else { + held = (!refcount_is_zero(&rrl->rr_anon_rcount) || + !refcount_is_zero(&rrl->rr_linked_rcount)); + } + mutex_exit(&rrl->rr_lock); + + return (held); +} diff --git a/uts/common/fs/zfs/sa.c b/uts/common/fs/zfs/sa.c new file mode 100644 index 000000000000..4cb4546b2511 --- /dev/null +++ b/uts/common/fs/zfs/sa.c @@ -0,0 +1,1970 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/dmu.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_objset.h> +#include <sys/dbuf.h> +#include <sys/dnode.h> +#include <sys/zap.h> +#include <sys/sa.h> +#include <sys/sunddi.h> +#include <sys/sa_impl.h> +#include <sys/dnode.h> +#include <sys/errno.h> +#include <sys/zfs_context.h> + +/* + * ZFS System attributes: + * + * A generic mechanism to allow for arbitrary attributes + * to be stored in a dnode. The data will be stored in the bonus buffer of + * the dnode and if necessary a special "spill" block will be used to handle + * overflow situations. The spill block will be sized to fit the data + * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the + * spill block is stored at the end of the current bonus buffer. Any + * attributes that would be in the way of the blkptr_t will be relocated + * into the spill block. + * + * Attribute registration: + * + * Stored persistently on a per dataset basis + * a mapping between attribute "string" names and their actual attribute + * numeric values, length, and byteswap function. The names are only used + * during registration. All attributes are known by their unique attribute + * id value. If an attribute can have a variable size then the value + * 0 will be used to indicate this. + * + * Attribute Layout: + * + * Attribute layouts are a way to compactly store multiple attributes, but + * without taking the overhead associated with managing each attribute + * individually. Since you will typically have the same set of attributes + * stored in the same order a single table will be used to represent that + * layout. The ZPL for example will usually have only about 10 different + * layouts (regular files, device files, symlinks, + * regular files + scanstamp, files/dir with extended attributes, and then + * you have the possibility of all of those minus ACL, because it would + * be kicked out into the spill block) + * + * Layouts are simply an array of the attributes and their + * ordering i.e. [0, 1, 4, 5, 2] + * + * Each distinct layout is given a unique layout number and that is whats + * stored in the header at the beginning of the SA data buffer. + * + * A layout only covers a single dbuf (bonus or spill). If a set of + * attributes is split up between the bonus buffer and a spill buffer then + * two different layouts will be used. This allows us to byteswap the + * spill without looking at the bonus buffer and keeps the on disk format of + * the bonus and spill buffer the same. + * + * Adding a single attribute will cause the entire set of attributes to + * be rewritten and could result in a new layout number being constructed + * as part of the rewrite if no such layout exists for the new set of + * attribues. The new attribute will be appended to the end of the already + * existing attributes. + * + * Both the attribute registration and attribute layout information are + * stored in normal ZAP attributes. Their should be a small number of + * known layouts and the set of attributes is assumed to typically be quite + * small. + * + * The registered attributes and layout "table" information is maintained + * in core and a special "sa_os_t" is attached to the objset_t. + * + * A special interface is provided to allow for quickly applying + * a large set of attributes at once. sa_replace_all_by_template() is + * used to set an array of attributes. This is used by the ZPL when + * creating a brand new file. The template that is passed into the function + * specifies the attribute, size for variable length attributes, location of + * data and special "data locator" function if the data isn't in a contiguous + * location. + * + * Byteswap implications: + * Since the SA attributes are not entirely self describing we can't do + * the normal byteswap processing. The special ZAP layout attribute and + * attribute registration attributes define the byteswap function and the + * size of the attributes, unless it is variable sized. + * The normal ZFS byteswapping infrastructure assumes you don't need + * to read any objects in order to do the necessary byteswapping. Whereas + * SA attributes can only be properly byteswapped if the dataset is opened + * and the layout/attribute ZAP attributes are available. Because of this + * the SA attributes will be byteswapped when they are first accessed by + * the SA code that will read the SA data. + */ + +typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, + uint16_t length, int length_idx, boolean_t, void *userp); + +static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); +static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); +static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, + void *data); +static void sa_idx_tab_rele(objset_t *os, void *arg); +static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, + int buflen); +static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, + sa_data_op_t action, sa_data_locator_t *locator, void *datastart, + uint16_t buflen, dmu_tx_t *tx); + +arc_byteswap_func_t *sa_bswap_table[] = { + byteswap_uint64_array, + byteswap_uint32_array, + byteswap_uint16_array, + byteswap_uint8_array, + zfs_acl_byteswap, +}; + +#define SA_COPY_DATA(f, s, t, l) \ + { \ + if (f == NULL) { \ + if (l == 8) { \ + *(uint64_t *)t = *(uint64_t *)s; \ + } else if (l == 16) { \ + *(uint64_t *)t = *(uint64_t *)s; \ + *(uint64_t *)((uintptr_t)t + 8) = \ + *(uint64_t *)((uintptr_t)s + 8); \ + } else { \ + bcopy(s, t, l); \ + } \ + } else \ + sa_copy_data(f, s, t, l); \ + } + +/* + * This table is fixed and cannot be changed. Its purpose is to + * allow the SA code to work with both old/new ZPL file systems. + * It contains the list of legacy attributes. These attributes aren't + * stored in the "attribute" registry zap objects, since older ZPL file systems + * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will + * use this static table. + */ +sa_attr_reg_t sa_legacy_attrs[] = { + {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, + {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, + {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, + {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, + {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, + {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, + {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, + {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, + {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, + {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, + {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, + {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, + {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, + {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, + {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, + {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, +}; + +/* + * ZPL legacy layout + * This is only used for objects of type DMU_OT_ZNODE + */ +sa_attr_type_t sa_legacy_zpl_layout[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; + +/* + * Special dummy layout used for buffers with no attributes. + */ + +sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; + +static int sa_legacy_attr_count = 16; +static kmem_cache_t *sa_cache = NULL; + +/*ARGSUSED*/ +static int +sa_cache_constructor(void *buf, void *unused, int kmflag) +{ + sa_handle_t *hdl = buf; + + hdl->sa_bonus_tab = NULL; + hdl->sa_spill_tab = NULL; + hdl->sa_os = NULL; + hdl->sa_userp = NULL; + hdl->sa_bonus = NULL; + hdl->sa_spill = NULL; + mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); + return (0); +} + +/*ARGSUSED*/ +static void +sa_cache_destructor(void *buf, void *unused) +{ + sa_handle_t *hdl = buf; + mutex_destroy(&hdl->sa_lock); +} + +void +sa_cache_init(void) +{ + sa_cache = kmem_cache_create("sa_cache", + sizeof (sa_handle_t), 0, sa_cache_constructor, + sa_cache_destructor, NULL, NULL, NULL, 0); +} + +void +sa_cache_fini(void) +{ + if (sa_cache) + kmem_cache_destroy(sa_cache); +} + +static int +layout_num_compare(const void *arg1, const void *arg2) +{ + const sa_lot_t *node1 = arg1; + const sa_lot_t *node2 = arg2; + + if (node1->lot_num > node2->lot_num) + return (1); + else if (node1->lot_num < node2->lot_num) + return (-1); + return (0); +} + +static int +layout_hash_compare(const void *arg1, const void *arg2) +{ + const sa_lot_t *node1 = arg1; + const sa_lot_t *node2 = arg2; + + if (node1->lot_hash > node2->lot_hash) + return (1); + if (node1->lot_hash < node2->lot_hash) + return (-1); + if (node1->lot_instance > node2->lot_instance) + return (1); + if (node1->lot_instance < node2->lot_instance) + return (-1); + return (0); +} + +boolean_t +sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) +{ + int i; + + if (count != tbf->lot_attr_count) + return (1); + + for (i = 0; i != count; i++) { + if (attrs[i] != tbf->lot_attrs[i]) + return (1); + } + return (0); +} + +#define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) + +static uint64_t +sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) +{ + int i; + uint64_t crc = -1ULL; + + for (i = 0; i != attr_count; i++) + crc ^= SA_ATTR_HASH(attrs[i]); + + return (crc); +} + +static int +sa_get_spill(sa_handle_t *hdl) +{ + int rc; + if (hdl->sa_spill == NULL) { + if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, + &hdl->sa_spill)) == 0) + VERIFY(0 == sa_build_index(hdl, SA_SPILL)); + } else { + rc = 0; + } + + return (rc); +} + +/* + * Main attribute lookup/update function + * returns 0 for success or non zero for failures + * + * Operates on bulk array, first failure will abort further processing + */ +int +sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, + sa_data_op_t data_op, dmu_tx_t *tx) +{ + sa_os_t *sa = hdl->sa_os->os_sa; + int i; + int error = 0; + sa_buf_type_t buftypes; + + buftypes = 0; + + ASSERT(count > 0); + for (i = 0; i != count; i++) { + ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); + + bulk[i].sa_addr = NULL; + /* First check the bonus buffer */ + + if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( + hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { + SA_ATTR_INFO(sa, hdl->sa_bonus_tab, + SA_GET_HDR(hdl, SA_BONUS), + bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); + if (tx && !(buftypes & SA_BONUS)) { + dmu_buf_will_dirty(hdl->sa_bonus, tx); + buftypes |= SA_BONUS; + } + } + if (bulk[i].sa_addr == NULL && + ((error = sa_get_spill(hdl)) == 0)) { + if (TOC_ATTR_PRESENT( + hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { + SA_ATTR_INFO(sa, hdl->sa_spill_tab, + SA_GET_HDR(hdl, SA_SPILL), + bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); + if (tx && !(buftypes & SA_SPILL) && + bulk[i].sa_size == bulk[i].sa_length) { + dmu_buf_will_dirty(hdl->sa_spill, tx); + buftypes |= SA_SPILL; + } + } + } + if (error && error != ENOENT) { + return ((error == ECKSUM) ? EIO : error); + } + + switch (data_op) { + case SA_LOOKUP: + if (bulk[i].sa_addr == NULL) + return (ENOENT); + if (bulk[i].sa_data) { + SA_COPY_DATA(bulk[i].sa_data_func, + bulk[i].sa_addr, bulk[i].sa_data, + bulk[i].sa_size); + } + continue; + + case SA_UPDATE: + /* existing rewrite of attr */ + if (bulk[i].sa_addr && + bulk[i].sa_size == bulk[i].sa_length) { + SA_COPY_DATA(bulk[i].sa_data_func, + bulk[i].sa_data, bulk[i].sa_addr, + bulk[i].sa_length); + continue; + } else if (bulk[i].sa_addr) { /* attr size change */ + error = sa_modify_attrs(hdl, bulk[i].sa_attr, + SA_REPLACE, bulk[i].sa_data_func, + bulk[i].sa_data, bulk[i].sa_length, tx); + } else { /* adding new attribute */ + error = sa_modify_attrs(hdl, bulk[i].sa_attr, + SA_ADD, bulk[i].sa_data_func, + bulk[i].sa_data, bulk[i].sa_length, tx); + } + if (error) + return (error); + break; + } + } + return (error); +} + +static sa_lot_t * +sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, + uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) +{ + sa_os_t *sa = os->os_sa; + sa_lot_t *tb, *findtb; + int i; + avl_index_t loc; + + ASSERT(MUTEX_HELD(&sa->sa_lock)); + tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); + tb->lot_attr_count = attr_count; + tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, + KM_SLEEP); + bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); + tb->lot_num = lot_num; + tb->lot_hash = hash; + tb->lot_instance = 0; + + if (zapadd) { + char attr_name[8]; + + if (sa->sa_layout_attr_obj == 0) { + sa->sa_layout_attr_obj = zap_create(os, + DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1, + &sa->sa_layout_attr_obj, tx) == 0); + } + + (void) snprintf(attr_name, sizeof (attr_name), + "%d", (int)lot_num); + VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, + attr_name, 2, attr_count, attrs, tx)); + } + + list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), + offsetof(sa_idx_tab_t, sa_next)); + + for (i = 0; i != attr_count; i++) { + if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) + tb->lot_var_sizes++; + } + + avl_add(&sa->sa_layout_num_tree, tb); + + /* verify we don't have a hash collision */ + if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { + for (; findtb && findtb->lot_hash == hash; + findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { + if (findtb->lot_instance != tb->lot_instance) + break; + tb->lot_instance++; + } + } + avl_add(&sa->sa_layout_hash_tree, tb); + return (tb); +} + +static void +sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, + int count, dmu_tx_t *tx, sa_lot_t **lot) +{ + sa_lot_t *tb, tbsearch; + avl_index_t loc; + sa_os_t *sa = os->os_sa; + boolean_t found = B_FALSE; + + mutex_enter(&sa->sa_lock); + tbsearch.lot_hash = hash; + tbsearch.lot_instance = 0; + tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); + if (tb) { + for (; tb && tb->lot_hash == hash; + tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { + if (sa_layout_equal(tb, attrs, count) == 0) { + found = B_TRUE; + break; + } + } + } + if (!found) { + tb = sa_add_layout_entry(os, attrs, count, + avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); + } + mutex_exit(&sa->sa_lock); + *lot = tb; +} + +static int +sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) +{ + int error; + uint32_t blocksize; + + if (size == 0) { + blocksize = SPA_MINBLOCKSIZE; + } else if (size > SPA_MAXBLOCKSIZE) { + ASSERT(0); + return (EFBIG); + } else { + blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); + } + + error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); + ASSERT(error == 0); + return (error); +} + +static void +sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) +{ + if (func == NULL) { + bcopy(datastart, target, buflen); + } else { + boolean_t start; + int bytes; + void *dataptr; + void *saptr = target; + uint32_t length; + + start = B_TRUE; + bytes = 0; + while (bytes < buflen) { + func(&dataptr, &length, buflen, start, datastart); + bcopy(dataptr, saptr, length); + saptr = (void *)((caddr_t)saptr + length); + bytes += length; + start = B_FALSE; + } + } +} + +/* + * Determine several different sizes + * first the sa header size + * the number of bytes to be stored + * if spill would occur the index in the attribute array is returned + * + * the boolean will_spill will be set when spilling is necessary. It + * is only set when the buftype is SA_BONUS + */ +static int +sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, + dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, + boolean_t *will_spill) +{ + int var_size = 0; + int i; + int full_space; + int hdrsize; + boolean_t done = B_FALSE; + + if (buftype == SA_BONUS && sa->sa_force_spill) { + *total = 0; + *index = 0; + *will_spill = B_TRUE; + return (0); + } + + *index = -1; + *total = 0; + + if (buftype == SA_BONUS) + *will_spill = B_FALSE; + + hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : + sizeof (sa_hdr_phys_t); + + full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; + + for (i = 0; i != attr_count; i++) { + boolean_t is_var_sz; + + *total += attr_desc[i].sa_length; + if (done) + goto next; + + is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); + if (is_var_sz) { + var_size++; + } + + if (is_var_sz && var_size > 1) { + if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + + *total < full_space) { + hdrsize += sizeof (uint16_t); + } else { + done = B_TRUE; + *index = i; + if (buftype == SA_BONUS) + *will_spill = B_TRUE; + continue; + } + } + + /* + * find index of where spill *could* occur. + * Then continue to count of remainder attribute + * space. The sum is used later for sizing bonus + * and spill buffer. + */ + if (buftype == SA_BONUS && *index == -1 && + P2ROUNDUP(*total + hdrsize, 8) > + (full_space - sizeof (blkptr_t))) { + *index = i; + done = B_TRUE; + } + +next: + if (P2ROUNDUP(*total + hdrsize, 8) > full_space && + buftype == SA_BONUS) + *will_spill = B_TRUE; + } + + hdrsize = P2ROUNDUP(hdrsize, 8); + return (hdrsize); +} + +#define BUF_SPACE_NEEDED(total, header) (total + header) + +/* + * Find layout that corresponds to ordering of attributes + * If not found a new layout number is created and added to + * persistent layout tables. + */ +static int +sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, + dmu_tx_t *tx) +{ + sa_os_t *sa = hdl->sa_os->os_sa; + uint64_t hash; + sa_buf_type_t buftype; + sa_hdr_phys_t *sahdr; + void *data_start; + int buf_space; + sa_attr_type_t *attrs, *attrs_start; + int i, lot_count; + int hdrsize, spillhdrsize; + int used; + dmu_object_type_t bonustype; + sa_lot_t *lot; + int len_idx; + int spill_used; + boolean_t spilling; + + dmu_buf_will_dirty(hdl->sa_bonus, tx); + bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); + + /* first determine bonus header size and sum of all attributes */ + hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, + SA_BONUS, &i, &used, &spilling); + + if (used > SPA_MAXBLOCKSIZE) + return (EFBIG); + + VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? + MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : + used + hdrsize, tx)); + + ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || + bonustype == DMU_OT_SA); + + /* setup and size spill buffer when needed */ + if (spilling) { + boolean_t dummy; + + if (hdl->sa_spill == NULL) { + VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, + &hdl->sa_spill) == 0); + } + dmu_buf_will_dirty(hdl->sa_spill, tx); + + spillhdrsize = sa_find_sizes(sa, &attr_desc[i], + attr_count - i, hdl->sa_spill, SA_SPILL, &i, + &spill_used, &dummy); + + if (spill_used > SPA_MAXBLOCKSIZE) + return (EFBIG); + + buf_space = hdl->sa_spill->db_size - spillhdrsize; + if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > + hdl->sa_spill->db_size) + VERIFY(0 == sa_resize_spill(hdl, + BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); + } + + /* setup starting pointers to lay down data */ + data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); + sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; + buftype = SA_BONUS; + + if (spilling) + buf_space = (sa->sa_force_spill) ? + 0 : SA_BLKPTR_SPACE - hdrsize; + else + buf_space = hdl->sa_bonus->db_size - hdrsize; + + attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, + KM_SLEEP); + lot_count = 0; + + for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { + uint16_t length; + + attrs[i] = attr_desc[i].sa_attr; + length = SA_REGISTERED_LEN(sa, attrs[i]); + if (length == 0) + length = attr_desc[i].sa_length; + + if (buf_space < length) { /* switch to spill buffer */ + VERIFY(bonustype == DMU_OT_SA); + if (buftype == SA_BONUS && !sa->sa_force_spill) { + sa_find_layout(hdl->sa_os, hash, attrs_start, + lot_count, tx, &lot); + SA_SET_HDR(sahdr, lot->lot_num, hdrsize); + } + + buftype = SA_SPILL; + hash = -1ULL; + len_idx = 0; + + sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; + sahdr->sa_magic = SA_MAGIC; + data_start = (void *)((uintptr_t)sahdr + + spillhdrsize); + attrs_start = &attrs[i]; + buf_space = hdl->sa_spill->db_size - spillhdrsize; + lot_count = 0; + } + hash ^= SA_ATTR_HASH(attrs[i]); + attr_desc[i].sa_addr = data_start; + attr_desc[i].sa_size = length; + SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, + data_start, length); + if (sa->sa_attr_table[attrs[i]].sa_length == 0) { + sahdr->sa_lengths[len_idx++] = length; + } + data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + + length), 8); + buf_space -= P2ROUNDUP(length, 8); + lot_count++; + } + + sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); + + /* + * Verify that old znodes always have layout number 0. + * Must be DMU_OT_SA for arbitrary layouts + */ + VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || + (bonustype == DMU_OT_SA && lot->lot_num > 1)); + + if (bonustype == DMU_OT_SA) { + SA_SET_HDR(sahdr, lot->lot_num, + buftype == SA_BONUS ? hdrsize : spillhdrsize); + } + + kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); + if (hdl->sa_bonus_tab) { + sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); + hdl->sa_bonus_tab = NULL; + } + if (!sa->sa_force_spill) + VERIFY(0 == sa_build_index(hdl, SA_BONUS)); + if (hdl->sa_spill) { + sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); + if (!spilling) { + /* + * remove spill block that is no longer needed. + */ + dmu_buf_rele(hdl->sa_spill, NULL); + hdl->sa_spill = NULL; + hdl->sa_spill_tab = NULL; + VERIFY(0 == dmu_rm_spill(hdl->sa_os, + sa_handle_object(hdl), tx)); + } else { + VERIFY(0 == sa_build_index(hdl, SA_SPILL)); + } + } + + return (0); +} + +static void +sa_free_attr_table(sa_os_t *sa) +{ + int i; + + if (sa->sa_attr_table == NULL) + return; + + for (i = 0; i != sa->sa_num_attrs; i++) { + if (sa->sa_attr_table[i].sa_name) + kmem_free(sa->sa_attr_table[i].sa_name, + strlen(sa->sa_attr_table[i].sa_name) + 1); + } + + kmem_free(sa->sa_attr_table, + sizeof (sa_attr_table_t) * sa->sa_num_attrs); + + sa->sa_attr_table = NULL; +} + +static int +sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) +{ + sa_os_t *sa = os->os_sa; + uint64_t sa_attr_count = 0; + uint64_t sa_reg_count; + int error = 0; + uint64_t attr_value; + sa_attr_table_t *tb; + zap_cursor_t zc; + zap_attribute_t za; + int registered_count = 0; + int i; + dmu_objset_type_t ostype = dmu_objset_type(os); + + sa->sa_user_table = + kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); + sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); + + if (sa->sa_reg_attr_obj != 0) { + error = zap_count(os, sa->sa_reg_attr_obj, + &sa_attr_count); + + /* + * Make sure we retrieved a count and that it isn't zero + */ + if (error || (error == 0 && sa_attr_count == 0)) { + if (error == 0) + error = EINVAL; + goto bail; + } + sa_reg_count = sa_attr_count; + } + + if (ostype == DMU_OST_ZFS && sa_attr_count == 0) + sa_attr_count += sa_legacy_attr_count; + + /* Allocate attribute numbers for attributes that aren't registered */ + for (i = 0; i != count; i++) { + boolean_t found = B_FALSE; + int j; + + if (ostype == DMU_OST_ZFS) { + for (j = 0; j != sa_legacy_attr_count; j++) { + if (strcmp(reg_attrs[i].sa_name, + sa_legacy_attrs[j].sa_name) == 0) { + sa->sa_user_table[i] = + sa_legacy_attrs[j].sa_attr; + found = B_TRUE; + } + } + } + if (found) + continue; + + if (sa->sa_reg_attr_obj) + error = zap_lookup(os, sa->sa_reg_attr_obj, + reg_attrs[i].sa_name, 8, 1, &attr_value); + else + error = ENOENT; + switch (error) { + case ENOENT: + sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; + sa_attr_count++; + break; + case 0: + sa->sa_user_table[i] = ATTR_NUM(attr_value); + break; + default: + goto bail; + } + } + + sa->sa_num_attrs = sa_attr_count; + tb = sa->sa_attr_table = + kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); + + /* + * Attribute table is constructed from requested attribute list, + * previously foreign registered attributes, and also the legacy + * ZPL set of attributes. + */ + + if (sa->sa_reg_attr_obj) { + for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); + (error = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + uint64_t value; + value = za.za_first_integer; + + registered_count++; + tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); + tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); + tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); + tb[ATTR_NUM(value)].sa_registered = B_TRUE; + + if (tb[ATTR_NUM(value)].sa_name) { + continue; + } + tb[ATTR_NUM(value)].sa_name = + kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); + (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, + strlen(za.za_name) +1); + } + zap_cursor_fini(&zc); + /* + * Make sure we processed the correct number of registered + * attributes + */ + if (registered_count != sa_reg_count) { + ASSERT(error != 0); + goto bail; + } + + } + + if (ostype == DMU_OST_ZFS) { + for (i = 0; i != sa_legacy_attr_count; i++) { + if (tb[i].sa_name) + continue; + tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; + tb[i].sa_length = sa_legacy_attrs[i].sa_length; + tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; + tb[i].sa_registered = B_FALSE; + tb[i].sa_name = + kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, + KM_SLEEP); + (void) strlcpy(tb[i].sa_name, + sa_legacy_attrs[i].sa_name, + strlen(sa_legacy_attrs[i].sa_name) + 1); + } + } + + for (i = 0; i != count; i++) { + sa_attr_type_t attr_id; + + attr_id = sa->sa_user_table[i]; + if (tb[attr_id].sa_name) + continue; + + tb[attr_id].sa_length = reg_attrs[i].sa_length; + tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; + tb[attr_id].sa_attr = attr_id; + tb[attr_id].sa_name = + kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); + (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, + strlen(reg_attrs[i].sa_name) + 1); + } + + sa->sa_need_attr_registration = + (sa_attr_count != registered_count); + + return (0); +bail: + kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); + sa->sa_user_table = NULL; + sa_free_attr_table(sa); + return ((error != 0) ? error : EINVAL); +} + +int +sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, + sa_attr_type_t **user_table) +{ + zap_cursor_t zc; + zap_attribute_t za; + sa_os_t *sa; + dmu_objset_type_t ostype = dmu_objset_type(os); + sa_attr_type_t *tb; + int error; + + mutex_enter(&os->os_lock); + if (os->os_sa) { + mutex_enter(&os->os_sa->sa_lock); + mutex_exit(&os->os_lock); + tb = os->os_sa->sa_user_table; + mutex_exit(&os->os_sa->sa_lock); + *user_table = tb; + return (0); + } + + sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); + mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); + sa->sa_master_obj = sa_obj; + + os->os_sa = sa; + mutex_enter(&sa->sa_lock); + mutex_exit(&os->os_lock); + avl_create(&sa->sa_layout_num_tree, layout_num_compare, + sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); + avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, + sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); + + if (sa_obj) { + error = zap_lookup(os, sa_obj, SA_LAYOUTS, + 8, 1, &sa->sa_layout_attr_obj); + if (error != 0 && error != ENOENT) + goto fail; + error = zap_lookup(os, sa_obj, SA_REGISTRY, + 8, 1, &sa->sa_reg_attr_obj); + if (error != 0 && error != ENOENT) + goto fail; + } + + if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) + goto fail; + + if (sa->sa_layout_attr_obj != 0) { + uint64_t layout_count; + + error = zap_count(os, sa->sa_layout_attr_obj, + &layout_count); + + /* + * Layout number count should be > 0 + */ + if (error || (error == 0 && layout_count == 0)) { + if (error == 0) + error = EINVAL; + goto fail; + } + + for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); + (error = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + sa_attr_type_t *lot_attrs; + uint64_t lot_num; + + lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * + za.za_num_integers, KM_SLEEP); + + if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, + za.za_name, 2, za.za_num_integers, + lot_attrs))) != 0) { + kmem_free(lot_attrs, sizeof (sa_attr_type_t) * + za.za_num_integers); + break; + } + VERIFY(ddi_strtoull(za.za_name, NULL, 10, + (unsigned long long *)&lot_num) == 0); + + (void) sa_add_layout_entry(os, lot_attrs, + za.za_num_integers, lot_num, + sa_layout_info_hash(lot_attrs, + za.za_num_integers), B_FALSE, NULL); + kmem_free(lot_attrs, sizeof (sa_attr_type_t) * + za.za_num_integers); + } + zap_cursor_fini(&zc); + + /* + * Make sure layout count matches number of entries added + * to AVL tree + */ + if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { + ASSERT(error != 0); + goto fail; + } + } + + /* Add special layout number for old ZNODES */ + if (ostype == DMU_OST_ZFS) { + (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, + sa_legacy_attr_count, 0, + sa_layout_info_hash(sa_legacy_zpl_layout, + sa_legacy_attr_count), B_FALSE, NULL); + + (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, + 0, B_FALSE, NULL); + } + *user_table = os->os_sa->sa_user_table; + mutex_exit(&sa->sa_lock); + return (0); +fail: + os->os_sa = NULL; + sa_free_attr_table(sa); + if (sa->sa_user_table) + kmem_free(sa->sa_user_table, sa->sa_user_table_sz); + mutex_exit(&sa->sa_lock); + kmem_free(sa, sizeof (sa_os_t)); + return ((error == ECKSUM) ? EIO : error); +} + +void +sa_tear_down(objset_t *os) +{ + sa_os_t *sa = os->os_sa; + sa_lot_t *layout; + void *cookie; + + kmem_free(sa->sa_user_table, sa->sa_user_table_sz); + + /* Free up attr table */ + + sa_free_attr_table(sa); + + cookie = NULL; + while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { + sa_idx_tab_t *tab; + while (tab = list_head(&layout->lot_idx_tab)) { + ASSERT(refcount_count(&tab->sa_refcount)); + sa_idx_tab_rele(os, tab); + } + } + + cookie = NULL; + while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { + kmem_free(layout->lot_attrs, + sizeof (sa_attr_type_t) * layout->lot_attr_count); + kmem_free(layout, sizeof (sa_lot_t)); + } + + avl_destroy(&sa->sa_layout_hash_tree); + avl_destroy(&sa->sa_layout_num_tree); + + kmem_free(sa, sizeof (sa_os_t)); + os->os_sa = NULL; +} + +void +sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, + uint16_t length, int length_idx, boolean_t var_length, void *userp) +{ + sa_idx_tab_t *idx_tab = userp; + + if (var_length) { + ASSERT(idx_tab->sa_variable_lengths); + idx_tab->sa_variable_lengths[length_idx] = length; + } + TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, + (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); +} + +static void +sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, + sa_iterfunc_t func, sa_lot_t *tab, void *userp) +{ + void *data_start; + sa_lot_t *tb = tab; + sa_lot_t search; + avl_index_t loc; + sa_os_t *sa = os->os_sa; + int i; + uint16_t *length_start = NULL; + uint8_t length_idx = 0; + + if (tab == NULL) { + search.lot_num = SA_LAYOUT_NUM(hdr, type); + tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); + ASSERT(tb); + } + + if (IS_SA_BONUSTYPE(type)) { + data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + + offsetof(sa_hdr_phys_t, sa_lengths) + + (sizeof (uint16_t) * tb->lot_var_sizes)), 8); + length_start = hdr->sa_lengths; + } else { + data_start = hdr; + } + + for (i = 0; i != tb->lot_attr_count; i++) { + int attr_length, reg_length; + uint8_t idx_len; + + reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; + if (reg_length) { + attr_length = reg_length; + idx_len = 0; + } else { + attr_length = length_start[length_idx]; + idx_len = length_idx++; + } + + func(hdr, data_start, tb->lot_attrs[i], attr_length, + idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); + + data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + + attr_length), 8); + } +} + +/*ARGSUSED*/ +void +sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, + uint16_t length, int length_idx, boolean_t variable_length, void *userp) +{ + sa_handle_t *hdl = userp; + sa_os_t *sa = hdl->sa_os->os_sa; + + sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); +} + +void +sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) +{ + sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); + dmu_buf_impl_t *db; + sa_os_t *sa = hdl->sa_os->os_sa; + int num_lengths = 1; + int i; + + ASSERT(MUTEX_HELD(&sa->sa_lock)); + if (sa_hdr_phys->sa_magic == SA_MAGIC) + return; + + db = SA_GET_DB(hdl, buftype); + + if (buftype == SA_SPILL) { + arc_release(db->db_buf, NULL); + arc_buf_thaw(db->db_buf); + } + + sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); + sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); + + /* + * Determine number of variable lenghts in header + * The standard 8 byte header has one for free and a + * 16 byte header would have 4 + 1; + */ + if (SA_HDR_SIZE(sa_hdr_phys) > 8) + num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; + for (i = 0; i != num_lengths; i++) + sa_hdr_phys->sa_lengths[i] = + BSWAP_16(sa_hdr_phys->sa_lengths[i]); + + sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, + sa_byteswap_cb, NULL, hdl); + + if (buftype == SA_SPILL) + arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); +} + +static int +sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) +{ + sa_hdr_phys_t *sa_hdr_phys; + dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); + dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); + sa_os_t *sa = hdl->sa_os->os_sa; + sa_idx_tab_t *idx_tab; + + sa_hdr_phys = SA_GET_HDR(hdl, buftype); + + mutex_enter(&sa->sa_lock); + + /* Do we need to byteswap? */ + + /* only check if not old znode */ + if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && + sa_hdr_phys->sa_magic != 0) { + VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); + sa_byteswap(hdl, buftype); + } + + idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); + + if (buftype == SA_BONUS) + hdl->sa_bonus_tab = idx_tab; + else + hdl->sa_spill_tab = idx_tab; + + mutex_exit(&sa->sa_lock); + return (0); +} + +/*ARGSUSED*/ +void +sa_evict(dmu_buf_t *db, void *sap) +{ + panic("evicting sa dbuf %p\n", (void *)db); +} + +static void +sa_idx_tab_rele(objset_t *os, void *arg) +{ + sa_os_t *sa = os->os_sa; + sa_idx_tab_t *idx_tab = arg; + + if (idx_tab == NULL) + return; + + mutex_enter(&sa->sa_lock); + if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { + list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); + if (idx_tab->sa_variable_lengths) + kmem_free(idx_tab->sa_variable_lengths, + sizeof (uint16_t) * + idx_tab->sa_layout->lot_var_sizes); + refcount_destroy(&idx_tab->sa_refcount); + kmem_free(idx_tab->sa_idx_tab, + sizeof (uint32_t) * sa->sa_num_attrs); + kmem_free(idx_tab, sizeof (sa_idx_tab_t)); + } + mutex_exit(&sa->sa_lock); +} + +static void +sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) +{ + sa_os_t *sa = os->os_sa; + + ASSERT(MUTEX_HELD(&sa->sa_lock)); + (void) refcount_add(&idx_tab->sa_refcount, NULL); +} + +void +sa_handle_destroy(sa_handle_t *hdl) +{ + mutex_enter(&hdl->sa_lock); + (void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl, + NULL, NULL, NULL); + + if (hdl->sa_bonus_tab) { + sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); + hdl->sa_bonus_tab = NULL; + } + if (hdl->sa_spill_tab) { + sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); + hdl->sa_spill_tab = NULL; + } + + dmu_buf_rele(hdl->sa_bonus, NULL); + + if (hdl->sa_spill) + dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + mutex_exit(&hdl->sa_lock); + + kmem_cache_free(sa_cache, hdl); +} + +int +sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, + sa_handle_type_t hdl_type, sa_handle_t **handlepp) +{ + int error = 0; + dmu_object_info_t doi; + sa_handle_t *handle; + +#ifdef ZFS_DEBUG + dmu_object_info_from_db(db, &doi); + ASSERT(doi.doi_bonus_type == DMU_OT_SA || + doi.doi_bonus_type == DMU_OT_ZNODE); +#endif + /* find handle, if it exists */ + /* if one doesn't exist then create a new one, and initialize it */ + + handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL; + if (handle == NULL) { + sa_handle_t *newhandle; + handle = kmem_cache_alloc(sa_cache, KM_SLEEP); + handle->sa_userp = userp; + handle->sa_bonus = db; + handle->sa_os = os; + handle->sa_spill = NULL; + + error = sa_build_index(handle, SA_BONUS); + newhandle = (hdl_type == SA_HDL_SHARED) ? + dmu_buf_set_user_ie(db, handle, + NULL, sa_evict) : NULL; + + if (newhandle != NULL) { + kmem_cache_free(sa_cache, handle); + handle = newhandle; + } + } + *handlepp = handle; + + return (error); +} + +int +sa_handle_get(objset_t *objset, uint64_t objid, void *userp, + sa_handle_type_t hdl_type, sa_handle_t **handlepp) +{ + dmu_buf_t *db; + int error; + + if (error = dmu_bonus_hold(objset, objid, NULL, &db)) + return (error); + + return (sa_handle_get_from_db(objset, db, userp, hdl_type, + handlepp)); +} + +int +sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) +{ + return (dmu_bonus_hold(objset, obj_num, tag, db)); +} + +void +sa_buf_rele(dmu_buf_t *db, void *tag) +{ + dmu_buf_rele(db, tag); +} + +int +sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) +{ + ASSERT(hdl); + ASSERT(MUTEX_HELD(&hdl->sa_lock)); + return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); +} + +int +sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) +{ + int error; + sa_bulk_attr_t bulk; + + bulk.sa_attr = attr; + bulk.sa_data = buf; + bulk.sa_length = buflen; + bulk.sa_data_func = NULL; + + ASSERT(hdl); + mutex_enter(&hdl->sa_lock); + error = sa_lookup_impl(hdl, &bulk, 1); + mutex_exit(&hdl->sa_lock); + return (error); +} + +#ifdef _KERNEL +int +sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) +{ + int error; + sa_bulk_attr_t bulk; + + bulk.sa_data = NULL; + bulk.sa_attr = attr; + bulk.sa_data_func = NULL; + + ASSERT(hdl); + + mutex_enter(&hdl->sa_lock); + if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { + error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, + uio->uio_resid), UIO_READ, uio); + } + mutex_exit(&hdl->sa_lock); + return (error); + +} +#endif + +void * +sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) +{ + sa_idx_tab_t *idx_tab; + sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; + sa_os_t *sa = os->os_sa; + sa_lot_t *tb, search; + avl_index_t loc; + + /* + * Deterimine layout number. If SA node and header == 0 then + * force the index table to the dummy "1" empty layout. + * + * The layout number would only be zero for a newly created file + * that has not added any attributes yet, or with crypto enabled which + * doesn't write any attributes to the bonus buffer. + */ + + search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); + + tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); + + /* Verify header size is consistent with layout information */ + ASSERT(tb); + ASSERT(IS_SA_BONUSTYPE(bonustype) && + SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || + (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); + + /* + * See if any of the already existing TOC entries can be reused? + */ + + for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; + idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { + boolean_t valid_idx = B_TRUE; + int i; + + if (tb->lot_var_sizes != 0 && + idx_tab->sa_variable_lengths != NULL) { + for (i = 0; i != tb->lot_var_sizes; i++) { + if (hdr->sa_lengths[i] != + idx_tab->sa_variable_lengths[i]) { + valid_idx = B_FALSE; + break; + } + } + } + if (valid_idx) { + sa_idx_tab_hold(os, idx_tab); + return (idx_tab); + } + } + + /* No such luck, create a new entry */ + idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); + idx_tab->sa_idx_tab = + kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); + idx_tab->sa_layout = tb; + refcount_create(&idx_tab->sa_refcount); + if (tb->lot_var_sizes) + idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * + tb->lot_var_sizes, KM_SLEEP); + + sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, + tb, idx_tab); + sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ + sa_idx_tab_hold(os, idx_tab); /* one for layout */ + list_insert_tail(&tb->lot_idx_tab, idx_tab); + return (idx_tab); +} + +void +sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, + boolean_t start, void *userdata) +{ + ASSERT(start); + + *dataptr = userdata; + *len = total_len; +} + +static void +sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) +{ + uint64_t attr_value = 0; + sa_os_t *sa = hdl->sa_os->os_sa; + sa_attr_table_t *tb = sa->sa_attr_table; + int i; + + mutex_enter(&sa->sa_lock); + + if (!sa->sa_need_attr_registration || sa->sa_master_obj == NULL) { + mutex_exit(&sa->sa_lock); + return; + } + + if (sa->sa_reg_attr_obj == NULL) { + sa->sa_reg_attr_obj = zap_create(hdl->sa_os, + DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj, + SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0); + } + for (i = 0; i != sa->sa_num_attrs; i++) { + if (sa->sa_attr_table[i].sa_registered) + continue; + ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, + tb[i].sa_byteswap); + VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, + tb[i].sa_name, 8, 1, &attr_value, tx)); + tb[i].sa_registered = B_TRUE; + } + sa->sa_need_attr_registration = B_FALSE; + mutex_exit(&sa->sa_lock); +} + +/* + * Replace all attributes with attributes specified in template. + * If dnode had a spill buffer then those attributes will be + * also be replaced, possibly with just an empty spill block + * + * This interface is intended to only be used for bulk adding of + * attributes for a new file. It will also be used by the ZPL + * when converting and old formatted znode to native SA support. + */ +int +sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, + int attr_count, dmu_tx_t *tx) +{ + sa_os_t *sa = hdl->sa_os->os_sa; + + if (sa->sa_need_attr_registration) + sa_attr_register_sync(hdl, tx); + return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); +} + +int +sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, + int attr_count, dmu_tx_t *tx) +{ + int error; + + mutex_enter(&hdl->sa_lock); + error = sa_replace_all_by_template_locked(hdl, attr_desc, + attr_count, tx); + mutex_exit(&hdl->sa_lock); + return (error); +} + +/* + * add/remove/replace a single attribute and then rewrite the entire set + * of attributes. + */ +static int +sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, + sa_data_op_t action, sa_data_locator_t *locator, void *datastart, + uint16_t buflen, dmu_tx_t *tx) +{ + sa_os_t *sa = hdl->sa_os->os_sa; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; + dnode_t *dn; + sa_bulk_attr_t *attr_desc; + void *old_data[2]; + int bonus_attr_count = 0; + int bonus_data_size, spill_data_size; + int spill_attr_count = 0; + int error; + uint16_t length; + int i, j, k, length_idx; + sa_hdr_phys_t *hdr; + sa_idx_tab_t *idx_tab; + int attr_count; + int count; + + ASSERT(MUTEX_HELD(&hdl->sa_lock)); + + /* First make of copy of the old data */ + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + if (dn->dn_bonuslen != 0) { + bonus_data_size = hdl->sa_bonus->db_size; + old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); + bcopy(hdl->sa_bonus->db_data, old_data[0], + hdl->sa_bonus->db_size); + bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; + } else { + old_data[0] = NULL; + } + DB_DNODE_EXIT(db); + + /* Bring spill buffer online if it isn't currently */ + + if ((error = sa_get_spill(hdl)) == 0) { + spill_data_size = hdl->sa_spill->db_size; + old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); + bcopy(hdl->sa_spill->db_data, old_data[1], + hdl->sa_spill->db_size); + spill_attr_count = + hdl->sa_spill_tab->sa_layout->lot_attr_count; + } else if (error && error != ENOENT) { + if (old_data[0]) + kmem_free(old_data[0], bonus_data_size); + return (error); + } else { + old_data[1] = NULL; + } + + /* build descriptor of all attributes */ + + attr_count = bonus_attr_count + spill_attr_count; + if (action == SA_ADD) + attr_count++; + else if (action == SA_REMOVE) + attr_count--; + + attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); + + /* + * loop through bonus and spill buffer if it exists, and + * build up new attr_descriptor to reset the attributes + */ + k = j = 0; + count = bonus_attr_count; + hdr = SA_GET_HDR(hdl, SA_BONUS); + idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); + for (; k != 2; k++) { + /* iterate over each attribute in layout */ + for (i = 0, length_idx = 0; i != count; i++) { + sa_attr_type_t attr; + + attr = idx_tab->sa_layout->lot_attrs[i]; + if (attr == newattr) { + if (action == SA_REMOVE) { + j++; + continue; + } + ASSERT(SA_REGISTERED_LEN(sa, attr) == 0); + ASSERT(action == SA_REPLACE); + SA_ADD_BULK_ATTR(attr_desc, j, attr, + locator, datastart, buflen); + } else { + length = SA_REGISTERED_LEN(sa, attr); + if (length == 0) { + length = hdr->sa_lengths[length_idx++]; + } + + SA_ADD_BULK_ATTR(attr_desc, j, attr, + NULL, (void *) + (TOC_OFF(idx_tab->sa_idx_tab[attr]) + + (uintptr_t)old_data[k]), length); + } + } + if (k == 0 && hdl->sa_spill) { + hdr = SA_GET_HDR(hdl, SA_SPILL); + idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); + count = spill_attr_count; + } else { + break; + } + } + if (action == SA_ADD) { + length = SA_REGISTERED_LEN(sa, newattr); + if (length == 0) { + length = buflen; + } + SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, + datastart, buflen); + } + + error = sa_build_layouts(hdl, attr_desc, attr_count, tx); + + if (old_data[0]) + kmem_free(old_data[0], bonus_data_size); + if (old_data[1]) + kmem_free(old_data[1], spill_data_size); + kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); + + return (error); +} + +static int +sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, + dmu_tx_t *tx) +{ + int error; + sa_os_t *sa = hdl->sa_os->os_sa; + dmu_object_type_t bonustype; + + bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); + + ASSERT(hdl); + ASSERT(MUTEX_HELD(&hdl->sa_lock)); + + /* sync out registration table if necessary */ + if (sa->sa_need_attr_registration) + sa_attr_register_sync(hdl, tx); + + error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); + if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) + sa->sa_update_cb(hdl, tx); + + return (error); +} + +/* + * update or add new attribute + */ +int +sa_update(sa_handle_t *hdl, sa_attr_type_t type, + void *buf, uint32_t buflen, dmu_tx_t *tx) +{ + int error; + sa_bulk_attr_t bulk; + + bulk.sa_attr = type; + bulk.sa_data_func = NULL; + bulk.sa_length = buflen; + bulk.sa_data = buf; + + mutex_enter(&hdl->sa_lock); + error = sa_bulk_update_impl(hdl, &bulk, 1, tx); + mutex_exit(&hdl->sa_lock); + return (error); +} + +int +sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, + uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) +{ + int error; + sa_bulk_attr_t bulk; + + bulk.sa_attr = attr; + bulk.sa_data = userdata; + bulk.sa_data_func = locator; + bulk.sa_length = buflen; + + mutex_enter(&hdl->sa_lock); + error = sa_bulk_update_impl(hdl, &bulk, 1, tx); + mutex_exit(&hdl->sa_lock); + return (error); +} + +/* + * Return size of an attribute + */ + +int +sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) +{ + sa_bulk_attr_t bulk; + int error; + + bulk.sa_data = NULL; + bulk.sa_attr = attr; + bulk.sa_data_func = NULL; + + ASSERT(hdl); + mutex_enter(&hdl->sa_lock); + if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { + mutex_exit(&hdl->sa_lock); + return (error); + } + *size = bulk.sa_size; + + mutex_exit(&hdl->sa_lock); + return (0); +} + +int +sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) +{ + ASSERT(hdl); + ASSERT(MUTEX_HELD(&hdl->sa_lock)); + return (sa_lookup_impl(hdl, attrs, count)); +} + +int +sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) +{ + int error; + + ASSERT(hdl); + mutex_enter(&hdl->sa_lock); + error = sa_bulk_lookup_locked(hdl, attrs, count); + mutex_exit(&hdl->sa_lock); + return (error); +} + +int +sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) +{ + int error; + + ASSERT(hdl); + mutex_enter(&hdl->sa_lock); + error = sa_bulk_update_impl(hdl, attrs, count, tx); + mutex_exit(&hdl->sa_lock); + return (error); +} + +int +sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) +{ + int error; + + mutex_enter(&hdl->sa_lock); + error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, + NULL, 0, tx); + mutex_exit(&hdl->sa_lock); + return (error); +} + +void +sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) +{ + dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); +} + +void +sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) +{ + dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, + blksize, nblocks); +} + +void +sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl) +{ + (void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus, + oldhdl, newhdl, NULL, sa_evict); + oldhdl->sa_bonus = NULL; +} + +void +sa_set_userp(sa_handle_t *hdl, void *ptr) +{ + hdl->sa_userp = ptr; +} + +dmu_buf_t * +sa_get_db(sa_handle_t *hdl) +{ + return ((dmu_buf_t *)hdl->sa_bonus); +} + +void * +sa_get_userdata(sa_handle_t *hdl) +{ + return (hdl->sa_userp); +} + +void +sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) +{ + ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); + os->os_sa->sa_update_cb = func; +} + +void +sa_register_update_callback(objset_t *os, sa_update_cb_t *func) +{ + + mutex_enter(&os->os_sa->sa_lock); + sa_register_update_callback_locked(os, func); + mutex_exit(&os->os_sa->sa_lock); +} + +uint64_t +sa_handle_object(sa_handle_t *hdl) +{ + return (hdl->sa_bonus->db_object); +} + +boolean_t +sa_enabled(objset_t *os) +{ + return (os->os_sa == NULL); +} + +int +sa_set_sa_object(objset_t *os, uint64_t sa_object) +{ + sa_os_t *sa = os->os_sa; + + if (sa->sa_master_obj) + return (1); + + sa->sa_master_obj = sa_object; + + return (0); +} + +int +sa_hdrsize(void *arg) +{ + sa_hdr_phys_t *hdr = arg; + + return (SA_HDR_SIZE(hdr)); +} + +void +sa_handle_lock(sa_handle_t *hdl) +{ + ASSERT(hdl); + mutex_enter(&hdl->sa_lock); +} + +void +sa_handle_unlock(sa_handle_t *hdl) +{ + ASSERT(hdl); + mutex_exit(&hdl->sa_lock); +} diff --git a/uts/common/fs/zfs/sha256.c b/uts/common/fs/zfs/sha256.c new file mode 100644 index 000000000000..f515be6bb304 --- /dev/null +++ b/uts/common/fs/zfs/sha256.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#include <sys/zfs_context.h> +#include <sys/zio.h> +#include <sys/sha2.h> + +void +zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + SHA2_CTX ctx; + zio_cksum_t tmp; + + SHA2Init(SHA256, &ctx); + SHA2Update(&ctx, buf, size); + SHA2Final(&tmp, &ctx); + + /* + * A prior implementation of this function had a + * private SHA256 implementation always wrote things out in + * Big Endian and there wasn't a byteswap variant of it. + * To preseve on disk compatibility we need to force that + * behaviour. + */ + zcp->zc_word[0] = BE_64(tmp.zc_word[0]); + zcp->zc_word[1] = BE_64(tmp.zc_word[1]); + zcp->zc_word[2] = BE_64(tmp.zc_word[2]); + zcp->zc_word[3] = BE_64(tmp.zc_word[3]); +} diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c new file mode 100644 index 000000000000..b6190e4cfafe --- /dev/null +++ b/uts/common/fs/zfs/spa.c @@ -0,0 +1,5882 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file contains all the routines used when modifying on-disk SPA state. + * This includes opening, importing, destroying, exporting a pool, and syncing a + * pool. + */ + +#include <sys/zfs_context.h> +#include <sys/fm/fs/zfs.h> +#include <sys/spa_impl.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/zap.h> +#include <sys/zil.h> +#include <sys/ddt.h> +#include <sys/vdev_impl.h> +#include <sys/metaslab.h> +#include <sys/metaslab_impl.h> +#include <sys/uberblock_impl.h> +#include <sys/txg.h> +#include <sys/avl.h> +#include <sys/dmu_traverse.h> +#include <sys/dmu_objset.h> +#include <sys/unique.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/fs/zfs.h> +#include <sys/arc.h> +#include <sys/callb.h> +#include <sys/systeminfo.h> +#include <sys/spa_boot.h> +#include <sys/zfs_ioctl.h> +#include <sys/dsl_scan.h> + +#ifdef _KERNEL +#include <sys/bootprops.h> +#include <sys/callb.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/sysdc.h> +#include <sys/zone.h> +#endif /* _KERNEL */ + +#include "zfs_prop.h" +#include "zfs_comutil.h" + +typedef enum zti_modes { + zti_mode_fixed, /* value is # of threads (min 1) */ + zti_mode_online_percent, /* value is % of online CPUs */ + zti_mode_batch, /* cpu-intensive; value is ignored */ + zti_mode_null, /* don't create a taskq */ + zti_nmodes +} zti_modes_t; + +#define ZTI_FIX(n) { zti_mode_fixed, (n) } +#define ZTI_PCT(n) { zti_mode_online_percent, (n) } +#define ZTI_BATCH { zti_mode_batch, 0 } +#define ZTI_NULL { zti_mode_null, 0 } + +#define ZTI_ONE ZTI_FIX(1) + +typedef struct zio_taskq_info { + enum zti_modes zti_mode; + uint_t zti_value; +} zio_taskq_info_t; + +static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { + "issue", "issue_high", "intr", "intr_high" +}; + +/* + * Define the taskq threads for the following I/O types: + * NULL, READ, WRITE, FREE, CLAIM, and IOCTL + */ +const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { + /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, + { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, + { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, + { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, + { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, +}; + +static dsl_syncfunc_t spa_sync_props; +static boolean_t spa_has_active_shared_spare(spa_t *spa); +static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, + spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, + char **ereport); +static void spa_vdev_resilver_done(spa_t *spa); + +uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ +id_t zio_taskq_psrset_bind = PS_NONE; +boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ +uint_t zio_taskq_basedc = 80; /* base duty cycle */ + +boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ + +/* + * This (illegal) pool name is used when temporarily importing a spa_t in order + * to get the vdev stats associated with the imported devices. + */ +#define TRYIMPORT_NAME "$import" + +/* + * ========================================================================== + * SPA properties routines + * ========================================================================== + */ + +/* + * Add a (source=src, propname=propval) list to an nvlist. + */ +static void +spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, + uint64_t intval, zprop_source_t src) +{ + const char *propname = zpool_prop_to_name(prop); + nvlist_t *propval; + + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); + + if (strval != NULL) + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); + else + VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); + + VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); + nvlist_free(propval); +} + +/* + * Get property values from the spa configuration. + */ +static void +spa_prop_get_config(spa_t *spa, nvlist_t **nvp) +{ + uint64_t size; + uint64_t alloc; + uint64_t cap, version; + zprop_source_t src = ZPROP_SRC_NONE; + spa_config_dirent_t *dp; + + ASSERT(MUTEX_HELD(&spa->spa_props_lock)); + + if (spa->spa_root_vdev != NULL) { + alloc = metaslab_class_get_alloc(spa_normal_class(spa)); + size = metaslab_class_get_space(spa_normal_class(spa)); + spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, + size - alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, + (spa_mode(spa) == FREAD), src); + + cap = (size == 0) ? 0 : (alloc * 100 / size); + spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, + ddt_get_pool_dedup_ratio(spa), src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, + spa->spa_root_vdev->vdev_state, src); + + version = spa_version(spa); + if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) + src = ZPROP_SRC_DEFAULT; + else + src = ZPROP_SRC_LOCAL; + spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); + } + + spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); + + if (spa->spa_root != NULL) + spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, + 0, ZPROP_SRC_LOCAL); + + if ((dp = list_head(&spa->spa_config_list)) != NULL) { + if (dp->scd_path == NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + "none", 0, ZPROP_SRC_LOCAL); + } else if (strcmp(dp->scd_path, spa_config_path) != 0) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + dp->scd_path, 0, ZPROP_SRC_LOCAL); + } + } +} + +/* + * Get zpool property values. + */ +int +spa_prop_get(spa_t *spa, nvlist_t **nvp) +{ + objset_t *mos = spa->spa_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + int err; + + VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + mutex_enter(&spa->spa_props_lock); + + /* + * Get properties from the spa config. + */ + spa_prop_get_config(spa, nvp); + + /* If no pool property object, no more prop to get. */ + if (mos == NULL || spa->spa_pool_props_object == 0) { + mutex_exit(&spa->spa_props_lock); + return (0); + } + + /* + * Get properties from the MOS pool property object. + */ + for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); + (err = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + uint64_t intval = 0; + char *strval = NULL; + zprop_source_t src = ZPROP_SRC_DEFAULT; + zpool_prop_t prop; + + if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) + continue; + + switch (za.za_integer_length) { + case 8: + /* integer property */ + if (za.za_first_integer != + zpool_prop_default_numeric(prop)) + src = ZPROP_SRC_LOCAL; + + if (prop == ZPOOL_PROP_BOOTFS) { + dsl_pool_t *dp; + dsl_dataset_t *ds = NULL; + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + if (err = dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &ds)) { + rw_exit(&dp->dp_config_rwlock); + break; + } + + strval = kmem_alloc( + MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, + KM_SLEEP); + dsl_dataset_name(ds, strval); + dsl_dataset_rele(ds, FTAG); + rw_exit(&dp->dp_config_rwlock); + } else { + strval = NULL; + intval = za.za_first_integer; + } + + spa_prop_add_list(*nvp, prop, strval, intval, src); + + if (strval != NULL) + kmem_free(strval, + MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); + + break; + + case 1: + /* string property */ + strval = kmem_alloc(za.za_num_integers, KM_SLEEP); + err = zap_lookup(mos, spa->spa_pool_props_object, + za.za_name, 1, za.za_num_integers, strval); + if (err) { + kmem_free(strval, za.za_num_integers); + break; + } + spa_prop_add_list(*nvp, prop, strval, 0, src); + kmem_free(strval, za.za_num_integers); + break; + + default: + break; + } + } + zap_cursor_fini(&zc); + mutex_exit(&spa->spa_props_lock); +out: + if (err && err != ENOENT) { + nvlist_free(*nvp); + *nvp = NULL; + return (err); + } + + return (0); +} + +/* + * Validate the given pool properties nvlist and modify the list + * for the property values to be set. + */ +static int +spa_prop_validate(spa_t *spa, nvlist_t *props) +{ + nvpair_t *elem; + int error = 0, reset_bootfs = 0; + uint64_t objnum; + + elem = NULL; + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + zpool_prop_t prop; + char *propname, *strval; + uint64_t intval; + objset_t *os; + char *slash; + + propname = nvpair_name(elem); + + if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) + return (EINVAL); + + switch (prop) { + case ZPOOL_PROP_VERSION: + error = nvpair_value_uint64(elem, &intval); + if (!error && + (intval < spa_version(spa) || intval > SPA_VERSION)) + error = EINVAL; + break; + + case ZPOOL_PROP_DELEGATION: + case ZPOOL_PROP_AUTOREPLACE: + case ZPOOL_PROP_LISTSNAPS: + case ZPOOL_PROP_AUTOEXPAND: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = EINVAL; + break; + + case ZPOOL_PROP_BOOTFS: + /* + * If the pool version is less than SPA_VERSION_BOOTFS, + * or the pool is still being created (version == 0), + * the bootfs property cannot be set. + */ + if (spa_version(spa) < SPA_VERSION_BOOTFS) { + error = ENOTSUP; + break; + } + + /* + * Make sure the vdev config is bootable + */ + if (!vdev_is_bootable(spa->spa_root_vdev)) { + error = ENOTSUP; + break; + } + + reset_bootfs = 1; + + error = nvpair_value_string(elem, &strval); + + if (!error) { + uint64_t compress; + + if (strval == NULL || strval[0] == '\0') { + objnum = zpool_prop_default_numeric( + ZPOOL_PROP_BOOTFS); + break; + } + + if (error = dmu_objset_hold(strval, FTAG, &os)) + break; + + /* Must be ZPL and not gzip compressed. */ + + if (dmu_objset_type(os) != DMU_OST_ZFS) { + error = ENOTSUP; + } else if ((error = dsl_prop_get_integer(strval, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), + &compress, NULL)) == 0 && + !BOOTFS_COMPRESS_VALID(compress)) { + error = ENOTSUP; + } else { + objnum = dmu_objset_id(os); + } + dmu_objset_rele(os, FTAG); + } + break; + + case ZPOOL_PROP_FAILUREMODE: + error = nvpair_value_uint64(elem, &intval); + if (!error && (intval < ZIO_FAILURE_MODE_WAIT || + intval > ZIO_FAILURE_MODE_PANIC)) + error = EINVAL; + + /* + * This is a special case which only occurs when + * the pool has completely failed. This allows + * the user to change the in-core failmode property + * without syncing it out to disk (I/Os might + * currently be blocked). We do this by returning + * EIO to the caller (spa_prop_set) to trick it + * into thinking we encountered a property validation + * error. + */ + if (!error && spa_suspended(spa)) { + spa->spa_failmode = intval; + error = EIO; + } + break; + + case ZPOOL_PROP_CACHEFILE: + if ((error = nvpair_value_string(elem, &strval)) != 0) + break; + + if (strval[0] == '\0') + break; + + if (strcmp(strval, "none") == 0) + break; + + if (strval[0] != '/') { + error = EINVAL; + break; + } + + slash = strrchr(strval, '/'); + ASSERT(slash != NULL); + + if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || + strcmp(slash, "/..") == 0) + error = EINVAL; + break; + + case ZPOOL_PROP_DEDUPDITTO: + if (spa_version(spa) < SPA_VERSION_DEDUP) + error = ENOTSUP; + else + error = nvpair_value_uint64(elem, &intval); + if (error == 0 && + intval != 0 && intval < ZIO_DEDUPDITTO_MIN) + error = EINVAL; + break; + } + + if (error) + break; + } + + if (!error && reset_bootfs) { + error = nvlist_remove(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); + + if (!error) { + error = nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); + } + } + + return (error); +} + +void +spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) +{ + char *cachefile; + spa_config_dirent_t *dp; + + if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), + &cachefile) != 0) + return; + + dp = kmem_alloc(sizeof (spa_config_dirent_t), + KM_SLEEP); + + if (cachefile[0] == '\0') + dp->scd_path = spa_strdup(spa_config_path); + else if (strcmp(cachefile, "none") == 0) + dp->scd_path = NULL; + else + dp->scd_path = spa_strdup(cachefile); + + list_insert_head(&spa->spa_config_list, dp); + if (need_sync) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); +} + +int +spa_prop_set(spa_t *spa, nvlist_t *nvp) +{ + int error; + nvpair_t *elem; + boolean_t need_sync = B_FALSE; + zpool_prop_t prop; + + if ((error = spa_prop_validate(spa, nvp)) != 0) + return (error); + + elem = NULL; + while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { + if ((prop = zpool_name_to_prop( + nvpair_name(elem))) == ZPROP_INVAL) + return (EINVAL); + + if (prop == ZPOOL_PROP_CACHEFILE || + prop == ZPOOL_PROP_ALTROOT || + prop == ZPOOL_PROP_READONLY) + continue; + + need_sync = B_TRUE; + break; + } + + if (need_sync) + return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, + spa, nvp, 3)); + else + return (0); +} + +/* + * If the bootfs property value is dsobj, clear it. + */ +void +spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) +{ + if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { + VERIFY(zap_remove(spa->spa_meta_objset, + spa->spa_pool_props_object, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); + spa->spa_bootfs = 0; + } +} + +/* + * ========================================================================== + * SPA state manipulation (open/create/destroy/import/export) + * ========================================================================== + */ + +static int +spa_error_entry_compare(const void *a, const void *b) +{ + spa_error_entry_t *sa = (spa_error_entry_t *)a; + spa_error_entry_t *sb = (spa_error_entry_t *)b; + int ret; + + ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, + sizeof (zbookmark_t)); + + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +/* + * Utility function which retrieves copies of the current logs and + * re-initializes them in the process. + */ +void +spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) +{ + ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); + + bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); + bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); + + avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); + avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +} + +static taskq_t * +spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, + uint_t value) +{ + uint_t flags = TASKQ_PREPOPULATE; + boolean_t batch = B_FALSE; + + switch (mode) { + case zti_mode_null: + return (NULL); /* no taskq needed */ + + case zti_mode_fixed: + ASSERT3U(value, >=, 1); + value = MAX(value, 1); + break; + + case zti_mode_batch: + batch = B_TRUE; + flags |= TASKQ_THREADS_CPU_PCT; + value = zio_taskq_batch_pct; + break; + + case zti_mode_online_percent: + flags |= TASKQ_THREADS_CPU_PCT; + break; + + default: + panic("unrecognized mode for %s taskq (%u:%u) in " + "spa_activate()", + name, mode, value); + break; + } + + if (zio_taskq_sysdc && spa->spa_proc != &p0) { + if (batch) + flags |= TASKQ_DC_BATCH; + + return (taskq_create_sysdc(name, value, 50, INT_MAX, + spa->spa_proc, zio_taskq_basedc, flags)); + } + return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, + spa->spa_proc, flags)); +} + +static void +spa_create_zio_taskqs(spa_t *spa) +{ + for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; + enum zti_modes mode = ztip->zti_mode; + uint_t value = ztip->zti_value; + char name[32]; + + (void) snprintf(name, sizeof (name), + "%s_%s", zio_type_name[t], zio_taskq_types[q]); + + spa->spa_zio_taskq[t][q] = + spa_taskq_create(spa, name, mode, value); + } + } +} + +#ifdef _KERNEL +static void +spa_thread(void *arg) +{ + callb_cpr_t cprinfo; + + spa_t *spa = arg; + user_t *pu = PTOU(curproc); + + CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, + spa->spa_name); + + ASSERT(curproc != &p0); + (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), + "zpool-%s", spa->spa_name); + (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); + + /* bind this thread to the requested psrset */ + if (zio_taskq_psrset_bind != PS_NONE) { + pool_lock(); + mutex_enter(&cpu_lock); + mutex_enter(&pidlock); + mutex_enter(&curproc->p_lock); + + if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, + 0, NULL, NULL) == 0) { + curthread->t_bind_pset = zio_taskq_psrset_bind; + } else { + cmn_err(CE_WARN, + "Couldn't bind process for zfs pool \"%s\" to " + "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); + } + + mutex_exit(&curproc->p_lock); + mutex_exit(&pidlock); + mutex_exit(&cpu_lock); + pool_unlock(); + } + + if (zio_taskq_sysdc) { + sysdc_thread_enter(curthread, 100, 0); + } + + spa->spa_proc = curproc; + spa->spa_did = curthread->t_did; + + spa_create_zio_taskqs(spa); + + mutex_enter(&spa->spa_proc_lock); + ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); + + spa->spa_proc_state = SPA_PROC_ACTIVE; + cv_broadcast(&spa->spa_proc_cv); + + CALLB_CPR_SAFE_BEGIN(&cprinfo); + while (spa->spa_proc_state == SPA_PROC_ACTIVE) + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); + CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); + + ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); + spa->spa_proc_state = SPA_PROC_GONE; + spa->spa_proc = &p0; + cv_broadcast(&spa->spa_proc_cv); + CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ + + mutex_enter(&curproc->p_lock); + lwp_exit(); +} +#endif + +/* + * Activate an uninitialized pool. + */ +static void +spa_activate(spa_t *spa, int mode) +{ + ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); + + spa->spa_state = POOL_STATE_ACTIVE; + spa->spa_mode = mode; + + spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); + spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); + + /* Try to create a covering process */ + mutex_enter(&spa->spa_proc_lock); + ASSERT(spa->spa_proc_state == SPA_PROC_NONE); + ASSERT(spa->spa_proc == &p0); + spa->spa_did = 0; + + /* Only create a process if we're going to be around a while. */ + if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { + if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, + NULL, 0) == 0) { + spa->spa_proc_state = SPA_PROC_CREATED; + while (spa->spa_proc_state == SPA_PROC_CREATED) { + cv_wait(&spa->spa_proc_cv, + &spa->spa_proc_lock); + } + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + ASSERT(spa->spa_proc != &p0); + ASSERT(spa->spa_did != 0); + } else { +#ifdef _KERNEL + cmn_err(CE_WARN, + "Couldn't create process for zfs pool \"%s\"\n", + spa->spa_name); +#endif + } + } + mutex_exit(&spa->spa_proc_lock); + + /* If we didn't create a process, we need to create our taskqs. */ + if (spa->spa_proc == &p0) { + spa_create_zio_taskqs(spa); + } + + list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_config_dirty_node)); + list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_state_dirty_node)); + + txg_list_create(&spa->spa_vdev_txg_list, + offsetof(struct vdev, vdev_txg_node)); + + avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); + avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +} + +/* + * Opposite of spa_activate(). + */ +static void +spa_deactivate(spa_t *spa) +{ + ASSERT(spa->spa_sync_on == B_FALSE); + ASSERT(spa->spa_dsl_pool == NULL); + ASSERT(spa->spa_root_vdev == NULL); + ASSERT(spa->spa_async_zio_root == NULL); + ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); + + txg_list_destroy(&spa->spa_vdev_txg_list); + + list_destroy(&spa->spa_config_dirty_list); + list_destroy(&spa->spa_state_dirty_list); + + for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + if (spa->spa_zio_taskq[t][q] != NULL) + taskq_destroy(spa->spa_zio_taskq[t][q]); + spa->spa_zio_taskq[t][q] = NULL; + } + } + + metaslab_class_destroy(spa->spa_normal_class); + spa->spa_normal_class = NULL; + + metaslab_class_destroy(spa->spa_log_class); + spa->spa_log_class = NULL; + + /* + * If this was part of an import or the open otherwise failed, we may + * still have errors left in the queues. Empty them just in case. + */ + spa_errlog_drain(spa); + + avl_destroy(&spa->spa_errlist_scrub); + avl_destroy(&spa->spa_errlist_last); + + spa->spa_state = POOL_STATE_UNINITIALIZED; + + mutex_enter(&spa->spa_proc_lock); + if (spa->spa_proc_state != SPA_PROC_NONE) { + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + spa->spa_proc_state = SPA_PROC_DEACTIVATE; + cv_broadcast(&spa->spa_proc_cv); + while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { + ASSERT(spa->spa_proc != &p0); + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); + } + ASSERT(spa->spa_proc_state == SPA_PROC_GONE); + spa->spa_proc_state = SPA_PROC_NONE; + } + ASSERT(spa->spa_proc == &p0); + mutex_exit(&spa->spa_proc_lock); + + /* + * We want to make sure spa_thread() has actually exited the ZFS + * module, so that the module can't be unloaded out from underneath + * it. + */ + if (spa->spa_did != 0) { + thread_join(spa->spa_did); + spa->spa_did = 0; + } +} + +/* + * Verify a pool configuration, and construct the vdev tree appropriately. This + * will create all the necessary vdevs in the appropriate layout, with each vdev + * in the CLOSED state. This will prep the pool before open/creation/import. + * All vdev validation is done by the vdev_alloc() routine. + */ +static int +spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, + uint_t id, int atype) +{ + nvlist_t **child; + uint_t children; + int error; + + if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) + return (error); + + if ((*vdp)->vdev_ops->vdev_op_leaf) + return (0); + + error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); + + if (error == ENOENT) + return (0); + + if (error) { + vdev_free(*vdp); + *vdp = NULL; + return (EINVAL); + } + + for (int c = 0; c < children; c++) { + vdev_t *vd; + if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, + atype)) != 0) { + vdev_free(*vdp); + *vdp = NULL; + return (error); + } + } + + ASSERT(*vdp != NULL); + + return (0); +} + +/* + * Opposite of spa_load(). + */ +static void +spa_unload(spa_t *spa) +{ + int i; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + /* + * Stop async tasks. + */ + spa_async_suspend(spa); + + /* + * Stop syncing. + */ + if (spa->spa_sync_on) { + txg_sync_stop(spa->spa_dsl_pool); + spa->spa_sync_on = B_FALSE; + } + + /* + * Wait for any outstanding async I/O to complete. + */ + if (spa->spa_async_zio_root != NULL) { + (void) zio_wait(spa->spa_async_zio_root); + spa->spa_async_zio_root = NULL; + } + + bpobj_close(&spa->spa_deferred_bpobj); + + /* + * Close the dsl pool. + */ + if (spa->spa_dsl_pool) { + dsl_pool_close(spa->spa_dsl_pool); + spa->spa_dsl_pool = NULL; + spa->spa_meta_objset = NULL; + } + + ddt_unload(spa); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + /* + * Drop and purge level 2 cache + */ + spa_l2cache_drop(spa); + + /* + * Close all vdevs. + */ + if (spa->spa_root_vdev) + vdev_free(spa->spa_root_vdev); + ASSERT(spa->spa_root_vdev == NULL); + + for (i = 0; i < spa->spa_spares.sav_count; i++) + vdev_free(spa->spa_spares.sav_vdevs[i]); + if (spa->spa_spares.sav_vdevs) { + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); + spa->spa_spares.sav_vdevs = NULL; + } + if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; + } + spa->spa_spares.sav_count = 0; + + for (i = 0; i < spa->spa_l2cache.sav_count; i++) + vdev_free(spa->spa_l2cache.sav_vdevs[i]); + if (spa->spa_l2cache.sav_vdevs) { + kmem_free(spa->spa_l2cache.sav_vdevs, + spa->spa_l2cache.sav_count * sizeof (void *)); + spa->spa_l2cache.sav_vdevs = NULL; + } + if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; + } + spa->spa_l2cache.sav_count = 0; + + spa->spa_async_suspended = 0; + + spa_config_exit(spa, SCL_ALL, FTAG); +} + +/* + * Load (or re-load) the current list of vdevs describing the active spares for + * this pool. When this is called, we have some form of basic information in + * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and + * then re-generate a more complete list including status information. + */ +static void +spa_load_spares(spa_t *spa) +{ + nvlist_t **spares; + uint_t nspares; + int i; + vdev_t *vd, *tvd; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + /* + * First, close and free any existing spare vdevs. + */ + for (i = 0; i < spa->spa_spares.sav_count; i++) { + vd = spa->spa_spares.sav_vdevs[i]; + + /* Undo the call to spa_activate() below */ + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL && tvd->vdev_isspare) + spa_spare_remove(tvd); + vdev_close(vd); + vdev_free(vd); + } + + if (spa->spa_spares.sav_vdevs) + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); + + if (spa->spa_spares.sav_config == NULL) + nspares = 0; + else + VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + + spa->spa_spares.sav_count = (int)nspares; + spa->spa_spares.sav_vdevs = NULL; + + if (nspares == 0) + return; + + /* + * Construct the array of vdevs, opening them to get status in the + * process. For each spare, there is potentially two different vdev_t + * structures associated with it: one in the list of spares (used only + * for basic validation purposes) and one in the active vdev + * configuration (if it's spared in). During this phase we open and + * validate each vdev on the spare list. If the vdev also exists in the + * active configuration, then we also mark this vdev as an active spare. + */ + spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), + KM_SLEEP); + for (i = 0; i < spa->spa_spares.sav_count; i++) { + VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, + VDEV_ALLOC_SPARE) == 0); + ASSERT(vd != NULL); + + spa->spa_spares.sav_vdevs[i] = vd; + + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL) { + if (!tvd->vdev_isspare) + spa_spare_add(tvd); + + /* + * We only mark the spare active if we were successfully + * able to load the vdev. Otherwise, importing a pool + * with a bad active spare would result in strange + * behavior, because multiple pool would think the spare + * is actively in use. + * + * There is a vulnerability here to an equally bizarre + * circumstance, where a dead active spare is later + * brought back to life (onlined or otherwise). Given + * the rarity of this scenario, and the extra complexity + * it adds, we ignore the possibility. + */ + if (!vdev_is_dead(tvd)) + spa_spare_activate(tvd); + } + + vd->vdev_top = vd; + vd->vdev_aux = &spa->spa_spares; + + if (vdev_open(vd) != 0) + continue; + + if (vdev_validate_aux(vd) == 0) + spa_spare_add(vd); + } + + /* + * Recompute the stashed list of spares, with status information + * this time. + */ + VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, + DATA_TYPE_NVLIST_ARRAY) == 0); + + spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), + KM_SLEEP); + for (i = 0; i < spa->spa_spares.sav_count; i++) + spares[i] = vdev_config_generate(spa, + spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); + for (i = 0; i < spa->spa_spares.sav_count; i++) + nvlist_free(spares[i]); + kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); +} + +/* + * Load (or re-load) the current list of vdevs describing the active l2cache for + * this pool. When this is called, we have some form of basic information in + * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and + * then re-generate a more complete list including status information. + * Devices which are already active have their details maintained, and are + * not re-opened. + */ +static void +spa_load_l2cache(spa_t *spa) +{ + nvlist_t **l2cache; + uint_t nl2cache; + int i, j, oldnvdevs; + uint64_t guid; + vdev_t *vd, **oldvdevs, **newvdevs; + spa_aux_vdev_t *sav = &spa->spa_l2cache; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + if (sav->sav_config != NULL) { + VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); + newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); + } else { + nl2cache = 0; + } + + oldvdevs = sav->sav_vdevs; + oldnvdevs = sav->sav_count; + sav->sav_vdevs = NULL; + sav->sav_count = 0; + + /* + * Process new nvlist of vdevs. + */ + for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, + &guid) == 0); + + newvdevs[i] = NULL; + for (j = 0; j < oldnvdevs; j++) { + vd = oldvdevs[j]; + if (vd != NULL && guid == vd->vdev_guid) { + /* + * Retain previous vdev for add/remove ops. + */ + newvdevs[i] = vd; + oldvdevs[j] = NULL; + break; + } + } + + if (newvdevs[i] == NULL) { + /* + * Create new vdev + */ + VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, + VDEV_ALLOC_L2CACHE) == 0); + ASSERT(vd != NULL); + newvdevs[i] = vd; + + /* + * Commit this vdev as an l2cache device, + * even if it fails to open. + */ + spa_l2cache_add(vd); + + vd->vdev_top = vd; + vd->vdev_aux = sav; + + spa_l2cache_activate(vd); + + if (vdev_open(vd) != 0) + continue; + + (void) vdev_validate_aux(vd); + + if (!vdev_is_dead(vd)) + l2arc_add_vdev(spa, vd); + } + } + + /* + * Purge vdevs that were dropped + */ + for (i = 0; i < oldnvdevs; i++) { + uint64_t pool; + + vd = oldvdevs[i]; + if (vd != NULL) { + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); + (void) vdev_close(vd); + spa_l2cache_remove(vd); + } + } + + if (oldvdevs) + kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); + + if (sav->sav_config == NULL) + goto out; + + sav->sav_vdevs = newvdevs; + sav->sav_count = (int)nl2cache; + + /* + * Recompute the stashed list of l2cache devices, with status + * information this time. + */ + VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, + DATA_TYPE_NVLIST_ARRAY) == 0); + + l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); + for (i = 0; i < sav->sav_count; i++) + l2cache[i] = vdev_config_generate(spa, + sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); + VERIFY(nvlist_add_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); +out: + for (i = 0; i < sav->sav_count; i++) + nvlist_free(l2cache[i]); + if (sav->sav_count) + kmem_free(l2cache, sav->sav_count * sizeof (void *)); +} + +static int +load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) +{ + dmu_buf_t *db; + char *packed = NULL; + size_t nvsize = 0; + int error; + *value = NULL; + + VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); + nvsize = *(uint64_t *)db->db_data; + dmu_buf_rele(db, FTAG); + + packed = kmem_alloc(nvsize, KM_SLEEP); + error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, + DMU_READ_PREFETCH); + if (error == 0) + error = nvlist_unpack(packed, nvsize, value, 0); + kmem_free(packed, nvsize); + + return (error); +} + +/* + * Checks to see if the given vdev could not be opened, in which case we post a + * sysevent to notify the autoreplace code that the device has been removed. + */ +static void +spa_check_removed(vdev_t *vd) +{ + for (int c = 0; c < vd->vdev_children; c++) + spa_check_removed(vd->vdev_child[c]); + + if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { + zfs_post_autoreplace(vd->vdev_spa, vd); + spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); + } +} + +/* + * Validate the current config against the MOS config + */ +static boolean_t +spa_config_valid(spa_t *spa, nvlist_t *config) +{ + vdev_t *mrvd, *rvd = spa->spa_root_vdev; + nvlist_t *nv; + + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); + + ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children); + + /* + * If we're doing a normal import, then build up any additional + * diagnostic information about missing devices in this config. + * We'll pass this up to the user for further processing. + */ + if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { + nvlist_t **child, *nv; + uint64_t idx = 0; + + child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), + KM_SLEEP); + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + vdev_t *mtvd = mrvd->vdev_child[c]; + + if (tvd->vdev_ops == &vdev_missing_ops && + mtvd->vdev_ops != &vdev_missing_ops && + mtvd->vdev_islog) + child[idx++] = vdev_config_generate(spa, mtvd, + B_FALSE, 0); + } + + if (idx) { + VERIFY(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, idx) == 0); + VERIFY(nvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0); + + for (int i = 0; i < idx; i++) + nvlist_free(child[i]); + } + nvlist_free(nv); + kmem_free(child, rvd->vdev_children * sizeof (char **)); + } + + /* + * Compare the root vdev tree with the information we have + * from the MOS config (mrvd). Check each top-level vdev + * with the corresponding MOS config top-level (mtvd). + */ + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + vdev_t *mtvd = mrvd->vdev_child[c]; + + /* + * Resolve any "missing" vdevs in the current configuration. + * If we find that the MOS config has more accurate information + * about the top-level vdev then use that vdev instead. + */ + if (tvd->vdev_ops == &vdev_missing_ops && + mtvd->vdev_ops != &vdev_missing_ops) { + + if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) + continue; + + /* + * Device specific actions. + */ + if (mtvd->vdev_islog) { + spa_set_log_state(spa, SPA_LOG_CLEAR); + } else { + /* + * XXX - once we have 'readonly' pool + * support we should be able to handle + * missing data devices by transitioning + * the pool to readonly. + */ + continue; + } + + /* + * Swap the missing vdev with the data we were + * able to obtain from the MOS config. + */ + vdev_remove_child(rvd, tvd); + vdev_remove_child(mrvd, mtvd); + + vdev_add_child(rvd, mtvd); + vdev_add_child(mrvd, tvd); + + spa_config_exit(spa, SCL_ALL, FTAG); + vdev_load(mtvd); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + vdev_reopen(rvd); + } else if (mtvd->vdev_islog) { + /* + * Load the slog device's state from the MOS config + * since it's possible that the label does not + * contain the most up-to-date information. + */ + vdev_load_log_state(tvd, mtvd); + vdev_reopen(tvd); + } + } + vdev_free(mrvd); + spa_config_exit(spa, SCL_ALL, FTAG); + + /* + * Ensure we were able to validate the config. + */ + return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum); +} + +/* + * Check for missing log devices + */ +static int +spa_check_logs(spa_t *spa) +{ + switch (spa->spa_log_state) { + case SPA_LOG_MISSING: + /* need to recheck in case slog has been restored */ + case SPA_LOG_UNKNOWN: + if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, + DS_FIND_CHILDREN)) { + spa_set_log_state(spa, SPA_LOG_MISSING); + return (1); + } + break; + } + return (0); +} + +static boolean_t +spa_passivate_log(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + boolean_t slog_found = B_FALSE; + + ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); + + if (!spa_has_slogs(spa)) + return (B_FALSE); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (tvd->vdev_islog) { + metaslab_group_passivate(mg); + slog_found = B_TRUE; + } + } + + return (slog_found); +} + +static void +spa_activate_log(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + + ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (tvd->vdev_islog) + metaslab_group_activate(mg); + } +} + +int +spa_offline_log(spa_t *spa) +{ + int error = 0; + + if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline, + NULL, DS_FIND_CHILDREN)) == 0) { + + /* + * We successfully offlined the log device, sync out the + * current txg so that the "stubby" block can be removed + * by zil_sync(). + */ + txg_wait_synced(spa->spa_dsl_pool, 0); + } + return (error); +} + +static void +spa_aux_check_removed(spa_aux_vdev_t *sav) +{ + for (int i = 0; i < sav->sav_count; i++) + spa_check_removed(sav->sav_vdevs[i]); +} + +void +spa_claim_notify(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + + if (zio->io_error) + return; + + mutex_enter(&spa->spa_props_lock); /* any mutex will do */ + if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) + spa->spa_claim_max_txg = zio->io_bp->blk_birth; + mutex_exit(&spa->spa_props_lock); +} + +typedef struct spa_load_error { + uint64_t sle_meta_count; + uint64_t sle_data_count; +} spa_load_error_t; + +static void +spa_load_verify_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + spa_load_error_t *sle = zio->io_private; + dmu_object_type_t type = BP_GET_TYPE(bp); + int error = zio->io_error; + + if (error) { + if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && + type != DMU_OT_INTENT_LOG) + atomic_add_64(&sle->sle_meta_count, 1); + else + atomic_add_64(&sle->sle_data_count, 1); + } + zio_data_buf_free(zio->io_data, zio->io_size); +} + +/*ARGSUSED*/ +static int +spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, + arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + if (bp != NULL) { + zio_t *rio = arg; + size_t size = BP_GET_PSIZE(bp); + void *data = zio_data_buf_alloc(size); + + zio_nowait(zio_read(rio, spa, bp, data, size, + spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); + } + return (0); +} + +static int +spa_load_verify(spa_t *spa) +{ + zio_t *rio; + spa_load_error_t sle = { 0 }; + zpool_rewind_policy_t policy; + boolean_t verify_ok = B_FALSE; + int error; + + zpool_get_rewind_policy(spa->spa_config, &policy); + + if (policy.zrp_request & ZPOOL_NEVER_REWIND) + return (0); + + rio = zio_root(spa, NULL, &sle, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); + + error = traverse_pool(spa, spa->spa_verify_min_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); + + (void) zio_wait(rio); + + spa->spa_load_meta_errors = sle.sle_meta_count; + spa->spa_load_data_errors = sle.sle_data_count; + + if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && + sle.sle_data_count <= policy.zrp_maxdata) { + int64_t loss = 0; + + verify_ok = B_TRUE; + spa->spa_load_txg = spa->spa_uberblock.ub_txg; + spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; + + loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); + VERIFY(nvlist_add_int64(spa->spa_load_info, + ZPOOL_CONFIG_REWIND_TIME, loss) == 0); + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); + } else { + spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; + } + + if (error) { + if (error != ENXIO && error != EIO) + error = EIO; + return (error); + } + + return (verify_ok ? 0 : EIO); +} + +/* + * Find a value in the pool props object. + */ +static void +spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) +{ + (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, + zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); +} + +/* + * Find a value in the pool directory object. + */ +static int +spa_dir_prop(spa_t *spa, const char *name, uint64_t *val) +{ + return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + name, sizeof (uint64_t), 1, val)); +} + +static int +spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) +{ + vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); + return (err); +} + +/* + * Fix up config after a partly-completed split. This is done with the + * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off + * pool have that entry in their config, but only the splitting one contains + * a list of all the guids of the vdevs that are being split off. + * + * This function determines what to do with that list: either rejoin + * all the disks to the pool, or complete the splitting process. To attempt + * the rejoin, each disk that is offlined is marked online again, and + * we do a reopen() call. If the vdev label for every disk that was + * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) + * then we call vdev_split() on each disk, and complete the split. + * + * Otherwise we leave the config alone, with all the vdevs in place in + * the original pool. + */ +static void +spa_try_repair(spa_t *spa, nvlist_t *config) +{ + uint_t extracted; + uint64_t *glist; + uint_t i, gcount; + nvlist_t *nvl; + vdev_t **vd; + boolean_t attempt_reopen; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) + return; + + /* check that the config is complete */ + if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, + &glist, &gcount) != 0) + return; + + vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); + + /* attempt to online all the vdevs & validate */ + attempt_reopen = B_TRUE; + for (i = 0; i < gcount; i++) { + if (glist[i] == 0) /* vdev is hole */ + continue; + + vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); + if (vd[i] == NULL) { + /* + * Don't bother attempting to reopen the disks; + * just do the split. + */ + attempt_reopen = B_FALSE; + } else { + /* attempt to re-online it */ + vd[i]->vdev_offline = B_FALSE; + } + } + + if (attempt_reopen) { + vdev_reopen(spa->spa_root_vdev); + + /* check each device to see what state it's in */ + for (extracted = 0, i = 0; i < gcount; i++) { + if (vd[i] != NULL && + vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) + break; + ++extracted; + } + } + + /* + * If every disk has been moved to the new pool, or if we never + * even attempted to look at them, then we split them off for + * good. + */ + if (!attempt_reopen || gcount == extracted) { + for (i = 0; i < gcount; i++) + if (vd[i] != NULL) + vdev_split(vd[i]); + vdev_reopen(spa->spa_root_vdev); + } + + kmem_free(vd, gcount * sizeof (vdev_t *)); +} + +static int +spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, + boolean_t mosconfig) +{ + nvlist_t *config = spa->spa_config; + char *ereport = FM_EREPORT_ZFS_POOL; + int error; + uint64_t pool_guid; + nvlist_t *nvl; + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) + return (EINVAL); + + /* + * Versioning wasn't explicitly added to the label until later, so if + * it's not present treat it as the initial version. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &spa->spa_ubsync.ub_version) != 0) + spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; + + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, + &spa->spa_config_txg); + + if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0)) { + error = EEXIST; + } else { + spa->spa_load_guid = pool_guid; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, + &nvl) == 0) { + VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, + KM_SLEEP) == 0); + } + + gethrestime(&spa->spa_loaded_ts); + error = spa_load_impl(spa, pool_guid, config, state, type, + mosconfig, &ereport); + } + + spa->spa_minref = refcount_count(&spa->spa_refcount); + if (error) { + if (error != EEXIST) { + spa->spa_loaded_ts.tv_sec = 0; + spa->spa_loaded_ts.tv_nsec = 0; + } + if (error != EBADF) { + zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); + } + } + spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; + spa->spa_ena = 0; + + return (error); +} + +/* + * Load an existing storage pool, using the pool's builtin spa_config as a + * source of configuration information. + */ +static int +spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, + spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, + char **ereport) +{ + int error = 0; + nvlist_t *nvroot = NULL; + vdev_t *rvd; + uberblock_t *ub = &spa->spa_uberblock; + uint64_t children, config_cache_txg = spa->spa_config_txg; + int orig_mode = spa->spa_mode; + int parse; + uint64_t obj; + + /* + * If this is an untrusted config, access the pool in read-only mode. + * This prevents things like resilvering recently removed devices. + */ + if (!mosconfig) + spa->spa_mode = FREAD; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + spa->spa_load_state = state; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) + return (EINVAL); + + parse = (type == SPA_IMPORT_EXISTING ? + VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); + + /* + * Create "The Godfather" zio to hold all async IOs + */ + spa->spa_async_zio_root = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + + /* + * Parse the configuration into a vdev tree. We explicitly set the + * value that will be returned by spa_version() since parsing the + * configuration requires knowing the version number. + */ + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse); + spa_config_exit(spa, SCL_ALL, FTAG); + + if (error != 0) + return (error); + + ASSERT(spa->spa_root_vdev == rvd); + + if (type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_guid(spa) == pool_guid); + } + + /* + * Try to open all vdevs, loading each label in the process. + */ + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + error = vdev_open(rvd); + spa_config_exit(spa, SCL_ALL, FTAG); + if (error != 0) + return (error); + + /* + * We need to validate the vdev labels against the configuration that + * we have in hand, which is dependent on the setting of mosconfig. If + * mosconfig is true then we're validating the vdev labels based on + * that config. Otherwise, we're validating against the cached config + * (zpool.cache) that was read when we loaded the zfs module, and then + * later we will recursively call spa_load() and validate against + * the vdev config. + * + * If we're assembling a new pool that's been split off from an + * existing pool, the labels haven't yet been updated so we skip + * validation for now. + */ + if (type != SPA_IMPORT_ASSEMBLE) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + error = vdev_validate(rvd); + spa_config_exit(spa, SCL_ALL, FTAG); + + if (error != 0) + return (error); + + if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) + return (ENXIO); + } + + /* + * Find the best uberblock. + */ + vdev_uberblock_load(NULL, rvd, ub); + + /* + * If we weren't able to find a single valid uberblock, return failure. + */ + if (ub->ub_txg == 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); + + /* + * If the pool is newer than the code, we can't open it. + */ + if (ub->ub_version > SPA_VERSION) + return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); + + /* + * If the vdev guid sum doesn't match the uberblock, we have an + * incomplete configuration. We first check to see if the pool + * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN). + * If it is, defer the vdev_guid_sum check till later so we + * can handle missing vdevs. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, + &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE && + rvd->vdev_guid_sum != ub->ub_guid_sum) + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); + + if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_try_repair(spa, config); + spa_config_exit(spa, SCL_ALL, FTAG); + nvlist_free(spa->spa_config_splitting); + spa->spa_config_splitting = NULL; + } + + /* + * Initialize internal SPA structures. + */ + spa->spa_state = POOL_STATE_ACTIVE; + spa->spa_ubsync = spa->spa_uberblock; + spa->spa_verify_min_txg = spa->spa_extreme_rewind ? + TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; + spa->spa_first_txg = spa->spa_last_ubsync_txg ? + spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; + spa->spa_claim_max_txg = spa->spa_first_txg; + spa->spa_prev_software_version = ub->ub_software_version; + + error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); + if (error) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; + + if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + if (!mosconfig) { + uint64_t hostid; + nvlist_t *policy = NULL, *nvconfig; + + if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, + ZPOOL_CONFIG_HOSTID, &hostid) == 0) { + char *hostname; + unsigned long myhostid = 0; + + VERIFY(nvlist_lookup_string(nvconfig, + ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); + +#ifdef _KERNEL + myhostid = zone_get_hostid(NULL); +#else /* _KERNEL */ + /* + * We're emulating the system's hostid in userland, so + * we can't use zone_get_hostid(). + */ + (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); +#endif /* _KERNEL */ + if (hostid != 0 && myhostid != 0 && + hostid != myhostid) { + nvlist_free(nvconfig); + cmn_err(CE_WARN, "pool '%s' could not be " + "loaded as it was last accessed by " + "another system (host: %s hostid: 0x%lx). " + "See: http://www.sun.com/msg/ZFS-8000-EY", + spa_name(spa), hostname, + (unsigned long)hostid); + return (EBADF); + } + } + if (nvlist_lookup_nvlist(spa->spa_config, + ZPOOL_REWIND_POLICY, &policy) == 0) + VERIFY(nvlist_add_nvlist(nvconfig, + ZPOOL_REWIND_POLICY, policy) == 0); + + spa_config_set(spa, nvconfig); + spa_unload(spa); + spa_deactivate(spa); + spa_activate(spa, orig_mode); + + return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); + } + + if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); + if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + /* + * Load the bit that tells us to use the new accounting function + * (raid-z deflation). If we have an older pool, this will not + * be present. + */ + error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, + &spa->spa_creation_version); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + /* + * Load the persistent error log. If we have an older pool, this will + * not be present. + */ + error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, + &spa->spa_errlog_scrub); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + /* + * Load the history object. If we have an older pool, this + * will not be present. + */ + error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + /* + * If we're assembling the pool from the split-off vdevs of + * an existing pool, we don't want to attach the spares & cache + * devices. + */ + + /* + * Load any hot spares for this pool. + */ + error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); + if (load_nvlist(spa, spa->spa_spares.sav_object, + &spa->spa_spares.sav_config) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + } else if (error == 0) { + spa->spa_spares.sav_sync = B_TRUE; + } + + /* + * Load any level 2 ARC devices for this pool. + */ + error = spa_dir_prop(spa, DMU_POOL_L2CACHE, + &spa->spa_l2cache.sav_object); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); + if (load_nvlist(spa, spa->spa_l2cache.sav_object, + &spa->spa_l2cache.sav_config) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + } else if (error == 0) { + spa->spa_l2cache.sav_sync = B_TRUE; + } + + spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); + + error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object); + if (error && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + if (error == 0) { + uint64_t autoreplace; + + spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); + spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); + spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); + spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); + spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, + &spa->spa_dedup_ditto); + + spa->spa_autoreplace = (autoreplace != 0); + } + + /* + * If the 'autoreplace' property is set, then post a resource notifying + * the ZFS DE that it should not issue any faults for unopenable + * devices. We also iterate over the vdevs, and post a sysevent for any + * unopenable vdevs so that the normal autoreplace handler can take + * over. + */ + if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { + spa_check_removed(spa->spa_root_vdev); + /* + * For the import case, this is done in spa_import(), because + * at this point we're using the spare definitions from + * the MOS config, not necessarily from the userland config. + */ + if (state != SPA_LOAD_IMPORT) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); + } + } + + /* + * Load the vdev state for all toplevel vdevs. + */ + vdev_load(rvd); + + /* + * Propagate the leaf DTLs we just loaded all the way up the tree. + */ + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + vdev_dtl_reassess(rvd, 0, 0, B_FALSE); + spa_config_exit(spa, SCL_ALL, FTAG); + + /* + * Load the DDTs (dedup tables). + */ + error = ddt_load(spa); + if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + spa_update_dspace(spa); + + /* + * Validate the config, using the MOS config to fill in any + * information which might be missing. If we fail to validate + * the config then declare the pool unfit for use. If we're + * assembling a pool from a split, the log is not transferred + * over. + */ + if (type != SPA_IMPORT_ASSEMBLE) { + nvlist_t *nvconfig; + + if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + if (!spa_config_valid(spa, nvconfig)) { + nvlist_free(nvconfig); + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, + ENXIO)); + } + nvlist_free(nvconfig); + + /* + * Now that we've validate the config, check the state of the + * root vdev. If it can't be opened, it indicates one or + * more toplevel vdevs are faulted. + */ + if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) + return (ENXIO); + + if (spa_check_logs(spa)) { + *ereport = FM_EREPORT_ZFS_LOG_REPLAY; + return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); + } + } + + /* + * We've successfully opened the pool, verify that we're ready + * to start pushing transactions. + */ + if (state != SPA_LOAD_TRYIMPORT) { + if (error = spa_load_verify(spa)) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + error)); + } + + if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || + spa->spa_load_max_txg == UINT64_MAX)) { + dmu_tx_t *tx; + int need_update = B_FALSE; + + ASSERT(state != SPA_LOAD_TRYIMPORT); + + /* + * Claim log blocks that haven't been committed yet. + * This must all happen in a single txg. + * Note: spa_claim_max_txg is updated by spa_claim_notify(), + * invoked from zil_claim_log_block()'s i/o done callback. + * Price of rollback is that we abandon the log. + */ + spa->spa_claiming = B_TRUE; + + tx = dmu_tx_create_assigned(spa_get_dsl(spa), + spa_first_txg(spa)); + (void) dmu_objset_find(spa_name(spa), + zil_claim, tx, DS_FIND_CHILDREN); + dmu_tx_commit(tx); + + spa->spa_claiming = B_FALSE; + + spa_set_log_state(spa, SPA_LOG_GOOD); + spa->spa_sync_on = B_TRUE; + txg_sync_start(spa->spa_dsl_pool); + + /* + * Wait for all claims to sync. We sync up to the highest + * claimed log block birth time so that claimed log blocks + * don't appear to be from the future. spa_claim_max_txg + * will have been set for us by either zil_check_log_chain() + * (invoked from spa_check_logs()) or zil_claim() above. + */ + txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); + + /* + * If the config cache is stale, or we have uninitialized + * metaslabs (see spa_vdev_add()), then update the config. + * + * If this is a verbatim import, trust the current + * in-core spa_config and update the disk labels. + */ + if (config_cache_txg != spa->spa_config_txg || + state == SPA_LOAD_IMPORT || + state == SPA_LOAD_RECOVER || + (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) + need_update = B_TRUE; + + for (int c = 0; c < rvd->vdev_children; c++) + if (rvd->vdev_child[c]->vdev_ms_array == 0) + need_update = B_TRUE; + + /* + * Update the config cache asychronously in case we're the + * root pool, in which case the config cache isn't writable yet. + */ + if (need_update) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + + /* + * Check all DTLs to see if anything needs resilvering. + */ + if (!dsl_scan_resilvering(spa->spa_dsl_pool) && + vdev_resilver_needed(rvd, NULL, NULL)) + spa_async_request(spa, SPA_ASYNC_RESILVER); + + /* + * Delete any inconsistent datasets. + */ + (void) dmu_objset_find(spa_name(spa), + dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); + + /* + * Clean up any stale temporary dataset userrefs. + */ + dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); + } + + return (0); +} + +static int +spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) +{ + int mode = spa->spa_mode; + + spa_unload(spa); + spa_deactivate(spa); + + spa->spa_load_max_txg--; + + spa_activate(spa, mode); + spa_async_suspend(spa); + + return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); +} + +static int +spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, + uint64_t max_request, int rewind_flags) +{ + nvlist_t *config = NULL; + int load_error, rewind_error; + uint64_t safe_rewind_txg; + uint64_t min_txg; + + if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { + spa->spa_load_max_txg = spa->spa_load_txg; + spa_set_log_state(spa, SPA_LOG_CLEAR); + } else { + spa->spa_load_max_txg = max_request; + } + + load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, + mosconfig); + if (load_error == 0) + return (0); + + if (spa->spa_root_vdev != NULL) + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); + + spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; + spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; + + if (rewind_flags & ZPOOL_NEVER_REWIND) { + nvlist_free(config); + return (load_error); + } + + /* Price of rolling back is discarding txgs, including log */ + if (state == SPA_LOAD_RECOVER) + spa_set_log_state(spa, SPA_LOG_CLEAR); + + spa->spa_load_max_txg = spa->spa_last_ubsync_txg; + safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; + min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? + TXG_INITIAL : safe_rewind_txg; + + /* + * Continue as long as we're finding errors, we're still within + * the acceptable rewind range, and we're still finding uberblocks + */ + while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && + spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { + if (spa->spa_load_max_txg < safe_rewind_txg) + spa->spa_extreme_rewind = B_TRUE; + rewind_error = spa_load_retry(spa, state, mosconfig); + } + + spa->spa_extreme_rewind = B_FALSE; + spa->spa_load_max_txg = UINT64_MAX; + + if (config && (rewind_error || state != SPA_LOAD_RECOVER)) + spa_config_set(spa, config); + + return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); +} + +/* + * Pool Open/Import + * + * The import case is identical to an open except that the configuration is sent + * down from userland, instead of grabbed from the configuration cache. For the + * case of an open, the pool configuration will exist in the + * POOL_STATE_UNINITIALIZED state. + * + * The stats information (gen/count/ustats) is used to gather vdev statistics at + * the same time open the pool, without having to keep around the spa_t in some + * ambiguous state. + */ +static int +spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, + nvlist_t **config) +{ + spa_t *spa; + spa_load_state_t state = SPA_LOAD_OPEN; + int error; + int locked = B_FALSE; + + *spapp = NULL; + + /* + * As disgusting as this is, we need to support recursive calls to this + * function because dsl_dir_open() is called during spa_load(), and ends + * up calling spa_open() again. The real fix is to figure out how to + * avoid dsl_dir_open() calling this in the first place. + */ + if (mutex_owner(&spa_namespace_lock) != curthread) { + mutex_enter(&spa_namespace_lock); + locked = B_TRUE; + } + + if ((spa = spa_lookup(pool)) == NULL) { + if (locked) + mutex_exit(&spa_namespace_lock); + return (ENOENT); + } + + if (spa->spa_state == POOL_STATE_UNINITIALIZED) { + zpool_rewind_policy_t policy; + + zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, + &policy); + if (policy.zrp_request & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; + + spa_activate(spa, spa_mode_global); + + if (state != SPA_LOAD_RECOVER) + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + + error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, + policy.zrp_request); + + if (error == EBADF) { + /* + * If vdev_validate() returns failure (indicated by + * EBADF), it indicates that one of the vdevs indicates + * that the pool has been exported or destroyed. If + * this is the case, the config cache is out of sync and + * we should remove the pool from the namespace. + */ + spa_unload(spa); + spa_deactivate(spa); + spa_config_sync(spa, B_TRUE, B_TRUE); + spa_remove(spa); + if (locked) + mutex_exit(&spa_namespace_lock); + return (ENOENT); + } + + if (error) { + /* + * We can't open the pool, but we still have useful + * information: the state of each vdev after the + * attempted vdev_open(). Return this to the user. + */ + if (config != NULL && spa->spa_config) { + VERIFY(nvlist_dup(spa->spa_config, config, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist(*config, + ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); + } + spa_unload(spa); + spa_deactivate(spa); + spa->spa_last_open_failed = error; + if (locked) + mutex_exit(&spa_namespace_lock); + *spapp = NULL; + return (error); + } + } + + spa_open_ref(spa, tag); + + if (config != NULL) + *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); + + /* + * If we've recovered the pool, pass back any information we + * gathered while doing the load. + */ + if (state == SPA_LOAD_RECOVER) { + VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); + } + + if (locked) { + spa->spa_last_open_failed = 0; + spa->spa_last_ubsync_txg = 0; + spa->spa_load_txg = 0; + mutex_exit(&spa_namespace_lock); + } + + *spapp = spa; + + return (0); +} + +int +spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, + nvlist_t **config) +{ + return (spa_open_common(name, spapp, tag, policy, config)); +} + +int +spa_open(const char *name, spa_t **spapp, void *tag) +{ + return (spa_open_common(name, spapp, tag, NULL, NULL)); +} + +/* + * Lookup the given spa_t, incrementing the inject count in the process, + * preventing it from being exported or destroyed. + */ +spa_t * +spa_inject_addref(char *name) +{ + spa_t *spa; + + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(name)) == NULL) { + mutex_exit(&spa_namespace_lock); + return (NULL); + } + spa->spa_inject_ref++; + mutex_exit(&spa_namespace_lock); + + return (spa); +} + +void +spa_inject_delref(spa_t *spa) +{ + mutex_enter(&spa_namespace_lock); + spa->spa_inject_ref--; + mutex_exit(&spa_namespace_lock); +} + +/* + * Add spares device information to the nvlist. + */ +static void +spa_add_spares(spa_t *spa, nvlist_t *config) +{ + nvlist_t **spares; + uint_t i, nspares; + nvlist_t *nvroot; + uint64_t guid; + vdev_stat_t *vs; + uint_t vsc; + uint64_t pool; + + ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + + if (spa->spa_spares.sav_count == 0) + return; + + VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + if (nspares != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + + /* + * Go through and find any spares which have since been + * repurposed as an active spare. If this is the case, update + * their status appropriately. + */ + for (i = 0; i < nspares; i++) { + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (spa_spare_exists(guid, &pool, NULL) && + pool != 0ULL) { + VERIFY(nvlist_lookup_uint64_array( + spares[i], ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + vs->vs_state = VDEV_STATE_CANT_OPEN; + vs->vs_aux = VDEV_AUX_SPARED; + } + } + } +} + +/* + * Add l2cache device information to the nvlist, including vdev stats. + */ +static void +spa_add_l2cache(spa_t *spa, nvlist_t *config) +{ + nvlist_t **l2cache; + uint_t i, j, nl2cache; + nvlist_t *nvroot; + uint64_t guid; + vdev_t *vd; + vdev_stat_t *vs; + uint_t vsc; + + ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + + if (spa->spa_l2cache.sav_count == 0) + return; + + VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); + if (nl2cache != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); + + /* + * Update level 2 cache device stats. + */ + + for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + + vd = NULL; + for (j = 0; j < spa->spa_l2cache.sav_count; j++) { + if (guid == + spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { + vd = spa->spa_l2cache.sav_vdevs[j]; + break; + } + } + ASSERT(vd != NULL); + + VERIFY(nvlist_lookup_uint64_array(l2cache[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); + vdev_get_stats(vd, vs); + } + } +} + +int +spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) +{ + int error; + spa_t *spa; + + *config = NULL; + error = spa_open_common(name, &spa, FTAG, NULL, config); + + if (spa != NULL) { + /* + * This still leaves a window of inconsistency where the spares + * or l2cache devices could change and the config would be + * self-inconsistent. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + + if (*config != NULL) { + uint64_t loadtimes[2]; + + loadtimes[0] = spa->spa_loaded_ts.tv_sec; + loadtimes[1] = spa->spa_loaded_ts.tv_nsec; + VERIFY(nvlist_add_uint64_array(*config, + ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); + + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_ERRCOUNT, + spa_get_errlog_size(spa)) == 0); + + if (spa_suspended(spa)) + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED, + spa->spa_failmode) == 0); + + spa_add_spares(spa, *config); + spa_add_l2cache(spa, *config); + } + } + + /* + * We want to get the alternate root even for faulted pools, so we cheat + * and call spa_lookup() directly. + */ + if (altroot) { + if (spa == NULL) { + mutex_enter(&spa_namespace_lock); + spa = spa_lookup(name); + if (spa) + spa_altroot(spa, altroot, buflen); + else + altroot[0] = '\0'; + spa = NULL; + mutex_exit(&spa_namespace_lock); + } else { + spa_altroot(spa, altroot, buflen); + } + } + + if (spa != NULL) { + spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_close(spa, FTAG); + } + + return (error); +} + +/* + * Validate that the auxiliary device array is well formed. We must have an + * array of nvlists, each which describes a valid leaf vdev. If this is an + * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be + * specified, as long as they are well-formed. + */ +static int +spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, + spa_aux_vdev_t *sav, const char *config, uint64_t version, + vdev_labeltype_t label) +{ + nvlist_t **dev; + uint_t i, ndev; + vdev_t *vd; + int error; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + /* + * It's acceptable to have no devs specified. + */ + if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) + return (0); + + if (ndev == 0) + return (EINVAL); + + /* + * Make sure the pool is formatted with a version that supports this + * device type. + */ + if (spa_version(spa) < version) + return (ENOTSUP); + + /* + * Set the pending device list so we correctly handle device in-use + * checking. + */ + sav->sav_pending = dev; + sav->sav_npending = ndev; + + for (i = 0; i < ndev; i++) { + if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, + mode)) != 0) + goto out; + + if (!vd->vdev_ops->vdev_op_leaf) { + vdev_free(vd); + error = EINVAL; + goto out; + } + + /* + * The L2ARC currently only supports disk devices in + * kernel context. For user-level testing, we allow it. + */ +#ifdef _KERNEL + if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && + strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { + error = ENOTBLK; + goto out; + } +#endif + vd->vdev_top = vd; + + if ((error = vdev_open(vd)) == 0 && + (error = vdev_label_init(vd, crtxg, label)) == 0) { + VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } + + vdev_free(vd); + + if (error && + (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) + goto out; + else + error = 0; + } + +out: + sav->sav_pending = NULL; + sav->sav_npending = 0; + return (error); +} + +static int +spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) +{ + int error; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, + VDEV_LABEL_SPARE)) != 0) { + return (error); + } + + return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, + VDEV_LABEL_L2CACHE)); +} + +static void +spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, + const char *config) +{ + int i; + + if (sav->sav_config != NULL) { + nvlist_t **olddevs; + uint_t oldndevs; + nvlist_t **newdevs; + + /* + * Generate new dev list by concatentating with the + * current dev list. + */ + VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, + &olddevs, &oldndevs) == 0); + + newdevs = kmem_alloc(sizeof (void *) * + (ndevs + oldndevs), KM_SLEEP); + for (i = 0; i < oldndevs; i++) + VERIFY(nvlist_dup(olddevs[i], &newdevs[i], + KM_SLEEP) == 0); + for (i = 0; i < ndevs; i++) + VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], + KM_SLEEP) == 0); + + VERIFY(nvlist_remove(sav->sav_config, config, + DATA_TYPE_NVLIST_ARRAY) == 0); + + VERIFY(nvlist_add_nvlist_array(sav->sav_config, + config, newdevs, ndevs + oldndevs) == 0); + for (i = 0; i < oldndevs + ndevs; i++) + nvlist_free(newdevs[i]); + kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); + } else { + /* + * Generate a new dev list. + */ + VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, + devs, ndevs) == 0); + } +} + +/* + * Stop and drop level 2 ARC devices + */ +void +spa_l2cache_drop(spa_t *spa) +{ + vdev_t *vd; + int i; + spa_aux_vdev_t *sav = &spa->spa_l2cache; + + for (i = 0; i < sav->sav_count; i++) { + uint64_t pool; + + vd = sav->sav_vdevs[i]; + ASSERT(vd != NULL); + + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); + if (vd->vdev_isl2cache) + spa_l2cache_remove(vd); + vdev_clear_stats(vd); + (void) vdev_close(vd); + } +} + +/* + * Pool Creation + */ +int +spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, + const char *history_str, nvlist_t *zplprops) +{ + spa_t *spa; + char *altroot = NULL; + vdev_t *rvd; + dsl_pool_t *dp; + dmu_tx_t *tx; + int error = 0; + uint64_t txg = TXG_INITIAL; + nvlist_t **spares, **l2cache; + uint_t nspares, nl2cache; + uint64_t version, obj; + + /* + * If this pool already exists, return failure. + */ + mutex_enter(&spa_namespace_lock); + if (spa_lookup(pool) != NULL) { + mutex_exit(&spa_namespace_lock); + return (EEXIST); + } + + /* + * Allocate a new spa_t structure. + */ + (void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); + spa = spa_add(pool, NULL, altroot); + spa_activate(spa, spa_mode_global); + + if (props && (error = spa_prop_validate(spa, props))) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } + + if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), + &version) != 0) + version = SPA_VERSION; + ASSERT(version <= SPA_VERSION); + + spa->spa_first_txg = txg; + spa->spa_uberblock.ub_txg = txg - 1; + spa->spa_uberblock.ub_version = version; + spa->spa_ubsync = spa->spa_uberblock; + + /* + * Create "The Godfather" zio to hold all async IOs + */ + spa->spa_async_zio_root = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + + /* + * Create the root vdev. + */ + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); + + ASSERT(error != 0 || rvd != NULL); + ASSERT(error != 0 || spa->spa_root_vdev == rvd); + + if (error == 0 && !zfs_allocatable_devs(nvroot)) + error = EINVAL; + + if (error == 0 && + (error = vdev_create(rvd, txg, B_FALSE)) == 0 && + (error = spa_validate_aux(spa, nvroot, txg, + VDEV_ALLOC_ADD)) == 0) { + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_metaslab_set_size(rvd->vdev_child[c]); + vdev_expand(rvd->vdev_child[c], txg); + } + } + + spa_config_exit(spa, SCL_ALL, FTAG); + + if (error != 0) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } + + /* + * Get the list of spares, if specified. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; + } + + /* + * Get the list of level 2 cache devices, if specified. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; + } + + spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); + spa->spa_meta_objset = dp->dp_meta_objset; + + /* + * Create DDTs (dedup tables). + */ + ddt_create(spa); + + spa_update_dspace(spa); + + tx = dmu_tx_create_assigned(dp, txg); + + /* + * Create the pool config object. + */ + spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, + DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); + + if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, + sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool config"); + } + + if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, + sizeof (uint64_t), 1, &version, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool version"); + } + + /* Newly created pools with the right version are always deflated. */ + if (version >= SPA_VERSION_RAIDZ_DEFLATE) { + spa->spa_deflate = TRUE; + if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { + cmn_err(CE_PANIC, "failed to add deflate"); + } + } + + /* + * Create the deferred-free bpobj. Turn off compression + * because sync-to-convergence takes longer if the blocksize + * keeps changing. + */ + obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); + dmu_object_set_compress(spa->spa_meta_objset, obj, + ZIO_COMPRESS_OFF, tx); + if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, + sizeof (uint64_t), 1, &obj, tx) != 0) { + cmn_err(CE_PANIC, "failed to add bpobj"); + } + VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, + spa->spa_meta_objset, obj)); + + /* + * Create the pool's history object. + */ + if (version >= SPA_VERSION_ZPOOL_HISTORY) + spa_history_create_obj(spa, tx); + + /* + * Set pool properties. + */ + spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); + spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); + spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); + spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); + + if (props != NULL) { + spa_configfile_set(spa, props, B_FALSE); + spa_sync_props(spa, props, tx); + } + + dmu_tx_commit(tx); + + spa->spa_sync_on = B_TRUE; + txg_sync_start(spa->spa_dsl_pool); + + /* + * We explicitly wait for the first transaction to complete so that our + * bean counters are appropriately updated. + */ + txg_wait_synced(spa->spa_dsl_pool, txg); + + spa_config_sync(spa, B_FALSE, B_TRUE); + + if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) + (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); + spa_history_log_version(spa, LOG_POOL_CREATE); + + spa->spa_minref = refcount_count(&spa->spa_refcount); + + mutex_exit(&spa_namespace_lock); + + return (0); +} + +#ifdef _KERNEL +/* + * Get the root pool information from the root disk, then import the root pool + * during the system boot up time. + */ +extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); + +static nvlist_t * +spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) +{ + nvlist_t *config; + nvlist_t *nvtop, *nvroot; + uint64_t pgid; + + if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) + return (NULL); + + /* + * Add this top-level vdev to the child array. + */ + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvtop) == 0); + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &pgid) == 0); + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); + + /* + * Put this pool's top-level vdevs into a root vdev. + */ + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) == 0); + VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); + VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); + VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &nvtop, 1) == 0); + + /* + * Replace the existing vdev_tree with the new root vdev in + * this pool's configuration (remove the old, add the new). + */ + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + nvlist_free(nvroot); + return (config); +} + +/* + * Walk the vdev tree and see if we can find a device with "better" + * configuration. A configuration is "better" if the label on that + * device has a more recent txg. + */ +static void +spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) +{ + for (int c = 0; c < vd->vdev_children; c++) + spa_alt_rootvdev(vd->vdev_child[c], avd, txg); + + if (vd->vdev_ops->vdev_op_leaf) { + nvlist_t *label; + uint64_t label_txg; + + if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, + &label) != 0) + return; + + VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, + &label_txg) == 0); + + /* + * Do we have a better boot device? + */ + if (label_txg > *txg) { + *txg = label_txg; + *avd = vd; + } + nvlist_free(label); + } +} + +/* + * Import a root pool. + * + * For x86. devpath_list will consist of devid and/or physpath name of + * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). + * The GRUB "findroot" command will return the vdev we should boot. + * + * For Sparc, devpath_list consists the physpath name of the booting device + * no matter the rootpool is a single device pool or a mirrored pool. + * e.g. + * "/pci@1f,0/ide@d/disk@0,0:a" + */ +int +spa_import_rootpool(char *devpath, char *devid) +{ + spa_t *spa; + vdev_t *rvd, *bvd, *avd = NULL; + nvlist_t *config, *nvtop; + uint64_t guid, txg; + char *pname; + int error; + + /* + * Read the label from the boot device and generate a configuration. + */ + config = spa_generate_rootconf(devpath, devid, &guid); +#if defined(_OBP) && defined(_KERNEL) + if (config == NULL) { + if (strstr(devpath, "/iscsi/ssd") != NULL) { + /* iscsi boot */ + get_iscsi_bootpath_phy(devpath); + config = spa_generate_rootconf(devpath, devid, &guid); + } + } +#endif + if (config == NULL) { + cmn_err(CE_NOTE, "Can not read the pool label from '%s'", + devpath); + return (EIO); + } + + VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &pname) == 0); + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); + + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(pname)) != NULL) { + /* + * Remove the existing root pool from the namespace so that we + * can replace it with the correct config we just read in. + */ + spa_remove(spa); + } + + spa = spa_add(pname, config, NULL); + spa->spa_is_root = B_TRUE; + spa->spa_import_flags = ZFS_IMPORT_VERBATIM; + + /* + * Build up a vdev tree based on the boot device's label config. + */ + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvtop) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, + VDEV_ALLOC_ROOTPOOL); + spa_config_exit(spa, SCL_ALL, FTAG); + if (error) { + mutex_exit(&spa_namespace_lock); + nvlist_free(config); + cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", + pname); + return (error); + } + + /* + * Get the boot vdev. + */ + if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { + cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", + (u_longlong_t)guid); + error = ENOENT; + goto out; + } + + /* + * Determine if there is a better boot device. + */ + avd = bvd; + spa_alt_rootvdev(rvd, &avd, &txg); + if (avd != bvd) { + cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " + "try booting from '%s'", avd->vdev_path); + error = EINVAL; + goto out; + } + + /* + * If the boot device is part of a spare vdev then ensure that + * we're booting off the active spare. + */ + if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && + !bvd->vdev_isspare) { + cmn_err(CE_NOTE, "The boot device is currently spared. Please " + "try booting from '%s'", + bvd->vdev_parent-> + vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); + error = EINVAL; + goto out; + } + + error = 0; + spa_history_log_version(spa, LOG_POOL_IMPORT); +out: + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + vdev_free(rvd); + spa_config_exit(spa, SCL_ALL, FTAG); + mutex_exit(&spa_namespace_lock); + + nvlist_free(config); + return (error); +} + +#endif + +/* + * Import a non-root pool into the system. + */ +int +spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) +{ + spa_t *spa; + char *altroot = NULL; + spa_load_state_t state = SPA_LOAD_IMPORT; + zpool_rewind_policy_t policy; + uint64_t mode = spa_mode_global; + uint64_t readonly = B_FALSE; + int error; + nvlist_t *nvroot; + nvlist_t **spares, **l2cache; + uint_t nspares, nl2cache; + + /* + * If a pool with this name exists, return failure. + */ + mutex_enter(&spa_namespace_lock); + if (spa_lookup(pool) != NULL) { + mutex_exit(&spa_namespace_lock); + return (EEXIST); + } + + /* + * Create and initialize the spa structure. + */ + (void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); + (void) nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); + if (readonly) + mode = FREAD; + spa = spa_add(pool, config, altroot); + spa->spa_import_flags = flags; + + /* + * Verbatim import - Take a pool and insert it into the namespace + * as if it had been loaded at boot. + */ + if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { + if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); + + spa_config_sync(spa, B_FALSE, B_TRUE); + + mutex_exit(&spa_namespace_lock); + spa_history_log_version(spa, LOG_POOL_IMPORT); + + return (0); + } + + spa_activate(spa, mode); + + /* + * Don't start async tasks until we know everything is healthy. + */ + spa_async_suspend(spa); + + zpool_get_rewind_policy(config, &policy); + if (policy.zrp_request & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; + + /* + * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig + * because the user-supplied config is actually the one to trust when + * doing an import. + */ + if (state != SPA_LOAD_RECOVER) + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + + error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, + policy.zrp_request); + + /* + * Propagate anything learned while loading the pool and pass it + * back to caller (i.e. rewind info, missing devices, etc). + */ + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + /* + * Toss any existing sparelist, as it doesn't have any validity + * anymore, and conflicts with spa_has_spare(). + */ + if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; + spa_load_spares(spa); + } + if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; + spa_load_l2cache(spa); + } + + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (error == 0) + error = spa_validate_aux(spa, nvroot, -1ULL, + VDEV_ALLOC_SPARE); + if (error == 0) + error = spa_validate_aux(spa, nvroot, -1ULL, + VDEV_ALLOC_L2CACHE); + spa_config_exit(spa, SCL_ALL, FTAG); + + if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); + + if (error != 0 || (props && spa_writeable(spa) && + (error = spa_prop_set(spa, props)))) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } + + spa_async_resume(spa); + + /* + * Override any spares and level 2 cache devices as specified by + * the user, as these may have correct device names/devids, etc. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + if (spa->spa_spares.sav_config) + VERIFY(nvlist_remove(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; + } + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + if (spa->spa_l2cache.sav_config) + VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; + } + + /* + * Check for any removed devices. + */ + if (spa->spa_autoreplace) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); + } + + if (spa_writeable(spa)) { + /* + * Update the config cache to include the newly-imported pool. + */ + spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); + } + + /* + * It's possible that the pool was expanded while it was exported. + * We kick off an async task to handle this for us. + */ + spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); + + mutex_exit(&spa_namespace_lock); + spa_history_log_version(spa, LOG_POOL_IMPORT); + + return (0); +} + +nvlist_t * +spa_tryimport(nvlist_t *tryconfig) +{ + nvlist_t *config = NULL; + char *poolname; + spa_t *spa; + uint64_t state; + int error; + + if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) + return (NULL); + + if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) + return (NULL); + + /* + * Create and initialize the spa structure. + */ + mutex_enter(&spa_namespace_lock); + spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); + spa_activate(spa, FREAD); + + /* + * Pass off the heavy lifting to spa_load(). + * Pass TRUE for mosconfig because the user-supplied config + * is actually the one to trust when doing an import. + */ + error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE); + + /* + * If 'tryconfig' was at least parsable, return the current config. + */ + if (spa->spa_root_vdev != NULL) { + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, + poolname) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, + state) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + spa->spa_uberblock.ub_timestamp) == 0); + + /* + * If the bootfs property exists on this pool then we + * copy it out so that external consumers can tell which + * pools are bootable. + */ + if ((!error || error == EEXIST) && spa->spa_bootfs) { + char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * We have to play games with the name since the + * pool was opened as TRYIMPORT_NAME. + */ + if (dsl_dsobj_to_dsname(spa_name(spa), + spa->spa_bootfs, tmpname) == 0) { + char *cp; + char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + cp = strchr(tmpname, '/'); + if (cp == NULL) { + (void) strlcpy(dsname, tmpname, + MAXPATHLEN); + } else { + (void) snprintf(dsname, MAXPATHLEN, + "%s/%s", poolname, ++cp); + } + VERIFY(nvlist_add_string(config, + ZPOOL_CONFIG_BOOTFS, dsname) == 0); + kmem_free(dsname, MAXPATHLEN); + } + kmem_free(tmpname, MAXPATHLEN); + } + + /* + * Add the list of hot spares and level 2 cache devices. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_add_spares(spa, config); + spa_add_l2cache(spa, config); + spa_config_exit(spa, SCL_CONFIG, FTAG); + } + + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + + return (config); +} + +/* + * Pool export/destroy + * + * The act of destroying or exporting a pool is very simple. We make sure there + * is no more pending I/O and any references to the pool are gone. Then, we + * update the pool state and sync all the labels to disk, removing the + * configuration from the cache afterwards. If the 'hardforce' flag is set, then + * we don't sync the labels or remove the configuration cache. + */ +static int +spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, + boolean_t force, boolean_t hardforce) +{ + spa_t *spa; + + if (oldconfig) + *oldconfig = NULL; + + if (!(spa_mode_global & FWRITE)) + return (EROFS); + + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(pool)) == NULL) { + mutex_exit(&spa_namespace_lock); + return (ENOENT); + } + + /* + * Put a hold on the pool, drop the namespace lock, stop async tasks, + * reacquire the namespace lock, and see if we can export. + */ + spa_open_ref(spa, FTAG); + mutex_exit(&spa_namespace_lock); + spa_async_suspend(spa); + mutex_enter(&spa_namespace_lock); + spa_close(spa, FTAG); + + /* + * The pool will be in core if it's openable, + * in which case we can modify its state. + */ + if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { + /* + * Objsets may be open only because they're dirty, so we + * have to force it to sync before checking spa_refcnt. + */ + txg_wait_synced(spa->spa_dsl_pool, 0); + + /* + * A pool cannot be exported or destroyed if there are active + * references. If we are resetting a pool, allow references by + * fault injection handlers. + */ + if (!spa_refcount_zero(spa) || + (spa->spa_inject_ref != 0 && + new_state != POOL_STATE_UNINITIALIZED)) { + spa_async_resume(spa); + mutex_exit(&spa_namespace_lock); + return (EBUSY); + } + + /* + * A pool cannot be exported if it has an active shared spare. + * This is to prevent other pools stealing the active spare + * from an exported pool. At user's own will, such pool can + * be forcedly exported. + */ + if (!force && new_state == POOL_STATE_EXPORTED && + spa_has_active_shared_spare(spa)) { + spa_async_resume(spa); + mutex_exit(&spa_namespace_lock); + return (EXDEV); + } + + /* + * We want this to be reflected on every label, + * so mark them all dirty. spa_unload() will do the + * final sync that pushes these changes out. + */ + if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa->spa_state = new_state; + spa->spa_final_txg = spa_last_synced_txg(spa) + + TXG_DEFER_SIZE + 1; + vdev_config_dirty(spa->spa_root_vdev); + spa_config_exit(spa, SCL_ALL, FTAG); + } + } + + spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); + + if (spa->spa_state != POOL_STATE_UNINITIALIZED) { + spa_unload(spa); + spa_deactivate(spa); + } + + if (oldconfig && spa->spa_config) + VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); + + if (new_state != POOL_STATE_UNINITIALIZED) { + if (!hardforce) + spa_config_sync(spa, B_TRUE, B_TRUE); + spa_remove(spa); + } + mutex_exit(&spa_namespace_lock); + + return (0); +} + +/* + * Destroy a storage pool. + */ +int +spa_destroy(char *pool) +{ + return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, + B_FALSE, B_FALSE)); +} + +/* + * Export a storage pool. + */ +int +spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, + boolean_t hardforce) +{ + return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, + force, hardforce)); +} + +/* + * Similar to spa_export(), this unloads the spa_t without actually removing it + * from the namespace in any way. + */ +int +spa_reset(char *pool) +{ + return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, + B_FALSE, B_FALSE)); +} + +/* + * ========================================================================== + * Device manipulation + * ========================================================================== + */ + +/* + * Add a device to a storage pool. + */ +int +spa_vdev_add(spa_t *spa, nvlist_t *nvroot) +{ + uint64_t txg, id; + int error; + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd, *tvd; + nvlist_t **spares, **l2cache; + uint_t nspares, nl2cache; + + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, NULL, txg, error)); + + spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares) != 0) + nspares = 0; + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, + &nl2cache) != 0) + nl2cache = 0; + + if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) + return (spa_vdev_exit(spa, vd, txg, EINVAL)); + + if (vd->vdev_children != 0 && + (error = vdev_create(vd, txg, B_FALSE)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); + + /* + * We must validate the spares and l2cache devices after checking the + * children. Otherwise, vdev_inuse() will blindly overwrite the spare. + */ + if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); + + /* + * Transfer each new top-level vdev from vd to rvd. + */ + for (int c = 0; c < vd->vdev_children; c++) { + + /* + * Set the vdev id to the first hole, if one exists. + */ + for (id = 0; id < rvd->vdev_children; id++) { + if (rvd->vdev_child[id]->vdev_ishole) { + vdev_free(rvd->vdev_child[id]); + break; + } + } + tvd = vd->vdev_child[c]; + vdev_remove_child(vd, tvd); + tvd->vdev_id = id; + vdev_add_child(rvd, tvd); + vdev_config_dirty(tvd); + } + + if (nspares != 0) { + spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, + ZPOOL_CONFIG_SPARES); + spa_load_spares(spa); + spa->spa_spares.sav_sync = B_TRUE; + } + + if (nl2cache != 0) { + spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, + ZPOOL_CONFIG_L2CACHE); + spa_load_l2cache(spa); + spa->spa_l2cache.sav_sync = B_TRUE; + } + + /* + * We have to be careful when adding new vdevs to an existing pool. + * If other threads start allocating from these vdevs before we + * sync the config cache, and we lose power, then upon reboot we may + * fail to open the pool because there are DVAs that the config cache + * can't translate. Therefore, we first add the vdevs without + * initializing metaslabs; sync the config cache (via spa_vdev_exit()); + * and then let spa_config_update() initialize the new metaslabs. + * + * spa_load() checks for added-but-not-initialized vdevs, so that + * if we lose power at any point in this sequence, the remaining + * steps will be completed the next time we load the pool. + */ + (void) spa_vdev_exit(spa, vd, txg, 0); + + mutex_enter(&spa_namespace_lock); + spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); + mutex_exit(&spa_namespace_lock); + + return (0); +} + +/* + * Attach a device to a mirror. The arguments are the path to any device + * in the mirror, and the nvroot for the new device. If the path specifies + * a device that is not mirrored, we automatically insert the mirror vdev. + * + * If 'replacing' is specified, the new device is intended to replace the + * existing device; in this case the two devices are made into their own + * mirror using the 'replacing' vdev, which is functionally identical to + * the mirror vdev (it actually reuses all the same ops) but has a few + * extra rules: you can't attach to it after it's been created, and upon + * completion of resilvering, the first disk (the one being replaced) + * is automatically detached. + */ +int +spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) +{ + uint64_t txg, dtl_max_txg; + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; + vdev_ops_t *pvops; + char *oldvdpath, *newvdpath; + int newvd_isspare; + int error; + + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); + + if (oldvd == NULL) + return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + + if (!oldvd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + + pvd = oldvd->vdev_parent; + + if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + + if (newrootvd->vdev_children != 1) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); + + newvd = newrootvd->vdev_child[0]; + + if (!newvd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); + + if ((error = vdev_create(newrootvd, txg, replacing)) != 0) + return (spa_vdev_exit(spa, newrootvd, txg, error)); + + /* + * Spares can't replace logs + */ + if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + + if (!replacing) { + /* + * For attach, the only allowable parent is a mirror or the root + * vdev. + */ + if (pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_root_ops) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + + pvops = &vdev_mirror_ops; + } else { + /* + * Active hot spares can only be replaced by inactive hot + * spares. + */ + if (pvd->vdev_ops == &vdev_spare_ops && + oldvd->vdev_isspare && + !spa_has_spare(spa, newvd->vdev_guid)) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + + /* + * If the source is a hot spare, and the parent isn't already a + * spare, then we want to create a new hot spare. Otherwise, we + * want to create a replacing vdev. The user is not allowed to + * attach to a spared vdev child unless the 'isspare' state is + * the same (spare replaces spare, non-spare replaces + * non-spare). + */ + if (pvd->vdev_ops == &vdev_replacing_ops && + spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } else if (pvd->vdev_ops == &vdev_spare_ops && + newvd->vdev_isspare != oldvd->vdev_isspare) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } + + if (newvd->vdev_isspare) + pvops = &vdev_spare_ops; + else + pvops = &vdev_replacing_ops; + } + + /* + * Make sure the new device is big enough. + */ + if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) + return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); + + /* + * The new device cannot have a higher alignment requirement + * than the top-level vdev. + */ + if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) + return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); + + /* + * If this is an in-place replacement, update oldvd's path and devid + * to make it distinguishable from newvd, and unopenable from now on. + */ + if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { + spa_strfree(oldvd->vdev_path); + oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, + KM_SLEEP); + (void) sprintf(oldvd->vdev_path, "%s/%s", + newvd->vdev_path, "old"); + if (oldvd->vdev_devid != NULL) { + spa_strfree(oldvd->vdev_devid); + oldvd->vdev_devid = NULL; + } + } + + /* mark the device being resilvered */ + newvd->vdev_resilvering = B_TRUE; + + /* + * If the parent is not a mirror, or if we're replacing, insert the new + * mirror/replacing/spare vdev above oldvd. + */ + if (pvd->vdev_ops != pvops) + pvd = vdev_add_parent(oldvd, pvops); + + ASSERT(pvd->vdev_top->vdev_parent == rvd); + ASSERT(pvd->vdev_ops == pvops); + ASSERT(oldvd->vdev_parent == pvd); + + /* + * Extract the new device from its root and add it to pvd. + */ + vdev_remove_child(newrootvd, newvd); + newvd->vdev_id = pvd->vdev_children; + newvd->vdev_crtxg = oldvd->vdev_crtxg; + vdev_add_child(pvd, newvd); + + tvd = newvd->vdev_top; + ASSERT(pvd->vdev_top == tvd); + ASSERT(tvd->vdev_parent == rvd); + + vdev_config_dirty(tvd); + + /* + * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account + * for any dmu_sync-ed blocks. It will propagate upward when + * spa_vdev_exit() calls vdev_dtl_reassess(). + */ + dtl_max_txg = txg + TXG_CONCURRENT_STATES; + + vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, + dtl_max_txg - TXG_INITIAL); + + if (newvd->vdev_isspare) { + spa_spare_activate(newvd); + spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); + } + + oldvdpath = spa_strdup(oldvd->vdev_path); + newvdpath = spa_strdup(newvd->vdev_path); + newvd_isspare = newvd->vdev_isspare; + + /* + * Mark newvd's DTL dirty in this txg. + */ + vdev_dirty(tvd, VDD_DTL, newvd, txg); + + /* + * Restart the resilver + */ + dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); + + /* + * Commit the config + */ + (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); + + spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, + "%s vdev=%s %s vdev=%s", + replacing && newvd_isspare ? "spare in" : + replacing ? "replace" : "attach", newvdpath, + replacing ? "for" : "to", oldvdpath); + + spa_strfree(oldvdpath); + spa_strfree(newvdpath); + + if (spa->spa_bootfs) + spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); + + return (0); +} + +/* + * Detach a device from a mirror or replacing vdev. + * If 'replace_done' is specified, only detach if the parent + * is a replacing vdev. + */ +int +spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) +{ + uint64_t txg; + int error; + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd, *pvd, *cvd, *tvd; + boolean_t unspare = B_FALSE; + uint64_t unspare_guid; + char *vdpath; + + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + vd = spa_lookup_by_guid(spa, guid, B_FALSE); + + if (vd == NULL) + return (spa_vdev_exit(spa, NULL, txg, ENODEV)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + + pvd = vd->vdev_parent; + + /* + * If the parent/child relationship is not as expected, don't do it. + * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing + * vdev that's replacing B with C. The user's intent in replacing + * is to go from M(A,B) to M(A,C). If the user decides to cancel + * the replace by detaching C, the expected behavior is to end up + * M(A,B). But suppose that right after deciding to detach C, + * the replacement of B completes. We would have M(A,C), and then + * ask to detach C, which would leave us with just A -- not what + * the user wanted. To prevent this, we make sure that the + * parent/child relationship hasn't changed -- in this example, + * that C's parent is still the replacing vdev R. + */ + if (pvd->vdev_guid != pguid && pguid != 0) + return (spa_vdev_exit(spa, NULL, txg, EBUSY)); + + /* + * Only 'replacing' or 'spare' vdevs can be replaced. + */ + if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && + pvd->vdev_ops != &vdev_spare_ops) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + + ASSERT(pvd->vdev_ops != &vdev_spare_ops || + spa_version(spa) >= SPA_VERSION_SPARES); + + /* + * Only mirror, replacing, and spare vdevs support detach. + */ + if (pvd->vdev_ops != &vdev_replacing_ops && + pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_spare_ops) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + + /* + * If this device has the only valid copy of some data, + * we cannot safely detach it. + */ + if (vdev_dtl_required(vd)) + return (spa_vdev_exit(spa, NULL, txg, EBUSY)); + + ASSERT(pvd->vdev_children >= 2); + + /* + * If we are detaching the second disk from a replacing vdev, then + * check to see if we changed the original vdev's path to have "/old" + * at the end in spa_vdev_attach(). If so, undo that change now. + */ + if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && + vd->vdev_path != NULL) { + size_t len = strlen(vd->vdev_path); + + for (int c = 0; c < pvd->vdev_children; c++) { + cvd = pvd->vdev_child[c]; + + if (cvd == vd || cvd->vdev_path == NULL) + continue; + + if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && + strcmp(cvd->vdev_path + len, "/old") == 0) { + spa_strfree(cvd->vdev_path); + cvd->vdev_path = spa_strdup(vd->vdev_path); + break; + } + } + } + + /* + * If we are detaching the original disk from a spare, then it implies + * that the spare should become a real disk, and be removed from the + * active spare list for the pool. + */ + if (pvd->vdev_ops == &vdev_spare_ops && + vd->vdev_id == 0 && + pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) + unspare = B_TRUE; + + /* + * Erase the disk labels so the disk can be used for other things. + * This must be done after all other error cases are handled, + * but before we disembowel vd (so we can still do I/O to it). + * But if we can't do it, don't treat the error as fatal -- + * it may be that the unwritability of the disk is the reason + * it's being detached! + */ + error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); + + /* + * Remove vd from its parent and compact the parent's children. + */ + vdev_remove_child(pvd, vd); + vdev_compact_children(pvd); + + /* + * Remember one of the remaining children so we can get tvd below. + */ + cvd = pvd->vdev_child[pvd->vdev_children - 1]; + + /* + * If we need to remove the remaining child from the list of hot spares, + * do it now, marking the vdev as no longer a spare in the process. + * We must do this before vdev_remove_parent(), because that can + * change the GUID if it creates a new toplevel GUID. For a similar + * reason, we must remove the spare now, in the same txg as the detach; + * otherwise someone could attach a new sibling, change the GUID, and + * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. + */ + if (unspare) { + ASSERT(cvd->vdev_isspare); + spa_spare_remove(cvd); + unspare_guid = cvd->vdev_guid; + (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); + cvd->vdev_unspare = B_TRUE; + } + + /* + * If the parent mirror/replacing vdev only has one child, + * the parent is no longer needed. Remove it from the tree. + */ + if (pvd->vdev_children == 1) { + if (pvd->vdev_ops == &vdev_spare_ops) + cvd->vdev_unspare = B_FALSE; + vdev_remove_parent(cvd); + cvd->vdev_resilvering = B_FALSE; + } + + + /* + * We don't set tvd until now because the parent we just removed + * may have been the previous top-level vdev. + */ + tvd = cvd->vdev_top; + ASSERT(tvd->vdev_parent == rvd); + + /* + * Reevaluate the parent vdev state. + */ + vdev_propagate_state(cvd); + + /* + * If the 'autoexpand' property is set on the pool then automatically + * try to expand the size of the pool. For example if the device we + * just detached was smaller than the others, it may be possible to + * add metaslabs (i.e. grow the pool). We need to reopen the vdev + * first so that we can obtain the updated sizes of the leaf vdevs. + */ + if (spa->spa_autoexpand) { + vdev_reopen(tvd); + vdev_expand(tvd, txg); + } + + vdev_config_dirty(tvd); + + /* + * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that + * vd->vdev_detached is set and free vd's DTL object in syncing context. + * But first make sure we're not on any *other* txg's DTL list, to + * prevent vd from being accessed after it's freed. + */ + vdpath = spa_strdup(vd->vdev_path); + for (int t = 0; t < TXG_SIZE; t++) + (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); + vd->vdev_detached = B_TRUE; + vdev_dirty(tvd, VDD_DTL, vd, txg); + + spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); + + /* hang on to the spa before we release the lock */ + spa_open_ref(spa, FTAG); + + error = spa_vdev_exit(spa, vd, txg, 0); + + spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, + "vdev=%s", vdpath); + spa_strfree(vdpath); + + /* + * If this was the removal of the original device in a hot spare vdev, + * then we want to go through and remove the device from the hot spare + * list of every other pool. + */ + if (unspare) { + spa_t *altspa = NULL; + + mutex_enter(&spa_namespace_lock); + while ((altspa = spa_next(altspa)) != NULL) { + if (altspa->spa_state != POOL_STATE_ACTIVE || + altspa == spa) + continue; + + spa_open_ref(altspa, FTAG); + mutex_exit(&spa_namespace_lock); + (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); + mutex_enter(&spa_namespace_lock); + spa_close(altspa, FTAG); + } + mutex_exit(&spa_namespace_lock); + + /* search the rest of the vdevs for spares to remove */ + spa_vdev_resilver_done(spa); + } + + /* all done with the spa; OK to release */ + mutex_enter(&spa_namespace_lock); + spa_close(spa, FTAG); + mutex_exit(&spa_namespace_lock); + + return (error); +} + +/* + * Split a set of devices from their mirrors, and create a new pool from them. + */ +int +spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, + nvlist_t *props, boolean_t exp) +{ + int error = 0; + uint64_t txg, *glist; + spa_t *newspa; + uint_t c, children, lastlog; + nvlist_t **child, *nvl, *tmp; + dmu_tx_t *tx; + char *altroot = NULL; + vdev_t *rvd, **vml = NULL; /* vdev modify list */ + boolean_t activate_slog; + + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + /* clear the log and flush everything up to now */ + activate_slog = spa_passivate_log(spa); + (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); + error = spa_offline_log(spa); + txg = spa_vdev_config_enter(spa); + + if (activate_slog) + spa_activate_log(spa); + + if (error != 0) + return (spa_vdev_exit(spa, NULL, txg, error)); + + /* check new spa name before going any further */ + if (spa_lookup(newname) != NULL) + return (spa_vdev_exit(spa, NULL, txg, EEXIST)); + + /* + * scan through all the children to ensure they're all mirrors + */ + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || + nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + + /* first, check to ensure we've got the right child count */ + rvd = spa->spa_root_vdev; + lastlog = 0; + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; + + /* don't count the holes & logs as children */ + if (vd->vdev_islog || vd->vdev_ishole) { + if (lastlog == 0) + lastlog = c; + continue; + } + + lastlog = 0; + } + if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + + /* next, ensure no spare or cache devices are part of the split */ + if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || + nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + + vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); + glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); + + /* then, loop over each vdev and validate it */ + for (c = 0; c < children; c++) { + uint64_t is_hole = 0; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &is_hole); + + if (is_hole != 0) { + if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || + spa->spa_root_vdev->vdev_child[c]->vdev_islog) { + continue; + } else { + error = EINVAL; + break; + } + } + + /* which disk is going to be split? */ + if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, + &glist[c]) != 0) { + error = EINVAL; + break; + } + + /* look it up in the spa */ + vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); + if (vml[c] == NULL) { + error = ENODEV; + break; + } + + /* make sure there's nothing stopping the split */ + if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || + vml[c]->vdev_islog || + vml[c]->vdev_ishole || + vml[c]->vdev_isspare || + vml[c]->vdev_isl2cache || + !vdev_writeable(vml[c]) || + vml[c]->vdev_children != 0 || + vml[c]->vdev_state != VDEV_STATE_HEALTHY || + c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { + error = EINVAL; + break; + } + + if (vdev_dtl_required(vml[c])) { + error = EBUSY; + break; + } + + /* we need certain info from the top level */ + VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, + vml[c]->vdev_top->vdev_ms_array) == 0); + VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, + vml[c]->vdev_top->vdev_ms_shift) == 0); + VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, + vml[c]->vdev_top->vdev_asize) == 0); + VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, + vml[c]->vdev_top->vdev_ashift) == 0); + } + + if (error != 0) { + kmem_free(vml, children * sizeof (vdev_t *)); + kmem_free(glist, children * sizeof (uint64_t)); + return (spa_vdev_exit(spa, NULL, txg, error)); + } + + /* stop writers from using the disks */ + for (c = 0; c < children; c++) { + if (vml[c] != NULL) + vml[c]->vdev_offline = B_TRUE; + } + vdev_reopen(spa->spa_root_vdev); + + /* + * Temporarily record the splitting vdevs in the spa config. This + * will disappear once the config is regenerated. + */ + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, + glist, children) == 0); + kmem_free(glist, children * sizeof (uint64_t)); + + mutex_enter(&spa->spa_props_lock); + VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, + nvl) == 0); + mutex_exit(&spa->spa_props_lock); + spa->spa_config_splitting = nvl; + vdev_config_dirty(spa->spa_root_vdev); + + /* configure and create the new pool */ + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, + exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, + spa_version(spa)) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, + spa->spa_config_txg) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, + spa_generate_guid(NULL)) == 0); + (void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); + + /* add the new pool to the namespace */ + newspa = spa_add(newname, config, altroot); + newspa->spa_config_txg = spa->spa_config_txg; + spa_set_log_state(newspa, SPA_LOG_CLEAR); + + /* release the spa config lock, retaining the namespace lock */ + spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); + + if (zio_injection_enabled) + zio_handle_panic_injection(spa, FTAG, 1); + + spa_activate(newspa, spa_mode_global); + spa_async_suspend(newspa); + + /* create the new pool from the disks of the original pool */ + error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); + if (error) + goto out; + + /* if that worked, generate a real config for the new pool */ + if (newspa->spa_root_vdev != NULL) { + VERIFY(nvlist_alloc(&newspa->spa_config_splitting, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, + ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); + spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, + B_TRUE)); + } + + /* set the props */ + if (props != NULL) { + spa_configfile_set(newspa, props, B_FALSE); + error = spa_prop_set(newspa, props); + if (error) + goto out; + } + + /* flush everything */ + txg = spa_vdev_config_enter(newspa); + vdev_config_dirty(newspa->spa_root_vdev); + (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); + + if (zio_injection_enabled) + zio_handle_panic_injection(spa, FTAG, 2); + + spa_async_resume(newspa); + + /* finally, update the original pool's config */ + txg = spa_vdev_config_enter(spa); + tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error != 0) + dmu_tx_abort(tx); + for (c = 0; c < children; c++) { + if (vml[c] != NULL) { + vdev_split(vml[c]); + if (error == 0) + spa_history_log_internal(LOG_POOL_VDEV_DETACH, + spa, tx, "vdev=%s", + vml[c]->vdev_path); + vdev_free(vml[c]); + } + } + vdev_config_dirty(spa->spa_root_vdev); + spa->spa_config_splitting = NULL; + nvlist_free(nvl); + if (error == 0) + dmu_tx_commit(tx); + (void) spa_vdev_exit(spa, NULL, txg, 0); + + if (zio_injection_enabled) + zio_handle_panic_injection(spa, FTAG, 3); + + /* split is complete; log a history record */ + spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, + "split new pool %s from pool %s", newname, spa_name(spa)); + + kmem_free(vml, children * sizeof (vdev_t *)); + + /* if we're not going to mount the filesystems in userland, export */ + if (exp) + error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, + B_FALSE, B_FALSE); + + return (error); + +out: + spa_unload(newspa); + spa_deactivate(newspa); + spa_remove(newspa); + + txg = spa_vdev_config_enter(spa); + + /* re-online all offlined disks */ + for (c = 0; c < children; c++) { + if (vml[c] != NULL) + vml[c]->vdev_offline = B_FALSE; + } + vdev_reopen(spa->spa_root_vdev); + + nvlist_free(spa->spa_config_splitting); + spa->spa_config_splitting = NULL; + (void) spa_vdev_exit(spa, NULL, txg, error); + + kmem_free(vml, children * sizeof (vdev_t *)); + return (error); +} + +static nvlist_t * +spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) +{ + for (int i = 0; i < count; i++) { + uint64_t guid; + + VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, + &guid) == 0); + + if (guid == target_guid) + return (nvpp[i]); + } + + return (NULL); +} + +static void +spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, + nvlist_t *dev_to_remove) +{ + nvlist_t **newdev = NULL; + + if (count > 1) + newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); + + for (int i = 0, j = 0; i < count; i++) { + if (dev[i] == dev_to_remove) + continue; + VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); + } + + VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); + VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); + + for (int i = 0; i < count - 1; i++) + nvlist_free(newdev[i]); + + if (count > 1) + kmem_free(newdev, (count - 1) * sizeof (void *)); +} + +/* + * Evacuate the device. + */ +static int +spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) +{ + uint64_t txg; + int error = 0; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + ASSERT(vd == vd->vdev_top); + + /* + * Evacuate the device. We don't hold the config lock as writer + * since we need to do I/O but we do keep the + * spa_namespace_lock held. Once this completes the device + * should no longer have any blocks allocated on it. + */ + if (vd->vdev_islog) { + if (vd->vdev_stat.vs_alloc != 0) + error = spa_offline_log(spa); + } else { + error = ENOTSUP; + } + + if (error) + return (error); + + /* + * The evacuation succeeded. Remove any remaining MOS metadata + * associated with this vdev, and wait for these changes to sync. + */ + ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); + txg = spa_vdev_config_enter(spa); + vd->vdev_removing = B_TRUE; + vdev_dirty(vd, 0, NULL, txg); + vdev_config_dirty(vd); + spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); + + return (0); +} + +/* + * Complete the removal by cleaning up the namespace. + */ +static void +spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) +{ + vdev_t *rvd = spa->spa_root_vdev; + uint64_t id = vd->vdev_id; + boolean_t last_vdev = (id == (rvd->vdev_children - 1)); + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + ASSERT(vd == vd->vdev_top); + + /* + * Only remove any devices which are empty. + */ + if (vd->vdev_stat.vs_alloc != 0) + return; + + (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); + + if (list_link_active(&vd->vdev_state_dirty_node)) + vdev_state_clean(vd); + if (list_link_active(&vd->vdev_config_dirty_node)) + vdev_config_clean(vd); + + vdev_free(vd); + + if (last_vdev) { + vdev_compact_children(rvd); + } else { + vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); + vdev_add_child(rvd, vd); + } + vdev_config_dirty(rvd); + + /* + * Reassess the health of our root vdev. + */ + vdev_reopen(rvd); +} + +/* + * Remove a device from the pool - + * + * Removing a device from the vdev namespace requires several steps + * and can take a significant amount of time. As a result we use + * the spa_vdev_config_[enter/exit] functions which allow us to + * grab and release the spa_config_lock while still holding the namespace + * lock. During each step the configuration is synced out. + */ + +/* + * Remove a device from the pool. Currently, this supports removing only hot + * spares, slogs, and level 2 ARC devices. + */ +int +spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) +{ + vdev_t *vd; + metaslab_group_t *mg; + nvlist_t **spares, **l2cache, *nv; + uint64_t txg = 0; + uint_t nspares, nl2cache; + int error = 0; + boolean_t locked = MUTEX_HELD(&spa_namespace_lock); + + ASSERT(spa_writeable(spa)); + + if (!locked) + txg = spa_vdev_enter(spa); + + vd = spa_lookup_by_guid(spa, guid, B_FALSE); + + if (spa->spa_spares.sav_vdevs != NULL && + nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && + (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { + /* + * Only remove the hot spare if it's not currently in use + * in this pool. + */ + if (vd == NULL || unspare) { + spa_vdev_remove_aux(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares, nv); + spa_load_spares(spa); + spa->spa_spares.sav_sync = B_TRUE; + } else { + error = EBUSY; + } + } else if (spa->spa_l2cache.sav_vdevs != NULL && + nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && + (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { + /* + * Cache devices can always be removed. + */ + spa_vdev_remove_aux(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); + spa_load_l2cache(spa); + spa->spa_l2cache.sav_sync = B_TRUE; + } else if (vd != NULL && vd->vdev_islog) { + ASSERT(!locked); + ASSERT(vd == vd->vdev_top); + + /* + * XXX - Once we have bp-rewrite this should + * become the common case. + */ + + mg = vd->vdev_mg; + + /* + * Stop allocating from this vdev. + */ + metaslab_group_passivate(mg); + + /* + * Wait for the youngest allocations and frees to sync, + * and then wait for the deferral of those frees to finish. + */ + spa_vdev_config_exit(spa, NULL, + txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); + + /* + * Attempt to evacuate the vdev. + */ + error = spa_vdev_remove_evacuate(spa, vd); + + txg = spa_vdev_config_enter(spa); + + /* + * If we couldn't evacuate the vdev, unwind. + */ + if (error) { + metaslab_group_activate(mg); + return (spa_vdev_exit(spa, NULL, txg, error)); + } + + /* + * Clean up the vdev namespace. + */ + spa_vdev_remove_from_namespace(spa, vd); + + } else if (vd != NULL) { + /* + * Normal vdevs cannot be removed (yet). + */ + error = ENOTSUP; + } else { + /* + * There is no vdev of any kind with the specified guid. + */ + error = ENOENT; + } + + if (!locked) + return (spa_vdev_exit(spa, NULL, txg, error)); + + return (error); +} + +/* + * Find any device that's done replacing, or a vdev marked 'unspare' that's + * current spared, so we can detach it. + */ +static vdev_t * +spa_vdev_resilver_done_hunt(vdev_t *vd) +{ + vdev_t *newvd, *oldvd; + + for (int c = 0; c < vd->vdev_children; c++) { + oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); + if (oldvd != NULL) + return (oldvd); + } + + /* + * Check for a completed replacement. We always consider the first + * vdev in the list to be the oldest vdev, and the last one to be + * the newest (see spa_vdev_attach() for how that works). In + * the case where the newest vdev is faulted, we will not automatically + * remove it after a resilver completes. This is OK as it will require + * user intervention to determine which disk the admin wishes to keep. + */ + if (vd->vdev_ops == &vdev_replacing_ops) { + ASSERT(vd->vdev_children > 1); + + newvd = vd->vdev_child[vd->vdev_children - 1]; + oldvd = vd->vdev_child[0]; + + if (vdev_dtl_empty(newvd, DTL_MISSING) && + vdev_dtl_empty(newvd, DTL_OUTAGE) && + !vdev_dtl_required(oldvd)) + return (oldvd); + } + + /* + * Check for a completed resilver with the 'unspare' flag set. + */ + if (vd->vdev_ops == &vdev_spare_ops) { + vdev_t *first = vd->vdev_child[0]; + vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; + + if (last->vdev_unspare) { + oldvd = first; + newvd = last; + } else if (first->vdev_unspare) { + oldvd = last; + newvd = first; + } else { + oldvd = NULL; + } + + if (oldvd != NULL && + vdev_dtl_empty(newvd, DTL_MISSING) && + vdev_dtl_empty(newvd, DTL_OUTAGE) && + !vdev_dtl_required(oldvd)) + return (oldvd); + + /* + * If there are more than two spares attached to a disk, + * and those spares are not required, then we want to + * attempt to free them up now so that they can be used + * by other pools. Once we're back down to a single + * disk+spare, we stop removing them. + */ + if (vd->vdev_children > 2) { + newvd = vd->vdev_child[1]; + + if (newvd->vdev_isspare && last->vdev_isspare && + vdev_dtl_empty(last, DTL_MISSING) && + vdev_dtl_empty(last, DTL_OUTAGE) && + !vdev_dtl_required(newvd)) + return (newvd); + } + } + + return (NULL); +} + +static void +spa_vdev_resilver_done(spa_t *spa) +{ + vdev_t *vd, *pvd, *ppvd; + uint64_t guid, sguid, pguid, ppguid; + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { + pvd = vd->vdev_parent; + ppvd = pvd->vdev_parent; + guid = vd->vdev_guid; + pguid = pvd->vdev_guid; + ppguid = ppvd->vdev_guid; + sguid = 0; + /* + * If we have just finished replacing a hot spared device, then + * we need to detach the parent's first child (the original hot + * spare) as well. + */ + if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && + ppvd->vdev_children == 2) { + ASSERT(pvd->vdev_ops == &vdev_replacing_ops); + sguid = ppvd->vdev_child[1]->vdev_guid; + } + spa_config_exit(spa, SCL_ALL, FTAG); + if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) + return; + if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) + return; + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + } + + spa_config_exit(spa, SCL_ALL, FTAG); +} + +/* + * Update the stored path or FRU for this vdev. + */ +int +spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, + boolean_t ispath) +{ + vdev_t *vd; + boolean_t sync = B_FALSE; + + ASSERT(spa_writeable(spa)); + + spa_vdev_state_enter(spa, SCL_ALL); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, ENOENT)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); + + if (ispath) { + if (strcmp(value, vd->vdev_path) != 0) { + spa_strfree(vd->vdev_path); + vd->vdev_path = spa_strdup(value); + sync = B_TRUE; + } + } else { + if (vd->vdev_fru == NULL) { + vd->vdev_fru = spa_strdup(value); + sync = B_TRUE; + } else if (strcmp(value, vd->vdev_fru) != 0) { + spa_strfree(vd->vdev_fru); + vd->vdev_fru = spa_strdup(value); + sync = B_TRUE; + } + } + + return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); +} + +int +spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) +{ + return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); +} + +int +spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) +{ + return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); +} + +/* + * ========================================================================== + * SPA Scanning + * ========================================================================== + */ + +int +spa_scan_stop(spa_t *spa) +{ + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + if (dsl_scan_resilvering(spa->spa_dsl_pool)) + return (EBUSY); + return (dsl_scan_cancel(spa->spa_dsl_pool)); +} + +int +spa_scan(spa_t *spa, pool_scan_func_t func) +{ + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + + if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) + return (ENOTSUP); + + /* + * If a resilver was requested, but there is no DTL on a + * writeable leaf device, we have nothing to do. + */ + if (func == POOL_SCAN_RESILVER && + !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { + spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); + return (0); + } + + return (dsl_scan(spa->spa_dsl_pool, func)); +} + +/* + * ========================================================================== + * SPA async task processing + * ========================================================================== + */ + +static void +spa_async_remove(spa_t *spa, vdev_t *vd) +{ + if (vd->vdev_remove_wanted) { + vd->vdev_remove_wanted = B_FALSE; + vd->vdev_delayed_close = B_FALSE; + vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); + + /* + * We want to clear the stats, but we don't want to do a full + * vdev_clear() as that will cause us to throw away + * degraded/faulted state as well as attempt to reopen the + * device, all of which is a waste. + */ + vd->vdev_stat.vs_read_errors = 0; + vd->vdev_stat.vs_write_errors = 0; + vd->vdev_stat.vs_checksum_errors = 0; + + vdev_state_dirty(vd->vdev_top); + } + + for (int c = 0; c < vd->vdev_children; c++) + spa_async_remove(spa, vd->vdev_child[c]); +} + +static void +spa_async_probe(spa_t *spa, vdev_t *vd) +{ + if (vd->vdev_probe_wanted) { + vd->vdev_probe_wanted = B_FALSE; + vdev_reopen(vd); /* vdev_open() does the actual probe */ + } + + for (int c = 0; c < vd->vdev_children; c++) + spa_async_probe(spa, vd->vdev_child[c]); +} + +static void +spa_async_autoexpand(spa_t *spa, vdev_t *vd) +{ + sysevent_id_t eid; + nvlist_t *attr; + char *physpath; + + if (!spa->spa_autoexpand) + return; + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + spa_async_autoexpand(spa, cvd); + } + + if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) + return; + + physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); + + VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); + + (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, + ESC_DEV_DLE, attr, &eid, DDI_SLEEP); + + nvlist_free(attr); + kmem_free(physpath, MAXPATHLEN); +} + +static void +spa_async_thread(spa_t *spa) +{ + int tasks; + + ASSERT(spa->spa_sync_on); + + mutex_enter(&spa->spa_async_lock); + tasks = spa->spa_async_tasks; + spa->spa_async_tasks = 0; + mutex_exit(&spa->spa_async_lock); + + /* + * See if the config needs to be updated. + */ + if (tasks & SPA_ASYNC_CONFIG_UPDATE) { + uint64_t old_space, new_space; + + mutex_enter(&spa_namespace_lock); + old_space = metaslab_class_get_space(spa_normal_class(spa)); + spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); + new_space = metaslab_class_get_space(spa_normal_class(spa)); + mutex_exit(&spa_namespace_lock); + + /* + * If the pool grew as a result of the config update, + * then log an internal history event. + */ + if (new_space != old_space) { + spa_history_log_internal(LOG_POOL_VDEV_ONLINE, + spa, NULL, + "pool '%s' size: %llu(+%llu)", + spa_name(spa), new_space, new_space - old_space); + } + } + + /* + * See if any devices need to be marked REMOVED. + */ + if (tasks & SPA_ASYNC_REMOVE) { + spa_vdev_state_enter(spa, SCL_NONE); + spa_async_remove(spa, spa->spa_root_vdev); + for (int i = 0; i < spa->spa_l2cache.sav_count; i++) + spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); + for (int i = 0; i < spa->spa_spares.sav_count; i++) + spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); + (void) spa_vdev_state_exit(spa, NULL, 0); + } + + if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_async_autoexpand(spa, spa->spa_root_vdev); + spa_config_exit(spa, SCL_CONFIG, FTAG); + } + + /* + * See if any devices need to be probed. + */ + if (tasks & SPA_ASYNC_PROBE) { + spa_vdev_state_enter(spa, SCL_NONE); + spa_async_probe(spa, spa->spa_root_vdev); + (void) spa_vdev_state_exit(spa, NULL, 0); + } + + /* + * If any devices are done replacing, detach them. + */ + if (tasks & SPA_ASYNC_RESILVER_DONE) + spa_vdev_resilver_done(spa); + + /* + * Kick off a resilver. + */ + if (tasks & SPA_ASYNC_RESILVER) + dsl_resilver_restart(spa->spa_dsl_pool, 0); + + /* + * Let the world know that we're done. + */ + mutex_enter(&spa->spa_async_lock); + spa->spa_async_thread = NULL; + cv_broadcast(&spa->spa_async_cv); + mutex_exit(&spa->spa_async_lock); + thread_exit(); +} + +void +spa_async_suspend(spa_t *spa) +{ + mutex_enter(&spa->spa_async_lock); + spa->spa_async_suspended++; + while (spa->spa_async_thread != NULL) + cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); + mutex_exit(&spa->spa_async_lock); +} + +void +spa_async_resume(spa_t *spa) +{ + mutex_enter(&spa->spa_async_lock); + ASSERT(spa->spa_async_suspended != 0); + spa->spa_async_suspended--; + mutex_exit(&spa->spa_async_lock); +} + +static void +spa_async_dispatch(spa_t *spa) +{ + mutex_enter(&spa->spa_async_lock); + if (spa->spa_async_tasks && !spa->spa_async_suspended && + spa->spa_async_thread == NULL && + rootdir != NULL && !vn_is_readonly(rootdir)) + spa->spa_async_thread = thread_create(NULL, 0, + spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); + mutex_exit(&spa->spa_async_lock); +} + +void +spa_async_request(spa_t *spa, int task) +{ + zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); + mutex_enter(&spa->spa_async_lock); + spa->spa_async_tasks |= task; + mutex_exit(&spa->spa_async_lock); +} + +/* + * ========================================================================== + * SPA syncing routines + * ========================================================================== + */ + +static int +bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + bpobj_t *bpo = arg; + bpobj_enqueue(bpo, bp, tx); + return (0); +} + +static int +spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + zio_t *zio = arg; + + zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, + zio->io_flags)); + return (0); +} + +static void +spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) +{ + char *packed = NULL; + size_t bufsize; + size_t nvsize = 0; + dmu_buf_t *db; + + VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); + + /* + * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration + * information. This avoids the dbuf_will_dirty() path and + * saves us a pre-read to get data we don't actually care about. + */ + bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); + packed = kmem_alloc(bufsize, KM_SLEEP); + + VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, + KM_SLEEP) == 0); + bzero(packed + nvsize, bufsize - nvsize); + + dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); + + kmem_free(packed, bufsize); + + VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); + dmu_buf_will_dirty(db, tx); + *(uint64_t *)db->db_data = nvsize; + dmu_buf_rele(db, FTAG); +} + +static void +spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, + const char *config, const char *entry) +{ + nvlist_t *nvroot; + nvlist_t **list; + int i; + + if (!sav->sav_sync) + return; + + /* + * Update the MOS nvlist describing the list of available devices. + * spa_validate_aux() will have already made sure this nvlist is + * valid and the vdevs are labeled appropriately. + */ + if (sav->sav_object == 0) { + sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, + sizeof (uint64_t), tx); + VERIFY(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, + &sav->sav_object, tx) == 0); + } + + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); + if (sav->sav_count == 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); + } else { + list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); + for (i = 0; i < sav->sav_count; i++) + list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], + B_FALSE, VDEV_CONFIG_L2CACHE); + VERIFY(nvlist_add_nvlist_array(nvroot, config, list, + sav->sav_count) == 0); + for (i = 0; i < sav->sav_count; i++) + nvlist_free(list[i]); + kmem_free(list, sav->sav_count * sizeof (void *)); + } + + spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); + nvlist_free(nvroot); + + sav->sav_sync = B_FALSE; +} + +static void +spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) +{ + nvlist_t *config; + + if (list_is_empty(&spa->spa_config_dirty_list)) + return; + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + + config = spa_config_generate(spa, spa->spa_root_vdev, + dmu_tx_get_txg(tx), B_FALSE); + + spa_config_exit(spa, SCL_STATE, FTAG); + + if (spa->spa_config_syncing) + nvlist_free(spa->spa_config_syncing); + spa->spa_config_syncing = config; + + spa_sync_nvlist(spa, spa->spa_config_object, config, tx); +} + +/* + * Set zpool properties. + */ +static void +spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + objset_t *mos = spa->spa_meta_objset; + nvlist_t *nvp = arg2; + nvpair_t *elem; + uint64_t intval; + char *strval; + zpool_prop_t prop; + const char *propname; + zprop_type_t proptype; + + mutex_enter(&spa->spa_props_lock); + + elem = NULL; + while ((elem = nvlist_next_nvpair(nvp, elem))) { + switch (prop = zpool_name_to_prop(nvpair_name(elem))) { + case ZPOOL_PROP_VERSION: + /* + * Only set version for non-zpool-creation cases + * (set/import). spa_create() needs special care + * for version setting. + */ + if (tx->tx_txg != TXG_INITIAL) { + VERIFY(nvpair_value_uint64(elem, + &intval) == 0); + ASSERT(intval <= SPA_VERSION); + ASSERT(intval >= spa_version(spa)); + spa->spa_uberblock.ub_version = intval; + vdev_config_dirty(spa->spa_root_vdev); + } + break; + + case ZPOOL_PROP_ALTROOT: + /* + * 'altroot' is a non-persistent property. It should + * have been set temporarily at creation or import time. + */ + ASSERT(spa->spa_root != NULL); + break; + + case ZPOOL_PROP_READONLY: + case ZPOOL_PROP_CACHEFILE: + /* + * 'readonly' and 'cachefile' are also non-persisitent + * properties. + */ + break; + default: + /* + * Set pool property values in the poolprops mos object. + */ + if (spa->spa_pool_props_object == 0) { + VERIFY((spa->spa_pool_props_object = + zap_create(mos, DMU_OT_POOL_PROPS, + DMU_OT_NONE, 0, tx)) > 0); + + VERIFY(zap_update(mos, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, + 8, 1, &spa->spa_pool_props_object, tx) + == 0); + } + + /* normalize the property name */ + propname = zpool_prop_to_name(prop); + proptype = zpool_prop_get_type(prop); + + if (nvpair_type(elem) == DATA_TYPE_STRING) { + ASSERT(proptype == PROP_TYPE_STRING); + VERIFY(nvpair_value_string(elem, &strval) == 0); + VERIFY(zap_update(mos, + spa->spa_pool_props_object, propname, + 1, strlen(strval) + 1, strval, tx) == 0); + + } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { + VERIFY(nvpair_value_uint64(elem, &intval) == 0); + + if (proptype == PROP_TYPE_INDEX) { + const char *unused; + VERIFY(zpool_prop_index_to_string( + prop, intval, &unused) == 0); + } + VERIFY(zap_update(mos, + spa->spa_pool_props_object, propname, + 8, 1, &intval, tx) == 0); + } else { + ASSERT(0); /* not allowed */ + } + + switch (prop) { + case ZPOOL_PROP_DELEGATION: + spa->spa_delegation = intval; + break; + case ZPOOL_PROP_BOOTFS: + spa->spa_bootfs = intval; + break; + case ZPOOL_PROP_FAILUREMODE: + spa->spa_failmode = intval; + break; + case ZPOOL_PROP_AUTOEXPAND: + spa->spa_autoexpand = intval; + if (tx->tx_txg != TXG_INITIAL) + spa_async_request(spa, + SPA_ASYNC_AUTOEXPAND); + break; + case ZPOOL_PROP_DEDUPDITTO: + spa->spa_dedup_ditto = intval; + break; + default: + break; + } + } + + /* log internal history if this is not a zpool create */ + if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && + tx->tx_txg != TXG_INITIAL) { + spa_history_log_internal(LOG_POOL_PROPSET, + spa, tx, "%s %lld %s", + nvpair_name(elem), intval, spa_name(spa)); + } + } + + mutex_exit(&spa->spa_props_lock); +} + +/* + * Perform one-time upgrade on-disk changes. spa_version() does not + * reflect the new version this txg, so there must be no changes this + * txg to anything that the upgrade code depends on after it executes. + * Therefore this must be called after dsl_pool_sync() does the sync + * tasks. + */ +static void +spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) +{ + dsl_pool_t *dp = spa->spa_dsl_pool; + + ASSERT(spa->spa_sync_pass == 1); + + if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && + spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { + dsl_pool_create_origin(dp, tx); + + /* Keeping the origin open increases spa_minref */ + spa->spa_minref += 3; + } + + if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && + spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { + dsl_pool_upgrade_clones(dp, tx); + } + + if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && + spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { + dsl_pool_upgrade_dir_clones(dp, tx); + + /* Keeping the freedir open increases spa_minref */ + spa->spa_minref += 3; + } +} + +/* + * Sync the specified transaction group. New blocks may be dirtied as + * part of the process, so we iterate until it converges. + */ +void +spa_sync(spa_t *spa, uint64_t txg) +{ + dsl_pool_t *dp = spa->spa_dsl_pool; + objset_t *mos = spa->spa_meta_objset; + bpobj_t *defer_bpo = &spa->spa_deferred_bpobj; + bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd; + dmu_tx_t *tx; + int error; + + VERIFY(spa_writeable(spa)); + + /* + * Lock out configuration changes. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + + spa->spa_syncing_txg = txg; + spa->spa_sync_pass = 0; + + /* + * If there are any pending vdev state changes, convert them + * into config changes that go out with this transaction group. + */ + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + while (list_head(&spa->spa_state_dirty_list) != NULL) { + /* + * We need the write lock here because, for aux vdevs, + * calling vdev_config_dirty() modifies sav_config. + * This is ugly and will become unnecessary when we + * eliminate the aux vdev wart by integrating all vdevs + * into the root vdev tree. + */ + spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); + spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); + while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { + vdev_state_clean(vd); + vdev_config_dirty(vd); + } + spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); + spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); + } + spa_config_exit(spa, SCL_STATE, FTAG); + + tx = dmu_tx_create_assigned(dp, txg); + + /* + * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, + * set spa_deflate if we have no raid-z vdevs. + */ + if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && + spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { + int i; + + for (i = 0; i < rvd->vdev_children; i++) { + vd = rvd->vdev_child[i]; + if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) + break; + } + if (i == rvd->vdev_children) { + spa->spa_deflate = TRUE; + VERIFY(0 == zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx)); + } + } + + /* + * If anything has changed in this txg, or if someone is waiting + * for this txg to sync (eg, spa_vdev_remove()), push the + * deferred frees from the previous txg. If not, leave them + * alone so that we don't generate work on an otherwise idle + * system. + */ + if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || + !txg_list_empty(&dp->dp_dirty_dirs, txg) || + !txg_list_empty(&dp->dp_sync_tasks, txg) || + ((dsl_scan_active(dp->dp_scan) || + txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { + zio_t *zio = zio_root(spa, NULL, NULL, 0); + VERIFY3U(bpobj_iterate(defer_bpo, + spa_free_sync_cb, zio, tx), ==, 0); + VERIFY3U(zio_wait(zio), ==, 0); + } + + /* + * Iterate to convergence. + */ + do { + int pass = ++spa->spa_sync_pass; + + spa_sync_config_object(spa, tx); + spa_sync_aux_dev(spa, &spa->spa_spares, tx, + ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); + spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, + ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); + spa_errlog_sync(spa, txg); + dsl_pool_sync(dp, txg); + + if (pass <= SYNC_PASS_DEFERRED_FREE) { + zio_t *zio = zio_root(spa, NULL, NULL, 0); + bplist_iterate(free_bpl, spa_free_sync_cb, + zio, tx); + VERIFY(zio_wait(zio) == 0); + } else { + bplist_iterate(free_bpl, bpobj_enqueue_cb, + defer_bpo, tx); + } + + ddt_sync(spa, txg); + dsl_scan_sync(dp, tx); + + while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) + vdev_sync(vd, txg); + + if (pass == 1) + spa_sync_upgrades(spa, tx); + + } while (dmu_objset_is_dirty(mos, txg)); + + /* + * Rewrite the vdev configuration (which includes the uberblock) + * to commit the transaction group. + * + * If there are no dirty vdevs, we sync the uberblock to a few + * random top-level vdevs that are known to be visible in the + * config cache (see spa_vdev_add() for a complete description). + * If there *are* dirty vdevs, sync the uberblock to all vdevs. + */ + for (;;) { + /* + * We hold SCL_STATE to prevent vdev open/close/etc. + * while we're attempting to write the vdev labels. + */ + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + + if (list_is_empty(&spa->spa_config_dirty_list)) { + vdev_t *svd[SPA_DVAS_PER_BP]; + int svdcount = 0; + int children = rvd->vdev_children; + int c0 = spa_get_random(children); + + for (int c = 0; c < children; c++) { + vd = rvd->vdev_child[(c0 + c) % children]; + if (vd->vdev_ms_array == 0 || vd->vdev_islog) + continue; + svd[svdcount++] = vd; + if (svdcount == SPA_DVAS_PER_BP) + break; + } + error = vdev_config_sync(svd, svdcount, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(svd, svdcount, txg, + B_TRUE); + } else { + error = vdev_config_sync(rvd->vdev_child, + rvd->vdev_children, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(rvd->vdev_child, + rvd->vdev_children, txg, B_TRUE); + } + + spa_config_exit(spa, SCL_STATE, FTAG); + + if (error == 0) + break; + zio_suspend(spa, NULL); + zio_resume_wait(spa); + } + dmu_tx_commit(tx); + + /* + * Clear the dirty config list. + */ + while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) + vdev_config_clean(vd); + + /* + * Now that the new config has synced transactionally, + * let it become visible to the config cache. + */ + if (spa->spa_config_syncing != NULL) { + spa_config_set(spa, spa->spa_config_syncing); + spa->spa_config_txg = txg; + spa->spa_config_syncing = NULL; + } + + spa->spa_ubsync = spa->spa_uberblock; + + dsl_pool_sync_done(dp, txg); + + /* + * Update usable space statistics. + */ + while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) + vdev_sync_done(vd, txg); + + spa_update_dspace(spa); + + /* + * It had better be the case that we didn't dirty anything + * since vdev_config_sync(). + */ + ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); + ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); + ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); + + spa->spa_sync_pass = 0; + + spa_config_exit(spa, SCL_CONFIG, FTAG); + + spa_handle_ignored_writes(spa); + + /* + * If any async tasks have been requested, kick them off. + */ + spa_async_dispatch(spa); +} + +/* + * Sync all pools. We don't want to hold the namespace lock across these + * operations, so we take a reference on the spa_t and drop the lock during the + * sync. + */ +void +spa_sync_allpools(void) +{ + spa_t *spa = NULL; + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa)) != NULL) { + if (spa_state(spa) != POOL_STATE_ACTIVE || + !spa_writeable(spa) || spa_suspended(spa)) + continue; + spa_open_ref(spa, FTAG); + mutex_exit(&spa_namespace_lock); + txg_wait_synced(spa_get_dsl(spa), 0); + mutex_enter(&spa_namespace_lock); + spa_close(spa, FTAG); + } + mutex_exit(&spa_namespace_lock); +} + +/* + * ========================================================================== + * Miscellaneous routines + * ========================================================================== + */ + +/* + * Remove all pools in the system. + */ +void +spa_evict_all(void) +{ + spa_t *spa; + + /* + * Remove all cached state. All pools should be closed now, + * so every spa in the AVL tree should be unreferenced. + */ + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(NULL)) != NULL) { + /* + * Stop async tasks. The async thread may need to detach + * a device that's been replaced, which requires grabbing + * spa_namespace_lock, so we must drop it here. + */ + spa_open_ref(spa, FTAG); + mutex_exit(&spa_namespace_lock); + spa_async_suspend(spa); + mutex_enter(&spa_namespace_lock); + spa_close(spa, FTAG); + + if (spa->spa_state != POOL_STATE_UNINITIALIZED) { + spa_unload(spa); + spa_deactivate(spa); + } + spa_remove(spa); + } + mutex_exit(&spa_namespace_lock); +} + +vdev_t * +spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) +{ + vdev_t *vd; + int i; + + if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) + return (vd); + + if (aux) { + for (i = 0; i < spa->spa_l2cache.sav_count; i++) { + vd = spa->spa_l2cache.sav_vdevs[i]; + if (vd->vdev_guid == guid) + return (vd); + } + + for (i = 0; i < spa->spa_spares.sav_count; i++) { + vd = spa->spa_spares.sav_vdevs[i]; + if (vd->vdev_guid == guid) + return (vd); + } + } + + return (NULL); +} + +void +spa_upgrade(spa_t *spa, uint64_t version) +{ + ASSERT(spa_writeable(spa)); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + /* + * This should only be called for a non-faulted pool, and since a + * future version would result in an unopenable pool, this shouldn't be + * possible. + */ + ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); + ASSERT(version >= spa->spa_uberblock.ub_version); + + spa->spa_uberblock.ub_version = version; + vdev_config_dirty(spa->spa_root_vdev); + + spa_config_exit(spa, SCL_ALL, FTAG); + + txg_wait_synced(spa_get_dsl(spa), 0); +} + +boolean_t +spa_has_spare(spa_t *spa, uint64_t guid) +{ + int i; + uint64_t spareguid; + spa_aux_vdev_t *sav = &spa->spa_spares; + + for (i = 0; i < sav->sav_count; i++) + if (sav->sav_vdevs[i]->vdev_guid == guid) + return (B_TRUE); + + for (i = 0; i < sav->sav_npending; i++) { + if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, + &spareguid) == 0 && spareguid == guid) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Check if a pool has an active shared spare device. + * Note: reference count of an active spare is 2, as a spare and as a replace + */ +static boolean_t +spa_has_active_shared_spare(spa_t *spa) +{ + int i, refcnt; + uint64_t pool; + spa_aux_vdev_t *sav = &spa->spa_spares; + + for (i = 0; i < sav->sav_count; i++) { + if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, + &refcnt) && pool != 0ULL && pool == spa_guid(spa) && + refcnt > 2) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Post a sysevent corresponding to the given event. The 'name' must be one of + * the event definitions in sys/sysevent/eventdefs.h. The payload will be + * filled in from the spa and (optionally) the vdev. This doesn't do anything + * in the userland libzpool, as we don't want consumers to misinterpret ztest + * or zdb as real changes. + */ +void +spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) +{ +#ifdef _KERNEL + sysevent_t *ev; + sysevent_attr_list_t *attr = NULL; + sysevent_value_t value; + sysevent_id_t eid; + + ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", + SE_SLEEP); + + value.value_type = SE_DATA_TYPE_STRING; + value.value.sv_string = spa_name(spa); + if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) + goto done; + + value.value_type = SE_DATA_TYPE_UINT64; + value.value.sv_uint64 = spa_guid(spa); + if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) + goto done; + + if (vd) { + value.value_type = SE_DATA_TYPE_UINT64; + value.value.sv_uint64 = vd->vdev_guid; + if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, + SE_SLEEP) != 0) + goto done; + + if (vd->vdev_path) { + value.value_type = SE_DATA_TYPE_STRING; + value.value.sv_string = vd->vdev_path; + if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, + &value, SE_SLEEP) != 0) + goto done; + } + } + + if (sysevent_attach_attributes(ev, attr) != 0) + goto done; + attr = NULL; + + (void) log_sysevent(ev, SE_SLEEP, &eid); + +done: + if (attr) + sysevent_free_attr(attr); + sysevent_free(ev); +#endif +} diff --git a/uts/common/fs/zfs/spa_config.c b/uts/common/fs/zfs/spa_config.c new file mode 100644 index 000000000000..69d57f66dbb6 --- /dev/null +++ b/uts/common/fs/zfs/spa_config.c @@ -0,0 +1,487 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/nvpair.h> +#include <sys/uio.h> +#include <sys/fs/zfs.h> +#include <sys/vdev_impl.h> +#include <sys/zfs_ioctl.h> +#include <sys/utsname.h> +#include <sys/systeminfo.h> +#include <sys/sunddi.h> +#ifdef _KERNEL +#include <sys/kobj.h> +#include <sys/zone.h> +#endif + +/* + * Pool configuration repository. + * + * Pool configuration is stored as a packed nvlist on the filesystem. By + * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot + * (when the ZFS module is loaded). Pools can also have the 'cachefile' + * property set that allows them to be stored in an alternate location until + * the control of external software. + * + * For each cache file, we have a single nvlist which holds all the + * configuration information. When the module loads, we read this information + * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is + * maintained independently in spa.c. Whenever the namespace is modified, or + * the configuration of a pool is changed, we call spa_config_sync(), which + * walks through all the active pools and writes the configuration to disk. + */ + +static uint64_t spa_config_generation = 1; + +/* + * This can be overridden in userland to preserve an alternate namespace for + * userland pools when doing testing. + */ +const char *spa_config_path = ZPOOL_CACHE; + +/* + * Called when the module is first loaded, this routine loads the configuration + * file into the SPA namespace. It does not actually open or load the pools; it + * only populates the namespace. + */ +void +spa_config_load(void) +{ + void *buf = NULL; + nvlist_t *nvlist, *child; + nvpair_t *nvpair; + char *pathname; + struct _buf *file; + uint64_t fsize; + + /* + * Open the configuration file. + */ + pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) snprintf(pathname, MAXPATHLEN, "%s%s", + (rootdir != NULL) ? "./" : "", spa_config_path); + + file = kobj_open_file(pathname); + + kmem_free(pathname, MAXPATHLEN); + + if (file == (struct _buf *)-1) + return; + + if (kobj_get_filesize(file, &fsize) != 0) + goto out; + + buf = kmem_alloc(fsize, KM_SLEEP); + + /* + * Read the nvlist from the file. + */ + if (kobj_read_file(file, buf, fsize, 0) < 0) + goto out; + + /* + * Unpack the nvlist. + */ + if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) + goto out; + + /* + * Iterate over all elements in the nvlist, creating a new spa_t for + * each one with the specified configuration. + */ + mutex_enter(&spa_namespace_lock); + nvpair = NULL; + while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { + if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) + continue; + + VERIFY(nvpair_value_nvlist(nvpair, &child) == 0); + + if (spa_lookup(nvpair_name(nvpair)) != NULL) + continue; + (void) spa_add(nvpair_name(nvpair), child, NULL); + } + mutex_exit(&spa_namespace_lock); + + nvlist_free(nvlist); + +out: + if (buf != NULL) + kmem_free(buf, fsize); + + kobj_close_file(file); +} + +static void +spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) +{ + size_t buflen; + char *buf; + vnode_t *vp; + int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; + char *temp; + + /* + * If the nvlist is empty (NULL), then remove the old cachefile. + */ + if (nvl == NULL) { + (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); + return; + } + + /* + * Pack the configuration into a buffer. + */ + VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); + + buf = kmem_alloc(buflen, KM_SLEEP); + temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + + VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, + KM_SLEEP) == 0); + + /* + * Write the configuration to disk. We need to do the traditional + * 'write to temporary file, sync, move over original' to make sure we + * always have a consistent view of the data. + */ + (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); + + if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) { + if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, NULL) == 0 && + VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { + (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); + } + (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); + VN_RELE(vp); + } + + (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); + + kmem_free(buf, buflen); + kmem_free(temp, MAXPATHLEN); +} + +/* + * Synchronize pool configuration to disk. This must be called with the + * namespace lock held. + */ +void +spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) +{ + spa_config_dirent_t *dp, *tdp; + nvlist_t *nvl; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + if (rootdir == NULL || !(spa_mode_global & FWRITE)) + return; + + /* + * Iterate over all cachefiles for the pool, past or present. When the + * cachefile is changed, the new one is pushed onto this list, allowing + * us to update previous cachefiles that no longer contain this pool. + */ + for (dp = list_head(&target->spa_config_list); dp != NULL; + dp = list_next(&target->spa_config_list, dp)) { + spa_t *spa = NULL; + if (dp->scd_path == NULL) + continue; + + /* + * Iterate over all pools, adding any matching pools to 'nvl'. + */ + nvl = NULL; + while ((spa = spa_next(spa)) != NULL) { + if (spa == target && removing) + continue; + + mutex_enter(&spa->spa_props_lock); + tdp = list_head(&spa->spa_config_list); + if (spa->spa_config == NULL || + tdp->scd_path == NULL || + strcmp(tdp->scd_path, dp->scd_path) != 0) { + mutex_exit(&spa->spa_props_lock); + continue; + } + + if (nvl == NULL) + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + + VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, + spa->spa_config) == 0); + mutex_exit(&spa->spa_props_lock); + } + + spa_config_write(dp, nvl); + nvlist_free(nvl); + } + + /* + * Remove any config entries older than the current one. + */ + dp = list_head(&target->spa_config_list); + while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { + list_remove(&target->spa_config_list, tdp); + if (tdp->scd_path != NULL) + spa_strfree(tdp->scd_path); + kmem_free(tdp, sizeof (spa_config_dirent_t)); + } + + spa_config_generation++; + + if (postsysevent) + spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); +} + +/* + * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, + * and we don't want to allow the local zone to see all the pools anyway. + * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration + * information for all pool visible within the zone. + */ +nvlist_t * +spa_all_configs(uint64_t *generation) +{ + nvlist_t *pools; + spa_t *spa = NULL; + + if (*generation == spa_config_generation) + return (NULL); + + VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa)) != NULL) { + if (INGLOBALZONE(curproc) || + zone_dataset_visible(spa_name(spa), NULL)) { + mutex_enter(&spa->spa_props_lock); + VERIFY(nvlist_add_nvlist(pools, spa_name(spa), + spa->spa_config) == 0); + mutex_exit(&spa->spa_props_lock); + } + } + *generation = spa_config_generation; + mutex_exit(&spa_namespace_lock); + + return (pools); +} + +void +spa_config_set(spa_t *spa, nvlist_t *config) +{ + mutex_enter(&spa->spa_props_lock); + if (spa->spa_config != NULL) + nvlist_free(spa->spa_config); + spa->spa_config = config; + mutex_exit(&spa->spa_props_lock); +} + +/* + * Generate the pool's configuration based on the current in-core state. + * We infer whether to generate a complete config or just one top-level config + * based on whether vd is the root vdev. + */ +nvlist_t * +spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) +{ + nvlist_t *config, *nvroot; + vdev_t *rvd = spa->spa_root_vdev; + unsigned long hostid = 0; + boolean_t locked = B_FALSE; + uint64_t split_guid; + + if (vd == NULL) { + vd = rvd; + locked = B_TRUE; + spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); + } + + ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == + (SCL_CONFIG | SCL_STATE)); + + /* + * If txg is -1, report the current value of spa->spa_config_txg. + */ + if (txg == -1ULL) + txg = spa->spa_config_txg; + + VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, + spa_version(spa)) == 0); + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, + spa_name(spa)) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, + spa_state(spa)) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, + txg) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, + spa_guid(spa)) == 0); +#ifdef _KERNEL + hostid = zone_get_hostid(NULL); +#else /* _KERNEL */ + /* + * We're emulating the system's hostid in userland, so we can't use + * zone_get_hostid(). + */ + (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); +#endif /* _KERNEL */ + if (hostid != 0) { + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, + hostid) == 0); + } + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, + utsname.nodename) == 0); + + if (vd != rvd) { + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, + vd->vdev_top->vdev_guid) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + if (vd->vdev_isspare) + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, + 1ULL) == 0); + if (vd->vdev_islog) + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, + 1ULL) == 0); + vd = vd->vdev_top; /* label contains top config */ + } else { + /* + * Only add the (potentially large) split information + * in the mos config, and not in the vdev labels + */ + if (spa->spa_config_splitting != NULL) + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, + spa->spa_config_splitting) == 0); + } + + /* + * Add the top-level config. We even add this on pools which + * don't support holes in the namespace. + */ + vdev_top_config_generate(spa, config); + + /* + * If we're splitting, record the original pool's guid. + */ + if (spa->spa_config_splitting != NULL && + nvlist_lookup_uint64(spa->spa_config_splitting, + ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, + split_guid) == 0); + } + + nvroot = vdev_config_generate(spa, vd, getstats, 0); + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + nvlist_free(nvroot); + + if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { + ddt_histogram_t *ddh; + ddt_stat_t *dds; + ddt_object_t *ddo; + + ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); + ddt_get_dedup_histogram(spa, ddh); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_HISTOGRAM, + (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); + kmem_free(ddh, sizeof (ddt_histogram_t)); + + ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); + ddt_get_dedup_object_stats(spa, ddo); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_OBJ_STATS, + (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); + kmem_free(ddo, sizeof (ddt_object_t)); + + dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); + ddt_get_dedup_stats(spa, dds); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_STATS, + (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); + kmem_free(dds, sizeof (ddt_stat_t)); + } + + if (locked) + spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); + + return (config); +} + +/* + * Update all disk labels, generate a fresh config based on the current + * in-core state, and sync the global config cache (do not sync the config + * cache if this is a booting rootpool). + */ +void +spa_config_update(spa_t *spa, int what) +{ + vdev_t *rvd = spa->spa_root_vdev; + uint64_t txg; + int c; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + txg = spa_last_synced_txg(spa) + 1; + if (what == SPA_CONFIG_UPDATE_POOL) { + vdev_config_dirty(rvd); + } else { + /* + * If we have top-level vdevs that were added but have + * not yet been prepared for allocation, do that now. + * (It's safe now because the config cache is up to date, + * so it will be able to translate the new DVAs.) + * See comments in spa_vdev_add() for full details. + */ + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + if (tvd->vdev_ms_array == 0) + vdev_metaslab_set_size(tvd); + vdev_expand(tvd, txg); + } + } + spa_config_exit(spa, SCL_ALL, FTAG); + + /* + * Wait for the mosconfig to be regenerated and synced. + */ + txg_wait_synced(spa->spa_dsl_pool, txg); + + /* + * Update the global config cache to reflect the new mosconfig. + */ + if (!spa->spa_is_root) + spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL); + + if (what == SPA_CONFIG_UPDATE_POOL) + spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); +} diff --git a/uts/common/fs/zfs/spa_errlog.c b/uts/common/fs/zfs/spa_errlog.c new file mode 100644 index 000000000000..282140b3bd65 --- /dev/null +++ b/uts/common/fs/zfs/spa_errlog.c @@ -0,0 +1,403 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Routines to manage the on-disk persistent error log. + * + * Each pool stores a log of all logical data errors seen during normal + * operation. This is actually the union of two distinct logs: the last log, + * and the current log. All errors seen are logged to the current log. When a + * scrub completes, the current log becomes the last log, the last log is thrown + * out, and the current log is reinitialized. This way, if an error is somehow + * corrected, a new scrub will show that that it no longer exists, and will be + * deleted from the log when the scrub completes. + * + * The log is stored using a ZAP object whose key is a string form of the + * zbookmark tuple (objset, object, level, blkid), and whose contents is an + * optional 'objset:object' human-readable string describing the data. When an + * error is first logged, this string will be empty, indicating that no name is + * known. This prevents us from having to issue a potentially large amount of + * I/O to discover the object name during an error path. Instead, we do the + * calculation when the data is requested, storing the result so future queries + * will be faster. + * + * This log is then shipped into an nvlist where the key is the dataset name and + * the value is the object name. Userland is then responsible for uniquifying + * this list and displaying it to the user. + */ + +#include <sys/dmu_tx.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/zap.h> +#include <sys/zio.h> + + +/* + * Convert a bookmark to a string. + */ +static void +bookmark_to_name(zbookmark_t *zb, char *buf, size_t len) +{ + (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", + (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid); +} + +/* + * Convert a string to a bookmark + */ +#ifdef _KERNEL +static void +name_to_bookmark(char *buf, zbookmark_t *zb) +{ + zb->zb_objset = strtonum(buf, &buf); + ASSERT(*buf == ':'); + zb->zb_object = strtonum(buf + 1, &buf); + ASSERT(*buf == ':'); + zb->zb_level = (int)strtonum(buf + 1, &buf); + ASSERT(*buf == ':'); + zb->zb_blkid = strtonum(buf + 1, &buf); + ASSERT(*buf == '\0'); +} +#endif + +/* + * Log an uncorrectable error to the persistent error log. We add it to the + * spa's list of pending errors. The changes are actually synced out to disk + * during spa_errlog_sync(). + */ +void +spa_log_error(spa_t *spa, zio_t *zio) +{ + zbookmark_t *zb = &zio->io_logical->io_bookmark; + spa_error_entry_t search; + spa_error_entry_t *new; + avl_tree_t *tree; + avl_index_t where; + + /* + * If we are trying to import a pool, ignore any errors, as we won't be + * writing to the pool any time soon. + */ + if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT) + return; + + mutex_enter(&spa->spa_errlist_lock); + + /* + * If we have had a request to rotate the log, log it to the next list + * instead of the current one. + */ + if (spa->spa_scrub_active || spa->spa_scrub_finished) + tree = &spa->spa_errlist_scrub; + else + tree = &spa->spa_errlist_last; + + search.se_bookmark = *zb; + if (avl_find(tree, &search, &where) != NULL) { + mutex_exit(&spa->spa_errlist_lock); + return; + } + + new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); + new->se_bookmark = *zb; + avl_insert(tree, new, where); + + mutex_exit(&spa->spa_errlist_lock); +} + +/* + * Return the number of errors currently in the error log. This is actually the + * sum of both the last log and the current log, since we don't know the union + * of these logs until we reach userland. + */ +uint64_t +spa_get_errlog_size(spa_t *spa) +{ + uint64_t total = 0, count; + + mutex_enter(&spa->spa_errlog_lock); + if (spa->spa_errlog_scrub != 0 && + zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub, + &count) == 0) + total += count; + + if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished && + zap_count(spa->spa_meta_objset, spa->spa_errlog_last, + &count) == 0) + total += count; + mutex_exit(&spa->spa_errlog_lock); + + mutex_enter(&spa->spa_errlist_lock); + total += avl_numnodes(&spa->spa_errlist_last); + total += avl_numnodes(&spa->spa_errlist_scrub); + mutex_exit(&spa->spa_errlist_lock); + + return (total); +} + +#ifdef _KERNEL +static int +process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count) +{ + zap_cursor_t zc; + zap_attribute_t za; + zbookmark_t zb; + + if (obj == 0) + return (0); + + for (zap_cursor_init(&zc, spa->spa_meta_objset, obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + + if (*count == 0) { + zap_cursor_fini(&zc); + return (ENOMEM); + } + + name_to_bookmark(za.za_name, &zb); + + if (copyout(&zb, (char *)addr + + (*count - 1) * sizeof (zbookmark_t), + sizeof (zbookmark_t)) != 0) + return (EFAULT); + + *count -= 1; + } + + zap_cursor_fini(&zc); + + return (0); +} + +static int +process_error_list(avl_tree_t *list, void *addr, size_t *count) +{ + spa_error_entry_t *se; + + for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { + + if (*count == 0) + return (ENOMEM); + + if (copyout(&se->se_bookmark, (char *)addr + + (*count - 1) * sizeof (zbookmark_t), + sizeof (zbookmark_t)) != 0) + return (EFAULT); + + *count -= 1; + } + + return (0); +} +#endif + +/* + * Copy all known errors to userland as an array of bookmarks. This is + * actually a union of the on-disk last log and current log, as well as any + * pending error requests. + * + * Because the act of reading the on-disk log could cause errors to be + * generated, we have two separate locks: one for the error log and one for the + * in-core error lists. We only need the error list lock to log and error, so + * we grab the error log lock while we read the on-disk logs, and only pick up + * the error list lock when we are finished. + */ +int +spa_get_errlog(spa_t *spa, void *uaddr, size_t *count) +{ + int ret = 0; + +#ifdef _KERNEL + mutex_enter(&spa->spa_errlog_lock); + + ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count); + + if (!ret && !spa->spa_scrub_finished) + ret = process_error_log(spa, spa->spa_errlog_last, uaddr, + count); + + mutex_enter(&spa->spa_errlist_lock); + if (!ret) + ret = process_error_list(&spa->spa_errlist_scrub, uaddr, + count); + if (!ret) + ret = process_error_list(&spa->spa_errlist_last, uaddr, + count); + mutex_exit(&spa->spa_errlist_lock); + + mutex_exit(&spa->spa_errlog_lock); +#endif + + return (ret); +} + +/* + * Called when a scrub completes. This simply set a bit which tells which AVL + * tree to add new errors. spa_errlog_sync() is responsible for actually + * syncing the changes to the underlying objects. + */ +void +spa_errlog_rotate(spa_t *spa) +{ + mutex_enter(&spa->spa_errlist_lock); + spa->spa_scrub_finished = B_TRUE; + mutex_exit(&spa->spa_errlist_lock); +} + +/* + * Discard any pending errors from the spa_t. Called when unloading a faulted + * pool, as the errors encountered during the open cannot be synced to disk. + */ +void +spa_errlog_drain(spa_t *spa) +{ + spa_error_entry_t *se; + void *cookie; + + mutex_enter(&spa->spa_errlist_lock); + + cookie = NULL; + while ((se = avl_destroy_nodes(&spa->spa_errlist_last, + &cookie)) != NULL) + kmem_free(se, sizeof (spa_error_entry_t)); + cookie = NULL; + while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub, + &cookie)) != NULL) + kmem_free(se, sizeof (spa_error_entry_t)); + + mutex_exit(&spa->spa_errlist_lock); +} + +/* + * Process a list of errors into the current on-disk log. + */ +static void +sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) +{ + spa_error_entry_t *se; + char buf[64]; + void *cookie; + + if (avl_numnodes(t) != 0) { + /* create log if necessary */ + if (*obj == 0) + *obj = zap_create(spa->spa_meta_objset, + DMU_OT_ERROR_LOG, DMU_OT_NONE, + 0, tx); + + /* add errors to the current log */ + for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { + char *name = se->se_name ? se->se_name : ""; + + bookmark_to_name(&se->se_bookmark, buf, sizeof (buf)); + + (void) zap_update(spa->spa_meta_objset, + *obj, buf, 1, strlen(name) + 1, name, tx); + } + + /* purge the error list */ + cookie = NULL; + while ((se = avl_destroy_nodes(t, &cookie)) != NULL) + kmem_free(se, sizeof (spa_error_entry_t)); + } +} + +/* + * Sync the error log out to disk. This is a little tricky because the act of + * writing the error log requires the spa_errlist_lock. So, we need to lock the + * error lists, take a copy of the lists, and then reinitialize them. Then, we + * drop the error list lock and take the error log lock, at which point we + * do the errlog processing. Then, if we encounter an I/O error during this + * process, we can successfully add the error to the list. Note that this will + * result in the perpetual recycling of errors, but it is an unlikely situation + * and not a performance critical operation. + */ +void +spa_errlog_sync(spa_t *spa, uint64_t txg) +{ + dmu_tx_t *tx; + avl_tree_t scrub, last; + int scrub_finished; + + mutex_enter(&spa->spa_errlist_lock); + + /* + * Bail out early under normal circumstances. + */ + if (avl_numnodes(&spa->spa_errlist_scrub) == 0 && + avl_numnodes(&spa->spa_errlist_last) == 0 && + !spa->spa_scrub_finished) { + mutex_exit(&spa->spa_errlist_lock); + return; + } + + spa_get_errlists(spa, &last, &scrub); + scrub_finished = spa->spa_scrub_finished; + spa->spa_scrub_finished = B_FALSE; + + mutex_exit(&spa->spa_errlist_lock); + mutex_enter(&spa->spa_errlog_lock); + + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + + /* + * Sync out the current list of errors. + */ + sync_error_list(spa, &last, &spa->spa_errlog_last, tx); + + /* + * Rotate the log if necessary. + */ + if (scrub_finished) { + if (spa->spa_errlog_last != 0) + VERIFY(dmu_object_free(spa->spa_meta_objset, + spa->spa_errlog_last, tx) == 0); + spa->spa_errlog_last = spa->spa_errlog_scrub; + spa->spa_errlog_scrub = 0; + + sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx); + } + + /* + * Sync out any pending scrub errors. + */ + sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx); + + /* + * Update the MOS to reflect the new values. + */ + (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1, + &spa->spa_errlog_last, tx); + (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1, + &spa->spa_errlog_scrub, tx); + + dmu_tx_commit(tx); + + mutex_exit(&spa->spa_errlog_lock); +} diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c new file mode 100644 index 000000000000..212abae5b80c --- /dev/null +++ b/uts/common/fs/zfs/spa_history.c @@ -0,0 +1,502 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/zap.h> +#include <sys/dsl_synctask.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/utsname.h> +#include <sys/cmn_err.h> +#include <sys/sunddi.h> +#include "zfs_comutil.h" +#ifdef _KERNEL +#include <sys/zone.h> +#endif + +/* + * Routines to manage the on-disk history log. + * + * The history log is stored as a dmu object containing + * <packed record length, record nvlist> tuples. + * + * Where "record nvlist" is a nvlist containing uint64_ts and strings, and + * "packed record length" is the packed length of the "record nvlist" stored + * as a little endian uint64_t. + * + * The log is implemented as a ring buffer, though the original creation + * of the pool ('zpool create') is never overwritten. + * + * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer + * of 'spa_history' stores the offsets for logging/retrieving history as + * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of + * where the 'zpool create' record is stored. This allows us to never + * overwrite the original creation of the pool. 'sh_phys_max_off' is the + * physical ending offset in bytes of the log. This tells you the length of + * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record + * is added, 'sh_eof' is incremented by the the size of the record. + * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). + * This is where the consumer should start reading from after reading in + * the 'zpool create' portion of the log. + * + * 'sh_records_lost' keeps track of how many records have been overwritten + * and permanently lost. + */ + +/* convert a logical offset to physical */ +static uint64_t +spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) +{ + uint64_t phys_len; + + phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; + return ((log_off - shpp->sh_pool_create_len) % phys_len + + shpp->sh_pool_create_len); +} + +void +spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) +{ + dmu_buf_t *dbp; + spa_history_phys_t *shpp; + objset_t *mos = spa->spa_meta_objset; + + ASSERT(spa->spa_history == 0); + spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, + SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, + sizeof (spa_history_phys_t), tx); + + VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_HISTORY, sizeof (uint64_t), 1, + &spa->spa_history, tx) == 0); + + VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); + ASSERT(dbp->db_size >= sizeof (spa_history_phys_t)); + + shpp = dbp->db_data; + dmu_buf_will_dirty(dbp, tx); + + /* + * Figure out maximum size of history log. We set it at + * 1% of pool size, with a max of 32MB and min of 128KB. + */ + shpp->sh_phys_max_off = + metaslab_class_get_dspace(spa_normal_class(spa)) / 100; + shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20); + shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); + + dmu_buf_rele(dbp, FTAG); +} + +/* + * Change 'sh_bof' to the beginning of the next record. + */ +static int +spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) +{ + objset_t *mos = spa->spa_meta_objset; + uint64_t firstread, reclen, phys_bof; + char buf[sizeof (reclen)]; + int err; + + phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); + firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); + + if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, + buf, DMU_READ_PREFETCH)) != 0) + return (err); + if (firstread != sizeof (reclen)) { + if ((err = dmu_read(mos, spa->spa_history, + shpp->sh_pool_create_len, sizeof (reclen) - firstread, + buf + firstread, DMU_READ_PREFETCH)) != 0) + return (err); + } + + reclen = LE_64(*((uint64_t *)buf)); + shpp->sh_bof += reclen + sizeof (reclen); + shpp->sh_records_lost++; + return (0); +} + +static int +spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, + dmu_tx_t *tx) +{ + uint64_t firstwrite, phys_eof; + objset_t *mos = spa->spa_meta_objset; + int err; + + ASSERT(MUTEX_HELD(&spa->spa_history_lock)); + + /* see if we need to reset logical BOF */ + while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - + (shpp->sh_eof - shpp->sh_bof) <= len) { + if ((err = spa_history_advance_bof(spa, shpp)) != 0) { + return (err); + } + } + + phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); + firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); + shpp->sh_eof += len; + dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); + + len -= firstwrite; + if (len > 0) { + /* write out the rest at the beginning of physical file */ + dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, + len, (char *)buf + firstwrite, tx); + } + + return (0); +} + +static char * +spa_history_zone() +{ +#ifdef _KERNEL + return (curproc->p_zone->zone_name); +#else + return ("global"); +#endif +} + +/* + * Write out a history event. + */ +/*ARGSUSED*/ +static void +spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + history_arg_t *hap = arg2; + const char *history_str = hap->ha_history_str; + objset_t *mos = spa->spa_meta_objset; + dmu_buf_t *dbp; + spa_history_phys_t *shpp; + size_t reclen; + uint64_t le_len; + nvlist_t *nvrecord; + char *record_packed = NULL; + int ret; + + /* + * If we have an older pool that doesn't have a command + * history object, create it now. + */ + mutex_enter(&spa->spa_history_lock); + if (!spa->spa_history) + spa_history_create_obj(spa, tx); + mutex_exit(&spa->spa_history_lock); + + /* + * Get the offset of where we need to write via the bonus buffer. + * Update the offset when the write completes. + */ + VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); + shpp = dbp->db_data; + + dmu_buf_will_dirty(dbp, tx); + +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(dbp, &doi); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); + } +#endif + + VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, + gethrestime_sec()) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); + if (hap->ha_zone != NULL) + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, + hap->ha_zone) == 0); +#ifdef _KERNEL + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, + utsname.nodename) == 0); +#endif + if (hap->ha_log_type == LOG_CMD_POOL_CREATE || + hap->ha_log_type == LOG_CMD_NORMAL) { + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, + history_str) == 0); + + zfs_dbgmsg("command: %s", history_str); + } else { + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, + hap->ha_event) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, + tx->tx_txg) == 0); + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, + history_str) == 0); + + zfs_dbgmsg("internal %s pool:%s txg:%llu %s", + zfs_history_event_names[hap->ha_event], spa_name(spa), + (longlong_t)tx->tx_txg, history_str); + + } + + VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); + record_packed = kmem_alloc(reclen, KM_SLEEP); + + VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, + NV_ENCODE_XDR, KM_SLEEP) == 0); + + mutex_enter(&spa->spa_history_lock); + if (hap->ha_log_type == LOG_CMD_POOL_CREATE) + VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); + + /* write out the packed length as little endian */ + le_len = LE_64((uint64_t)reclen); + ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); + if (!ret) + ret = spa_history_write(spa, record_packed, reclen, shpp, tx); + + if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { + shpp->sh_pool_create_len += sizeof (le_len) + reclen; + shpp->sh_bof = shpp->sh_pool_create_len; + } + + mutex_exit(&spa->spa_history_lock); + nvlist_free(nvrecord); + kmem_free(record_packed, reclen); + dmu_buf_rele(dbp, FTAG); + + strfree(hap->ha_history_str); + if (hap->ha_zone != NULL) + strfree(hap->ha_zone); + kmem_free(hap, sizeof (history_arg_t)); +} + +/* + * Write out a history event. + */ +int +spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +{ + history_arg_t *ha; + int err = 0; + dmu_tx_t *tx; + + ASSERT(what != LOG_INTERNAL); + + tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + + ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); + ha->ha_history_str = strdup(history_str); + ha->ha_zone = strdup(spa_history_zone()); + ha->ha_log_type = what; + ha->ha_uid = crgetuid(CRED()); + + /* Kick this off asynchronously; errors are ignored. */ + dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, + spa_history_log_sync, spa, ha, 0, tx); + dmu_tx_commit(tx); + + /* spa_history_log_sync will free ha and strings */ + return (err); +} + +/* + * Read out the command history. + */ +int +spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) +{ + objset_t *mos = spa->spa_meta_objset; + dmu_buf_t *dbp; + uint64_t read_len, phys_read_off, phys_eof; + uint64_t leftover = 0; + spa_history_phys_t *shpp; + int err; + + /* + * If the command history doesn't exist (older pool), + * that's ok, just return ENOENT. + */ + if (!spa->spa_history) + return (ENOENT); + + /* + * The history is logged asynchronously, so when they request + * the first chunk of history, make sure everything has been + * synced to disk so that we get it. + */ + if (*offp == 0 && spa_writeable(spa)) + txg_wait_synced(spa_get_dsl(spa), 0); + + if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) + return (err); + shpp = dbp->db_data; + +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(dbp, &doi); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); + } +#endif + + mutex_enter(&spa->spa_history_lock); + phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); + + if (*offp < shpp->sh_pool_create_len) { + /* read in just the zpool create history */ + phys_read_off = *offp; + read_len = MIN(*len, shpp->sh_pool_create_len - + phys_read_off); + } else { + /* + * Need to reset passed in offset to BOF if the passed in + * offset has since been overwritten. + */ + *offp = MAX(*offp, shpp->sh_bof); + phys_read_off = spa_history_log_to_phys(*offp, shpp); + + /* + * Read up to the minimum of what the user passed down or + * the EOF (physical or logical). If we hit physical EOF, + * use 'leftover' to read from the physical BOF. + */ + if (phys_read_off <= phys_eof) { + read_len = MIN(*len, phys_eof - phys_read_off); + } else { + read_len = MIN(*len, + shpp->sh_phys_max_off - phys_read_off); + if (phys_read_off + *len > shpp->sh_phys_max_off) { + leftover = MIN(*len - read_len, + phys_eof - shpp->sh_pool_create_len); + } + } + } + + /* offset for consumer to use next */ + *offp += read_len + leftover; + + /* tell the consumer how much you actually read */ + *len = read_len + leftover; + + if (read_len == 0) { + mutex_exit(&spa->spa_history_lock); + dmu_buf_rele(dbp, FTAG); + return (0); + } + + err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, + DMU_READ_PREFETCH); + if (leftover && err == 0) { + err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, + leftover, buf + read_len, DMU_READ_PREFETCH); + } + mutex_exit(&spa->spa_history_lock); + + dmu_buf_rele(dbp, FTAG); + return (err); +} + +static void +log_internal(history_internal_events_t event, spa_t *spa, + dmu_tx_t *tx, const char *fmt, va_list adx) +{ + history_arg_t *ha; + + /* + * If this is part of creating a pool, not everything is + * initialized yet, so don't bother logging the internal events. + */ + if (tx->tx_txg == TXG_INITIAL) + return; + + ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); + ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, + KM_SLEEP); + + (void) vsprintf(ha->ha_history_str, fmt, adx); + + ha->ha_log_type = LOG_INTERNAL; + ha->ha_event = event; + ha->ha_zone = NULL; + ha->ha_uid = 0; + + if (dmu_tx_is_syncing(tx)) { + spa_history_log_sync(spa, ha, tx); + } else { + dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, + spa_history_log_sync, spa, ha, 0, tx); + } + /* spa_history_log_sync() will free ha and strings */ +} + +void +spa_history_log_internal(history_internal_events_t event, spa_t *spa, + dmu_tx_t *tx, const char *fmt, ...) +{ + dmu_tx_t *htx = tx; + va_list adx; + + /* create a tx if we didn't get one */ + if (tx == NULL) { + htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + if (dmu_tx_assign(htx, TXG_WAIT) != 0) { + dmu_tx_abort(htx); + return; + } + } + + va_start(adx, fmt); + log_internal(event, spa, htx, fmt, adx); + va_end(adx); + + /* if we didn't get a tx from the caller, commit the one we made */ + if (tx == NULL) + dmu_tx_commit(htx); +} + +void +spa_history_log_version(spa_t *spa, history_internal_events_t event) +{ +#ifdef _KERNEL + uint64_t current_vers = spa_version(spa); + + if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { + spa_history_log_internal(event, spa, NULL, + "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", + (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); + } + cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", + event == LOG_POOL_IMPORT ? "imported" : + event == LOG_POOL_CREATE ? "created" : "accessed", + (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); +#endif +} diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c new file mode 100644 index 000000000000..1b54afb0be5e --- /dev/null +++ b/uts/common/fs/zfs/spa_misc.c @@ -0,0 +1,1672 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa_impl.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/zio_compress.h> +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/zap.h> +#include <sys/zil.h> +#include <sys/vdev_impl.h> +#include <sys/metaslab.h> +#include <sys/uberblock_impl.h> +#include <sys/txg.h> +#include <sys/avl.h> +#include <sys/unique.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_scan.h> +#include <sys/fs/zfs.h> +#include <sys/metaslab_impl.h> +#include <sys/arc.h> +#include <sys/ddt.h> +#include "zfs_prop.h" + +/* + * SPA locking + * + * There are four basic locks for managing spa_t structures: + * + * spa_namespace_lock (global mutex) + * + * This lock must be acquired to do any of the following: + * + * - Lookup a spa_t by name + * - Add or remove a spa_t from the namespace + * - Increase spa_refcount from non-zero + * - Check if spa_refcount is zero + * - Rename a spa_t + * - add/remove/attach/detach devices + * - Held for the duration of create/destroy/import/export + * + * It does not need to handle recursion. A create or destroy may + * reference objects (files or zvols) in other pools, but by + * definition they must have an existing reference, and will never need + * to lookup a spa_t by name. + * + * spa_refcount (per-spa refcount_t protected by mutex) + * + * This reference count keep track of any active users of the spa_t. The + * spa_t cannot be destroyed or freed while this is non-zero. Internally, + * the refcount is never really 'zero' - opening a pool implicitly keeps + * some references in the DMU. Internally we check against spa_minref, but + * present the image of a zero/non-zero value to consumers. + * + * spa_config_lock[] (per-spa array of rwlocks) + * + * This protects the spa_t from config changes, and must be held in + * the following circumstances: + * + * - RW_READER to perform I/O to the spa + * - RW_WRITER to change the vdev config + * + * The locking order is fairly straightforward: + * + * spa_namespace_lock -> spa_refcount + * + * The namespace lock must be acquired to increase the refcount from 0 + * or to check if it is zero. + * + * spa_refcount -> spa_config_lock[] + * + * There must be at least one valid reference on the spa_t to acquire + * the config lock. + * + * spa_namespace_lock -> spa_config_lock[] + * + * The namespace lock must always be taken before the config lock. + * + * + * The spa_namespace_lock can be acquired directly and is globally visible. + * + * The namespace is manipulated using the following functions, all of which + * require the spa_namespace_lock to be held. + * + * spa_lookup() Lookup a spa_t by name. + * + * spa_add() Create a new spa_t in the namespace. + * + * spa_remove() Remove a spa_t from the namespace. This also + * frees up any memory associated with the spa_t. + * + * spa_next() Returns the next spa_t in the system, or the + * first if NULL is passed. + * + * spa_evict_all() Shutdown and remove all spa_t structures in + * the system. + * + * spa_guid_exists() Determine whether a pool/device guid exists. + * + * The spa_refcount is manipulated using the following functions: + * + * spa_open_ref() Adds a reference to the given spa_t. Must be + * called with spa_namespace_lock held if the + * refcount is currently zero. + * + * spa_close() Remove a reference from the spa_t. This will + * not free the spa_t or remove it from the + * namespace. No locking is required. + * + * spa_refcount_zero() Returns true if the refcount is currently + * zero. Must be called with spa_namespace_lock + * held. + * + * The spa_config_lock[] is an array of rwlocks, ordered as follows: + * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV. + * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}(). + * + * To read the configuration, it suffices to hold one of these locks as reader. + * To modify the configuration, you must hold all locks as writer. To modify + * vdev state without altering the vdev tree's topology (e.g. online/offline), + * you must hold SCL_STATE and SCL_ZIO as writer. + * + * We use these distinct config locks to avoid recursive lock entry. + * For example, spa_sync() (which holds SCL_CONFIG as reader) induces + * block allocations (SCL_ALLOC), which may require reading space maps + * from disk (dmu_read() -> zio_read() -> SCL_ZIO). + * + * The spa config locks cannot be normal rwlocks because we need the + * ability to hand off ownership. For example, SCL_ZIO is acquired + * by the issuing thread and later released by an interrupt thread. + * They do, however, obey the usual write-wanted semantics to prevent + * writer (i.e. system administrator) starvation. + * + * The lock acquisition rules are as follows: + * + * SCL_CONFIG + * Protects changes to the vdev tree topology, such as vdev + * add/remove/attach/detach. Protects the dirty config list + * (spa_config_dirty_list) and the set of spares and l2arc devices. + * + * SCL_STATE + * Protects changes to pool state and vdev state, such as vdev + * online/offline/fault/degrade/clear. Protects the dirty state list + * (spa_state_dirty_list) and global pool state (spa_state). + * + * SCL_ALLOC + * Protects changes to metaslab groups and classes. + * Held as reader by metaslab_alloc() and metaslab_claim(). + * + * SCL_ZIO + * Held by bp-level zios (those which have no io_vd upon entry) + * to prevent changes to the vdev tree. The bp-level zio implicitly + * protects all of its vdev child zios, which do not hold SCL_ZIO. + * + * SCL_FREE + * Protects changes to metaslab groups and classes. + * Held as reader by metaslab_free(). SCL_FREE is distinct from + * SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free + * blocks in zio_done() while another i/o that holds either + * SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete. + * + * SCL_VDEV + * Held as reader to prevent changes to the vdev tree during trivial + * inquiries such as bp_get_dsize(). SCL_VDEV is distinct from the + * other locks, and lower than all of them, to ensure that it's safe + * to acquire regardless of caller context. + * + * In addition, the following rules apply: + * + * (a) spa_props_lock protects pool properties, spa_config and spa_config_list. + * The lock ordering is SCL_CONFIG > spa_props_lock. + * + * (b) I/O operations on leaf vdevs. For any zio operation that takes + * an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(), + * or zio_write_phys() -- the caller must ensure that the config cannot + * cannot change in the interim, and that the vdev cannot be reopened. + * SCL_STATE as reader suffices for both. + * + * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit(). + * + * spa_vdev_enter() Acquire the namespace lock and the config lock + * for writing. + * + * spa_vdev_exit() Release the config lock, wait for all I/O + * to complete, sync the updated configs to the + * cache, and release the namespace lock. + * + * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit(). + * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual + * locking is, always, based on spa_namespace_lock and spa_config_lock[]. + * + * spa_rename() is also implemented within this file since is requires + * manipulation of the namespace. + */ + +static avl_tree_t spa_namespace_avl; +kmutex_t spa_namespace_lock; +static kcondvar_t spa_namespace_cv; +static int spa_active_count; +int spa_max_replication_override = SPA_DVAS_PER_BP; + +static kmutex_t spa_spare_lock; +static avl_tree_t spa_spare_avl; +static kmutex_t spa_l2cache_lock; +static avl_tree_t spa_l2cache_avl; + +kmem_cache_t *spa_buffer_pool; +int spa_mode_global; + +#ifdef ZFS_DEBUG +/* Everything except dprintf is on by default in debug builds */ +int zfs_flags = ~ZFS_DEBUG_DPRINTF; +#else +int zfs_flags = 0; +#endif + +/* + * zfs_recover can be set to nonzero to attempt to recover from + * otherwise-fatal errors, typically caused by on-disk corruption. When + * set, calls to zfs_panic_recover() will turn into warning messages. + */ +int zfs_recover = 0; + + +/* + * ========================================================================== + * SPA config locking + * ========================================================================== + */ +static void +spa_config_lock_init(spa_t *spa) +{ + for (int i = 0; i < SCL_LOCKS; i++) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); + refcount_create(&scl->scl_count); + scl->scl_writer = NULL; + scl->scl_write_wanted = 0; + } +} + +static void +spa_config_lock_destroy(spa_t *spa) +{ + for (int i = 0; i < SCL_LOCKS; i++) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + mutex_destroy(&scl->scl_lock); + cv_destroy(&scl->scl_cv); + refcount_destroy(&scl->scl_count); + ASSERT(scl->scl_writer == NULL); + ASSERT(scl->scl_write_wanted == 0); + } +} + +int +spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) +{ + for (int i = 0; i < SCL_LOCKS; i++) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + if (!(locks & (1 << i))) + continue; + mutex_enter(&scl->scl_lock); + if (rw == RW_READER) { + if (scl->scl_writer || scl->scl_write_wanted) { + mutex_exit(&scl->scl_lock); + spa_config_exit(spa, locks ^ (1 << i), tag); + return (0); + } + } else { + ASSERT(scl->scl_writer != curthread); + if (!refcount_is_zero(&scl->scl_count)) { + mutex_exit(&scl->scl_lock); + spa_config_exit(spa, locks ^ (1 << i), tag); + return (0); + } + scl->scl_writer = curthread; + } + (void) refcount_add(&scl->scl_count, tag); + mutex_exit(&scl->scl_lock); + } + return (1); +} + +void +spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) +{ + int wlocks_held = 0; + + for (int i = 0; i < SCL_LOCKS; i++) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + if (scl->scl_writer == curthread) + wlocks_held |= (1 << i); + if (!(locks & (1 << i))) + continue; + mutex_enter(&scl->scl_lock); + if (rw == RW_READER) { + while (scl->scl_writer || scl->scl_write_wanted) { + cv_wait(&scl->scl_cv, &scl->scl_lock); + } + } else { + ASSERT(scl->scl_writer != curthread); + while (!refcount_is_zero(&scl->scl_count)) { + scl->scl_write_wanted++; + cv_wait(&scl->scl_cv, &scl->scl_lock); + scl->scl_write_wanted--; + } + scl->scl_writer = curthread; + } + (void) refcount_add(&scl->scl_count, tag); + mutex_exit(&scl->scl_lock); + } + ASSERT(wlocks_held <= locks); +} + +void +spa_config_exit(spa_t *spa, int locks, void *tag) +{ + for (int i = SCL_LOCKS - 1; i >= 0; i--) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + if (!(locks & (1 << i))) + continue; + mutex_enter(&scl->scl_lock); + ASSERT(!refcount_is_zero(&scl->scl_count)); + if (refcount_remove(&scl->scl_count, tag) == 0) { + ASSERT(scl->scl_writer == NULL || + scl->scl_writer == curthread); + scl->scl_writer = NULL; /* OK in either case */ + cv_broadcast(&scl->scl_cv); + } + mutex_exit(&scl->scl_lock); + } +} + +int +spa_config_held(spa_t *spa, int locks, krw_t rw) +{ + int locks_held = 0; + + for (int i = 0; i < SCL_LOCKS; i++) { + spa_config_lock_t *scl = &spa->spa_config_lock[i]; + if (!(locks & (1 << i))) + continue; + if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) || + (rw == RW_WRITER && scl->scl_writer == curthread)) + locks_held |= 1 << i; + } + + return (locks_held); +} + +/* + * ========================================================================== + * SPA namespace functions + * ========================================================================== + */ + +/* + * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held. + * Returns NULL if no matching spa_t is found. + */ +spa_t * +spa_lookup(const char *name) +{ + static spa_t search; /* spa_t is large; don't allocate on stack */ + spa_t *spa; + avl_index_t where; + char c; + char *cp; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + /* + * If it's a full dataset name, figure out the pool name and + * just use that. + */ + cp = strpbrk(name, "/@"); + if (cp) { + c = *cp; + *cp = '\0'; + } + + (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); + spa = avl_find(&spa_namespace_avl, &search, &where); + + if (cp) + *cp = c; + + return (spa); +} + +/* + * Create an uninitialized spa_t with the given name. Requires + * spa_namespace_lock. The caller must ensure that the spa_t doesn't already + * exist by calling spa_lookup() first. + */ +spa_t * +spa_add(const char *name, nvlist_t *config, const char *altroot) +{ + spa_t *spa; + spa_config_dirent_t *dp; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); + + mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL); + + cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); + cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL); + cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL); + cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL); + + for (int t = 0; t < TXG_SIZE; t++) + bplist_create(&spa->spa_free_bplist[t]); + + (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name)); + spa->spa_state = POOL_STATE_UNINITIALIZED; + spa->spa_freeze_txg = UINT64_MAX; + spa->spa_final_txg = UINT64_MAX; + spa->spa_load_max_txg = UINT64_MAX; + spa->spa_proc = &p0; + spa->spa_proc_state = SPA_PROC_NONE; + + refcount_create(&spa->spa_refcount); + spa_config_lock_init(spa); + + avl_add(&spa_namespace_avl, spa); + + /* + * Set the alternate root, if there is one. + */ + if (altroot) { + spa->spa_root = spa_strdup(altroot); + spa_active_count++; + } + + /* + * Every pool starts with the default cachefile + */ + list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t), + offsetof(spa_config_dirent_t, scd_link)); + + dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP); + dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); + list_insert_head(&spa->spa_config_list, dp); + + VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + + if (config != NULL) + VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); + + return (spa); +} + +/* + * Removes a spa_t from the namespace, freeing up any memory used. Requires + * spa_namespace_lock. This is called only after the spa_t has been closed and + * deactivated. + */ +void +spa_remove(spa_t *spa) +{ + spa_config_dirent_t *dp; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); + + nvlist_free(spa->spa_config_splitting); + + avl_remove(&spa_namespace_avl, spa); + cv_broadcast(&spa_namespace_cv); + + if (spa->spa_root) { + spa_strfree(spa->spa_root); + spa_active_count--; + } + + while ((dp = list_head(&spa->spa_config_list)) != NULL) { + list_remove(&spa->spa_config_list, dp); + if (dp->scd_path != NULL) + spa_strfree(dp->scd_path); + kmem_free(dp, sizeof (spa_config_dirent_t)); + } + + list_destroy(&spa->spa_config_list); + + nvlist_free(spa->spa_load_info); + spa_config_set(spa, NULL); + + refcount_destroy(&spa->spa_refcount); + + spa_config_lock_destroy(spa); + + for (int t = 0; t < TXG_SIZE; t++) + bplist_destroy(&spa->spa_free_bplist[t]); + + cv_destroy(&spa->spa_async_cv); + cv_destroy(&spa->spa_proc_cv); + cv_destroy(&spa->spa_scrub_io_cv); + cv_destroy(&spa->spa_suspend_cv); + + mutex_destroy(&spa->spa_async_lock); + mutex_destroy(&spa->spa_errlist_lock); + mutex_destroy(&spa->spa_errlog_lock); + mutex_destroy(&spa->spa_history_lock); + mutex_destroy(&spa->spa_proc_lock); + mutex_destroy(&spa->spa_props_lock); + mutex_destroy(&spa->spa_scrub_lock); + mutex_destroy(&spa->spa_suspend_lock); + mutex_destroy(&spa->spa_vdev_top_lock); + + kmem_free(spa, sizeof (spa_t)); +} + +/* + * Given a pool, return the next pool in the namespace, or NULL if there is + * none. If 'prev' is NULL, return the first pool. + */ +spa_t * +spa_next(spa_t *prev) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + if (prev) + return (AVL_NEXT(&spa_namespace_avl, prev)); + else + return (avl_first(&spa_namespace_avl)); +} + +/* + * ========================================================================== + * SPA refcount functions + * ========================================================================== + */ + +/* + * Add a reference to the given spa_t. Must have at least one reference, or + * have the namespace lock held. + */ +void +spa_open_ref(spa_t *spa, void *tag) +{ + ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref || + MUTEX_HELD(&spa_namespace_lock)); + (void) refcount_add(&spa->spa_refcount, tag); +} + +/* + * Remove a reference to the given spa_t. Must have at least one reference, or + * have the namespace lock held. + */ +void +spa_close(spa_t *spa, void *tag) +{ + ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref || + MUTEX_HELD(&spa_namespace_lock)); + (void) refcount_remove(&spa->spa_refcount, tag); +} + +/* + * Check to see if the spa refcount is zero. Must be called with + * spa_namespace_lock held. We really compare against spa_minref, which is the + * number of references acquired when opening a pool + */ +boolean_t +spa_refcount_zero(spa_t *spa) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + return (refcount_count(&spa->spa_refcount) == spa->spa_minref); +} + +/* + * ========================================================================== + * SPA spare and l2cache tracking + * ========================================================================== + */ + +/* + * Hot spares and cache devices are tracked using the same code below, + * for 'auxiliary' devices. + */ + +typedef struct spa_aux { + uint64_t aux_guid; + uint64_t aux_pool; + avl_node_t aux_avl; + int aux_count; +} spa_aux_t; + +static int +spa_aux_compare(const void *a, const void *b) +{ + const spa_aux_t *sa = a; + const spa_aux_t *sb = b; + + if (sa->aux_guid < sb->aux_guid) + return (-1); + else if (sa->aux_guid > sb->aux_guid) + return (1); + else + return (0); +} + +void +spa_aux_add(vdev_t *vd, avl_tree_t *avl) +{ + avl_index_t where; + spa_aux_t search; + spa_aux_t *aux; + + search.aux_guid = vd->vdev_guid; + if ((aux = avl_find(avl, &search, &where)) != NULL) { + aux->aux_count++; + } else { + aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP); + aux->aux_guid = vd->vdev_guid; + aux->aux_count = 1; + avl_insert(avl, aux, where); + } +} + +void +spa_aux_remove(vdev_t *vd, avl_tree_t *avl) +{ + spa_aux_t search; + spa_aux_t *aux; + avl_index_t where; + + search.aux_guid = vd->vdev_guid; + aux = avl_find(avl, &search, &where); + + ASSERT(aux != NULL); + + if (--aux->aux_count == 0) { + avl_remove(avl, aux); + kmem_free(aux, sizeof (spa_aux_t)); + } else if (aux->aux_pool == spa_guid(vd->vdev_spa)) { + aux->aux_pool = 0ULL; + } +} + +boolean_t +spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl) +{ + spa_aux_t search, *found; + + search.aux_guid = guid; + found = avl_find(avl, &search, NULL); + + if (pool) { + if (found) + *pool = found->aux_pool; + else + *pool = 0ULL; + } + + if (refcnt) { + if (found) + *refcnt = found->aux_count; + else + *refcnt = 0; + } + + return (found != NULL); +} + +void +spa_aux_activate(vdev_t *vd, avl_tree_t *avl) +{ + spa_aux_t search, *found; + avl_index_t where; + + search.aux_guid = vd->vdev_guid; + found = avl_find(avl, &search, &where); + ASSERT(found != NULL); + ASSERT(found->aux_pool == 0ULL); + + found->aux_pool = spa_guid(vd->vdev_spa); +} + +/* + * Spares are tracked globally due to the following constraints: + * + * - A spare may be part of multiple pools. + * - A spare may be added to a pool even if it's actively in use within + * another pool. + * - A spare in use in any pool can only be the source of a replacement if + * the target is a spare in the same pool. + * + * We keep track of all spares on the system through the use of a reference + * counted AVL tree. When a vdev is added as a spare, or used as a replacement + * spare, then we bump the reference count in the AVL tree. In addition, we set + * the 'vdev_isspare' member to indicate that the device is a spare (active or + * inactive). When a spare is made active (used to replace a device in the + * pool), we also keep track of which pool its been made a part of. + * + * The 'spa_spare_lock' protects the AVL tree. These functions are normally + * called under the spa_namespace lock as part of vdev reconfiguration. The + * separate spare lock exists for the status query path, which does not need to + * be completely consistent with respect to other vdev configuration changes. + */ + +static int +spa_spare_compare(const void *a, const void *b) +{ + return (spa_aux_compare(a, b)); +} + +void +spa_spare_add(vdev_t *vd) +{ + mutex_enter(&spa_spare_lock); + ASSERT(!vd->vdev_isspare); + spa_aux_add(vd, &spa_spare_avl); + vd->vdev_isspare = B_TRUE; + mutex_exit(&spa_spare_lock); +} + +void +spa_spare_remove(vdev_t *vd) +{ + mutex_enter(&spa_spare_lock); + ASSERT(vd->vdev_isspare); + spa_aux_remove(vd, &spa_spare_avl); + vd->vdev_isspare = B_FALSE; + mutex_exit(&spa_spare_lock); +} + +boolean_t +spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt) +{ + boolean_t found; + + mutex_enter(&spa_spare_lock); + found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl); + mutex_exit(&spa_spare_lock); + + return (found); +} + +void +spa_spare_activate(vdev_t *vd) +{ + mutex_enter(&spa_spare_lock); + ASSERT(vd->vdev_isspare); + spa_aux_activate(vd, &spa_spare_avl); + mutex_exit(&spa_spare_lock); +} + +/* + * Level 2 ARC devices are tracked globally for the same reasons as spares. + * Cache devices currently only support one pool per cache device, and so + * for these devices the aux reference count is currently unused beyond 1. + */ + +static int +spa_l2cache_compare(const void *a, const void *b) +{ + return (spa_aux_compare(a, b)); +} + +void +spa_l2cache_add(vdev_t *vd) +{ + mutex_enter(&spa_l2cache_lock); + ASSERT(!vd->vdev_isl2cache); + spa_aux_add(vd, &spa_l2cache_avl); + vd->vdev_isl2cache = B_TRUE; + mutex_exit(&spa_l2cache_lock); +} + +void +spa_l2cache_remove(vdev_t *vd) +{ + mutex_enter(&spa_l2cache_lock); + ASSERT(vd->vdev_isl2cache); + spa_aux_remove(vd, &spa_l2cache_avl); + vd->vdev_isl2cache = B_FALSE; + mutex_exit(&spa_l2cache_lock); +} + +boolean_t +spa_l2cache_exists(uint64_t guid, uint64_t *pool) +{ + boolean_t found; + + mutex_enter(&spa_l2cache_lock); + found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl); + mutex_exit(&spa_l2cache_lock); + + return (found); +} + +void +spa_l2cache_activate(vdev_t *vd) +{ + mutex_enter(&spa_l2cache_lock); + ASSERT(vd->vdev_isl2cache); + spa_aux_activate(vd, &spa_l2cache_avl); + mutex_exit(&spa_l2cache_lock); +} + +/* + * ========================================================================== + * SPA vdev locking + * ========================================================================== + */ + +/* + * Lock the given spa_t for the purpose of adding or removing a vdev. + * Grabs the global spa_namespace_lock plus the spa config lock for writing. + * It returns the next transaction group for the spa_t. + */ +uint64_t +spa_vdev_enter(spa_t *spa) +{ + mutex_enter(&spa->spa_vdev_top_lock); + mutex_enter(&spa_namespace_lock); + return (spa_vdev_config_enter(spa)); +} + +/* + * Internal implementation for spa_vdev_enter(). Used when a vdev + * operation requires multiple syncs (i.e. removing a device) while + * keeping the spa_namespace_lock held. + */ +uint64_t +spa_vdev_config_enter(spa_t *spa) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); + + return (spa_last_synced_txg(spa) + 1); +} + +/* + * Used in combination with spa_vdev_config_enter() to allow the syncing + * of multiple transactions without releasing the spa_namespace_lock. + */ +void +spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + int config_changed = B_FALSE; + + ASSERT(txg > spa_last_synced_txg(spa)); + + spa->spa_pending_vdev = NULL; + + /* + * Reassess the DTLs. + */ + vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE); + + if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) { + config_changed = B_TRUE; + spa->spa_config_generation++; + } + + /* + * Verify the metaslab classes. + */ + ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0); + ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0); + + spa_config_exit(spa, SCL_ALL, spa); + + /* + * Panic the system if the specified tag requires it. This + * is useful for ensuring that configurations are updated + * transactionally. + */ + if (zio_injection_enabled) + zio_handle_panic_injection(spa, tag, 0); + + /* + * Note: this txg_wait_synced() is important because it ensures + * that there won't be more than one config change per txg. + * This allows us to use the txg as the generation number. + */ + if (error == 0) + txg_wait_synced(spa->spa_dsl_pool, txg); + + if (vd != NULL) { + ASSERT(!vd->vdev_detached || vd->vdev_dtl_smo.smo_object == 0); + spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); + vdev_free(vd); + spa_config_exit(spa, SCL_ALL, spa); + } + + /* + * If the config changed, update the config cache. + */ + if (config_changed) + spa_config_sync(spa, B_FALSE, B_TRUE); +} + +/* + * Unlock the spa_t after adding or removing a vdev. Besides undoing the + * locking of spa_vdev_enter(), we also want make sure the transactions have + * synced to disk, and then update the global configuration cache with the new + * information. + */ +int +spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) +{ + spa_vdev_config_exit(spa, vd, txg, error, FTAG); + mutex_exit(&spa_namespace_lock); + mutex_exit(&spa->spa_vdev_top_lock); + + return (error); +} + +/* + * Lock the given spa_t for the purpose of changing vdev state. + */ +void +spa_vdev_state_enter(spa_t *spa, int oplocks) +{ + int locks = SCL_STATE_ALL | oplocks; + + /* + * Root pools may need to read of the underlying devfs filesystem + * when opening up a vdev. Unfortunately if we're holding the + * SCL_ZIO lock it will result in a deadlock when we try to issue + * the read from the root filesystem. Instead we "prefetch" + * the associated vnodes that we need prior to opening the + * underlying devices and cache them so that we can prevent + * any I/O when we are doing the actual open. + */ + if (spa_is_root(spa)) { + int low = locks & ~(SCL_ZIO - 1); + int high = locks & ~low; + + spa_config_enter(spa, high, spa, RW_WRITER); + vdev_hold(spa->spa_root_vdev); + spa_config_enter(spa, low, spa, RW_WRITER); + } else { + spa_config_enter(spa, locks, spa, RW_WRITER); + } + spa->spa_vdev_locks = locks; +} + +int +spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) +{ + boolean_t config_changed = B_FALSE; + + if (vd != NULL || error == 0) + vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, + 0, 0, B_FALSE); + + if (vd != NULL) { + vdev_state_dirty(vd->vdev_top); + config_changed = B_TRUE; + spa->spa_config_generation++; + } + + if (spa_is_root(spa)) + vdev_rele(spa->spa_root_vdev); + + ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL); + spa_config_exit(spa, spa->spa_vdev_locks, spa); + + /* + * If anything changed, wait for it to sync. This ensures that, + * from the system administrator's perspective, zpool(1M) commands + * are synchronous. This is important for things like zpool offline: + * when the command completes, you expect no further I/O from ZFS. + */ + if (vd != NULL) + txg_wait_synced(spa->spa_dsl_pool, 0); + + /* + * If the config changed, update the config cache. + */ + if (config_changed) { + mutex_enter(&spa_namespace_lock); + spa_config_sync(spa, B_FALSE, B_TRUE); + mutex_exit(&spa_namespace_lock); + } + + return (error); +} + +/* + * ========================================================================== + * Miscellaneous functions + * ========================================================================== + */ + +/* + * Rename a spa_t. + */ +int +spa_rename(const char *name, const char *newname) +{ + spa_t *spa; + int err; + + /* + * Lookup the spa_t and grab the config lock for writing. We need to + * actually open the pool so that we can sync out the necessary labels. + * It's OK to call spa_open() with the namespace lock held because we + * allow recursive calls for other reasons. + */ + mutex_enter(&spa_namespace_lock); + if ((err = spa_open(name, &spa, FTAG)) != 0) { + mutex_exit(&spa_namespace_lock); + return (err); + } + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + avl_remove(&spa_namespace_avl, spa); + (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name)); + avl_add(&spa_namespace_avl, spa); + + /* + * Sync all labels to disk with the new names by marking the root vdev + * dirty and waiting for it to sync. It will pick up the new pool name + * during the sync. + */ + vdev_config_dirty(spa->spa_root_vdev); + + spa_config_exit(spa, SCL_ALL, FTAG); + + txg_wait_synced(spa->spa_dsl_pool, 0); + + /* + * Sync the updated config cache. + */ + spa_config_sync(spa, B_FALSE, B_TRUE); + + spa_close(spa, FTAG); + + mutex_exit(&spa_namespace_lock); + + return (0); +} + +/* + * Return the spa_t associated with given pool_guid, if it exists. If + * device_guid is non-zero, determine whether the pool exists *and* contains + * a device with the specified device_guid. + */ +spa_t * +spa_by_guid(uint64_t pool_guid, uint64_t device_guid) +{ + spa_t *spa; + avl_tree_t *t = &spa_namespace_avl; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { + if (spa->spa_state == POOL_STATE_UNINITIALIZED) + continue; + if (spa->spa_root_vdev == NULL) + continue; + if (spa_guid(spa) == pool_guid) { + if (device_guid == 0) + break; + + if (vdev_lookup_by_guid(spa->spa_root_vdev, + device_guid) != NULL) + break; + + /* + * Check any devices we may be in the process of adding. + */ + if (spa->spa_pending_vdev) { + if (vdev_lookup_by_guid(spa->spa_pending_vdev, + device_guid) != NULL) + break; + } + } + } + + return (spa); +} + +/* + * Determine whether a pool with the given pool_guid exists. + */ +boolean_t +spa_guid_exists(uint64_t pool_guid, uint64_t device_guid) +{ + return (spa_by_guid(pool_guid, device_guid) != NULL); +} + +char * +spa_strdup(const char *s) +{ + size_t len; + char *new; + + len = strlen(s); + new = kmem_alloc(len + 1, KM_SLEEP); + bcopy(s, new, len); + new[len] = '\0'; + + return (new); +} + +void +spa_strfree(char *s) +{ + kmem_free(s, strlen(s) + 1); +} + +uint64_t +spa_get_random(uint64_t range) +{ + uint64_t r; + + ASSERT(range != 0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t)); + + return (r % range); +} + +uint64_t +spa_generate_guid(spa_t *spa) +{ + uint64_t guid = spa_get_random(-1ULL); + + if (spa != NULL) { + while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)) + guid = spa_get_random(-1ULL); + } else { + while (guid == 0 || spa_guid_exists(guid, 0)) + guid = spa_get_random(-1ULL); + } + + return (guid); +} + +void +sprintf_blkptr(char *buf, const blkptr_t *bp) +{ + char *type = NULL; + char *checksum = NULL; + char *compress = NULL; + + if (bp != NULL) { + type = dmu_ot[BP_GET_TYPE(bp)].ot_name; + checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; + compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; + } + + SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress); +} + +void +spa_freeze(spa_t *spa) +{ + uint64_t freeze_txg = 0; + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + if (spa->spa_freeze_txg == UINT64_MAX) { + freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE; + spa->spa_freeze_txg = freeze_txg; + } + spa_config_exit(spa, SCL_ALL, FTAG); + if (freeze_txg != 0) + txg_wait_synced(spa_get_dsl(spa), freeze_txg); +} + +void +zfs_panic_recover(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); + va_end(adx); +} + +/* + * This is a stripped-down version of strtoull, suitable only for converting + * lowercase hexidecimal numbers that don't overflow. + */ +uint64_t +strtonum(const char *str, char **nptr) +{ + uint64_t val = 0; + char c; + int digit; + + while ((c = *str) != '\0') { + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + break; + + val *= 16; + val += digit; + + str++; + } + + if (nptr) + *nptr = (char *)str; + + return (val); +} + +/* + * ========================================================================== + * Accessor functions + * ========================================================================== + */ + +boolean_t +spa_shutting_down(spa_t *spa) +{ + return (spa->spa_async_suspended); +} + +dsl_pool_t * +spa_get_dsl(spa_t *spa) +{ + return (spa->spa_dsl_pool); +} + +blkptr_t * +spa_get_rootblkptr(spa_t *spa) +{ + return (&spa->spa_ubsync.ub_rootbp); +} + +void +spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp) +{ + spa->spa_uberblock.ub_rootbp = *bp; +} + +void +spa_altroot(spa_t *spa, char *buf, size_t buflen) +{ + if (spa->spa_root == NULL) + buf[0] = '\0'; + else + (void) strncpy(buf, spa->spa_root, buflen); +} + +int +spa_sync_pass(spa_t *spa) +{ + return (spa->spa_sync_pass); +} + +char * +spa_name(spa_t *spa) +{ + return (spa->spa_name); +} + +uint64_t +spa_guid(spa_t *spa) +{ + /* + * If we fail to parse the config during spa_load(), we can go through + * the error path (which posts an ereport) and end up here with no root + * vdev. We stash the original pool guid in 'spa_load_guid' to handle + * this case. + */ + if (spa->spa_root_vdev != NULL) + return (spa->spa_root_vdev->vdev_guid); + else + return (spa->spa_load_guid); +} + +uint64_t +spa_last_synced_txg(spa_t *spa) +{ + return (spa->spa_ubsync.ub_txg); +} + +uint64_t +spa_first_txg(spa_t *spa) +{ + return (spa->spa_first_txg); +} + +uint64_t +spa_syncing_txg(spa_t *spa) +{ + return (spa->spa_syncing_txg); +} + +pool_state_t +spa_state(spa_t *spa) +{ + return (spa->spa_state); +} + +spa_load_state_t +spa_load_state(spa_t *spa) +{ + return (spa->spa_load_state); +} + +uint64_t +spa_freeze_txg(spa_t *spa) +{ + return (spa->spa_freeze_txg); +} + +/* ARGSUSED */ +uint64_t +spa_get_asize(spa_t *spa, uint64_t lsize) +{ + /* + * The worst case is single-sector max-parity RAID-Z blocks, in which + * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) + * times the size; so just assume that. Add to this the fact that + * we can have up to 3 DVAs per bp, and one more factor of 2 because + * the block may be dittoed with up to 3 DVAs by ddt_sync(). + */ + return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2); +} + +uint64_t +spa_get_dspace(spa_t *spa) +{ + return (spa->spa_dspace); +} + +void +spa_update_dspace(spa_t *spa) +{ + spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) + + ddt_get_dedup_dspace(spa); +} + +/* + * Return the failure mode that has been set to this pool. The default + * behavior will be to block all I/Os when a complete failure occurs. + */ +uint8_t +spa_get_failmode(spa_t *spa) +{ + return (spa->spa_failmode); +} + +boolean_t +spa_suspended(spa_t *spa) +{ + return (spa->spa_suspended); +} + +uint64_t +spa_version(spa_t *spa) +{ + return (spa->spa_ubsync.ub_version); +} + +boolean_t +spa_deflate(spa_t *spa) +{ + return (spa->spa_deflate); +} + +metaslab_class_t * +spa_normal_class(spa_t *spa) +{ + return (spa->spa_normal_class); +} + +metaslab_class_t * +spa_log_class(spa_t *spa) +{ + return (spa->spa_log_class); +} + +int +spa_max_replication(spa_t *spa) +{ + /* + * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to + * handle BPs with more than one DVA allocated. Set our max + * replication level accordingly. + */ + if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS) + return (1); + return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override)); +} + +int +spa_prev_software_version(spa_t *spa) +{ + return (spa->spa_prev_software_version); +} + +uint64_t +dva_get_dsize_sync(spa_t *spa, const dva_t *dva) +{ + uint64_t asize = DVA_GET_ASIZE(dva); + uint64_t dsize = asize; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); + + if (asize != 0 && spa->spa_deflate) { + vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva)); + dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio; + } + + return (dsize); +} + +uint64_t +bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp) +{ + uint64_t dsize = 0; + + for (int d = 0; d < SPA_DVAS_PER_BP; d++) + dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); + + return (dsize); +} + +uint64_t +bp_get_dsize(spa_t *spa, const blkptr_t *bp) +{ + uint64_t dsize = 0; + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + + for (int d = 0; d < SPA_DVAS_PER_BP; d++) + dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); + + spa_config_exit(spa, SCL_VDEV, FTAG); + + return (dsize); +} + +/* + * ========================================================================== + * Initialization and Termination + * ========================================================================== + */ + +static int +spa_name_compare(const void *a1, const void *a2) +{ + const spa_t *s1 = a1; + const spa_t *s2 = a2; + int s; + + s = strcmp(s1->spa_name, s2->spa_name); + if (s > 0) + return (1); + if (s < 0) + return (-1); + return (0); +} + +int +spa_busy(void) +{ + return (spa_active_count); +} + +void +spa_boot_init() +{ + spa_config_load(); +} + +void +spa_init(int mode) +{ + mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL); + + avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t), + offsetof(spa_t, spa_avl)); + + avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t), + offsetof(spa_aux_t, aux_avl)); + + avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t), + offsetof(spa_aux_t, aux_avl)); + + spa_mode_global = mode; + + refcount_init(); + unique_init(); + zio_init(); + dmu_init(); + zil_init(); + vdev_cache_stat_init(); + zfs_prop_init(); + zpool_prop_init(); + spa_config_load(); + l2arc_start(); +} + +void +spa_fini(void) +{ + l2arc_stop(); + + spa_evict_all(); + + vdev_cache_stat_fini(); + zil_fini(); + dmu_fini(); + zio_fini(); + unique_fini(); + refcount_fini(); + + avl_destroy(&spa_namespace_avl); + avl_destroy(&spa_spare_avl); + avl_destroy(&spa_l2cache_avl); + + cv_destroy(&spa_namespace_cv); + mutex_destroy(&spa_namespace_lock); + mutex_destroy(&spa_spare_lock); + mutex_destroy(&spa_l2cache_lock); +} + +/* + * Return whether this pool has slogs. No locking needed. + * It's not a problem if the wrong answer is returned as it's only for + * performance and not correctness + */ +boolean_t +spa_has_slogs(spa_t *spa) +{ + return (spa->spa_log_class->mc_rotor != NULL); +} + +spa_log_state_t +spa_get_log_state(spa_t *spa) +{ + return (spa->spa_log_state); +} + +void +spa_set_log_state(spa_t *spa, spa_log_state_t state) +{ + spa->spa_log_state = state; +} + +boolean_t +spa_is_root(spa_t *spa) +{ + return (spa->spa_is_root); +} + +boolean_t +spa_writeable(spa_t *spa) +{ + return (!!(spa->spa_mode & FWRITE)); +} + +int +spa_mode(spa_t *spa) +{ + return (spa->spa_mode); +} + +uint64_t +spa_bootfs(spa_t *spa) +{ + return (spa->spa_bootfs); +} + +uint64_t +spa_delegation(spa_t *spa) +{ + return (spa->spa_delegation); +} + +objset_t * +spa_meta_objset(spa_t *spa) +{ + return (spa->spa_meta_objset); +} + +enum zio_checksum +spa_dedup_checksum(spa_t *spa) +{ + return (spa->spa_dedup_checksum); +} + +/* + * Reset pool scan stat per scan pass (or reboot). + */ +void +spa_scan_stat_init(spa_t *spa) +{ + /* data not stored on disk */ + spa->spa_scan_pass_start = gethrestime_sec(); + spa->spa_scan_pass_exam = 0; + vdev_scan_stat_init(spa->spa_root_vdev); +} + +/* + * Get scan stats for zpool status reports + */ +int +spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) +{ + dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; + + if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE) + return (ENOENT); + bzero(ps, sizeof (pool_scan_stat_t)); + + /* data stored on disk */ + ps->pss_func = scn->scn_phys.scn_func; + ps->pss_start_time = scn->scn_phys.scn_start_time; + ps->pss_end_time = scn->scn_phys.scn_end_time; + ps->pss_to_examine = scn->scn_phys.scn_to_examine; + ps->pss_examined = scn->scn_phys.scn_examined; + ps->pss_to_process = scn->scn_phys.scn_to_process; + ps->pss_processed = scn->scn_phys.scn_processed; + ps->pss_errors = scn->scn_phys.scn_errors; + ps->pss_state = scn->scn_phys.scn_state; + + /* data not stored on disk */ + ps->pss_pass_start = spa->spa_scan_pass_start; + ps->pss_pass_exam = spa->spa_scan_pass_exam; + + return (0); +} diff --git a/uts/common/fs/zfs/space_map.c b/uts/common/fs/zfs/space_map.c new file mode 100644 index 000000000000..1ce7b2a3d466 --- /dev/null +++ b/uts/common/fs/zfs/space_map.c @@ -0,0 +1,616 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/zio.h> +#include <sys/space_map.h> + +/* + * Space map routines. + * NOTE: caller is responsible for all locking. + */ +static int +space_map_seg_compare(const void *x1, const void *x2) +{ + const space_seg_t *s1 = x1; + const space_seg_t *s2 = x2; + + if (s1->ss_start < s2->ss_start) { + if (s1->ss_end > s2->ss_start) + return (0); + return (-1); + } + if (s1->ss_start > s2->ss_start) { + if (s1->ss_start < s2->ss_end) + return (0); + return (1); + } + return (0); +} + +void +space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift, + kmutex_t *lp) +{ + bzero(sm, sizeof (*sm)); + + cv_init(&sm->sm_load_cv, NULL, CV_DEFAULT, NULL); + + avl_create(&sm->sm_root, space_map_seg_compare, + sizeof (space_seg_t), offsetof(struct space_seg, ss_node)); + + sm->sm_start = start; + sm->sm_size = size; + sm->sm_shift = shift; + sm->sm_lock = lp; +} + +void +space_map_destroy(space_map_t *sm) +{ + ASSERT(!sm->sm_loaded && !sm->sm_loading); + VERIFY3U(sm->sm_space, ==, 0); + avl_destroy(&sm->sm_root); + cv_destroy(&sm->sm_load_cv); +} + +void +space_map_add(space_map_t *sm, uint64_t start, uint64_t size) +{ + avl_index_t where; + space_seg_t ssearch, *ss_before, *ss_after, *ss; + uint64_t end = start + size; + int merge_before, merge_after; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + VERIFY(size != 0); + VERIFY3U(start, >=, sm->sm_start); + VERIFY3U(end, <=, sm->sm_start + sm->sm_size); + VERIFY(sm->sm_space + size <= sm->sm_size); + VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); + VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); + + ssearch.ss_start = start; + ssearch.ss_end = end; + ss = avl_find(&sm->sm_root, &ssearch, &where); + + if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) { + zfs_panic_recover("zfs: allocating allocated segment" + "(offset=%llu size=%llu)\n", + (longlong_t)start, (longlong_t)size); + return; + } + + /* Make sure we don't overlap with either of our neighbors */ + VERIFY(ss == NULL); + + ss_before = avl_nearest(&sm->sm_root, where, AVL_BEFORE); + ss_after = avl_nearest(&sm->sm_root, where, AVL_AFTER); + + merge_before = (ss_before != NULL && ss_before->ss_end == start); + merge_after = (ss_after != NULL && ss_after->ss_start == end); + + if (merge_before && merge_after) { + avl_remove(&sm->sm_root, ss_before); + if (sm->sm_pp_root) { + avl_remove(sm->sm_pp_root, ss_before); + avl_remove(sm->sm_pp_root, ss_after); + } + ss_after->ss_start = ss_before->ss_start; + kmem_free(ss_before, sizeof (*ss_before)); + ss = ss_after; + } else if (merge_before) { + ss_before->ss_end = end; + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss_before); + ss = ss_before; + } else if (merge_after) { + ss_after->ss_start = start; + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss_after); + ss = ss_after; + } else { + ss = kmem_alloc(sizeof (*ss), KM_SLEEP); + ss->ss_start = start; + ss->ss_end = end; + avl_insert(&sm->sm_root, ss, where); + } + + if (sm->sm_pp_root) + avl_add(sm->sm_pp_root, ss); + + sm->sm_space += size; +} + +void +space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) +{ + avl_index_t where; + space_seg_t ssearch, *ss, *newseg; + uint64_t end = start + size; + int left_over, right_over; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + VERIFY(size != 0); + VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); + VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); + + ssearch.ss_start = start; + ssearch.ss_end = end; + ss = avl_find(&sm->sm_root, &ssearch, &where); + + /* Make sure we completely overlap with someone */ + if (ss == NULL) { + zfs_panic_recover("zfs: freeing free segment " + "(offset=%llu size=%llu)", + (longlong_t)start, (longlong_t)size); + return; + } + VERIFY3U(ss->ss_start, <=, start); + VERIFY3U(ss->ss_end, >=, end); + VERIFY(sm->sm_space - size <= sm->sm_size); + + left_over = (ss->ss_start != start); + right_over = (ss->ss_end != end); + + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss); + + if (left_over && right_over) { + newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP); + newseg->ss_start = end; + newseg->ss_end = ss->ss_end; + ss->ss_end = start; + avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER); + if (sm->sm_pp_root) + avl_add(sm->sm_pp_root, newseg); + } else if (left_over) { + ss->ss_end = start; + } else if (right_over) { + ss->ss_start = end; + } else { + avl_remove(&sm->sm_root, ss); + kmem_free(ss, sizeof (*ss)); + ss = NULL; + } + + if (sm->sm_pp_root && ss != NULL) + avl_add(sm->sm_pp_root, ss); + + sm->sm_space -= size; +} + +boolean_t +space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) +{ + avl_index_t where; + space_seg_t ssearch, *ss; + uint64_t end = start + size; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + VERIFY(size != 0); + VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); + VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); + + ssearch.ss_start = start; + ssearch.ss_end = end; + ss = avl_find(&sm->sm_root, &ssearch, &where); + + return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end); +} + +void +space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) +{ + space_seg_t *ss; + void *cookie = NULL; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + + while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { + if (func != NULL) + func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); + kmem_free(ss, sizeof (*ss)); + } + sm->sm_space = 0; +} + +void +space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + + for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) + func(mdest, ss->ss_start, ss->ss_end - ss->ss_start); +} + +/* + * Wait for any in-progress space_map_load() to complete. + */ +void +space_map_load_wait(space_map_t *sm) +{ + ASSERT(MUTEX_HELD(sm->sm_lock)); + + while (sm->sm_loading) { + ASSERT(!sm->sm_loaded); + cv_wait(&sm->sm_load_cv, sm->sm_lock); + } +} + +/* + * Note: space_map_load() will drop sm_lock across dmu_read() calls. + * The caller must be OK with this. + */ +int +space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype, + space_map_obj_t *smo, objset_t *os) +{ + uint64_t *entry, *entry_map, *entry_map_end; + uint64_t bufsize, size, offset, end, space; + uint64_t mapstart = sm->sm_start; + int error = 0; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + ASSERT(!sm->sm_loaded); + ASSERT(!sm->sm_loading); + + sm->sm_loading = B_TRUE; + end = smo->smo_objsize; + space = smo->smo_alloc; + + ASSERT(sm->sm_ops == NULL); + VERIFY3U(sm->sm_space, ==, 0); + + if (maptype == SM_FREE) { + space_map_add(sm, sm->sm_start, sm->sm_size); + space = sm->sm_size - space; + } + + bufsize = 1ULL << SPACE_MAP_BLOCKSHIFT; + entry_map = zio_buf_alloc(bufsize); + + mutex_exit(sm->sm_lock); + if (end > bufsize) + dmu_prefetch(os, smo->smo_object, bufsize, end - bufsize); + mutex_enter(sm->sm_lock); + + for (offset = 0; offset < end; offset += bufsize) { + size = MIN(end - offset, bufsize); + VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0); + VERIFY(size != 0); + + dprintf("object=%llu offset=%llx size=%llx\n", + smo->smo_object, offset, size); + + mutex_exit(sm->sm_lock); + error = dmu_read(os, smo->smo_object, offset, size, entry_map, + DMU_READ_PREFETCH); + mutex_enter(sm->sm_lock); + if (error != 0) + break; + + entry_map_end = entry_map + (size / sizeof (uint64_t)); + for (entry = entry_map; entry < entry_map_end; entry++) { + uint64_t e = *entry; + + if (SM_DEBUG_DECODE(e)) /* Skip debug entries */ + continue; + + (SM_TYPE_DECODE(e) == maptype ? + space_map_add : space_map_remove)(sm, + (SM_OFFSET_DECODE(e) << sm->sm_shift) + mapstart, + SM_RUN_DECODE(e) << sm->sm_shift); + } + } + + if (error == 0) { + VERIFY3U(sm->sm_space, ==, space); + + sm->sm_loaded = B_TRUE; + sm->sm_ops = ops; + if (ops != NULL) + ops->smop_load(sm); + } else { + space_map_vacate(sm, NULL, NULL); + } + + zio_buf_free(entry_map, bufsize); + + sm->sm_loading = B_FALSE; + + cv_broadcast(&sm->sm_load_cv); + + return (error); +} + +void +space_map_unload(space_map_t *sm) +{ + ASSERT(MUTEX_HELD(sm->sm_lock)); + + if (sm->sm_loaded && sm->sm_ops != NULL) + sm->sm_ops->smop_unload(sm); + + sm->sm_loaded = B_FALSE; + sm->sm_ops = NULL; + + space_map_vacate(sm, NULL, NULL); +} + +uint64_t +space_map_maxsize(space_map_t *sm) +{ + ASSERT(sm->sm_ops != NULL); + return (sm->sm_ops->smop_max(sm)); +} + +uint64_t +space_map_alloc(space_map_t *sm, uint64_t size) +{ + uint64_t start; + + start = sm->sm_ops->smop_alloc(sm, size); + if (start != -1ULL) + space_map_remove(sm, start, size); + return (start); +} + +void +space_map_claim(space_map_t *sm, uint64_t start, uint64_t size) +{ + sm->sm_ops->smop_claim(sm, start, size); + space_map_remove(sm, start, size); +} + +void +space_map_free(space_map_t *sm, uint64_t start, uint64_t size) +{ + space_map_add(sm, start, size); + sm->sm_ops->smop_free(sm, start, size); +} + +/* + * Note: space_map_sync() will drop sm_lock across dmu_write() calls. + */ +void +space_map_sync(space_map_t *sm, uint8_t maptype, + space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) +{ + spa_t *spa = dmu_objset_spa(os); + void *cookie = NULL; + space_seg_t *ss; + uint64_t bufsize, start, size, run_len; + uint64_t *entry, *entry_map, *entry_map_end; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + + if (sm->sm_space == 0) + return; + + dprintf("object %4llu, txg %llu, pass %d, %c, count %lu, space %llx\n", + smo->smo_object, dmu_tx_get_txg(tx), spa_sync_pass(spa), + maptype == SM_ALLOC ? 'A' : 'F', avl_numnodes(&sm->sm_root), + sm->sm_space); + + if (maptype == SM_ALLOC) + smo->smo_alloc += sm->sm_space; + else + smo->smo_alloc -= sm->sm_space; + + bufsize = (8 + avl_numnodes(&sm->sm_root)) * sizeof (uint64_t); + bufsize = MIN(bufsize, 1ULL << SPACE_MAP_BLOCKSHIFT); + entry_map = zio_buf_alloc(bufsize); + entry_map_end = entry_map + (bufsize / sizeof (uint64_t)); + entry = entry_map; + + *entry++ = SM_DEBUG_ENCODE(1) | + SM_DEBUG_ACTION_ENCODE(maptype) | + SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | + SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); + + while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { + size = ss->ss_end - ss->ss_start; + start = (ss->ss_start - sm->sm_start) >> sm->sm_shift; + + sm->sm_space -= size; + size >>= sm->sm_shift; + + while (size) { + run_len = MIN(size, SM_RUN_MAX); + + if (entry == entry_map_end) { + mutex_exit(sm->sm_lock); + dmu_write(os, smo->smo_object, smo->smo_objsize, + bufsize, entry_map, tx); + mutex_enter(sm->sm_lock); + smo->smo_objsize += bufsize; + entry = entry_map; + } + + *entry++ = SM_OFFSET_ENCODE(start) | + SM_TYPE_ENCODE(maptype) | + SM_RUN_ENCODE(run_len); + + start += run_len; + size -= run_len; + } + kmem_free(ss, sizeof (*ss)); + } + + if (entry != entry_map) { + size = (entry - entry_map) * sizeof (uint64_t); + mutex_exit(sm->sm_lock); + dmu_write(os, smo->smo_object, smo->smo_objsize, + size, entry_map, tx); + mutex_enter(sm->sm_lock); + smo->smo_objsize += size; + } + + zio_buf_free(entry_map, bufsize); + + VERIFY3U(sm->sm_space, ==, 0); +} + +void +space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) +{ + VERIFY(dmu_free_range(os, smo->smo_object, 0, -1ULL, tx) == 0); + + smo->smo_objsize = 0; + smo->smo_alloc = 0; +} + +/* + * Space map reference trees. + * + * A space map is a collection of integers. Every integer is either + * in the map, or it's not. A space map reference tree generalizes + * the idea: it allows its members to have arbitrary reference counts, + * as opposed to the implicit reference count of 0 or 1 in a space map. + * This representation comes in handy when computing the union or + * intersection of multiple space maps. For example, the union of + * N space maps is the subset of the reference tree with refcnt >= 1. + * The intersection of N space maps is the subset with refcnt >= N. + * + * [It's very much like a Fourier transform. Unions and intersections + * are hard to perform in the 'space map domain', so we convert the maps + * into the 'reference count domain', where it's trivial, then invert.] + * + * vdev_dtl_reassess() uses computations of this form to determine + * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev + * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev + * has an outage wherever refcnt >= vdev_children. + */ +static int +space_map_ref_compare(const void *x1, const void *x2) +{ + const space_ref_t *sr1 = x1; + const space_ref_t *sr2 = x2; + + if (sr1->sr_offset < sr2->sr_offset) + return (-1); + if (sr1->sr_offset > sr2->sr_offset) + return (1); + + if (sr1 < sr2) + return (-1); + if (sr1 > sr2) + return (1); + + return (0); +} + +void +space_map_ref_create(avl_tree_t *t) +{ + avl_create(t, space_map_ref_compare, + sizeof (space_ref_t), offsetof(space_ref_t, sr_node)); +} + +void +space_map_ref_destroy(avl_tree_t *t) +{ + space_ref_t *sr; + void *cookie = NULL; + + while ((sr = avl_destroy_nodes(t, &cookie)) != NULL) + kmem_free(sr, sizeof (*sr)); + + avl_destroy(t); +} + +static void +space_map_ref_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt) +{ + space_ref_t *sr; + + sr = kmem_alloc(sizeof (*sr), KM_SLEEP); + sr->sr_offset = offset; + sr->sr_refcnt = refcnt; + + avl_add(t, sr); +} + +void +space_map_ref_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, + int64_t refcnt) +{ + space_map_ref_add_node(t, start, refcnt); + space_map_ref_add_node(t, end, -refcnt); +} + +/* + * Convert (or add) a space map into a reference tree. + */ +void +space_map_ref_add_map(avl_tree_t *t, space_map_t *sm, int64_t refcnt) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + + for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) + space_map_ref_add_seg(t, ss->ss_start, ss->ss_end, refcnt); +} + +/* + * Convert a reference tree into a space map. The space map will contain + * all members of the reference tree for which refcnt >= minref. + */ +void +space_map_ref_generate_map(avl_tree_t *t, space_map_t *sm, int64_t minref) +{ + uint64_t start = -1ULL; + int64_t refcnt = 0; + space_ref_t *sr; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + + space_map_vacate(sm, NULL, NULL); + + for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) { + refcnt += sr->sr_refcnt; + if (refcnt >= minref) { + if (start == -1ULL) { + start = sr->sr_offset; + } + } else { + if (start != -1ULL) { + uint64_t end = sr->sr_offset; + ASSERT(start <= end); + if (end > start) + space_map_add(sm, start, end - start); + start = -1ULL; + } + } + } + ASSERT(refcnt == 0); + ASSERT(start == -1ULL); +} diff --git a/uts/common/fs/zfs/sys/arc.h b/uts/common/fs/zfs/sys/arc.h new file mode 100644 index 000000000000..8f189c62d31d --- /dev/null +++ b/uts/common/fs/zfs/sys/arc.h @@ -0,0 +1,142 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ARC_H +#define _SYS_ARC_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/zio.h> +#include <sys/dmu.h> +#include <sys/spa.h> + +typedef struct arc_buf_hdr arc_buf_hdr_t; +typedef struct arc_buf arc_buf_t; +typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); +typedef int arc_evict_func_t(void *private); + +/* generic arc_done_func_t's which you can use */ +arc_done_func_t arc_bcopy_func; +arc_done_func_t arc_getbuf_func; + +struct arc_buf { + arc_buf_hdr_t *b_hdr; + arc_buf_t *b_next; + kmutex_t b_evict_lock; + krwlock_t b_data_lock; + void *b_data; + arc_evict_func_t *b_efunc; + void *b_private; +}; + +typedef enum arc_buf_contents { + ARC_BUFC_DATA, /* buffer contains data */ + ARC_BUFC_METADATA, /* buffer contains metadata */ + ARC_BUFC_NUMTYPES +} arc_buf_contents_t; +/* + * These are the flags we pass into calls to the arc + */ +#define ARC_WAIT (1 << 1) /* perform I/O synchronously */ +#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */ +#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */ +#define ARC_CACHED (1 << 4) /* I/O was already in cache */ +#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */ + +/* + * The following breakdows of arc_size exist for kstat only. + */ +typedef enum arc_space_type { + ARC_SPACE_DATA, + ARC_SPACE_HDRS, + ARC_SPACE_L2HDRS, + ARC_SPACE_OTHER, + ARC_SPACE_NUMTYPES +} arc_space_type_t; + +void arc_space_consume(uint64_t space, arc_space_type_t type); +void arc_space_return(uint64_t space, arc_space_type_t type); +void *arc_data_buf_alloc(uint64_t space); +void arc_data_buf_free(void *buf, uint64_t space); +arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag, + arc_buf_contents_t type); +arc_buf_t *arc_loan_buf(spa_t *spa, int size); +void arc_return_buf(arc_buf_t *buf, void *tag); +void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); +void arc_buf_add_ref(arc_buf_t *buf, void *tag); +int arc_buf_remove_ref(arc_buf_t *buf, void *tag); +int arc_buf_size(arc_buf_t *buf); +void arc_release(arc_buf_t *buf, void *tag); +int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa, + zbookmark_t *zb); +int arc_released(arc_buf_t *buf); +int arc_has_callback(arc_buf_t *buf); +void arc_buf_freeze(arc_buf_t *buf); +void arc_buf_thaw(arc_buf_t *buf); +#ifdef ZFS_DEBUG +int arc_referenced(arc_buf_t *buf); +#endif + +int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb); +int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, + arc_done_func_t *done, void *private, int priority, int flags, + uint32_t *arc_flags, const zbookmark_t *zb); +zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, + blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, + arc_done_func_t *ready, arc_done_func_t *done, void *private, + int priority, int zio_flags, const zbookmark_t *zb); + +void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); +int arc_buf_evict(arc_buf_t *buf); + +void arc_flush(spa_t *spa); +void arc_tempreserve_clear(uint64_t reserve); +int arc_tempreserve_space(uint64_t reserve, uint64_t txg); + +void arc_init(void); +void arc_fini(void); + +/* + * Level 2 ARC + */ + +void l2arc_add_vdev(spa_t *spa, vdev_t *vd); +void l2arc_remove_vdev(vdev_t *vd); +boolean_t l2arc_vdev_present(vdev_t *vd); +void l2arc_init(void); +void l2arc_fini(void); +void l2arc_start(void); +void l2arc_stop(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ARC_H */ diff --git a/uts/common/fs/zfs/sys/bplist.h b/uts/common/fs/zfs/sys/bplist.h new file mode 100644 index 000000000000..471be9047ec2 --- /dev/null +++ b/uts/common/fs/zfs/sys/bplist.h @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_BPLIST_H +#define _SYS_BPLIST_H + +#include <sys/zfs_context.h> +#include <sys/spa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct bplist_entry { + blkptr_t bpe_blk; + list_node_t bpe_node; +} bplist_entry_t; + +typedef struct bplist { + kmutex_t bpl_lock; + list_t bpl_list; +} bplist_t; + +typedef int bplist_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx); + +void bplist_create(bplist_t *bpl); +void bplist_destroy(bplist_t *bpl); +void bplist_append(bplist_t *bpl, const blkptr_t *bp); +void bplist_iterate(bplist_t *bpl, bplist_itor_t *func, + void *arg, dmu_tx_t *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_BPLIST_H */ diff --git a/uts/common/fs/zfs/sys/bpobj.h b/uts/common/fs/zfs/sys/bpobj.h new file mode 100644 index 000000000000..3771a9541aa7 --- /dev/null +++ b/uts/common/fs/zfs/sys/bpobj.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_BPOBJ_H +#define _SYS_BPOBJ_H + +#include <sys/dmu.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/zio.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct bpobj_phys { + /* + * This is the bonus buffer for the dead lists. The object's + * contents is an array of bpo_entries blkptr_t's, representing + * a total of bpo_bytes physical space. + */ + uint64_t bpo_num_blkptrs; + uint64_t bpo_bytes; + uint64_t bpo_comp; + uint64_t bpo_uncomp; + uint64_t bpo_subobjs; + uint64_t bpo_num_subobjs; +} bpobj_phys_t; + +#define BPOBJ_SIZE_V0 (2 * sizeof (uint64_t)) +#define BPOBJ_SIZE_V1 (4 * sizeof (uint64_t)) + +typedef struct bpobj { + kmutex_t bpo_lock; + objset_t *bpo_os; + uint64_t bpo_object; + int bpo_epb; + uint8_t bpo_havecomp; + uint8_t bpo_havesubobj; + bpobj_phys_t *bpo_phys; + dmu_buf_t *bpo_dbuf; + dmu_buf_t *bpo_cached_dbuf; +} bpobj_t; + +typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx); + +uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx); +void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx); + +int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object); +void bpobj_close(bpobj_t *bpo); + +int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx); +int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *); +int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp); + +void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx); +void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx); + +int bpobj_space(bpobj_t *bpo, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_BPOBJ_H */ diff --git a/uts/common/fs/zfs/sys/dbuf.h b/uts/common/fs/zfs/sys/dbuf.h new file mode 100644 index 000000000000..cf1bbc030f45 --- /dev/null +++ b/uts/common/fs/zfs/sys/dbuf.h @@ -0,0 +1,375 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DBUF_H +#define _SYS_DBUF_H + +#include <sys/dmu.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/zio.h> +#include <sys/arc.h> +#include <sys/zfs_context.h> +#include <sys/refcount.h> +#include <sys/zrlock.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define IN_DMU_SYNC 2 + +/* + * define flags for dbuf_read + */ + +#define DB_RF_MUST_SUCCEED (1 << 0) +#define DB_RF_CANFAIL (1 << 1) +#define DB_RF_HAVESTRUCT (1 << 2) +#define DB_RF_NOPREFETCH (1 << 3) +#define DB_RF_NEVERWAIT (1 << 4) +#define DB_RF_CACHED (1 << 5) + +/* + * The simplified state transition diagram for dbufs looks like: + * + * +----> READ ----+ + * | | + * | V + * (alloc)-->UNCACHED CACHED-->EVICTING-->(free) + * | ^ ^ + * | | | + * +----> FILL ----+ | + * | | + * | | + * +--------> NOFILL -------+ + */ +typedef enum dbuf_states { + DB_UNCACHED, + DB_FILL, + DB_NOFILL, + DB_READ, + DB_CACHED, + DB_EVICTING +} dbuf_states_t; + +struct dnode; +struct dmu_tx; + +/* + * level = 0 means the user data + * level = 1 means the single indirect block + * etc. + */ + +struct dmu_buf_impl; + +typedef enum override_states { + DR_NOT_OVERRIDDEN, + DR_IN_DMU_SYNC, + DR_OVERRIDDEN +} override_states_t; + +typedef struct dbuf_dirty_record { + /* link on our parents dirty list */ + list_node_t dr_dirty_node; + + /* transaction group this data will sync in */ + uint64_t dr_txg; + + /* zio of outstanding write IO */ + zio_t *dr_zio; + + /* pointer back to our dbuf */ + struct dmu_buf_impl *dr_dbuf; + + /* pointer to next dirty record */ + struct dbuf_dirty_record *dr_next; + + /* pointer to parent dirty record */ + struct dbuf_dirty_record *dr_parent; + + union dirty_types { + struct dirty_indirect { + + /* protect access to list */ + kmutex_t dr_mtx; + + /* Our list of dirty children */ + list_t dr_children; + } di; + struct dirty_leaf { + + /* + * dr_data is set when we dirty the buffer + * so that we can retain the pointer even if it + * gets COW'd in a subsequent transaction group. + */ + arc_buf_t *dr_data; + blkptr_t dr_overridden_by; + override_states_t dr_override_state; + uint8_t dr_copies; + } dl; + } dt; +} dbuf_dirty_record_t; + +typedef struct dmu_buf_impl { + /* + * The following members are immutable, with the exception of + * db.db_data, which is protected by db_mtx. + */ + + /* the publicly visible structure */ + dmu_buf_t db; + + /* the objset we belong to */ + struct objset *db_objset; + + /* + * handle to safely access the dnode we belong to (NULL when evicted) + */ + struct dnode_handle *db_dnode_handle; + + /* + * our parent buffer; if the dnode points to us directly, + * db_parent == db_dnode_handle->dnh_dnode->dn_dbuf + * only accessed by sync thread ??? + * (NULL when evicted) + * May change from NULL to non-NULL under the protection of db_mtx + * (see dbuf_check_blkptr()) + */ + struct dmu_buf_impl *db_parent; + + /* + * link for hash table of all dmu_buf_impl_t's + */ + struct dmu_buf_impl *db_hash_next; + + /* our block number */ + uint64_t db_blkid; + + /* + * Pointer to the blkptr_t which points to us. May be NULL if we + * don't have one yet. (NULL when evicted) + */ + blkptr_t *db_blkptr; + + /* + * Our indirection level. Data buffers have db_level==0. + * Indirect buffers which point to data buffers have + * db_level==1. etc. Buffers which contain dnodes have + * db_level==0, since the dnodes are stored in a file. + */ + uint8_t db_level; + + /* db_mtx protects the members below */ + kmutex_t db_mtx; + + /* + * Current state of the buffer + */ + dbuf_states_t db_state; + + /* + * Refcount accessed by dmu_buf_{hold,rele}. + * If nonzero, the buffer can't be destroyed. + * Protected by db_mtx. + */ + refcount_t db_holds; + + /* buffer holding our data */ + arc_buf_t *db_buf; + + kcondvar_t db_changed; + dbuf_dirty_record_t *db_data_pending; + + /* pointer to most recent dirty record for this buffer */ + dbuf_dirty_record_t *db_last_dirty; + + /* + * Our link on the owner dnodes's dn_dbufs list. + * Protected by its dn_dbufs_mtx. + */ + list_node_t db_link; + + /* Data which is unique to data (leaf) blocks: */ + + /* stuff we store for the user (see dmu_buf_set_user) */ + void *db_user_ptr; + void **db_user_data_ptr_ptr; + dmu_buf_evict_func_t *db_evict_func; + + uint8_t db_immediate_evict; + uint8_t db_freed_in_flight; + + uint8_t db_dirtycnt; +} dmu_buf_impl_t; + +/* Note: the dbuf hash table is exposed only for the mdb module */ +#define DBUF_MUTEXES 256 +#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) +typedef struct dbuf_hash_table { + uint64_t hash_table_mask; + dmu_buf_impl_t **hash_table; + kmutex_t hash_mutexes[DBUF_MUTEXES]; +} dbuf_hash_table_t; + + +uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset); + +dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data); +void dbuf_create_bonus(struct dnode *dn); +int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx); +void dbuf_spill_hold(struct dnode *dn, dmu_buf_impl_t **dbp, void *tag); + +void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx); + +dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag); +dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid, + void *tag); +int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, + void *tag, dmu_buf_impl_t **dbp); + +void dbuf_prefetch(struct dnode *dn, uint64_t blkid); + +void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); +uint64_t dbuf_refcount(dmu_buf_impl_t *db); + +void dbuf_rele(dmu_buf_impl_t *db, void *tag); +void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag); + +dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid); + +int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); +void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx); +void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); +void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx); +dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db); + +void dbuf_clear(dmu_buf_impl_t *db); +void dbuf_evict(dmu_buf_impl_t *db); + +void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +void dbuf_unoverride(dbuf_dirty_record_t *dr); +void dbuf_sync_list(list_t *list, dmu_tx_t *tx); +void dbuf_release_bp(dmu_buf_impl_t *db); + +void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end, + struct dmu_tx *); + +void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); + +#define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode) +#define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock) +#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db))) +#define DB_DNODE_EXIT(_db) (zrl_remove(&DB_DNODE_LOCK(_db))) +#define DB_DNODE_HELD(_db) (!zrl_is_zero(&DB_DNODE_LOCK(_db))) +#define DB_GET_SPA(_spa_p, _db) { \ + dnode_t *__dn; \ + DB_DNODE_ENTER(_db); \ + __dn = DB_DNODE(_db); \ + *(_spa_p) = __dn->dn_objset->os_spa; \ + DB_DNODE_EXIT(_db); \ +} +#define DB_GET_OBJSET(_os_p, _db) { \ + dnode_t *__dn; \ + DB_DNODE_ENTER(_db); \ + __dn = DB_DNODE(_db); \ + *(_os_p) = __dn->dn_objset; \ + DB_DNODE_EXIT(_db); \ +} + +void dbuf_init(void); +void dbuf_fini(void); + +boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); + +#define DBUF_IS_METADATA(_db) \ + (dbuf_is_metadata(_db)) + +#define DBUF_GET_BUFC_TYPE(_db) \ + (DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) + +#define DBUF_IS_CACHEABLE(_db) \ + ((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \ + (DBUF_IS_METADATA(_db) && \ + ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) + +#define DBUF_IS_L2CACHEABLE(_db) \ + ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \ + (DBUF_IS_METADATA(_db) && \ + ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA))) + +#ifdef ZFS_DEBUG + +/* + * There should be a ## between the string literal and fmt, to make it + * clear that we're joining two strings together, but gcc does not + * support that preprocessor token. + */ +#define dprintf_dbuf(dbuf, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char __db_buf[32]; \ + uint64_t __db_obj = (dbuf)->db.db_object; \ + if (__db_obj == DMU_META_DNODE_OBJECT) \ + (void) strcpy(__db_buf, "mdn"); \ + else \ + (void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \ + (u_longlong_t)__db_obj); \ + dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \ + "obj=%s lvl=%u blkid=%lld " fmt, \ + __db_buf, (dbuf)->db_level, \ + (u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \ + } \ +_NOTE(CONSTCOND) } while (0) + +#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \ + sprintf_blkptr(__blkbuf, bp); \ + dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ + kmem_free(__blkbuf, BP_SPRINTF_LEN); \ + } \ +_NOTE(CONSTCOND) } while (0) + +#define DBUF_VERIFY(db) dbuf_verify(db) + +#else + +#define dprintf_dbuf(db, fmt, ...) +#define dprintf_dbuf_bp(db, bp, fmt, ...) +#define DBUF_VERIFY(db) + +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DBUF_H */ diff --git a/uts/common/fs/zfs/sys/ddt.h b/uts/common/fs/zfs/sys/ddt.h new file mode 100644 index 000000000000..9724d6ecebb0 --- /dev/null +++ b/uts/common/fs/zfs/sys/ddt.h @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DDT_H +#define _SYS_DDT_H + +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/fs/zfs.h> +#include <sys/zio.h> +#include <sys/dmu.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * On-disk DDT formats, in the desired search order (newest version first). + */ +enum ddt_type { + DDT_TYPE_ZAP = 0, + DDT_TYPES +}; + +/* + * DDT classes, in the desired search order (highest replication level first). + */ +enum ddt_class { + DDT_CLASS_DITTO = 0, + DDT_CLASS_DUPLICATE, + DDT_CLASS_UNIQUE, + DDT_CLASSES +}; + +#define DDT_TYPE_CURRENT 0 + +#define DDT_COMPRESS_BYTEORDER_MASK 0x80 +#define DDT_COMPRESS_FUNCTION_MASK 0x7f + +/* + * On-disk ddt entry: key (name) and physical storage (value). + */ +typedef struct ddt_key { + zio_cksum_t ddk_cksum; /* 256-bit block checksum */ + uint64_t ddk_prop; /* LSIZE, PSIZE, compression */ +} ddt_key_t; + +/* + * ddk_prop layout: + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | 0 | 0 | 0 | comp | PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define DDK_GET_LSIZE(ddk) \ + BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) +#define DDK_SET_LSIZE(ddk, x) \ + BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define DDK_GET_PSIZE(ddk) \ + BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) +#define DDK_SET_PSIZE(ddk, x) \ + BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) +#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) + +#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) + +typedef struct ddt_phys { + dva_t ddp_dva[SPA_DVAS_PER_BP]; + uint64_t ddp_refcnt; + uint64_t ddp_phys_birth; +} ddt_phys_t; + +enum ddt_phys_type { + DDT_PHYS_DITTO = 0, + DDT_PHYS_SINGLE = 1, + DDT_PHYS_DOUBLE = 2, + DDT_PHYS_TRIPLE = 3, + DDT_PHYS_TYPES +}; + +/* + * In-core ddt entry + */ +struct ddt_entry { + ddt_key_t dde_key; + ddt_phys_t dde_phys[DDT_PHYS_TYPES]; + zio_t *dde_lead_zio[DDT_PHYS_TYPES]; + void *dde_repair_data; + enum ddt_type dde_type; + enum ddt_class dde_class; + uint8_t dde_loading; + uint8_t dde_loaded; + kcondvar_t dde_cv; + avl_node_t dde_node; +}; + +/* + * In-core ddt + */ +struct ddt { + kmutex_t ddt_lock; + avl_tree_t ddt_tree; + avl_tree_t ddt_repair_tree; + enum zio_checksum ddt_checksum; + spa_t *ddt_spa; + objset_t *ddt_os; + uint64_t ddt_stat_object; + uint64_t ddt_object[DDT_TYPES][DDT_CLASSES]; + ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES]; + ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES]; + ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES]; + avl_node_t ddt_node; +}; + +/* + * In-core and on-disk bookmark for DDT walks + */ +typedef struct ddt_bookmark { + uint64_t ddb_class; + uint64_t ddb_type; + uint64_t ddb_checksum; + uint64_t ddb_cursor; +} ddt_bookmark_t; + +/* + * Ops vector to access a specific DDT object type. + */ +typedef struct ddt_ops { + char ddt_op_name[32]; + int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx, + boolean_t prehash); + int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx); + int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde); + void (*ddt_op_prefetch)(objset_t *os, uint64_t object, + ddt_entry_t *dde); + int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde, + dmu_tx_t *tx); + int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde, + dmu_tx_t *tx); + int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde, + uint64_t *walk); + uint64_t (*ddt_op_count)(objset_t *os, uint64_t object); +} ddt_ops_t; + +#define DDT_NAMELEN 80 + +extern void ddt_object_name(ddt_t *ddt, enum ddt_type type, + enum ddt_class class, char *name); +extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type, + enum ddt_class class, uint64_t *walk, ddt_entry_t *dde); +extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type, + enum ddt_class class); +extern int ddt_object_info(ddt_t *ddt, enum ddt_type type, + enum ddt_class class, dmu_object_info_t *); +extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type, + enum ddt_class class); + +extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, + uint64_t txg); +extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk, + const ddt_phys_t *ddp, blkptr_t *bp); + +extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp); + +extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp); +extern void ddt_phys_clear(ddt_phys_t *ddp); +extern void ddt_phys_addref(ddt_phys_t *ddp); +extern void ddt_phys_decref(ddt_phys_t *ddp); +extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, + uint64_t txg); +extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp); +extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde); + +extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg); + +extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); +extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); +extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh); +extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo); +extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh); +extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total); + +extern uint64_t ddt_get_dedup_dspace(spa_t *spa); +extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa); + +extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, + ddt_phys_t *ddp_willref); +extern int ddt_ditto_copies_present(ddt_entry_t *dde); + +extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len); +extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len); + +extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); +extern void ddt_enter(ddt_t *ddt); +extern void ddt_exit(ddt_t *ddt); +extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add); +extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); +extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde); + +extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class, + const blkptr_t *bp); + +extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp); +extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde); + +extern int ddt_entry_compare(const void *x1, const void *x2); + +extern void ddt_create(spa_t *spa); +extern int ddt_load(spa_t *spa); +extern void ddt_unload(spa_t *spa); +extern void ddt_sync(spa_t *spa, uint64_t txg); +extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde); +extern int ddt_object_update(ddt_t *ddt, enum ddt_type type, + enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx); + +extern const ddt_ops_t ddt_zap_ops; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DDT_H */ diff --git a/uts/common/fs/zfs/sys/dmu.h b/uts/common/fs/zfs/sys/dmu.h new file mode 100644 index 000000000000..07f5949ebfea --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu.h @@ -0,0 +1,740 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#ifndef _SYS_DMU_H +#define _SYS_DMU_H + +/* + * This file describes the interface that the DMU provides for its + * consumers. + * + * The DMU also interacts with the SPA. That interface is described in + * dmu_spa.h. + */ + +#include <sys/inttypes.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/cred.h> +#include <sys/time.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct uio; +struct xuio; +struct page; +struct vnode; +struct spa; +struct zilog; +struct zio; +struct blkptr; +struct zap_cursor; +struct dsl_dataset; +struct dsl_pool; +struct dnode; +struct drr_begin; +struct drr_end; +struct zbookmark; +struct spa; +struct nvlist; +struct arc_buf; +struct zio_prop; +struct sa_handle; + +typedef struct objset objset_t; +typedef struct dmu_tx dmu_tx_t; +typedef struct dsl_dir dsl_dir_t; + +typedef enum dmu_object_type { + DMU_OT_NONE, + /* general: */ + DMU_OT_OBJECT_DIRECTORY, /* ZAP */ + DMU_OT_OBJECT_ARRAY, /* UINT64 */ + DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ + DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ + DMU_OT_BPOBJ, /* UINT64 */ + DMU_OT_BPOBJ_HDR, /* UINT64 */ + /* spa: */ + DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ + DMU_OT_SPACE_MAP, /* UINT64 */ + /* zil: */ + DMU_OT_INTENT_LOG, /* UINT64 */ + /* dmu: */ + DMU_OT_DNODE, /* DNODE */ + DMU_OT_OBJSET, /* OBJSET */ + /* dsl: */ + DMU_OT_DSL_DIR, /* UINT64 */ + DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ + DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ + DMU_OT_DSL_PROPS, /* ZAP */ + DMU_OT_DSL_DATASET, /* UINT64 */ + /* zpl: */ + DMU_OT_ZNODE, /* ZNODE */ + DMU_OT_OLDACL, /* Old ACL */ + DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ + DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ + DMU_OT_MASTER_NODE, /* ZAP */ + DMU_OT_UNLINKED_SET, /* ZAP */ + /* zvol: */ + DMU_OT_ZVOL, /* UINT8 */ + DMU_OT_ZVOL_PROP, /* ZAP */ + /* other; for testing only! */ + DMU_OT_PLAIN_OTHER, /* UINT8 */ + DMU_OT_UINT64_OTHER, /* UINT64 */ + DMU_OT_ZAP_OTHER, /* ZAP */ + /* new object types: */ + DMU_OT_ERROR_LOG, /* ZAP */ + DMU_OT_SPA_HISTORY, /* UINT8 */ + DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ + DMU_OT_POOL_PROPS, /* ZAP */ + DMU_OT_DSL_PERMS, /* ZAP */ + DMU_OT_ACL, /* ACL */ + DMU_OT_SYSACL, /* SYSACL */ + DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ + DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ + DMU_OT_NEXT_CLONES, /* ZAP */ + DMU_OT_SCAN_QUEUE, /* ZAP */ + DMU_OT_USERGROUP_USED, /* ZAP */ + DMU_OT_USERGROUP_QUOTA, /* ZAP */ + DMU_OT_USERREFS, /* ZAP */ + DMU_OT_DDT_ZAP, /* ZAP */ + DMU_OT_DDT_STATS, /* ZAP */ + DMU_OT_SA, /* System attr */ + DMU_OT_SA_MASTER_NODE, /* ZAP */ + DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ + DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ + DMU_OT_SCAN_XLATE, /* ZAP */ + DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ + DMU_OT_DEADLIST, /* ZAP */ + DMU_OT_DEADLIST_HDR, /* UINT64 */ + DMU_OT_DSL_CLONES, /* ZAP */ + DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ + DMU_OT_NUMTYPES +} dmu_object_type_t; + +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + +void byteswap_uint64_array(void *buf, size_t size); +void byteswap_uint32_array(void *buf, size_t size); +void byteswap_uint16_array(void *buf, size_t size); +void byteswap_uint8_array(void *buf, size_t size); +void zap_byteswap(void *buf, size_t size); +void zfs_oldacl_byteswap(void *buf, size_t size); +void zfs_acl_byteswap(void *buf, size_t size); +void zfs_znode_byteswap(void *buf, size_t size); + +#define DS_FIND_SNAPSHOTS (1<<0) +#define DS_FIND_CHILDREN (1<<1) + +/* + * The maximum number of bytes that can be accessed as part of one + * operation, including metadata. + */ +#define DMU_MAX_ACCESS (10<<20) /* 10MB */ +#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */ + +#define DMU_USERUSED_OBJECT (-1ULL) +#define DMU_GROUPUSED_OBJECT (-2ULL) +#define DMU_DEADLIST_OBJECT (-3ULL) + +/* + * artificial blkids for bonus buffer and spill blocks + */ +#define DMU_BONUS_BLKID (-1ULL) +#define DMU_SPILL_BLKID (-2ULL) +/* + * Public routines to create, destroy, open, and close objsets. + */ +int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp); +void dmu_objset_rele(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, void *tag); +int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); + +int dmu_objset_evict_dbufs(objset_t *os); +int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, + void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); +int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, + uint64_t flags); +int dmu_objset_destroy(const char *name, boolean_t defer); +int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); +int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); +int dmu_objset_rename(const char *name, const char *newname, + boolean_t recursive); +int dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags); +void dmu_objset_byteswap(void *buf, size_t size); + +typedef struct dmu_buf { + uint64_t db_object; /* object that this buffer is part of */ + uint64_t db_offset; /* byte offset in this object */ + uint64_t db_size; /* size of buffer in bytes */ + void *db_data; /* data in buffer */ +} dmu_buf_t; + +typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr); + +/* + * The names of zap entries in the DIRECTORY_OBJECT of the MOS. + */ +#define DMU_POOL_DIRECTORY_OBJECT 1 +#define DMU_POOL_CONFIG "config" +#define DMU_POOL_ROOT_DATASET "root_dataset" +#define DMU_POOL_SYNC_BPOBJ "sync_bplist" +#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" +#define DMU_POOL_ERRLOG_LAST "errlog_last" +#define DMU_POOL_SPARES "spares" +#define DMU_POOL_DEFLATE "deflate" +#define DMU_POOL_HISTORY "history" +#define DMU_POOL_PROPS "pool_props" +#define DMU_POOL_L2CACHE "l2cache" +#define DMU_POOL_TMP_USERREFS "tmp_userrefs" +#define DMU_POOL_DDT "DDT-%s-%s-%s" +#define DMU_POOL_DDT_STATS "DDT-statistics" +#define DMU_POOL_CREATION_VERSION "creation_version" +#define DMU_POOL_SCAN "scan" +#define DMU_POOL_FREE_BPOBJ "free_bpobj" + +/* + * Allocate an object from this objset. The range of object numbers + * available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode. + * + * The transaction must be assigned to a txg. The newly allocated + * object will be "held" in the transaction (ie. you can modify the + * newly allocated object in this transaction). + * + * dmu_object_alloc() chooses an object and returns it in *objectp. + * + * dmu_object_claim() allocates a specific object number. If that + * number is already allocated, it fails and returns EEXIST. + * + * Return 0 on success, or ENOSPC or EEXIST as specified above. + */ +uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); +int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); +int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonustype, int bonuslen); + +/* + * Free an object from this objset. + * + * The object's data will be freed as well (ie. you don't need to call + * dmu_free(object, 0, -1, tx)). + * + * The object need not be held in the transaction. + * + * If there are any holds on this object's buffers (via dmu_buf_hold()), + * or tx holds on the object (via dmu_tx_hold_object()), you can not + * free it; it fails and returns EBUSY. + * + * If the object is not allocated, it fails and returns ENOENT. + * + * Return 0 on success, or EBUSY or ENOENT as specified above. + */ +int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx); + +/* + * Find the next allocated or free object. + * + * The objectp parameter is in-out. It will be updated to be the next + * object which is allocated. Ignore objects which have not been + * modified since txg. + * + * XXX Can only be called on a objset with no dirty data. + * + * Returns 0 on success, or ENOENT if there are no more objects. + */ +int dmu_object_next(objset_t *os, uint64_t *objectp, + boolean_t hole, uint64_t txg); + +/* + * Set the data blocksize for an object. + * + * The object cannot have any blocks allcated beyond the first. If + * the first block is allocated already, the new size must be greater + * than the current block size. If these conditions are not met, + * ENOTSUP will be returned. + * + * Returns 0 on success, or EBUSY if there are any holds on the object + * contents, or ENOTSUP as described above. + */ +int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, + int ibs, dmu_tx_t *tx); + +/* + * Set the checksum property on a dnode. The new checksum algorithm will + * apply to all newly written blocks; existing blocks will not be affected. + */ +void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, + dmu_tx_t *tx); + +/* + * Set the compress property on a dnode. The new compression algorithm will + * apply to all newly written blocks; existing blocks will not be affected. + */ +void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, + dmu_tx_t *tx); + +/* + * Decide how to write a block: checksum, compression, number of copies, etc. + */ +#define WP_NOFILL 0x1 +#define WP_DMU_SYNC 0x2 +#define WP_SPILL 0x4 + +void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp, + struct zio_prop *zp); +/* + * The bonus data is accessed more or less like a regular buffer. + * You must dmu_bonus_hold() to get the buffer, which will give you a + * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus + * data. As with any normal buffer, you must call dmu_buf_read() to + * read db_data, dmu_buf_will_dirty() before modifying it, and the + * object must be held in an assigned transaction before calling + * dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus + * buffer as well. You must release your hold with dmu_buf_rele(). + */ +int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); +int dmu_bonus_max(void); +int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); +int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *); +dmu_object_type_t dmu_get_bonustype(dmu_buf_t *); +int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *); + +/* + * Special spill buffer support used by "SA" framework + */ + +int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); +int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags, + void *tag, dmu_buf_t **dbp); +int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); + +/* + * Obtain the DMU buffer from the specified object which contains the + * specified offset. dmu_buf_hold() puts a "hold" on the buffer, so + * that it will remain in memory. You must release the hold with + * dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your + * hold. You must have a hold on any dmu_buf_t* you pass to the DMU. + * + * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill + * on the returned buffer before reading or writing the buffer's + * db_data. The comments for those routines describe what particular + * operations are valid after calling them. + * + * The object number must be a valid, allocated object number. + */ +int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, + void *tag, dmu_buf_t **, int flags); +void dmu_buf_add_ref(dmu_buf_t *db, void* tag); +void dmu_buf_rele(dmu_buf_t *db, void *tag); +uint64_t dmu_buf_refcount(dmu_buf_t *db); + +/* + * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a + * range of an object. A pointer to an array of dmu_buf_t*'s is + * returned (in *dbpp). + * + * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and + * frees the array. The hold on the array of buffers MUST be released + * with dmu_buf_rele_array. You can NOT release the hold on each buffer + * individually with dmu_buf_rele. + */ +int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, + uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp); +void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag); + +/* + * Returns NULL on success, or the existing user ptr if it's already + * been set. + * + * user_ptr is for use by the user and can be obtained via dmu_buf_get_user(). + * + * user_data_ptr_ptr should be NULL, or a pointer to a pointer which + * will be set to db->db_data when you are allowed to access it. Note + * that db->db_data (the pointer) can change when you do dmu_buf_read(), + * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill(). + * *user_data_ptr_ptr will be set to the new value when it changes. + * + * If non-NULL, pageout func will be called when this buffer is being + * excised from the cache, so that you can clean up the data structure + * pointed to by user_ptr. + * + * dmu_evict_user() will call the pageout func for all buffers in a + * objset with a given pageout func. + */ +void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr, + dmu_buf_evict_func_t *pageout_func); +/* + * set_user_ie is the same as set_user, but request immediate eviction + * when hold count goes to zero. + */ +void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr, + void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func); +void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, + void *user_ptr, void *user_data_ptr_ptr, + dmu_buf_evict_func_t *pageout_func); +void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func); + +/* + * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set. + */ +void *dmu_buf_get_user(dmu_buf_t *db); + +/* + * Indicate that you are going to modify the buffer's data (db_data). + * + * The transaction (tx) must be assigned to a txg (ie. you've called + * dmu_tx_assign()). The buffer's object must be held in the tx + * (ie. you've called dmu_tx_hold_object(tx, db->db_object)). + */ +void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); + +/* + * Tells if the given dbuf is freeable. + */ +boolean_t dmu_buf_freeable(dmu_buf_t *); + +/* + * You must create a transaction, then hold the objects which you will + * (or might) modify as part of this transaction. Then you must assign + * the transaction to a transaction group. Once the transaction has + * been assigned, you can modify buffers which belong to held objects as + * part of this transaction. You can't modify buffers before the + * transaction has been assigned; you can't modify buffers which don't + * belong to objects which this transaction holds; you can't hold + * objects once the transaction has been assigned. You may hold an + * object which you are going to free (with dmu_object_free()), but you + * don't have to. + * + * You can abort the transaction before it has been assigned. + * + * Note that you may hold buffers (with dmu_buf_hold) at any time, + * regardless of transaction state. + */ + +#define DMU_NEW_OBJECT (-1ULL) +#define DMU_OBJECT_END (-1ULL) + +dmu_tx_t *dmu_tx_create(objset_t *os); +void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len); +void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, + uint64_t len); +void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name); +void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object); +void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object); +void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); +void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); +void dmu_tx_abort(dmu_tx_t *tx); +int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +void dmu_tx_wait(dmu_tx_t *tx); +void dmu_tx_commit(dmu_tx_t *tx); + +/* + * To register a commit callback, dmu_tx_callback_register() must be called. + * + * dcb_data is a pointer to caller private data that is passed on as a + * callback parameter. The caller is responsible for properly allocating and + * freeing it. + * + * When registering a callback, the transaction must be already created, but + * it cannot be committed or aborted. It can be assigned to a txg or not. + * + * The callback will be called after the transaction has been safely written + * to stable storage and will also be called if the dmu_tx is aborted. + * If there is any error which prevents the transaction from being committed to + * disk, the callback will be called with a value of error != 0. + */ +typedef void dmu_tx_callback_func_t(void *dcb_data, int error); + +void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, + void *dcb_data); + +/* + * Free up the data blocks for a defined range of a file. If size is + * zero, the range from offset to end-of-file is freed. + */ +int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size, dmu_tx_t *tx); +int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size); +int dmu_free_object(objset_t *os, uint64_t object); + +/* + * Convenience functions. + * + * Canfail routines will return 0 on success, or an errno if there is a + * nonrecoverable I/O error. + */ +#define DMU_READ_PREFETCH 0 /* prefetch */ +#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + void *buf, uint32_t flags); +void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx); +void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + dmu_tx_t *tx); +int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); +int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, + dmu_tx_t *tx); +int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, + dmu_tx_t *tx); +int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size, struct page *pp, dmu_tx_t *tx); +struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); +void dmu_return_arcbuf(struct arc_buf *buf); +void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, + dmu_tx_t *tx); +int dmu_xuio_init(struct xuio *uio, int niov); +void dmu_xuio_fini(struct xuio *uio); +int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, + size_t n); +int dmu_xuio_cnt(struct xuio *uio); +struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i); +void dmu_xuio_clear(struct xuio *uio, int i); +void xuio_stat_wbuf_copied(); +void xuio_stat_wbuf_nocopy(); + +extern int zfs_prefetch_disable; + +/* + * Asynchronously try to read in the data. + */ +void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, + uint64_t len); + +typedef struct dmu_object_info { + /* All sizes are in bytes unless otherwise indicated. */ + uint32_t doi_data_block_size; + uint32_t doi_metadata_block_size; + dmu_object_type_t doi_type; + dmu_object_type_t doi_bonus_type; + uint64_t doi_bonus_size; + uint8_t doi_indirection; /* 2 = dnode->indirect->data */ + uint8_t doi_checksum; + uint8_t doi_compress; + uint8_t doi_pad[5]; + uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */ + uint64_t doi_max_offset; + uint64_t doi_fill_count; /* number of non-empty blocks */ +} dmu_object_info_t; + +typedef void arc_byteswap_func_t(void *buf, size_t size); + +typedef struct dmu_object_type_info { + arc_byteswap_func_t *ot_byteswap; + boolean_t ot_metadata; + char *ot_name; +} dmu_object_type_info_t; + +extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES]; + +/* + * Get information on a DMU object. + * + * Return 0 on success or ENOENT if object is not allocated. + * + * If doi is NULL, just indicates whether the object exists. + */ +int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi); +void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi); +void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi); +void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, + u_longlong_t *nblk512); + +typedef struct dmu_objset_stats { + uint64_t dds_num_clones; /* number of clones of this */ + uint64_t dds_creation_txg; + uint64_t dds_guid; + dmu_objset_type_t dds_type; + uint8_t dds_is_snapshot; + uint8_t dds_inconsistent; + char dds_origin[MAXNAMELEN]; +} dmu_objset_stats_t; + +/* + * Get stats on a dataset. + */ +void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); + +/* + * Add entries to the nvlist for all the objset's properties. See + * zfs_prop_table[] and zfs(1m) for details on the properties. + */ +void dmu_objset_stats(objset_t *os, struct nvlist *nv); + +/* + * Get the space usage statistics for statvfs(). + * + * refdbytes is the amount of space "referenced" by this objset. + * availbytes is the amount of space available to this objset, taking + * into account quotas & reservations, assuming that no other objsets + * use the space first. These values correspond to the 'referenced' and + * 'available' properties, described in the zfs(1m) manpage. + * + * usedobjs and availobjs are the number of objects currently allocated, + * and available. + */ +void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, + uint64_t *usedobjsp, uint64_t *availobjsp); + +/* + * The fsid_guid is a 56-bit ID that can change to avoid collisions. + * (Contrast with the ds_guid which is a 64-bit ID that will never + * change, so there is a small probability that it will collide.) + */ +uint64_t dmu_objset_fsid_guid(objset_t *os); + +/* + * Get the [cm]time for an objset's snapshot dir + */ +timestruc_t dmu_objset_snap_cmtime(objset_t *os); + +int dmu_objset_is_snapshot(objset_t *os); + +extern struct spa *dmu_objset_spa(objset_t *os); +extern struct zilog *dmu_objset_zil(objset_t *os); +extern struct dsl_pool *dmu_objset_pool(objset_t *os); +extern struct dsl_dataset *dmu_objset_ds(objset_t *os); +extern void dmu_objset_name(objset_t *os, char *buf); +extern dmu_objset_type_t dmu_objset_type(objset_t *os); +extern uint64_t dmu_objset_id(objset_t *os); +extern uint64_t dmu_objset_syncprop(objset_t *os); +extern uint64_t dmu_objset_logbias(objset_t *os); +extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name, + uint64_t *id, uint64_t *offp, boolean_t *case_conflict); +extern int dmu_snapshot_realname(objset_t *os, char *name, char *real, + int maxlen, boolean_t *conflict); +extern int dmu_dir_list_next(objset_t *os, int namelen, char *name, + uint64_t *idp, uint64_t *offp); + +typedef int objset_used_cb_t(dmu_object_type_t bonustype, + void *bonus, uint64_t *userp, uint64_t *groupp); +extern void dmu_objset_register_type(dmu_objset_type_t ost, + objset_used_cb_t *cb); +extern void dmu_objset_set_user(objset_t *os, void *user_ptr); +extern void *dmu_objset_get_user(objset_t *os); + +/* + * Return the txg number for the given assigned transaction. + */ +uint64_t dmu_tx_get_txg(dmu_tx_t *tx); + +/* + * Synchronous write. + * If a parent zio is provided this function initiates a write on the + * provided buffer as a child of the parent zio. + * In the absence of a parent zio, the write is completed synchronously. + * At write completion, blk is filled with the bp of the written block. + * Note that while the data covered by this function will be on stable + * storage when the write completes this new data does not become a + * permanent part of the file until the associated transaction commits. + */ + +/* + * {zfs,zvol,ztest}_get_done() args + */ +typedef struct zgd { + struct zilog *zgd_zilog; + struct blkptr *zgd_bp; + dmu_buf_t *zgd_db; + struct rl *zgd_rl; + void *zgd_private; +} zgd_t; + +typedef void dmu_sync_cb_t(zgd_t *arg, int error); +int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd); + +/* + * Find the next hole or data block in file starting at *off + * Return found offset in *off. Return ESRCH for end of file. + */ +int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, + uint64_t *off); + +/* + * Initial setup and final teardown. + */ +extern void dmu_init(void); +extern void dmu_fini(void); + +typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, + uint64_t object, uint64_t offset, int len); +void dmu_traverse_objset(objset_t *os, uint64_t txg_start, + dmu_traverse_cb_t cb, void *arg); + +int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + struct vnode *vp, offset_t *off); + +typedef struct dmu_recv_cookie { + /* + * This structure is opaque! + * + * If logical and real are different, we are recving the stream + * into the "real" temporary clone, and then switching it with + * the "logical" target. + */ + struct dsl_dataset *drc_logical_ds; + struct dsl_dataset *drc_real_ds; + struct drr_begin *drc_drrb; + char *drc_tosnap; + char *drc_top_ds; + boolean_t drc_newfs; + boolean_t drc_force; +} dmu_recv_cookie_t; + +int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, + boolean_t force, objset_t *origin, dmu_recv_cookie_t *); +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep); +int dmu_recv_end(dmu_recv_cookie_t *drc); + +int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, + offset_t *off); + +/* CRC64 table */ +#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ +extern uint64_t zfs_crc64_table[256]; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DMU_H */ diff --git a/uts/common/fs/zfs/sys/dmu_impl.h b/uts/common/fs/zfs/sys/dmu_impl.h new file mode 100644 index 000000000000..22f9f5f8c88c --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu_impl.h @@ -0,0 +1,272 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_IMPL_H +#define _SYS_DMU_IMPL_H + +#include <sys/txg_impl.h> +#include <sys/zio.h> +#include <sys/dnode.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This is the locking strategy for the DMU. Numbers in parenthesis are + * cases that use that lock order, referenced below: + * + * ARC is self-contained + * bplist is self-contained + * refcount is self-contained + * txg is self-contained (hopefully!) + * zst_lock + * zf_rwlock + * + * XXX try to improve evicting path? + * + * dp_config_rwlock > os_obj_lock > dn_struct_rwlock > + * dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs + * + * dp_config_rwlock + * must be held before: everything + * protects dd namespace changes + * protects property changes globally + * held from: + * dsl_dir_open/r: + * dsl_dir_create_sync/w: + * dsl_dir_sync_destroy/w: + * dsl_dir_rename_sync/w: + * dsl_prop_changed_notify/r: + * + * os_obj_lock + * must be held before: + * everything except dp_config_rwlock + * protects os_obj_next + * held from: + * dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock + * + * dn_struct_rwlock + * must be held before: + * everything except dp_config_rwlock and os_obj_lock + * protects structure of dnode (eg. nlevels) + * db_blkptr can change when syncing out change to nlevels + * dn_maxblkid + * dn_nlevels + * dn_*blksz* + * phys nlevels, maxblkid, physical blkptr_t's (?) + * held from: + * callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch + * dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz) + * dmu_tx_count_free: + * dbuf_read_impl: db_mtx, dmu_zfetch() + * dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch() + * dbuf_new_size: db_mtx + * dbuf_dirty: db_mtx + * dbuf_findbp: (callers, phys? - the real need) + * dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?) + * dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx + * dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp() + * dnode_sync/w (increase_indirection): db_mtx (phys) + * dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*) + * dnode_new_blkid/w: (dn_maxblkid) + * dnode_free_range/w: dn_dirty_mtx (dn_maxblkid) + * dnode_next_offset: (phys) + * + * dn_dbufs_mtx + * must be held before: + * db_mtx, hash_mutexes + * protects: + * dn_dbufs + * dn_evicted + * held from: + * dmu_evict_user: db_mtx (dn_dbufs) + * dbuf_free_range: db_mtx (dn_dbufs) + * dbuf_remove_ref: db_mtx, callees: + * dbuf_hash_remove: hash_mutexes, db_mtx + * dbuf_create: hash_mutexes, db_mtx (dn_dbufs) + * dnode_set_blksz: (dn_dbufs) + * + * hash_mutexes (global) + * must be held before: + * db_mtx + * protects dbuf_hash_table (global) and db_hash_next + * held from: + * dbuf_find: db_mtx + * dbuf_hash_insert: db_mtx + * dbuf_hash_remove: db_mtx + * + * db_mtx (meta-leaf) + * must be held before: + * dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes) + * protects: + * db_state + * db_holds + * db_buf + * db_changed + * db_data_pending + * db_dirtied + * db_link + * db_dirty_node (??) + * db_dirtycnt + * db_d.* + * db.* + * held from: + * dbuf_dirty: dn_mtx, dn_dirty_mtx + * dbuf_dirty->dsl_dir_willuse_space: dd_lock + * dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock + * dbuf_undirty: dn_dirty_mtx (db_d) + * dbuf_write_done: dn_dirty_mtx (db_state) + * dbuf_* + * dmu_buf_update_user: none (db_d) + * dmu_evict_user: none (db_d) (maybe can eliminate) + * dbuf_find: none (db_holds) + * dbuf_hash_insert: none (db_holds) + * dmu_buf_read_array_impl: none (db_state, db_changed) + * dmu_sync: none (db_dirty_node, db_d) + * dnode_reallocate: none (db) + * + * dn_mtx (leaf) + * protects: + * dn_dirty_dbufs + * dn_ranges + * phys accounting + * dn_allocated_txg + * dn_free_txg + * dn_assigned_txg + * dd_assigned_tx + * dn_notxholds + * dn_dirtyctx + * dn_dirtyctx_firstset + * (dn_phys copy fields?) + * (dn_phys contents?) + * held from: + * dnode_* + * dbuf_dirty: none + * dbuf_sync: none (phys accounting) + * dbuf_undirty: none (dn_ranges, dn_dirty_dbufs) + * dbuf_write_done: none (phys accounting) + * dmu_object_info_from_dnode: none (accounting) + * dmu_tx_commit: none + * dmu_tx_hold_object_impl: none + * dmu_tx_try_assign: dn_notxholds(cv) + * dmu_tx_unassign: none + * + * dd_lock + * must be held before: + * ds_lock + * ancestors' dd_lock + * protects: + * dd_prop_cbs + * dd_sync_* + * dd_used_bytes + * dd_tempreserved + * dd_space_towrite + * dd_myname + * dd_phys accounting? + * held from: + * dsl_dir_* + * dsl_prop_changed_notify: none (dd_prop_cbs) + * dsl_prop_register: none (dd_prop_cbs) + * dsl_prop_unregister: none (dd_prop_cbs) + * dsl_dataset_block_freeable: none (dd_sync_*) + * + * os_lock (leaf) + * protects: + * os_dirty_dnodes + * os_free_dnodes + * os_dnodes + * os_downgraded_dbufs + * dn_dirtyblksz + * dn_dirty_link + * held from: + * dnode_create: none (os_dnodes) + * dnode_destroy: none (os_dnodes) + * dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes) + * dnode_free: none (dn_dirtyblksz, os_*_dnodes) + * + * ds_lock + * protects: + * ds_objset + * ds_open_refcount + * ds_snapname + * ds_phys accounting + * ds_phys userrefs zapobj + * ds_reserved + * held from: + * dsl_dataset_* + * + * dr_mtx (leaf) + * protects: + * dr_children + * held from: + * dbuf_dirty + * dbuf_undirty + * dbuf_sync_indirect + * dnode_new_blkid + */ + +struct objset; +struct dmu_pool; + +typedef struct dmu_xuio { + int next; + int cnt; + struct arc_buf **bufs; + iovec_t *iovp; +} dmu_xuio_t; + +typedef struct xuio_stats { + /* loaned yet not returned arc_buf */ + kstat_named_t xuiostat_onloan_rbuf; + kstat_named_t xuiostat_onloan_wbuf; + /* whether a copy is made when loaning out a read buffer */ + kstat_named_t xuiostat_rbuf_copied; + kstat_named_t xuiostat_rbuf_nocopy; + /* whether a copy is made when assigning a write buffer */ + kstat_named_t xuiostat_wbuf_copied; + kstat_named_t xuiostat_wbuf_nocopy; +} xuio_stats_t; + +static xuio_stats_t xuio_stats = { + { "onloan_read_buf", KSTAT_DATA_UINT64 }, + { "onloan_write_buf", KSTAT_DATA_UINT64 }, + { "read_buf_copied", KSTAT_DATA_UINT64 }, + { "read_buf_nocopy", KSTAT_DATA_UINT64 }, + { "write_buf_copied", KSTAT_DATA_UINT64 }, + { "write_buf_nocopy", KSTAT_DATA_UINT64 } +}; + +#define XUIOSTAT_INCR(stat, val) \ + atomic_add_64(&xuio_stats.stat.value.ui64, (val)) +#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1) + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DMU_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/dmu_objset.h b/uts/common/fs/zfs/sys/dmu_objset.h new file mode 100644 index 000000000000..c6d202e2e81a --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu_objset.h @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#ifndef _SYS_DMU_OBJSET_H +#define _SYS_DMU_OBJSET_H + +#include <sys/spa.h> +#include <sys/arc.h> +#include <sys/txg.h> +#include <sys/zfs_context.h> +#include <sys/dnode.h> +#include <sys/zio.h> +#include <sys/zil.h> +#include <sys/sa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern krwlock_t os_lock; + +struct dsl_dataset; +struct dmu_tx; + +#define OBJSET_PHYS_SIZE 2048 +#define OBJSET_OLD_PHYS_SIZE 1024 + +#define OBJSET_BUF_HAS_USERUSED(buf) \ + (arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE) + +#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0) + +typedef struct objset_phys { + dnode_phys_t os_meta_dnode; + zil_header_t os_zil_header; + uint64_t os_type; + uint64_t os_flags; + char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - + sizeof (zil_header_t) - sizeof (uint64_t)*2]; + dnode_phys_t os_userused_dnode; + dnode_phys_t os_groupused_dnode; +} objset_phys_t; + +struct objset { + /* Immutable: */ + struct dsl_dataset *os_dsl_dataset; + spa_t *os_spa; + arc_buf_t *os_phys_buf; + objset_phys_t *os_phys; + /* + * The following "special" dnodes have no parent and are exempt from + * dnode_move(), but they root their descendents in this objset using + * handles anyway, so that all access to dnodes from dbufs consistently + * uses handles. + */ + dnode_handle_t os_meta_dnode; + dnode_handle_t os_userused_dnode; + dnode_handle_t os_groupused_dnode; + zilog_t *os_zil; + + /* can change, under dsl_dir's locks: */ + uint8_t os_checksum; + uint8_t os_compress; + uint8_t os_copies; + uint8_t os_dedup_checksum; + uint8_t os_dedup_verify; + uint8_t os_logbias; + uint8_t os_primary_cache; + uint8_t os_secondary_cache; + uint8_t os_sync; + + /* no lock needed: */ + struct dmu_tx *os_synctx; /* XXX sketchy */ + blkptr_t *os_rootbp; + zil_header_t os_zil_header; + list_t os_synced_dnodes; + uint64_t os_flags; + + /* Protected by os_obj_lock */ + kmutex_t os_obj_lock; + uint64_t os_obj_next; + + /* Protected by os_lock */ + kmutex_t os_lock; + list_t os_dirty_dnodes[TXG_SIZE]; + list_t os_free_dnodes[TXG_SIZE]; + list_t os_dnodes; + list_t os_downgraded_dbufs; + + /* stuff we store for the user */ + kmutex_t os_user_ptr_lock; + void *os_user_ptr; + + /* SA layout/attribute registration */ + sa_os_t *os_sa; +}; + +#define DMU_META_OBJSET 0 +#define DMU_META_DNODE_OBJECT 0 +#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0) +#define DMU_META_DNODE(os) ((os)->os_meta_dnode.dnh_dnode) +#define DMU_USERUSED_DNODE(os) ((os)->os_userused_dnode.dnh_dnode) +#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode) + +#define DMU_OS_IS_L2CACHEABLE(os) \ + ((os)->os_secondary_cache == ZFS_CACHE_ALL || \ + (os)->os_secondary_cache == ZFS_CACHE_METADATA) + +/* called from zpl */ +int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp); +void dmu_objset_rele(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, void *tag); +int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); + +int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, + void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); +int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, + uint64_t flags); +int dmu_objset_destroy(const char *name, boolean_t defer); +int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); +void dmu_objset_stats(objset_t *os, nvlist_t *nv); +void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); +void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, + uint64_t *usedobjsp, uint64_t *availobjsp); +uint64_t dmu_objset_fsid_guid(objset_t *os); +int dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags); +int dmu_objset_find_spa(spa_t *spa, const char *name, + int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); +int dmu_objset_prefetch(const char *name, void *arg); +void dmu_objset_byteswap(void *buf, size_t size); +int dmu_objset_evict_dbufs(objset_t *os); +timestruc_t dmu_objset_snap_cmtime(objset_t *os); + +/* called from dsl */ +void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); +boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg); +boolean_t dmu_objset_is_dirty_anywhere(objset_t *os); +objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, + blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); +int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, + objset_t **osp); +void dmu_objset_evict(objset_t *os); +void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx); +void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx); +boolean_t dmu_objset_userused_enabled(objset_t *os); +int dmu_objset_userspace_upgrade(objset_t *os); +boolean_t dmu_objset_userspace_present(objset_t *os); + +void dmu_objset_init(void); +void dmu_objset_fini(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DMU_OBJSET_H */ diff --git a/uts/common/fs/zfs/sys/dmu_traverse.h b/uts/common/fs/zfs/sys/dmu_traverse.h new file mode 100644 index 000000000000..5b326cd99c09 --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu_traverse.h @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DMU_TRAVERSE_H +#define _SYS_DMU_TRAVERSE_H + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dnode_phys; +struct dsl_dataset; +struct zilog; +struct arc_buf; + +typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, + struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp, + void *arg); + +#define TRAVERSE_PRE (1<<0) +#define TRAVERSE_POST (1<<1) +#define TRAVERSE_PREFETCH_METADATA (1<<2) +#define TRAVERSE_PREFETCH_DATA (1<<3) +#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) +#define TRAVERSE_HARD (1<<4) + +/* Special traverse error return value to indicate skipping of children */ +#define TRAVERSE_VISIT_NO_CHILDREN -1 + +int traverse_dataset(struct dsl_dataset *ds, + uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); +int traverse_pool(spa_t *spa, + uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DMU_TRAVERSE_H */ diff --git a/uts/common/fs/zfs/sys/dmu_tx.h b/uts/common/fs/zfs/sys/dmu_tx.h new file mode 100644 index 000000000000..c5ea50fa8d82 --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu_tx.h @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_TX_H +#define _SYS_DMU_TX_H + +#include <sys/inttypes.h> +#include <sys/dmu.h> +#include <sys/txg.h> +#include <sys/refcount.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dmu_buf_impl; +struct dmu_tx_hold; +struct dnode_link; +struct dsl_pool; +struct dnode; +struct dsl_dir; + +struct dmu_tx { + /* + * No synchronization is needed because a tx can only be handled + * by one thread. + */ + list_t tx_holds; /* list of dmu_tx_hold_t */ + objset_t *tx_objset; + struct dsl_dir *tx_dir; + struct dsl_pool *tx_pool; + uint64_t tx_txg; + uint64_t tx_lastsnap_txg; + uint64_t tx_lasttried_txg; + txg_handle_t tx_txgh; + void *tx_tempreserve_cookie; + struct dmu_tx_hold *tx_needassign_txh; + list_t tx_callbacks; /* list of dmu_tx_callback_t on this dmu_tx */ + uint8_t tx_anyobj; + int tx_err; +#ifdef ZFS_DEBUG + uint64_t tx_space_towrite; + uint64_t tx_space_tofree; + uint64_t tx_space_tooverwrite; + uint64_t tx_space_tounref; + refcount_t tx_space_written; + refcount_t tx_space_freed; +#endif +}; + +enum dmu_tx_hold_type { + THT_NEWOBJECT, + THT_WRITE, + THT_BONUS, + THT_FREE, + THT_ZAP, + THT_SPACE, + THT_SPILL, + THT_NUMTYPES +}; + +typedef struct dmu_tx_hold { + dmu_tx_t *txh_tx; + list_node_t txh_node; + struct dnode *txh_dnode; + uint64_t txh_space_towrite; + uint64_t txh_space_tofree; + uint64_t txh_space_tooverwrite; + uint64_t txh_space_tounref; + uint64_t txh_memory_tohold; + uint64_t txh_fudge; +#ifdef ZFS_DEBUG + enum dmu_tx_hold_type txh_type; + uint64_t txh_arg1; + uint64_t txh_arg2; +#endif +} dmu_tx_hold_t; + +typedef struct dmu_tx_callback { + list_node_t dcb_node; /* linked to tx_callbacks list */ + dmu_tx_callback_func_t *dcb_func; /* caller function pointer */ + void *dcb_data; /* caller private data */ +} dmu_tx_callback_t; + +/* + * These routines are defined in dmu.h, and are called by the user. + */ +dmu_tx_t *dmu_tx_create(objset_t *dd); +int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +void dmu_tx_commit(dmu_tx_t *tx); +void dmu_tx_abort(dmu_tx_t *tx); +uint64_t dmu_tx_get_txg(dmu_tx_t *tx); +void dmu_tx_wait(dmu_tx_t *tx); + +void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, + void *dcb_data); +void dmu_tx_do_callbacks(list_t *cb_list, int error); + +/* + * These routines are defined in dmu_spa.h, and are called by the SPA. + */ +extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg); + +/* + * These routines are only called by the DMU. + */ +dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd); +int dmu_tx_is_syncing(dmu_tx_t *tx); +int dmu_tx_private_ok(dmu_tx_t *tx); +void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object); +void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta); +void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db); +int dmu_tx_holds(dmu_tx_t *tx, uint64_t object); +void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space); + +#ifdef ZFS_DEBUG +#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db) +#else +#define DMU_TX_DIRTY_BUF(tx, db) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DMU_TX_H */ diff --git a/uts/common/fs/zfs/sys/dmu_zfetch.h b/uts/common/fs/zfs/sys/dmu_zfetch.h new file mode 100644 index 000000000000..78cadd2b1ee1 --- /dev/null +++ b/uts/common/fs/zfs/sys/dmu_zfetch.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DFETCH_H +#define _DFETCH_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern uint64_t zfetch_array_rd_sz; + +struct dnode; /* so we can reference dnode */ + +typedef enum zfetch_dirn { + ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */ + ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */ +} zfetch_dirn_t; + +typedef struct zstream { + uint64_t zst_offset; /* offset of starting block in range */ + uint64_t zst_len; /* length of range, in blocks */ + zfetch_dirn_t zst_direction; /* direction of prefetch */ + uint64_t zst_stride; /* length of stride, in blocks */ + uint64_t zst_ph_offset; /* prefetch offset, in blocks */ + uint64_t zst_cap; /* prefetch limit (cap), in blocks */ + kmutex_t zst_lock; /* protects stream */ + clock_t zst_last; /* lbolt of last prefetch */ + avl_node_t zst_node; /* embed avl node here */ +} zstream_t; + +typedef struct zfetch { + krwlock_t zf_rwlock; /* protects zfetch structure */ + list_t zf_stream; /* AVL tree of zstream_t's */ + struct dnode *zf_dnode; /* dnode that owns this zfetch */ + uint32_t zf_stream_cnt; /* # of active streams */ + uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */ +} zfetch_t; + +void zfetch_init(void); +void zfetch_fini(void); + +void dmu_zfetch_init(zfetch_t *, struct dnode *); +void dmu_zfetch_rele(zfetch_t *); +void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int); + + +#ifdef __cplusplus +} +#endif + +#endif /* _DFETCH_H */ diff --git a/uts/common/fs/zfs/sys/dnode.h b/uts/common/fs/zfs/sys/dnode.h new file mode 100644 index 000000000000..9ad4be36bf85 --- /dev/null +++ b/uts/common/fs/zfs/sys/dnode.h @@ -0,0 +1,329 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DNODE_H +#define _SYS_DNODE_H + +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/zio.h> +#include <sys/refcount.h> +#include <sys/dmu_zfetch.h> +#include <sys/zrlock.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * dnode_hold() flags. + */ +#define DNODE_MUST_BE_ALLOCATED 1 +#define DNODE_MUST_BE_FREE 2 + +/* + * dnode_next_offset() flags. + */ +#define DNODE_FIND_HOLE 1 +#define DNODE_FIND_BACKWARDS 2 +#define DNODE_FIND_HAVELOCK 4 + +/* + * Fixed constants. + */ +#define DNODE_SHIFT 9 /* 512 bytes */ +#define DN_MIN_INDBLKSHIFT 10 /* 1k */ +#define DN_MAX_INDBLKSHIFT 14 /* 16k */ +#define DNODE_BLOCK_SHIFT 14 /* 16k */ +#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ +#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ +#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */ + +/* + * dnode id flags + * + * Note: a file will never ever have its + * ids moved from bonus->spill + * and only in a crypto environment would it be on spill + */ +#define DN_ID_CHKED_BONUS 0x1 +#define DN_ID_CHKED_SPILL 0x2 +#define DN_ID_OLD_EXIST 0x4 +#define DN_ID_NEW_EXIST 0x8 + +/* + * Derived constants. + */ +#define DNODE_SIZE (1 << DNODE_SHIFT) +#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) +#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) +#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) +#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1) +#define DN_KILL_SPILLBLK (1) + +#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) +#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) +#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) +#define DNODES_PER_LEVEL (1ULL << DNODES_PER_LEVEL_SHIFT) + +/* The +2 here is a cheesy way to round up */ +#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \ + (DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT))) + +#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ + (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) + +#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ + (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) + +#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift)) + +struct dmu_buf_impl; +struct objset; +struct zio; + +enum dnode_dirtycontext { + DN_UNDIRTIED, + DN_DIRTY_OPEN, + DN_DIRTY_SYNC +}; + +/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ +#define DNODE_FLAG_USED_BYTES (1<<0) +#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1) + +/* Does dnode have a SA spill blkptr in bonus? */ +#define DNODE_FLAG_SPILL_BLKPTR (1<<2) + +typedef struct dnode_phys { + uint8_t dn_type; /* dmu_object_type_t */ + uint8_t dn_indblkshift; /* ln2(indirect block size) */ + uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */ + uint8_t dn_nblkptr; /* length of dn_blkptr */ + uint8_t dn_bonustype; /* type of data in bonus buffer */ + uint8_t dn_checksum; /* ZIO_CHECKSUM type */ + uint8_t dn_compress; /* ZIO_COMPRESS type */ + uint8_t dn_flags; /* DNODE_FLAG_* */ + uint16_t dn_datablkszsec; /* data block size in 512b sectors */ + uint16_t dn_bonuslen; /* length of dn_bonus */ + uint8_t dn_pad2[4]; + + /* accounting is protected by dn_dirty_mtx */ + uint64_t dn_maxblkid; /* largest allocated block ID */ + uint64_t dn_used; /* bytes (or sectors) of disk space */ + + uint64_t dn_pad3[4]; + + blkptr_t dn_blkptr[1]; + uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)]; + blkptr_t dn_spill; +} dnode_phys_t; + +typedef struct dnode { + /* + * dn_struct_rwlock protects the structure of the dnode, + * including the number of levels of indirection (dn_nlevels), + * dn_maxblkid, and dn_next_* + */ + krwlock_t dn_struct_rwlock; + + /* Our link on dn_objset->os_dnodes list; protected by os_lock. */ + list_node_t dn_link; + + /* immutable: */ + struct objset *dn_objset; + uint64_t dn_object; + struct dmu_buf_impl *dn_dbuf; + struct dnode_handle *dn_handle; + dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */ + + /* + * Copies of stuff in dn_phys. They're valid in the open + * context (eg. even before the dnode is first synced). + * Where necessary, these are protected by dn_struct_rwlock. + */ + dmu_object_type_t dn_type; /* object type */ + uint16_t dn_bonuslen; /* bonus length */ + uint8_t dn_bonustype; /* bonus type */ + uint8_t dn_nblkptr; /* number of blkptrs (immutable) */ + uint8_t dn_checksum; /* ZIO_CHECKSUM type */ + uint8_t dn_compress; /* ZIO_COMPRESS type */ + uint8_t dn_nlevels; + uint8_t dn_indblkshift; + uint8_t dn_datablkshift; /* zero if blksz not power of 2! */ + uint8_t dn_moved; /* Has this dnode been moved? */ + uint16_t dn_datablkszsec; /* in 512b sectors */ + uint32_t dn_datablksz; /* in bytes */ + uint64_t dn_maxblkid; + uint8_t dn_next_nblkptr[TXG_SIZE]; + uint8_t dn_next_nlevels[TXG_SIZE]; + uint8_t dn_next_indblkshift[TXG_SIZE]; + uint8_t dn_next_bonustype[TXG_SIZE]; + uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */ + uint16_t dn_next_bonuslen[TXG_SIZE]; + uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */ + + /* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */ + uint32_t dn_dbufs_count; /* count of dn_dbufs */ + + /* protected by os_lock: */ + list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */ + + /* protected by dn_mtx: */ + kmutex_t dn_mtx; + list_t dn_dirty_records[TXG_SIZE]; + avl_tree_t dn_ranges[TXG_SIZE]; + uint64_t dn_allocated_txg; + uint64_t dn_free_txg; + uint64_t dn_assigned_txg; + kcondvar_t dn_notxholds; + enum dnode_dirtycontext dn_dirtyctx; + uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */ + + /* protected by own devices */ + refcount_t dn_tx_holds; + refcount_t dn_holds; + + kmutex_t dn_dbufs_mtx; + list_t dn_dbufs; /* descendent dbufs */ + + /* protected by dn_struct_rwlock */ + struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */ + + boolean_t dn_have_spill; /* have spill or are spilling */ + + /* parent IO for current sync write */ + zio_t *dn_zio; + + /* used in syncing context */ + uint64_t dn_oldused; /* old phys used bytes */ + uint64_t dn_oldflags; /* old phys dn_flags */ + uint64_t dn_olduid, dn_oldgid; + uint64_t dn_newuid, dn_newgid; + int dn_id_flags; + + /* holds prefetch structure */ + struct zfetch dn_zfetch; +} dnode_t; + +/* + * Adds a level of indirection between the dbuf and the dnode to avoid + * iterating descendent dbufs in dnode_move(). Handles are not allocated + * individually, but as an array of child dnodes in dnode_hold_impl(). + */ +typedef struct dnode_handle { + /* Protects dnh_dnode from modification by dnode_move(). */ + zrlock_t dnh_zrlock; + dnode_t *dnh_dnode; +} dnode_handle_t; + +typedef struct dnode_children { + size_t dnc_count; /* number of children */ + dnode_handle_t dnc_children[1]; /* sized dynamically */ +} dnode_children_t; + +typedef struct free_range { + avl_node_t fr_node; + uint64_t fr_blkid; + uint64_t fr_nblks; +} free_range_t; + +dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp, + uint64_t object, dnode_handle_t *dnh); +void dnode_special_close(dnode_handle_t *dnh); + +void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx); +void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx); +void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx); + +int dnode_hold(struct objset *dd, uint64_t object, + void *ref, dnode_t **dnp); +int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, + void *ref, dnode_t **dnp); +boolean_t dnode_add_ref(dnode_t *dn, void *ref); +void dnode_rele(dnode_t *dn, void *ref); +void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); +void dnode_sync(dnode_t *dn, dmu_tx_t *tx); +void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); +void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); +void dnode_free(dnode_t *dn, dmu_tx_t *tx); +void dnode_byteswap(dnode_phys_t *dnp); +void dnode_buf_byteswap(void *buf, size_t size); +void dnode_verify(dnode_t *dn); +int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); +uint64_t dnode_current_max_length(dnode_t *dn); +void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); +void dnode_clear_range(dnode_t *dn, uint64_t blkid, + uint64_t nblks, dmu_tx_t *tx); +void dnode_diduse_space(dnode_t *dn, int64_t space); +void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx); +void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t); +uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid); +void dnode_init(void); +void dnode_fini(void); +int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, + int minlvl, uint64_t blkfill, uint64_t txg); +void dnode_evict_dbufs(dnode_t *dn); + +#ifdef ZFS_DEBUG + +/* + * There should be a ## between the string literal and fmt, to make it + * clear that we're joining two strings together, but that piece of shit + * gcc doesn't support that preprocessor token. + */ +#define dprintf_dnode(dn, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char __db_buf[32]; \ + uint64_t __db_obj = (dn)->dn_object; \ + if (__db_obj == DMU_META_DNODE_OBJECT) \ + (void) strcpy(__db_buf, "mdn"); \ + else \ + (void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \ + (u_longlong_t)__db_obj);\ + dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \ + __db_buf, __VA_ARGS__); \ + } \ +_NOTE(CONSTCOND) } while (0) + +#define DNODE_VERIFY(dn) dnode_verify(dn) +#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx) + +#else + +#define dprintf_dnode(db, fmt, ...) +#define DNODE_VERIFY(dn) +#define FREE_VERIFY(db, start, end, tx) + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DNODE_H */ diff --git a/uts/common/fs/zfs/sys/dsl_dataset.h b/uts/common/fs/zfs/sys/dsl_dataset.h new file mode 100644 index 000000000000..22733d070e8b --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_dataset.h @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_DATASET_H +#define _SYS_DSL_DATASET_H + +#include <sys/dmu.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/zio.h> +#include <sys/bplist.h> +#include <sys/dsl_synctask.h> +#include <sys/zfs_context.h> +#include <sys/dsl_deadlist.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_dataset; +struct dsl_dir; +struct dsl_pool; + +#define DS_FLAG_INCONSISTENT (1ULL<<0) +#define DS_IS_INCONSISTENT(ds) \ + ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) +/* + * NB: nopromote can not yet be set, but we want support for it in this + * on-disk version, so that we don't need to upgrade for it later. It + * will be needed when we implement 'zfs split' (where the split off + * clone should not be promoted). + */ +#define DS_FLAG_NOPROMOTE (1ULL<<1) + +/* + * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly + * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE, + * refquota/refreservations). + */ +#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2) + +/* + * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called + * on a dataset. This allows the dataset to be destroyed using 'zfs release'. + */ +#define DS_FLAG_DEFER_DESTROY (1ULL<<3) +#define DS_IS_DEFER_DESTROY(ds) \ + ((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY) + +/* + * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose + * name lookups should be performed case-insensitively. + */ +#define DS_FLAG_CI_DATASET (1ULL<<16) + +typedef struct dsl_dataset_phys { + uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */ + uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */ + uint64_t ds_prev_snap_txg; + uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */ + uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */ + uint64_t ds_num_children; /* clone/snap children; ==0 for head */ + uint64_t ds_creation_time; /* seconds since 1970 */ + uint64_t ds_creation_txg; + uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */ + uint64_t ds_used_bytes; + uint64_t ds_compressed_bytes; + uint64_t ds_uncompressed_bytes; + uint64_t ds_unique_bytes; /* only relevant to snapshots */ + /* + * The ds_fsid_guid is a 56-bit ID that can change to avoid + * collisions. The ds_guid is a 64-bit ID that will never + * change, so there is a small probability that it will collide. + */ + uint64_t ds_fsid_guid; + uint64_t ds_guid; + uint64_t ds_flags; /* DS_FLAG_* */ + blkptr_t ds_bp; + uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */ + uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */ + uint64_t ds_userrefs_obj; /* DMU_OT_USERREFS */ + uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */ +} dsl_dataset_phys_t; + +typedef struct dsl_dataset { + /* Immutable: */ + struct dsl_dir *ds_dir; + dsl_dataset_phys_t *ds_phys; + dmu_buf_t *ds_dbuf; + uint64_t ds_object; + uint64_t ds_fsid_guid; + + /* only used in syncing context, only valid for non-snapshots: */ + struct dsl_dataset *ds_prev; + + /* has internal locking: */ + dsl_deadlist_t ds_deadlist; + bplist_t ds_pending_deadlist; + + /* to protect against multiple concurrent incremental recv */ + kmutex_t ds_recvlock; + + /* protected by lock on pool's dp_dirty_datasets list */ + txg_node_t ds_dirty_link; + list_node_t ds_synced_link; + + /* + * ds_phys->ds_<accounting> is also protected by ds_lock. + * Protected by ds_lock: + */ + kmutex_t ds_lock; + objset_t *ds_objset; + uint64_t ds_userrefs; + + /* + * ds_owner is protected by the ds_rwlock and the ds_lock + */ + krwlock_t ds_rwlock; + kcondvar_t ds_exclusive_cv; + void *ds_owner; + + /* no locking; only for making guesses */ + uint64_t ds_trysnap_txg; + + /* for objset_open() */ + kmutex_t ds_opening_lock; + + uint64_t ds_reserved; /* cached refreservation */ + uint64_t ds_quota; /* cached refquota */ + + /* Protected by ds_lock; keep at end of struct for better locality */ + char ds_snapname[MAXNAMELEN]; +} dsl_dataset_t; + +struct dsl_ds_destroyarg { + dsl_dataset_t *ds; /* ds to destroy */ + dsl_dataset_t *rm_origin; /* also remove our origin? */ + boolean_t is_origin_rm; /* set if removing origin snap */ + boolean_t defer; /* destroy -d requested? */ + boolean_t releasing; /* destroying due to release? */ + boolean_t need_prep; /* do we need to retry due to EBUSY? */ +}; + +/* + * The max length of a temporary tag prefix is the number of hex digits + * required to express UINT64_MAX plus one for the hyphen. + */ +#define MAX_TAG_PREFIX_LEN 17 + +struct dsl_ds_holdarg { + dsl_sync_task_group_t *dstg; + char *htag; + char *snapname; + boolean_t recursive; + boolean_t gotone; + boolean_t temphold; + char failed[MAXPATHLEN]; +}; + +#define dsl_dataset_is_snapshot(ds) \ + ((ds)->ds_phys->ds_num_children != 0) + +#define DS_UNIQUE_IS_ACCURATE(ds) \ + (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) + +int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, + void *tag, dsl_dataset_t **); +int dsl_dataset_own(const char *name, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp); +int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, + boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); +void dsl_dataset_name(dsl_dataset_t *ds, char *name); +void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); +void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag); +boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, + void *tag); +void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); +void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor); +uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, + dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); +uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, + uint64_t flags, dmu_tx_t *tx); +int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); +int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); +dsl_checkfunc_t dsl_dataset_destroy_check; +dsl_syncfunc_t dsl_dataset_destroy_sync; +dsl_checkfunc_t dsl_dataset_snapshot_check; +dsl_syncfunc_t dsl_dataset_snapshot_sync; +dsl_syncfunc_t dsl_dataset_user_hold_sync; +int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); +int dsl_dataset_promote(const char *name, char *conflsnap); +int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, + boolean_t force); +int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, + boolean_t recursive, boolean_t temphold, int cleanup_fd); +int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, + boolean_t temphold); +int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, + boolean_t recursive); +int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, + char *htag, boolean_t retry); +int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); + +blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); +void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx); + +spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds); + +boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds); + +void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx); + +void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, + dmu_tx_t *tx); +int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, + dmu_tx_t *tx, boolean_t async); +boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, + uint64_t blk_birth); +uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds); + +void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx); +void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv); +void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat); +void dsl_dataset_space(dsl_dataset_t *ds, + uint64_t *refdbytesp, uint64_t *availbytesp, + uint64_t *usedobjsp, uint64_t *availobjsp); +uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds); + +int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); + +int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, + uint64_t asize, uint64_t inflight, uint64_t *used, + uint64_t *ref_rsrv); +int dsl_dataset_set_quota(const char *dsname, zprop_source_t source, + uint64_t quota); +dsl_syncfunc_t dsl_dataset_set_quota_sync; +int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, + uint64_t reservation); + +int dsl_destroy_inconsistent(const char *dsname, void *arg); + +#ifdef ZFS_DEBUG +#define dprintf_ds(ds, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \ + dsl_dataset_name(ds, __ds_name); \ + dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \ + kmem_free(__ds_name, MAXNAMELEN); \ + } \ +_NOTE(CONSTCOND) } while (0) +#else +#define dprintf_ds(dd, fmt, ...) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DATASET_H */ diff --git a/uts/common/fs/zfs/sys/dsl_deadlist.h b/uts/common/fs/zfs/sys/dsl_deadlist.h new file mode 100644 index 000000000000..d2c16d72c17e --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_deadlist.h @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_DEADLIST_H +#define _SYS_DSL_DEADLIST_H + +#include <sys/bpobj.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dmu_buf; +struct dsl_dataset; + +typedef struct dsl_deadlist_phys { + uint64_t dl_used; + uint64_t dl_comp; + uint64_t dl_uncomp; + uint64_t dl_pad[37]; /* pad out to 320b for future expansion */ +} dsl_deadlist_phys_t; + +typedef struct dsl_deadlist { + objset_t *dl_os; + uint64_t dl_object; + avl_tree_t dl_tree; + boolean_t dl_havetree; + struct dmu_buf *dl_dbuf; + dsl_deadlist_phys_t *dl_phys; + kmutex_t dl_lock; + + /* if it's the old on-disk format: */ + bpobj_t dl_bpobj; + boolean_t dl_oldfmt; +} dsl_deadlist_t; + +typedef struct dsl_deadlist_entry { + avl_node_t dle_node; + uint64_t dle_mintxg; + bpobj_t dle_bpobj; +} dsl_deadlist_entry_t; + +void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object); +void dsl_deadlist_close(dsl_deadlist_t *dl); +uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx); +void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx); +void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx); +void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx); +void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx); +uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg, + uint64_t mrs_obj, dmu_tx_t *tx); +void dsl_deadlist_space(dsl_deadlist_t *dl, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +void dsl_deadlist_space_range(dsl_deadlist_t *dl, + uint64_t mintxg, uint64_t maxtxg, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx); +void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, + dmu_tx_t *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DEADLIST_H */ diff --git a/uts/common/fs/zfs/sys/dsl_deleg.h b/uts/common/fs/zfs/sys/dsl_deleg.h new file mode 100644 index 000000000000..73c43bd23879 --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_deleg.h @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_DELEG_H +#define _SYS_DSL_DELEG_H + +#include <sys/dmu.h> +#include <sys/dsl_pool.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZFS_DELEG_PERM_NONE "" +#define ZFS_DELEG_PERM_CREATE "create" +#define ZFS_DELEG_PERM_DESTROY "destroy" +#define ZFS_DELEG_PERM_SNAPSHOT "snapshot" +#define ZFS_DELEG_PERM_ROLLBACK "rollback" +#define ZFS_DELEG_PERM_CLONE "clone" +#define ZFS_DELEG_PERM_PROMOTE "promote" +#define ZFS_DELEG_PERM_RENAME "rename" +#define ZFS_DELEG_PERM_MOUNT "mount" +#define ZFS_DELEG_PERM_SHARE "share" +#define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_RECEIVE "receive" +#define ZFS_DELEG_PERM_ALLOW "allow" +#define ZFS_DELEG_PERM_USERPROP "userprop" +#define ZFS_DELEG_PERM_VSCAN "vscan" +#define ZFS_DELEG_PERM_USERQUOTA "userquota" +#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" +#define ZFS_DELEG_PERM_USERUSED "userused" +#define ZFS_DELEG_PERM_GROUPUSED "groupused" +#define ZFS_DELEG_PERM_HOLD "hold" +#define ZFS_DELEG_PERM_RELEASE "release" +#define ZFS_DELEG_PERM_DIFF "diff" + +/* + * Note: the names of properties that are marked delegatable are also + * valid delegated permissions + */ + +int dsl_deleg_get(const char *ddname, nvlist_t **nvp); +int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); +int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); +void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); +int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); +int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); +int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx); +boolean_t dsl_delegation_on(objset_t *os); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DELEG_H */ diff --git a/uts/common/fs/zfs/sys/dsl_dir.h b/uts/common/fs/zfs/sys/dsl_dir.h new file mode 100644 index 000000000000..2191635dd813 --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_dir.h @@ -0,0 +1,167 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_DIR_H +#define _SYS_DSL_DIR_H + +#include <sys/dmu.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> +#include <sys/refcount.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_dataset; + +typedef enum dd_used { + DD_USED_HEAD, + DD_USED_SNAP, + DD_USED_CHILD, + DD_USED_CHILD_RSRV, + DD_USED_REFRSRV, + DD_USED_NUM +} dd_used_t; + +#define DD_FLAG_USED_BREAKDOWN (1<<0) + +typedef struct dsl_dir_phys { + uint64_t dd_creation_time; /* not actually used */ + uint64_t dd_head_dataset_obj; + uint64_t dd_parent_obj; + uint64_t dd_origin_obj; + uint64_t dd_child_dir_zapobj; + /* + * how much space our children are accounting for; for leaf + * datasets, == physical space used by fs + snaps + */ + uint64_t dd_used_bytes; + uint64_t dd_compressed_bytes; + uint64_t dd_uncompressed_bytes; + /* Administrative quota setting */ + uint64_t dd_quota; + /* Administrative reservation setting */ + uint64_t dd_reserved; + uint64_t dd_props_zapobj; + uint64_t dd_deleg_zapobj; /* dataset delegation permissions */ + uint64_t dd_flags; + uint64_t dd_used_breakdown[DD_USED_NUM]; + uint64_t dd_clones; /* dsl_dir objects */ + uint64_t dd_pad[13]; /* pad out to 256 bytes for good measure */ +} dsl_dir_phys_t; + +struct dsl_dir { + /* These are immutable; no lock needed: */ + uint64_t dd_object; + dsl_dir_phys_t *dd_phys; + dmu_buf_t *dd_dbuf; + dsl_pool_t *dd_pool; + + /* protected by lock on pool's dp_dirty_dirs list */ + txg_node_t dd_dirty_link; + + /* protected by dp_config_rwlock */ + dsl_dir_t *dd_parent; + + /* Protected by dd_lock */ + kmutex_t dd_lock; + list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */ + timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */ + uint64_t dd_origin_txg; + + /* gross estimate of space used by in-flight tx's */ + uint64_t dd_tempreserved[TXG_SIZE]; + /* amount of space we expect to write; == amount of dirty data */ + int64_t dd_space_towrite[TXG_SIZE]; + + /* protected by dd_lock; keep at end of struct for better locality */ + char dd_myname[MAXNAMELEN]; +}; + +void dsl_dir_close(dsl_dir_t *dd, void *tag); +int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail); +int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **, + const char **tailp); +int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, + const char *tail, void *tag, dsl_dir_t **); +void dsl_dir_name(dsl_dir_t *dd, char *buf); +int dsl_dir_namelen(dsl_dir_t *dd); +uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, + const char *name, dmu_tx_t *tx); +dsl_checkfunc_t dsl_dir_destroy_check; +dsl_syncfunc_t dsl_dir_destroy_sync; +void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv); +uint64_t dsl_dir_space_available(dsl_dir_t *dd, + dsl_dir_t *ancestor, int64_t delta, int ondiskonly); +void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx); +void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx); +int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem, + uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep, + dmu_tx_t *tx); +void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx); +void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx); +void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, + int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx); +void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, + dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx); +int dsl_dir_set_quota(const char *ddname, zprop_source_t source, + uint64_t quota); +int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, + uint64_t reservation); +int dsl_dir_rename(dsl_dir_t *dd, const char *newname); +int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space); +int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx); +boolean_t dsl_dir_is_clone(dsl_dir_t *dd); +void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds, + uint64_t reservation, cred_t *cr, dmu_tx_t *tx); +void dsl_dir_snap_cmtime_update(dsl_dir_t *dd); +timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd); + +/* internal reserved dir name */ +#define MOS_DIR_NAME "$MOS" +#define ORIGIN_DIR_NAME "$ORIGIN" +#define XLATION_DIR_NAME "$XLATION" +#define FREE_DIR_NAME "$FREE" + +#ifdef ZFS_DEBUG +#define dprintf_dd(dd, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \ + KM_SLEEP); \ + dsl_dir_name(dd, __ds_name); \ + dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \ + kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \ + } \ +_NOTE(CONSTCOND) } while (0) +#else +#define dprintf_dd(dd, fmt, ...) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DIR_H */ diff --git a/uts/common/fs/zfs/sys/dsl_pool.h b/uts/common/fs/zfs/sys/dsl_pool.h new file mode 100644 index 000000000000..7d25bd7c020d --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_pool.h @@ -0,0 +1,151 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_POOL_H +#define _SYS_DSL_POOL_H + +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/txg_impl.h> +#include <sys/zfs_context.h> +#include <sys/zio.h> +#include <sys/dnode.h> +#include <sys/ddt.h> +#include <sys/arc.h> +#include <sys/bpobj.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct objset; +struct dsl_dir; +struct dsl_dataset; +struct dsl_pool; +struct dmu_tx; +struct dsl_scan; + +/* These macros are for indexing into the zfs_all_blkstats_t. */ +#define DMU_OT_DEFERRED DMU_OT_NONE +#define DMU_OT_TOTAL DMU_OT_NUMTYPES + +typedef struct zfs_blkstat { + uint64_t zb_count; + uint64_t zb_asize; + uint64_t zb_lsize; + uint64_t zb_psize; + uint64_t zb_gangs; + uint64_t zb_ditto_2_of_2_samevdev; + uint64_t zb_ditto_2_of_3_samevdev; + uint64_t zb_ditto_3_of_3_samevdev; +} zfs_blkstat_t; + +typedef struct zfs_all_blkstats { + zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1]; +} zfs_all_blkstats_t; + + +typedef struct dsl_pool { + /* Immutable */ + spa_t *dp_spa; + struct objset *dp_meta_objset; + struct dsl_dir *dp_root_dir; + struct dsl_dir *dp_mos_dir; + struct dsl_dir *dp_free_dir; + struct dsl_dataset *dp_origin_snap; + uint64_t dp_root_dir_obj; + struct taskq *dp_vnrele_taskq; + + /* No lock needed - sync context only */ + blkptr_t dp_meta_rootbp; + list_t dp_synced_datasets; + hrtime_t dp_read_overhead; + uint64_t dp_throughput; /* bytes per millisec */ + uint64_t dp_write_limit; + uint64_t dp_tmp_userrefs_obj; + bpobj_t dp_free_bpobj; + + struct dsl_scan *dp_scan; + + /* Uses dp_lock */ + kmutex_t dp_lock; + uint64_t dp_space_towrite[TXG_SIZE]; + uint64_t dp_tempreserved[TXG_SIZE]; + + /* Has its own locking */ + tx_state_t dp_tx; + txg_list_t dp_dirty_datasets; + txg_list_t dp_dirty_dirs; + txg_list_t dp_sync_tasks; + + /* + * Protects administrative changes (properties, namespace) + * It is only held for write in syncing context. Therefore + * syncing context does not need to ever have it for read, since + * nobody else could possibly have it for write. + */ + krwlock_t dp_config_rwlock; + + zfs_all_blkstats_t *dp_blkstats; +} dsl_pool_t; + +int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); +void dsl_pool_close(dsl_pool_t *dp); +dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); +void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); +void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); +int dsl_pool_sync_context(dsl_pool_t *dp); +uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree); +uint64_t dsl_pool_adjustedfree(dsl_pool_t *dp, boolean_t netfree); +int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx); +void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx); +void dsl_pool_memory_pressure(dsl_pool_t *dp); +void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx); +void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp); +void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, + const blkptr_t *bpp); +int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb); +int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp, + arc_done_func_t *done, void *private, int priority, int zio_flags, + uint32_t *arc_flags, const zbookmark_t *zb); +void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx); +void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); +void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx); + +taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp); + +extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, + const char *tag, uint64_t *now, dmu_tx_t *tx); +extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, + const char *tag, dmu_tx_t *tx); +extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); +int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_POOL_H */ diff --git a/uts/common/fs/zfs/sys/dsl_prop.h b/uts/common/fs/zfs/sys/dsl_prop.h new file mode 100644 index 000000000000..a636ad35096b --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_prop.h @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_PROP_H +#define _SYS_DSL_PROP_H + +#include <sys/dmu.h> +#include <sys/dsl_pool.h> +#include <sys/zfs_context.h> +#include <sys/dsl_synctask.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_dataset; +struct dsl_dir; + +/* The callback func may not call into the DMU or DSL! */ +typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval); + +typedef struct dsl_prop_cb_record { + list_node_t cbr_node; /* link on dd_prop_cbs */ + struct dsl_dataset *cbr_ds; + const char *cbr_propname; + dsl_prop_changed_cb_t *cbr_func; + void *cbr_arg; +} dsl_prop_cb_record_t; + +typedef struct dsl_props_arg { + nvlist_t *pa_props; + zprop_source_t pa_source; +} dsl_props_arg_t; + +typedef struct dsl_prop_set_arg { + const char *psa_name; + zprop_source_t psa_source; + int psa_intsz; + int psa_numints; + const void *psa_value; + + /* + * Used to handle the special requirements of the quota and reservation + * properties. + */ + uint64_t psa_effective_value; +} dsl_prop_setarg_t; + +int dsl_prop_register(struct dsl_dataset *ds, const char *propname, + dsl_prop_changed_cb_t *callback, void *cbarg); +int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname, + dsl_prop_changed_cb_t *callback, void *cbarg); +int dsl_prop_numcb(struct dsl_dataset *ds); + +int dsl_prop_get(const char *ddname, const char *propname, + int intsz, int numints, void *buf, char *setpoint); +int dsl_prop_get_integer(const char *ddname, const char *propname, + uint64_t *valuep, char *setpoint); +int dsl_prop_get_all(objset_t *os, nvlist_t **nvp); +int dsl_prop_get_received(objset_t *os, nvlist_t **nvp); +int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname, + int intsz, int numints, void *buf, char *setpoint); +int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname, + int intsz, int numints, void *buf, char *setpoint, + boolean_t snapshot); + +dsl_syncfunc_t dsl_props_set_sync; +int dsl_prop_set(const char *ddname, const char *propname, + zprop_source_t source, int intsz, int numints, const void *buf); +int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); +void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, + dmu_tx_t *tx); + +void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, + zprop_source_t source, uint64_t *value); +int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa); +#ifdef ZFS_DEBUG +void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa); +#define DSL_PROP_CHECK_PREDICTION(dd, psa) \ + dsl_prop_check_prediction((dd), (psa)) +#else +#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */ +#endif + +/* flag first receive on or after SPA_VERSION_RECVD_PROPS */ +boolean_t dsl_prop_get_hasrecvd(objset_t *os); +void dsl_prop_set_hasrecvd(objset_t *os); +void dsl_prop_unset_hasrecvd(objset_t *os); + +void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value); +void dsl_prop_nvlist_add_string(nvlist_t *nv, + zfs_prop_t prop, const char *value); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_PROP_H */ diff --git a/uts/common/fs/zfs/sys/dsl_scan.h b/uts/common/fs/zfs/sys/dsl_scan.h new file mode 100644 index 000000000000..c79666e67de0 --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_scan.h @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_SCAN_H +#define _SYS_DSL_SCAN_H + +#include <sys/zfs_context.h> +#include <sys/zio.h> +#include <sys/ddt.h> +#include <sys/bplist.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct objset; +struct dsl_dir; +struct dsl_dataset; +struct dsl_pool; +struct dmu_tx; + +/* + * All members of this structure must be uint64_t, for byteswap + * purposes. + */ +typedef struct dsl_scan_phys { + uint64_t scn_func; /* pool_scan_func_t */ + uint64_t scn_state; /* dsl_scan_state_t */ + uint64_t scn_queue_obj; + uint64_t scn_min_txg; + uint64_t scn_max_txg; + uint64_t scn_cur_min_txg; + uint64_t scn_cur_max_txg; + uint64_t scn_start_time; + uint64_t scn_end_time; + uint64_t scn_to_examine; /* total bytes to be scanned */ + uint64_t scn_examined; /* bytes scanned so far */ + uint64_t scn_to_process; + uint64_t scn_processed; + uint64_t scn_errors; /* scan I/O error count */ + uint64_t scn_ddt_class_max; + ddt_bookmark_t scn_ddt_bookmark; + zbookmark_t scn_bookmark; + uint64_t scn_flags; /* dsl_scan_flags_t */ +} dsl_scan_phys_t; + +#define SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t)) + +typedef enum dsl_scan_flags { + DSF_VISIT_DS_AGAIN = 1<<0, +} dsl_scan_flags_t; + +typedef struct dsl_scan { + struct dsl_pool *scn_dp; + + boolean_t scn_pausing; + uint64_t scn_restart_txg; + uint64_t scn_sync_start_time; + zio_t *scn_zio_root; + + /* for debugging / information */ + uint64_t scn_visited_this_txg; + + dsl_scan_phys_t scn_phys; +} dsl_scan_t; + +int dsl_scan_init(struct dsl_pool *dp, uint64_t txg); +void dsl_scan_fini(struct dsl_pool *dp); +void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); +int dsl_scan_cancel(struct dsl_pool *); +int dsl_scan(struct dsl_pool *, pool_scan_func_t); +void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); +boolean_t dsl_scan_resilvering(struct dsl_pool *dp); +boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); +void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, + ddt_entry_t *dde, dmu_tx_t *tx); +void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx); +void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx); +void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, + struct dmu_tx *tx); +boolean_t dsl_scan_active(dsl_scan_t *scn); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_SCAN_H */ diff --git a/uts/common/fs/zfs/sys/dsl_synctask.h b/uts/common/fs/zfs/sys/dsl_synctask.h new file mode 100644 index 000000000000..9126290cdb5b --- /dev/null +++ b/uts/common/fs/zfs/sys/dsl_synctask.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_DSL_SYNCTASK_H +#define _SYS_DSL_SYNCTASK_H + +#include <sys/txg.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_pool; + +typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *); +typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *); + +typedef struct dsl_sync_task { + list_node_t dst_node; + dsl_checkfunc_t *dst_checkfunc; + dsl_syncfunc_t *dst_syncfunc; + void *dst_arg1; + void *dst_arg2; + int dst_err; +} dsl_sync_task_t; + +typedef struct dsl_sync_task_group { + txg_node_t dstg_node; + list_t dstg_tasks; + struct dsl_pool *dstg_pool; + uint64_t dstg_txg; + int dstg_err; + int dstg_space; + boolean_t dstg_nowaiter; +} dsl_sync_task_group_t; + +dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp); +void dsl_sync_task_create(dsl_sync_task_group_t *dstg, + dsl_checkfunc_t *, dsl_syncfunc_t *, + void *arg1, void *arg2, int blocks_modified); +int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg); +void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); +void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg); +void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); + +int dsl_sync_task_do(struct dsl_pool *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified); +void dsl_sync_task_do_nowait(struct dsl_pool *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_SYNCTASK_H */ diff --git a/uts/common/fs/zfs/sys/metaslab.h b/uts/common/fs/zfs/sys/metaslab.h new file mode 100644 index 000000000000..583d6303bd5a --- /dev/null +++ b/uts/common/fs/zfs/sys/metaslab.h @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_METASLAB_H +#define _SYS_METASLAB_H + +#include <sys/spa.h> +#include <sys/space_map.h> +#include <sys/txg.h> +#include <sys/zio.h> +#include <sys/avl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern space_map_ops_t *zfs_metaslab_ops; + +extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, + uint64_t start, uint64_t size, uint64_t txg); +extern void metaslab_fini(metaslab_t *msp); +extern void metaslab_sync(metaslab_t *msp, uint64_t txg); +extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg); +extern void metaslab_sync_reassess(metaslab_group_t *mg); + +#define METASLAB_HINTBP_FAVOR 0x0 +#define METASLAB_HINTBP_AVOID 0x1 +#define METASLAB_GANG_HEADER 0x2 + +extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, + blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags); +extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, + boolean_t now); +extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); + +extern metaslab_class_t *metaslab_class_create(spa_t *spa, + space_map_ops_t *ops); +extern void metaslab_class_destroy(metaslab_class_t *mc); +extern int metaslab_class_validate(metaslab_class_t *mc); + +extern void metaslab_class_space_update(metaslab_class_t *mc, + int64_t alloc_delta, int64_t defer_delta, + int64_t space_delta, int64_t dspace_delta); +extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc); +extern uint64_t metaslab_class_get_space(metaslab_class_t *mc); +extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc); +extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc); + +extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc, + vdev_t *vd); +extern void metaslab_group_destroy(metaslab_group_t *mg); +extern void metaslab_group_activate(metaslab_group_t *mg); +extern void metaslab_group_passivate(metaslab_group_t *mg); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_METASLAB_H */ diff --git a/uts/common/fs/zfs/sys/metaslab_impl.h b/uts/common/fs/zfs/sys/metaslab_impl.h new file mode 100644 index 000000000000..07988dd51a73 --- /dev/null +++ b/uts/common/fs/zfs/sys/metaslab_impl.h @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_METASLAB_IMPL_H +#define _SYS_METASLAB_IMPL_H + +#include <sys/metaslab.h> +#include <sys/space_map.h> +#include <sys/vdev.h> +#include <sys/txg.h> +#include <sys/avl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct metaslab_class { + spa_t *mc_spa; + metaslab_group_t *mc_rotor; + space_map_ops_t *mc_ops; + uint64_t mc_aliquot; + uint64_t mc_alloc; /* total allocated space */ + uint64_t mc_deferred; /* total deferred frees */ + uint64_t mc_space; /* total space (alloc + free) */ + uint64_t mc_dspace; /* total deflated space */ +}; + +struct metaslab_group { + kmutex_t mg_lock; + avl_tree_t mg_metaslab_tree; + uint64_t mg_aliquot; + uint64_t mg_bonus_area; + int64_t mg_bias; + int64_t mg_activation_count; + metaslab_class_t *mg_class; + vdev_t *mg_vd; + metaslab_group_t *mg_prev; + metaslab_group_t *mg_next; +}; + +/* + * Each metaslab's free space is tracked in space map object in the MOS, + * which is only updated in syncing context. Each time we sync a txg, + * we append the allocs and frees from that txg to the space map object. + * When the txg is done syncing, metaslab_sync_done() updates ms_smo + * to ms_smo_syncing. Everything in ms_smo is always safe to allocate. + */ +struct metaslab { + kmutex_t ms_lock; /* metaslab lock */ + space_map_obj_t ms_smo; /* synced space map object */ + space_map_obj_t ms_smo_syncing; /* syncing space map object */ + space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */ + space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */ + space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */ + space_map_t ms_map; /* in-core free space map */ + int64_t ms_deferspace; /* sum of ms_defermap[] space */ + uint64_t ms_weight; /* weight vs. others in group */ + metaslab_group_t *ms_group; /* metaslab group */ + avl_node_t ms_group_node; /* node in metaslab group tree */ + txg_node_t ms_txg_node; /* per-txg dirty metaslab links */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_METASLAB_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/refcount.h b/uts/common/fs/zfs/sys/refcount.h new file mode 100644 index 000000000000..1752c64e3e8b --- /dev/null +++ b/uts/common/fs/zfs/sys/refcount.h @@ -0,0 +1,107 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_REFCOUNT_H +#define _SYS_REFCOUNT_H + +#include <sys/inttypes.h> +#include <sys/list.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * If the reference is held only by the calling function and not any + * particular object, use FTAG (which is a string) for the holder_tag. + * Otherwise, use the object that holds the reference. + */ +#define FTAG ((char *)__func__) + +#ifdef ZFS_DEBUG +typedef struct reference { + list_node_t ref_link; + void *ref_holder; + uint64_t ref_number; + uint8_t *ref_removed; +} reference_t; + +typedef struct refcount { + kmutex_t rc_mtx; + list_t rc_list; + list_t rc_removed; + int64_t rc_count; + int64_t rc_removed_count; +} refcount_t; + +/* Note: refcount_t must be initialized with refcount_create() */ + +void refcount_create(refcount_t *rc); +void refcount_destroy(refcount_t *rc); +void refcount_destroy_many(refcount_t *rc, uint64_t number); +int refcount_is_zero(refcount_t *rc); +int64_t refcount_count(refcount_t *rc); +int64_t refcount_add(refcount_t *rc, void *holder_tag); +int64_t refcount_remove(refcount_t *rc, void *holder_tag); +int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag); +int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag); +void refcount_transfer(refcount_t *dst, refcount_t *src); + +void refcount_init(void); +void refcount_fini(void); + +#else /* ZFS_DEBUG */ + +typedef struct refcount { + uint64_t rc_count; +} refcount_t; + +#define refcount_create(rc) ((rc)->rc_count = 0) +#define refcount_destroy(rc) ((rc)->rc_count = 0) +#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0) +#define refcount_is_zero(rc) ((rc)->rc_count == 0) +#define refcount_count(rc) ((rc)->rc_count) +#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1) +#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1) +#define refcount_add_many(rc, number, holder) \ + atomic_add_64_nv(&(rc)->rc_count, number) +#define refcount_remove_many(rc, number, holder) \ + atomic_add_64_nv(&(rc)->rc_count, -number) +#define refcount_transfer(dst, src) { \ + uint64_t __tmp = (src)->rc_count; \ + atomic_add_64(&(src)->rc_count, -__tmp); \ + atomic_add_64(&(dst)->rc_count, __tmp); \ +} + +#define refcount_init() +#define refcount_fini() + +#endif /* ZFS_DEBUG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_REFCOUNT_H */ diff --git a/uts/common/fs/zfs/sys/rrwlock.h b/uts/common/fs/zfs/sys/rrwlock.h new file mode 100644 index 000000000000..19a43c97fc3c --- /dev/null +++ b/uts/common/fs/zfs/sys/rrwlock.h @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_RR_RW_LOCK_H +#define _SYS_RR_RW_LOCK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/inttypes.h> +#include <sys/zfs_context.h> +#include <sys/refcount.h> + +/* + * A reader-writer lock implementation that allows re-entrant reads, but + * still gives writers priority on "new" reads. + * + * See rrwlock.c for more details about the implementation. + * + * Fields of the rrwlock_t structure: + * - rr_lock: protects modification and reading of rrwlock_t fields + * - rr_cv: cv for waking up readers or waiting writers + * - rr_writer: thread id of the current writer + * - rr_anon_rount: number of active anonymous readers + * - rr_linked_rcount: total number of non-anonymous active readers + * - rr_writer_wanted: a writer wants the lock + */ +typedef struct rrwlock { + kmutex_t rr_lock; + kcondvar_t rr_cv; + kthread_t *rr_writer; + refcount_t rr_anon_rcount; + refcount_t rr_linked_rcount; + boolean_t rr_writer_wanted; +} rrwlock_t; + +/* + * 'tag' is used in reference counting tracking. The + * 'tag' must be the same in a rrw_enter() as in its + * corresponding rrw_exit(). + */ +void rrw_init(rrwlock_t *rrl); +void rrw_destroy(rrwlock_t *rrl); +void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag); +void rrw_exit(rrwlock_t *rrl, void *tag); +boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); + +#define RRW_READ_HELD(x) rrw_held(x, RW_READER) +#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_RR_RW_LOCK_H */ diff --git a/uts/common/fs/zfs/sys/sa.h b/uts/common/fs/zfs/sys/sa.h new file mode 100644 index 000000000000..bc89fa07d222 --- /dev/null +++ b/uts/common/fs/zfs/sys/sa.h @@ -0,0 +1,170 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SA_H +#define _SYS_SA_H + +#include <sys/dmu.h> + +/* + * Currently available byteswap functions. + * If it all possible new attributes should used + * one of the already defined byteswap functions. + * If a new byteswap function is added then the + * ZPL/Pool version will need to be bumped. + */ + +typedef enum sa_bswap_type { + SA_UINT64_ARRAY, + SA_UINT32_ARRAY, + SA_UINT16_ARRAY, + SA_UINT8_ARRAY, + SA_ACL, +} sa_bswap_type_t; + +typedef uint16_t sa_attr_type_t; + +/* + * Attribute to register support for. + */ +typedef struct sa_attr_reg { + char *sa_name; /* attribute name */ + uint16_t sa_length; + sa_bswap_type_t sa_byteswap; /* bswap functon enum */ + sa_attr_type_t sa_attr; /* filled in during registration */ +} sa_attr_reg_t; + + +typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t, + boolean_t, void *userptr); + +/* + * array of attributes to store. + * + * This array should be treated as opaque/private data. + * The SA_BULK_ADD_ATTR() macro should be used for manipulating + * the array. + * + * When sa_replace_all_by_template() is used the attributes + * will be stored in the order defined in the array, except that + * the attributes may be split between the bonus and the spill buffer + * + */ +typedef struct sa_bulk_attr { + void *sa_data; + sa_data_locator_t *sa_data_func; + uint16_t sa_length; + sa_attr_type_t sa_attr; + /* the following are private to the sa framework */ + void *sa_addr; + uint16_t sa_buftype; + uint16_t sa_size; +} sa_bulk_attr_t; + + +/* + * special macro for adding entries for bulk attr support + * bulk - sa_bulk_attr_t + * count - integer that will be incremented during each add + * attr - attribute to manipulate + * func - function for accessing data. + * data - pointer to data. + * len - length of data + */ + +#define SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \ +{ \ + b[idx].sa_attr = attr;\ + b[idx].sa_data_func = func; \ + b[idx].sa_data = data; \ + b[idx++].sa_length = len; \ +} + +typedef struct sa_os sa_os_t; + +typedef enum sa_handle_type { + SA_HDL_SHARED, + SA_HDL_PRIVATE +} sa_handle_type_t; + +struct sa_handle; +typedef void *sa_lookup_tab_t; +typedef struct sa_handle sa_handle_t; + +typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx); + +int sa_handle_get(objset_t *, uint64_t, void *userp, + sa_handle_type_t, sa_handle_t **); +int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp, + sa_handle_type_t, sa_handle_t **); +void sa_handle_destroy(sa_handle_t *); +int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **); +void sa_buf_rele(dmu_buf_t *, void *); +int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen); +int sa_update(sa_handle_t *, sa_attr_type_t, void *buf, + uint32_t buflen, dmu_tx_t *); +int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *); +int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count); +int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count); +int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *); +int sa_size(sa_handle_t *, sa_attr_type_t, int *); +int sa_update_from_cb(sa_handle_t *, sa_attr_type_t, + uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *); +void sa_object_info(sa_handle_t *, dmu_object_info_t *); +void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *); +void sa_update_user(sa_handle_t *, sa_handle_t *); +void *sa_get_userdata(sa_handle_t *); +void sa_set_userp(sa_handle_t *, void *); +dmu_buf_t *sa_get_db(sa_handle_t *); +uint64_t sa_handle_object(sa_handle_t *); +boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size); +void sa_register_update_callback(objset_t *, sa_update_cb_t *); +int sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int, sa_attr_type_t **); +void sa_tear_down(objset_t *); +int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *, + int, dmu_tx_t *); +int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *, + int, dmu_tx_t *); +boolean_t sa_enabled(objset_t *); +void sa_cache_init(); +void sa_cache_fini(); +int sa_set_sa_object(objset_t *, uint64_t); +int sa_hdrsize(void *); +void sa_handle_lock(sa_handle_t *); +void sa_handle_unlock(sa_handle_t *); + +#ifdef _KERNEL +int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *); +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SA_H */ diff --git a/uts/common/fs/zfs/sys/sa_impl.h b/uts/common/fs/zfs/sys/sa_impl.h new file mode 100644 index 000000000000..6661e47cfc83 --- /dev/null +++ b/uts/common/fs/zfs/sys/sa_impl.h @@ -0,0 +1,287 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SA_IMPL_H +#define _SYS_SA_IMPL_H + +#include <sys/dmu.h> +#include <sys/refcount.h> +#include <sys/list.h> + +/* + * Array of known attributes and their + * various characteristics. + */ +typedef struct sa_attr_table { + sa_attr_type_t sa_attr; + uint8_t sa_registered; + uint16_t sa_length; + sa_bswap_type_t sa_byteswap; + char *sa_name; +} sa_attr_table_t; + +/* + * Zap attribute format for attribute registration + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | unused | len | bswap | attr num | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Zap attribute format for layout information. + * + * layout information is stored as an array of attribute numbers + * The name of the attribute is the layout number (0, 1, 2, ...) + * + * 16 0 + * +---- ---+ + * | attr # | + * +--------+ + * | attr # | + * +--- ----+ + * ...... + * + */ + +#define ATTR_BSWAP(x) BF32_GET(x, 16, 8) +#define ATTR_LENGTH(x) BF32_GET(x, 24, 16) +#define ATTR_NUM(x) BF32_GET(x, 0, 16) +#define ATTR_ENCODE(x, attr, length, bswap) \ +{ \ + BF64_SET(x, 24, 16, length); \ + BF64_SET(x, 16, 8, bswap); \ + BF64_SET(x, 0, 16, attr); \ +} + +#define TOC_OFF(x) BF32_GET(x, 0, 23) +#define TOC_ATTR_PRESENT(x) BF32_GET(x, 31, 1) +#define TOC_LEN_IDX(x) BF32_GET(x, 24, 4) +#define TOC_ATTR_ENCODE(x, len_idx, offset) \ +{ \ + BF32_SET(x, 31, 1, 1); \ + BF32_SET(x, 24, 7, len_idx); \ + BF32_SET(x, 0, 24, offset); \ +} + +#define SA_LAYOUTS "LAYOUTS" +#define SA_REGISTRY "REGISTRY" + +/* + * Each unique layout will have their own table + * sa_lot (layout_table) + */ +typedef struct sa_lot { + avl_node_t lot_num_node; + avl_node_t lot_hash_node; + uint64_t lot_num; + uint64_t lot_hash; + sa_attr_type_t *lot_attrs; /* array of attr #'s */ + uint32_t lot_var_sizes; /* how many aren't fixed size */ + uint32_t lot_attr_count; /* total attr count */ + list_t lot_idx_tab; /* should be only a couple of entries */ + int lot_instance; /* used with lot_hash to identify entry */ +} sa_lot_t; + +/* index table of offsets */ +typedef struct sa_idx_tab { + list_node_t sa_next; + sa_lot_t *sa_layout; + uint16_t *sa_variable_lengths; + refcount_t sa_refcount; + uint32_t *sa_idx_tab; /* array of offsets */ +} sa_idx_tab_t; + +/* + * Since the offset/index information into the actual data + * will usually be identical we can share that information with + * all handles that have the exact same offsets. + * + * You would typically only have a large number of different table of + * contents if you had a several variable sized attributes. + * + * Two AVL trees are used to track the attribute layout numbers. + * one is keyed by number and will be consulted when a DMU_OT_SA + * object is first read. The second tree is keyed by the hash signature + * of the attributes and will be consulted when an attribute is added + * to determine if we already have an instance of that layout. Both + * of these tree's are interconnected. The only difference is that + * when an entry is found in the "hash" tree the list of attributes will + * need to be compared against the list of attributes you have in hand. + * The assumption is that typically attributes will just be updated and + * adding a completely new attribute is a very rare operation. + */ +struct sa_os { + kmutex_t sa_lock; + boolean_t sa_need_attr_registration; + boolean_t sa_force_spill; + uint64_t sa_master_obj; + uint64_t sa_reg_attr_obj; + uint64_t sa_layout_attr_obj; + int sa_num_attrs; + sa_attr_table_t *sa_attr_table; /* private attr table */ + sa_update_cb_t *sa_update_cb; + avl_tree_t sa_layout_num_tree; /* keyed by layout number */ + avl_tree_t sa_layout_hash_tree; /* keyed by layout hash value */ + int sa_user_table_sz; + sa_attr_type_t *sa_user_table; /* user name->attr mapping table */ +}; + +/* + * header for all bonus and spill buffers. + * The header has a fixed portion with a variable number + * of "lengths" depending on the number of variable sized + * attribues which are determined by the "layout number" + */ + +#define SA_MAGIC 0x2F505A /* ZFS SA */ +typedef struct sa_hdr_phys { + uint32_t sa_magic; + uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */ + uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */ + /* ... Data follows the lengths. */ +} sa_hdr_phys_t; + +/* + * sa_hdr_phys -> sa_layout_info + * + * 16 10 0 + * +--------+-------+ + * | hdrsz |layout | + * +--------+-------+ + * + * Bits 0-10 are the layout number + * Bits 11-16 are the size of the header. + * The hdrsize is the number * 8 + * + * For example. + * hdrsz of 1 ==> 8 byte header + * 2 ==> 16 byte header + * + */ + +#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10) +#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0) +#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \ +{ \ + BF32_SET_SB(x, 10, 6, 3, 0, size); \ + BF32_SET(x, 0, 10, num); \ +} + +typedef enum sa_buf_type { + SA_BONUS = 1, + SA_SPILL = 2 +} sa_buf_type_t; + +typedef enum sa_data_op { + SA_LOOKUP, + SA_UPDATE, + SA_ADD, + SA_REPLACE, + SA_REMOVE +} sa_data_op_t; + +/* + * Opaque handle used for most sa functions + * + * This needs to be kept as small as possible. + */ + +struct sa_handle { + kmutex_t sa_lock; + dmu_buf_t *sa_bonus; + dmu_buf_t *sa_spill; + objset_t *sa_os; + void *sa_userp; + sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */ + sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */ +}; + +#define SA_GET_DB(hdl, type) \ + (dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill) + +#define SA_GET_HDR(hdl, type) \ + ((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \ + type))->db.db_data)) + +#define SA_IDX_TAB_GET(hdl, type) \ + (type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab) + +#define IS_SA_BONUSTYPE(a) \ + ((a == DMU_OT_SA) ? B_TRUE : B_FALSE) + +#define SA_BONUSTYPE_FROM_DB(db) \ + (dmu_get_bonustype((dmu_buf_t *)db)) + +#define SA_BLKPTR_SPACE (DN_MAX_BONUSLEN - sizeof (blkptr_t)) + +#define SA_LAYOUT_NUM(x, type) \ + ((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \ + ((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x)))) + + +#define SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length + +#define SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\ + hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \ + SA_REGISTERED_LEN(sa, attr)) + +#define SA_SET_HDR(hdr, num, size) \ + { \ + hdr->sa_magic = SA_MAGIC; \ + SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \ + } + +#define SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \ + { \ + bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \ + bulk.sa_buftype = type; \ + bulk.sa_addr = \ + (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \ + (uintptr_t)hdr); \ +} + +#define SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \ + (SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \ + (tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \ + sizeof (uint16_t), 8) : 0))) + +int sa_add_impl(sa_handle_t *, sa_attr_type_t, + uint32_t, sa_data_locator_t, void *, dmu_tx_t *); + +void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *); +int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *); + +void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *); +int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t, + uint16_t *, sa_hdr_phys_t *); + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SA_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h new file mode 100644 index 000000000000..456ec06dc456 --- /dev/null +++ b/uts/common/fs/zfs/sys/spa.h @@ -0,0 +1,706 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SPA_H +#define _SYS_SPA_H + +#include <sys/avl.h> +#include <sys/zfs_context.h> +#include <sys/nvpair.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/fs/zfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Forward references that lots of things need. + */ +typedef struct spa spa_t; +typedef struct vdev vdev_t; +typedef struct metaslab metaslab_t; +typedef struct metaslab_group metaslab_group_t; +typedef struct metaslab_class metaslab_class_t; +typedef struct zio zio_t; +typedef struct zilog zilog_t; +typedef struct spa_aux_vdev spa_aux_vdev_t; +typedef struct ddt ddt_t; +typedef struct ddt_entry ddt_entry_t; +struct dsl_pool; + +/* + * General-purpose 32-bit and 64-bit bitfield encodings. + */ +#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) +#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) +#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) +#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) + +#define BF32_GET(x, low, len) BF32_DECODE(x, low, len) +#define BF64_GET(x, low, len) BF64_DECODE(x, low, len) + +#define BF32_SET(x, low, len, val) \ + ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) +#define BF64_SET(x, low, len, val) \ + ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) + +#define BF32_GET_SB(x, low, len, shift, bias) \ + ((BF32_GET(x, low, len) + (bias)) << (shift)) +#define BF64_GET_SB(x, low, len, shift, bias) \ + ((BF64_GET(x, low, len) + (bias)) << (shift)) + +#define BF32_SET_SB(x, low, len, shift, bias, val) \ + BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) +#define BF64_SET_SB(x, low, len, shift, bias, val) \ + BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) + +/* + * We currently support nine block sizes, from 512 bytes to 128K. + * We could go higher, but the benefits are near-zero and the cost + * of COWing a giant block to modify one byte would become excessive. + */ +#define SPA_MINBLOCKSHIFT 9 +#define SPA_MAXBLOCKSHIFT 17 +#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) +#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) + +#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) + +/* + * Size of block to hold the configuration data (a packed nvlist) + */ +#define SPA_CONFIG_BLOCKSIZE (1 << 14) + +/* + * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. + * The ASIZE encoding should be at least 64 times larger (6 more bits) + * to support up to 4-way RAID-Z mirror mode with worst-case gang block + * overhead, three DVAs per bp, plus one more bit in case we do anything + * else that expands the ASIZE. + */ +#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ +#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ +#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ + +/* + * All SPA data is represented by 128-bit data virtual addresses (DVAs). + * The members of the dva_t should be considered opaque outside the SPA. + */ +typedef struct dva { + uint64_t dva_word[2]; +} dva_t; + +/* + * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. + */ +typedef struct zio_cksum { + uint64_t zc_word[4]; +} zio_cksum_t; + +/* + * Each block is described by its DVAs, time of birth, checksum, etc. + * The word-by-word, bit-by-bit layout of the blkptr is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | vdev3 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 |G| offset3 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | physical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | checksum[2] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | checksum[3] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * vdev virtual device ID + * offset offset into virtual device + * LSIZE logical size + * PSIZE physical size (after compression) + * ASIZE allocated size (including RAID-Z parity and gang block headers) + * GRID RAID-Z layout information (reserved for future use) + * cksum checksum function + * comp compression function + * G gang block indicator + * B byteorder (endianness) + * D dedup + * X unused + * lvl level of indirection + * type DMU object type + * phys birth txg of block allocation; zero if same as logical birth txg + * log. birth transaction group in which the block was logically born + * fill count number of non-zero blocks under this bp + * checksum[4] 256-bit checksum of the data this bp describes + */ +#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ +#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ + +typedef struct blkptr { + dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ + uint64_t blk_prop; /* size, compression, type, etc */ + uint64_t blk_pad[2]; /* Extra space for the future */ + uint64_t blk_phys_birth; /* txg when block was allocated */ + uint64_t blk_birth; /* transaction group at birth */ + uint64_t blk_fill; /* fill count */ + zio_cksum_t blk_cksum; /* 256-bit checksum */ +} blkptr_t; + +/* + * Macros to get and set fields in a bp or DVA. + */ +#define DVA_GET_ASIZE(dva) \ + BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_ASIZE(dva, x) \ + BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) +#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) + +#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) +#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) + +#define DVA_GET_OFFSET(dva) \ + BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_OFFSET(dva, x) \ + BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) +#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) + +#define BP_GET_LSIZE(bp) \ + BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) +#define BP_SET_LSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_PSIZE(bp) \ + BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) +#define BP_SET_PSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) + +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) + +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) + +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) + +#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + +#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) +#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) + +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) + +#define BP_PHYSICAL_BIRTH(bp) \ + ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) + +#define BP_SET_BIRTH(bp, logical, physical) \ +{ \ + (bp)->blk_birth = (logical); \ + (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ +} + +#define BP_GET_ASIZE(bp) \ + (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_GET_UCSIZE(bp) \ + ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ + BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) + +#define BP_GET_NDVAS(bp) \ + (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_COUNT_GANG(bp) \ + (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ + DVA_GET_GANG(&(bp)->blk_dva[1]) + \ + DVA_GET_GANG(&(bp)->blk_dva[2])) + +#define DVA_EQUAL(dva1, dva2) \ + ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ + (dva1)->dva_word[0] == (dva2)->dva_word[0]) + +#define BP_EQUAL(bp1, bp2) \ + (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \ + DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ + DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ + DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2])) + +#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ + ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ + ((zc1).zc_word[3] - (zc2).zc_word[3]))) + +#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) + +#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ +{ \ + (zcp)->zc_word[0] = w0; \ + (zcp)->zc_word[1] = w1; \ + (zcp)->zc_word[2] = w2; \ + (zcp)->zc_word[3] = w3; \ +} + +#define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) +#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) +#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) + +/* BP_IS_RAIDZ(bp) assumes no block compression */ +#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \ + BP_GET_PSIZE(bp)) + +#define BP_ZERO(bp) \ +{ \ + (bp)->blk_dva[0].dva_word[0] = 0; \ + (bp)->blk_dva[0].dva_word[1] = 0; \ + (bp)->blk_dva[1].dva_word[0] = 0; \ + (bp)->blk_dva[1].dva_word[1] = 0; \ + (bp)->blk_dva[2].dva_word[0] = 0; \ + (bp)->blk_dva[2].dva_word[1] = 0; \ + (bp)->blk_prop = 0; \ + (bp)->blk_pad[0] = 0; \ + (bp)->blk_pad[1] = 0; \ + (bp)->blk_phys_birth = 0; \ + (bp)->blk_birth = 0; \ + (bp)->blk_fill = 0; \ + ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ +} + +/* + * Note: the byteorder is either 0 or -1, both of which are palindromes. + * This simplifies the endianness handling a bit. + */ +#ifdef _BIG_ENDIAN +#define ZFS_HOST_BYTEORDER (0ULL) +#else +#define ZFS_HOST_BYTEORDER (-1ULL) +#endif + +#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) + +#define BP_SPRINTF_LEN 320 + +/* + * This macro allows code sharing between zfs, libzpool, and mdb. + * 'func' is either snprintf() or mdb_snprintf(). + * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line. + */ +#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \ +{ \ + static const char *copyname[] = \ + { "zero", "single", "double", "triple" }; \ + int size = BP_SPRINTF_LEN; \ + int len = 0; \ + int copies = 0; \ + \ + if (bp == NULL) { \ + len = func(buf + len, size - len, "<NULL>"); \ + } else if (BP_IS_HOLE(bp)) { \ + len = func(buf + len, size - len, "<hole>"); \ + } else { \ + for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \ + const dva_t *dva = &bp->blk_dva[d]; \ + if (DVA_IS_VALID(dva)) \ + copies++; \ + len += func(buf + len, size - len, \ + "DVA[%d]=<%llu:%llx:%llx>%c", d, \ + (u_longlong_t)DVA_GET_VDEV(dva), \ + (u_longlong_t)DVA_GET_OFFSET(dva), \ + (u_longlong_t)DVA_GET_ASIZE(dva), \ + ws); \ + } \ + if (BP_IS_GANG(bp) && \ + DVA_GET_ASIZE(&bp->blk_dva[2]) <= \ + DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ + copies--; \ + len += func(buf + len, size - len, \ + "[L%llu %s] %s %s %s %s %s %s%c" \ + "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ + "cksum=%llx:%llx:%llx:%llx", \ + (u_longlong_t)BP_GET_LEVEL(bp), \ + type, \ + checksum, \ + compress, \ + BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ + BP_IS_GANG(bp) ? "gang" : "contiguous", \ + BP_GET_DEDUP(bp) ? "dedup" : "unique", \ + copyname[copies], \ + ws, \ + (u_longlong_t)BP_GET_LSIZE(bp), \ + (u_longlong_t)BP_GET_PSIZE(bp), \ + (u_longlong_t)bp->blk_birth, \ + (u_longlong_t)BP_PHYSICAL_BIRTH(bp), \ + (u_longlong_t)bp->blk_fill, \ + ws, \ + (u_longlong_t)bp->blk_cksum.zc_word[0], \ + (u_longlong_t)bp->blk_cksum.zc_word[1], \ + (u_longlong_t)bp->blk_cksum.zc_word[2], \ + (u_longlong_t)bp->blk_cksum.zc_word[3]); \ + } \ + ASSERT(len < size); \ +} + +#include <sys/dmu.h> + +#define BP_GET_BUFC_TYPE(bp) \ + (((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \ + ARC_BUFC_METADATA : ARC_BUFC_DATA); + +typedef enum spa_import_type { + SPA_IMPORT_EXISTING, + SPA_IMPORT_ASSEMBLE +} spa_import_type_t; + +/* state manipulation functions */ +extern int spa_open(const char *pool, spa_t **, void *tag); +extern int spa_open_rewind(const char *pool, spa_t **, void *tag, + nvlist_t *policy, nvlist_t **config); +extern int spa_get_stats(const char *pool, nvlist_t **config, + char *altroot, size_t buflen); +extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, + const char *history_str, nvlist_t *zplprops); +extern int spa_import_rootpool(char *devpath, char *devid); +extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, + uint64_t flags); +extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); +extern int spa_destroy(char *pool); +extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, + boolean_t hardforce); +extern int spa_reset(char *pool); +extern void spa_async_request(spa_t *spa, int flag); +extern void spa_async_unrequest(spa_t *spa, int flag); +extern void spa_async_suspend(spa_t *spa); +extern void spa_async_resume(spa_t *spa); +extern spa_t *spa_inject_addref(char *pool); +extern void spa_inject_delref(spa_t *spa); +extern void spa_scan_stat_init(spa_t *spa); +extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps); + +#define SPA_ASYNC_CONFIG_UPDATE 0x01 +#define SPA_ASYNC_REMOVE 0x02 +#define SPA_ASYNC_PROBE 0x04 +#define SPA_ASYNC_RESILVER_DONE 0x08 +#define SPA_ASYNC_RESILVER 0x10 +#define SPA_ASYNC_AUTOEXPAND 0x20 +#define SPA_ASYNC_REMOVE_DONE 0x40 +#define SPA_ASYNC_REMOVE_STOP 0x80 + +/* + * Controls the behavior of spa_vdev_remove(). + */ +#define SPA_REMOVE_UNSPARE 0x01 +#define SPA_REMOVE_DONE 0x02 + +/* device manipulation */ +extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot); +extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, + int replacing); +extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, + int replace_done); +extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare); +extern boolean_t spa_vdev_remove_active(spa_t *spa); +extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath); +extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru); +extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, + nvlist_t *props, boolean_t exp); + +/* spare state (which is global across all pools) */ +extern void spa_spare_add(vdev_t *vd); +extern void spa_spare_remove(vdev_t *vd); +extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt); +extern void spa_spare_activate(vdev_t *vd); + +/* L2ARC state (which is global across all pools) */ +extern void spa_l2cache_add(vdev_t *vd); +extern void spa_l2cache_remove(vdev_t *vd); +extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool); +extern void spa_l2cache_activate(vdev_t *vd); +extern void spa_l2cache_drop(spa_t *spa); + +/* scanning */ +extern int spa_scan(spa_t *spa, pool_scan_func_t func); +extern int spa_scan_stop(spa_t *spa); + +/* spa syncing */ +extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ +extern void spa_sync_allpools(void); + +/* + * DEFERRED_FREE must be large enough that regular blocks are not + * deferred. XXX so can't we change it back to 1? + */ +#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */ +#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */ +#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */ + +/* spa namespace global mutex */ +extern kmutex_t spa_namespace_lock; + +/* + * SPA configuration functions in spa_config.c + */ + +#define SPA_CONFIG_UPDATE_POOL 0 +#define SPA_CONFIG_UPDATE_VDEVS 1 + +extern void spa_config_sync(spa_t *, boolean_t, boolean_t); +extern void spa_config_load(void); +extern nvlist_t *spa_all_configs(uint64_t *); +extern void spa_config_set(spa_t *spa, nvlist_t *config); +extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, + int getstats); +extern void spa_config_update(spa_t *spa, int what); + +/* + * Miscellaneous SPA routines in spa_misc.c + */ + +/* Namespace manipulation */ +extern spa_t *spa_lookup(const char *name); +extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot); +extern void spa_remove(spa_t *spa); +extern spa_t *spa_next(spa_t *prev); + +/* Refcount functions */ +extern void spa_open_ref(spa_t *spa, void *tag); +extern void spa_close(spa_t *spa, void *tag); +extern boolean_t spa_refcount_zero(spa_t *spa); + +#define SCL_NONE 0x00 +#define SCL_CONFIG 0x01 +#define SCL_STATE 0x02 +#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */ +#define SCL_ALLOC 0x08 +#define SCL_ZIO 0x10 +#define SCL_FREE 0x20 +#define SCL_VDEV 0x40 +#define SCL_LOCKS 7 +#define SCL_ALL ((1 << SCL_LOCKS) - 1) +#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO) + +/* Pool configuration locks */ +extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); +extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw); +extern void spa_config_exit(spa_t *spa, int locks, void *tag); +extern int spa_config_held(spa_t *spa, int locks, krw_t rw); + +/* Pool vdev add/remove lock */ +extern uint64_t spa_vdev_enter(spa_t *spa); +extern uint64_t spa_vdev_config_enter(spa_t *spa); +extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, + int error, char *tag); +extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error); + +/* Pool vdev state change lock */ +extern void spa_vdev_state_enter(spa_t *spa, int oplock); +extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error); + +/* Log state */ +typedef enum spa_log_state { + SPA_LOG_UNKNOWN = 0, /* unknown log state */ + SPA_LOG_MISSING, /* missing log(s) */ + SPA_LOG_CLEAR, /* clear the log(s) */ + SPA_LOG_GOOD, /* log(s) are good */ +} spa_log_state_t; + +extern spa_log_state_t spa_get_log_state(spa_t *spa); +extern void spa_set_log_state(spa_t *spa, spa_log_state_t state); +extern int spa_offline_log(spa_t *spa); + +/* Log claim callback */ +extern void spa_claim_notify(zio_t *zio); + +/* Accessor functions */ +extern boolean_t spa_shutting_down(spa_t *spa); +extern struct dsl_pool *spa_get_dsl(spa_t *spa); +extern blkptr_t *spa_get_rootblkptr(spa_t *spa); +extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp); +extern void spa_altroot(spa_t *, char *, size_t); +extern int spa_sync_pass(spa_t *spa); +extern char *spa_name(spa_t *spa); +extern uint64_t spa_guid(spa_t *spa); +extern uint64_t spa_last_synced_txg(spa_t *spa); +extern uint64_t spa_first_txg(spa_t *spa); +extern uint64_t spa_syncing_txg(spa_t *spa); +extern uint64_t spa_version(spa_t *spa); +extern pool_state_t spa_state(spa_t *spa); +extern spa_load_state_t spa_load_state(spa_t *spa); +extern uint64_t spa_freeze_txg(spa_t *spa); +extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize); +extern uint64_t spa_get_dspace(spa_t *spa); +extern void spa_update_dspace(spa_t *spa); +extern uint64_t spa_version(spa_t *spa); +extern boolean_t spa_deflate(spa_t *spa); +extern metaslab_class_t *spa_normal_class(spa_t *spa); +extern metaslab_class_t *spa_log_class(spa_t *spa); +extern int spa_max_replication(spa_t *spa); +extern int spa_prev_software_version(spa_t *spa); +extern int spa_busy(void); +extern uint8_t spa_get_failmode(spa_t *spa); +extern boolean_t spa_suspended(spa_t *spa); +extern uint64_t spa_bootfs(spa_t *spa); +extern uint64_t spa_delegation(spa_t *spa); +extern objset_t *spa_meta_objset(spa_t *spa); + +/* Miscellaneous support routines */ +extern int spa_rename(const char *oldname, const char *newname); +extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid); +extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid); +extern char *spa_strdup(const char *); +extern void spa_strfree(char *); +extern uint64_t spa_get_random(uint64_t range); +extern uint64_t spa_generate_guid(spa_t *spa); +extern void sprintf_blkptr(char *buf, const blkptr_t *bp); +extern void spa_freeze(spa_t *spa); +extern void spa_upgrade(spa_t *spa, uint64_t version); +extern void spa_evict_all(void); +extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid, + boolean_t l2cache); +extern boolean_t spa_has_spare(spa_t *, uint64_t guid); +extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva); +extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp); +extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp); +extern boolean_t spa_has_slogs(spa_t *spa); +extern boolean_t spa_is_root(spa_t *spa); +extern boolean_t spa_writeable(spa_t *spa); + +extern int spa_mode(spa_t *spa); +extern uint64_t strtonum(const char *str, char **nptr); + +/* history logging */ +typedef enum history_log_type { + LOG_CMD_POOL_CREATE, + LOG_CMD_NORMAL, + LOG_INTERNAL +} history_log_type_t; + +typedef struct history_arg { + char *ha_history_str; + history_log_type_t ha_log_type; + history_internal_events_t ha_event; + char *ha_zone; + uid_t ha_uid; +} history_arg_t; + +extern char *spa_his_ievent_table[]; + +extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx); +extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read, + char *his_buf); +extern int spa_history_log(spa_t *spa, const char *his_buf, + history_log_type_t what); +extern void spa_history_log_internal(history_internal_events_t event, + spa_t *spa, dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); + +/* error handling */ +struct zbookmark; +extern void spa_log_error(spa_t *spa, zio_t *zio); +extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, + zio_t *zio, uint64_t stateoroffset, uint64_t length); +extern void zfs_post_remove(spa_t *spa, vdev_t *vd); +extern void zfs_post_state_change(spa_t *spa, vdev_t *vd); +extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); +extern uint64_t spa_get_errlog_size(spa_t *spa); +extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count); +extern void spa_errlog_rotate(spa_t *spa); +extern void spa_errlog_drain(spa_t *spa); +extern void spa_errlog_sync(spa_t *spa, uint64_t txg); +extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub); + +/* vdev cache */ +extern void vdev_cache_stat_init(void); +extern void vdev_cache_stat_fini(void); + +/* Initialization and termination */ +extern void spa_init(int flags); +extern void spa_fini(void); +extern void spa_boot_init(); + +/* properties */ +extern int spa_prop_set(spa_t *spa, nvlist_t *nvp); +extern int spa_prop_get(spa_t *spa, nvlist_t **nvp); +extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx); +extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t); + +/* asynchronous event notification */ +extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name); + +#ifdef ZFS_DEBUG +#define dprintf_bp(bp, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \ + sprintf_blkptr(__blkbuf, (bp)); \ + dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \ + kmem_free(__blkbuf, BP_SPRINTF_LEN); \ + } \ +_NOTE(CONSTCOND) } while (0) +#else +#define dprintf_bp(bp, fmt, ...) +#endif + +extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SPA_H */ diff --git a/uts/common/fs/zfs/sys/spa_boot.h b/uts/common/fs/zfs/sys/spa_boot.h new file mode 100644 index 000000000000..1d3622f5a108 --- /dev/null +++ b/uts/common/fs/zfs/sys/spa_boot.h @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SPA_BOOT_H +#define _SYS_SPA_BOOT_H + +#include <sys/nvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern char *spa_get_bootprop(char *prop); +extern void spa_free_bootprop(char *prop); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SPA_BOOT_H */ diff --git a/uts/common/fs/zfs/sys/spa_impl.h b/uts/common/fs/zfs/sys/spa_impl.h new file mode 100644 index 000000000000..c965ffbbef87 --- /dev/null +++ b/uts/common/fs/zfs/sys/spa_impl.h @@ -0,0 +1,235 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SPA_IMPL_H +#define _SYS_SPA_IMPL_H + +#include <sys/spa.h> +#include <sys/vdev.h> +#include <sys/metaslab.h> +#include <sys/dmu.h> +#include <sys/dsl_pool.h> +#include <sys/uberblock_impl.h> +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <sys/refcount.h> +#include <sys/bplist.h> +#include <sys/bpobj.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct spa_error_entry { + zbookmark_t se_bookmark; + char *se_name; + avl_node_t se_avl; +} spa_error_entry_t; + +typedef struct spa_history_phys { + uint64_t sh_pool_create_len; /* ending offset of zpool create */ + uint64_t sh_phys_max_off; /* physical EOF */ + uint64_t sh_bof; /* logical BOF */ + uint64_t sh_eof; /* logical EOF */ + uint64_t sh_records_lost; /* num of records overwritten */ +} spa_history_phys_t; + +struct spa_aux_vdev { + uint64_t sav_object; /* MOS object for device list */ + nvlist_t *sav_config; /* cached device config */ + vdev_t **sav_vdevs; /* devices */ + int sav_count; /* number devices */ + boolean_t sav_sync; /* sync the device list */ + nvlist_t **sav_pending; /* pending device additions */ + uint_t sav_npending; /* # pending devices */ +}; + +typedef struct spa_config_lock { + kmutex_t scl_lock; + kthread_t *scl_writer; + int scl_write_wanted; + kcondvar_t scl_cv; + refcount_t scl_count; +} spa_config_lock_t; + +typedef struct spa_config_dirent { + list_node_t scd_link; + char *scd_path; +} spa_config_dirent_t; + +enum zio_taskq_type { + ZIO_TASKQ_ISSUE = 0, + ZIO_TASKQ_ISSUE_HIGH, + ZIO_TASKQ_INTERRUPT, + ZIO_TASKQ_INTERRUPT_HIGH, + ZIO_TASKQ_TYPES +}; + +/* + * State machine for the zpool-pooname process. The states transitions + * are done as follows: + * + * From To Routine + * PROC_NONE -> PROC_CREATED spa_activate() + * PROC_CREATED -> PROC_ACTIVE spa_thread() + * PROC_ACTIVE -> PROC_DEACTIVATE spa_deactivate() + * PROC_DEACTIVATE -> PROC_GONE spa_thread() + * PROC_GONE -> PROC_NONE spa_deactivate() + */ +typedef enum spa_proc_state { + SPA_PROC_NONE, /* spa_proc = &p0, no process created */ + SPA_PROC_CREATED, /* spa_activate() has proc, is waiting */ + SPA_PROC_ACTIVE, /* taskqs created, spa_proc set */ + SPA_PROC_DEACTIVATE, /* spa_deactivate() requests process exit */ + SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */ +} spa_proc_state_t; + +struct spa { + /* + * Fields protected by spa_namespace_lock. + */ + char spa_name[MAXNAMELEN]; /* pool name */ + avl_node_t spa_avl; /* node in spa_namespace_avl */ + nvlist_t *spa_config; /* last synced config */ + nvlist_t *spa_config_syncing; /* currently syncing config */ + nvlist_t *spa_config_splitting; /* config for splitting */ + nvlist_t *spa_load_info; /* info and errors from load */ + uint64_t spa_config_txg; /* txg of last config change */ + int spa_sync_pass; /* iterate-to-convergence */ + pool_state_t spa_state; /* pool state */ + int spa_inject_ref; /* injection references */ + uint8_t spa_sync_on; /* sync threads are running */ + spa_load_state_t spa_load_state; /* current load operation */ + uint64_t spa_import_flags; /* import specific flags */ + taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; + dsl_pool_t *spa_dsl_pool; + metaslab_class_t *spa_normal_class; /* normal data class */ + metaslab_class_t *spa_log_class; /* intent log data class */ + uint64_t spa_first_txg; /* first txg after spa_open() */ + uint64_t spa_final_txg; /* txg of export/destroy */ + uint64_t spa_freeze_txg; /* freeze pool at this txg */ + uint64_t spa_load_max_txg; /* best initial ub_txg */ + uint64_t spa_claim_max_txg; /* highest claimed birth txg */ + timespec_t spa_loaded_ts; /* 1st successful open time */ + objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ + txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ + vdev_t *spa_root_vdev; /* top-level vdev container */ + uint64_t spa_load_guid; /* initial guid for spa_load */ + list_t spa_config_dirty_list; /* vdevs with dirty config */ + list_t spa_state_dirty_list; /* vdevs with dirty state */ + spa_aux_vdev_t spa_spares; /* hot spares */ + spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ + uint64_t spa_config_object; /* MOS object for pool config */ + uint64_t spa_config_generation; /* config generation number */ + uint64_t spa_syncing_txg; /* txg currently syncing */ + bpobj_t spa_deferred_bpobj; /* deferred-free bplist */ + bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */ + uberblock_t spa_ubsync; /* last synced uberblock */ + uberblock_t spa_uberblock; /* current uberblock */ + boolean_t spa_extreme_rewind; /* rewind past deferred frees */ + uint64_t spa_last_io; /* lbolt of last non-scan I/O */ + kmutex_t spa_scrub_lock; /* resilver/scrub lock */ + uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */ + kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */ + uint8_t spa_scrub_active; /* active or suspended? */ + uint8_t spa_scrub_type; /* type of scrub we're doing */ + uint8_t spa_scrub_finished; /* indicator to rotate logs */ + uint8_t spa_scrub_started; /* started since last boot */ + uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ + uint64_t spa_scan_pass_start; /* start time per pass/reboot */ + uint64_t spa_scan_pass_exam; /* examined bytes per pass */ + kmutex_t spa_async_lock; /* protect async state */ + kthread_t *spa_async_thread; /* thread doing async task */ + int spa_async_suspended; /* async tasks suspended */ + kcondvar_t spa_async_cv; /* wait for thread_exit() */ + uint16_t spa_async_tasks; /* async task mask */ + char *spa_root; /* alternate root directory */ + uint64_t spa_ena; /* spa-wide ereport ENA */ + int spa_last_open_failed; /* error if last open failed */ + uint64_t spa_last_ubsync_txg; /* "best" uberblock txg */ + uint64_t spa_last_ubsync_txg_ts; /* timestamp from that ub */ + uint64_t spa_load_txg; /* ub txg that loaded */ + uint64_t spa_load_txg_ts; /* timestamp from that ub */ + uint64_t spa_load_meta_errors; /* verify metadata err count */ + uint64_t spa_load_data_errors; /* verify data err count */ + uint64_t spa_verify_min_txg; /* start txg of verify scrub */ + kmutex_t spa_errlog_lock; /* error log lock */ + uint64_t spa_errlog_last; /* last error log object */ + uint64_t spa_errlog_scrub; /* scrub error log object */ + kmutex_t spa_errlist_lock; /* error list/ereport lock */ + avl_tree_t spa_errlist_last; /* last error list */ + avl_tree_t spa_errlist_scrub; /* scrub error list */ + uint64_t spa_deflate; /* should we deflate? */ + uint64_t spa_history; /* history object */ + kmutex_t spa_history_lock; /* history lock */ + vdev_t *spa_pending_vdev; /* pending vdev additions */ + kmutex_t spa_props_lock; /* property lock */ + uint64_t spa_pool_props_object; /* object for properties */ + uint64_t spa_bootfs; /* default boot filesystem */ + uint64_t spa_failmode; /* failure mode for the pool */ + uint64_t spa_delegation; /* delegation on/off */ + list_t spa_config_list; /* previous cache file(s) */ + zio_t *spa_async_zio_root; /* root of all async I/O */ + zio_t *spa_suspend_zio_root; /* root of all suspended I/O */ + kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ + kcondvar_t spa_suspend_cv; /* notification of resume */ + uint8_t spa_suspended; /* pool is suspended */ + uint8_t spa_claiming; /* pool is doing zil_claim() */ + boolean_t spa_is_root; /* pool is root */ + int spa_minref; /* num refs when first opened */ + int spa_mode; /* FREAD | FWRITE */ + spa_log_state_t spa_log_state; /* log state */ + uint64_t spa_autoexpand; /* lun expansion on/off */ + ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */ + uint64_t spa_ddt_stat_object; /* DDT statistics */ + uint64_t spa_dedup_ditto; /* dedup ditto threshold */ + uint64_t spa_dedup_checksum; /* default dedup checksum */ + uint64_t spa_dspace; /* dspace in normal class */ + kmutex_t spa_vdev_top_lock; /* dueling offline/remove */ + kmutex_t spa_proc_lock; /* protects spa_proc* */ + kcondvar_t spa_proc_cv; /* spa_proc_state transitions */ + spa_proc_state_t spa_proc_state; /* see definition */ + struct proc *spa_proc; /* "zpool-poolname" process */ + uint64_t spa_did; /* if procp != p0, did of t1 */ + boolean_t spa_autoreplace; /* autoreplace set in open */ + int spa_vdev_locks; /* locks grabbed */ + uint64_t spa_creation_version; /* version at pool creation */ + uint64_t spa_prev_software_version; + /* + * spa_refcnt & spa_config_lock must be the last elements + * because refcount_t changes size based on compilation options. + * In order for the MDB module to function correctly, the other + * fields must remain in the same location. + */ + spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ + refcount_t spa_refcount; /* number of opens */ +}; + +extern const char *spa_config_path; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SPA_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/space_map.h b/uts/common/fs/zfs/sys/space_map.h new file mode 100644 index 000000000000..6f935c9db27e --- /dev/null +++ b/uts/common/fs/zfs/sys/space_map.h @@ -0,0 +1,179 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SPACE_MAP_H +#define _SYS_SPACE_MAP_H + +#include <sys/avl.h> +#include <sys/dmu.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct space_map_ops space_map_ops_t; + +typedef struct space_map { + avl_tree_t sm_root; /* AVL tree of map segments */ + uint64_t sm_space; /* sum of all segments in the map */ + uint64_t sm_start; /* start of map */ + uint64_t sm_size; /* size of map */ + uint8_t sm_shift; /* unit shift */ + uint8_t sm_pad[3]; /* unused */ + uint8_t sm_loaded; /* map loaded? */ + uint8_t sm_loading; /* map loading? */ + kcondvar_t sm_load_cv; /* map load completion */ + space_map_ops_t *sm_ops; /* space map block picker ops vector */ + avl_tree_t *sm_pp_root; /* picker-private AVL tree */ + void *sm_ppd; /* picker-private data */ + kmutex_t *sm_lock; /* pointer to lock that protects map */ +} space_map_t; + +typedef struct space_seg { + avl_node_t ss_node; /* AVL node */ + avl_node_t ss_pp_node; /* AVL picker-private node */ + uint64_t ss_start; /* starting offset of this segment */ + uint64_t ss_end; /* ending offset (non-inclusive) */ +} space_seg_t; + +typedef struct space_ref { + avl_node_t sr_node; /* AVL node */ + uint64_t sr_offset; /* offset (start or end) */ + int64_t sr_refcnt; /* associated reference count */ +} space_ref_t; + +typedef struct space_map_obj { + uint64_t smo_object; /* on-disk space map object */ + uint64_t smo_objsize; /* size of the object */ + uint64_t smo_alloc; /* space allocated from the map */ +} space_map_obj_t; + +struct space_map_ops { + void (*smop_load)(space_map_t *sm); + void (*smop_unload)(space_map_t *sm); + uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size); + void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size); + void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size); + uint64_t (*smop_max)(space_map_t *sm); + boolean_t (*smop_fragmented)(space_map_t *sm); +}; + +/* + * debug entry + * + * 1 3 10 50 + * ,---+--------+------------+---------------------------------. + * | 1 | action | syncpass | txg (lower bits) | + * `---+--------+------------+---------------------------------' + * 63 62 60 59 50 49 0 + * + * + * + * non-debug entry + * + * 1 47 1 15 + * ,-----------------------------------------------------------. + * | 0 | offset (sm_shift units) | type | run | + * `-----------------------------------------------------------' + * 63 62 17 16 15 0 + */ + +/* All this stuff takes and returns bytes */ +#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1) +#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15) +#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1) +#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1) +#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47) +#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47) +#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1) +#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1) + +#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3) +#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3) + +#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10) +#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10) + +#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50) +#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50) + +#define SM_RUN_MAX SM_RUN_DECODE(~0ULL) + +#define SM_ALLOC 0x0 +#define SM_FREE 0x1 + +/* + * The data for a given space map can be kept on blocks of any size. + * Larger blocks entail fewer i/o operations, but they also cause the + * DMU to keep more data in-core, and also to waste more i/o bandwidth + * when only a few blocks have changed since the last transaction group. + * This could use a lot more research, but for now, set the freelist + * block size to 4k (2^12). + */ +#define SPACE_MAP_BLOCKSHIFT 12 + +typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size); + +extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size, + uint8_t shift, kmutex_t *lp); +extern void space_map_destroy(space_map_t *sm); +extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size); +extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size); +extern boolean_t space_map_contains(space_map_t *sm, + uint64_t start, uint64_t size); +extern void space_map_vacate(space_map_t *sm, + space_map_func_t *func, space_map_t *mdest); +extern void space_map_walk(space_map_t *sm, + space_map_func_t *func, space_map_t *mdest); + +extern void space_map_load_wait(space_map_t *sm); +extern int space_map_load(space_map_t *sm, space_map_ops_t *ops, + uint8_t maptype, space_map_obj_t *smo, objset_t *os); +extern void space_map_unload(space_map_t *sm); + +extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size); +extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size); +extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size); +extern uint64_t space_map_maxsize(space_map_t *sm); + +extern void space_map_sync(space_map_t *sm, uint8_t maptype, + space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx); +extern void space_map_truncate(space_map_obj_t *smo, + objset_t *os, dmu_tx_t *tx); + +extern void space_map_ref_create(avl_tree_t *t); +extern void space_map_ref_destroy(avl_tree_t *t); +extern void space_map_ref_add_seg(avl_tree_t *t, + uint64_t start, uint64_t end, int64_t refcnt); +extern void space_map_ref_add_map(avl_tree_t *t, + space_map_t *sm, int64_t refcnt); +extern void space_map_ref_generate_map(avl_tree_t *t, + space_map_t *sm, int64_t minref); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SPACE_MAP_H */ diff --git a/uts/common/fs/zfs/sys/txg.h b/uts/common/fs/zfs/sys/txg.h new file mode 100644 index 000000000000..e323d5efabb7 --- /dev/null +++ b/uts/common/fs/zfs/sys/txg.h @@ -0,0 +1,131 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_TXG_H +#define _SYS_TXG_H + +#include <sys/spa.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */ +#define TXG_SIZE 4 /* next power of 2 */ +#define TXG_MASK (TXG_SIZE - 1) /* mask for size */ +#define TXG_INITIAL TXG_SIZE /* initial txg */ +#define TXG_IDX (txg & TXG_MASK) + +/* Number of txgs worth of frees we defer adding to in-core spacemaps */ +#define TXG_DEFER_SIZE 2 + +#define TXG_WAIT 1ULL +#define TXG_NOWAIT 2ULL + +typedef struct tx_cpu tx_cpu_t; + +typedef struct txg_handle { + tx_cpu_t *th_cpu; + uint64_t th_txg; +} txg_handle_t; + +typedef struct txg_node { + struct txg_node *tn_next[TXG_SIZE]; + uint8_t tn_member[TXG_SIZE]; +} txg_node_t; + +typedef struct txg_list { + kmutex_t tl_lock; + size_t tl_offset; + txg_node_t *tl_head[TXG_SIZE]; +} txg_list_t; + +struct dsl_pool; + +extern void txg_init(struct dsl_pool *dp, uint64_t txg); +extern void txg_fini(struct dsl_pool *dp); +extern void txg_sync_start(struct dsl_pool *dp); +extern void txg_sync_stop(struct dsl_pool *dp); +extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp); +extern void txg_rele_to_quiesce(txg_handle_t *txghp); +extern void txg_rele_to_sync(txg_handle_t *txghp); +extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks); + +/* + * Delay the caller by the specified number of ticks or until + * the txg closes (whichever comes first). This is intended + * to be used to throttle writers when the system nears its + * capacity. + */ +extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks); + +/* + * Wait until the given transaction group has finished syncing. + * Try to make this happen as soon as possible (eg. kick off any + * necessary syncs immediately). If txg==0, wait for the currently open + * txg to finish syncing. + */ +extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg); + +/* + * Wait until the given transaction group, or one after it, is + * the open transaction group. Try to make this happen as soon + * as possible (eg. kick off any necessary syncs immediately). + * If txg == 0, wait for the next open txg. + */ +extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg); + +/* + * Returns TRUE if we are "backed up" waiting for the syncing + * transaction to complete; otherwise returns FALSE. + */ +extern boolean_t txg_stalled(struct dsl_pool *dp); + +/* returns TRUE if someone is waiting for the next txg to sync */ +extern boolean_t txg_sync_waiting(struct dsl_pool *dp); + +/* + * Per-txg object lists. + */ + +#define TXG_CLEAN(txg) ((txg) - 1) + +extern void txg_list_create(txg_list_t *tl, size_t offset); +extern void txg_list_destroy(txg_list_t *tl); +extern int txg_list_empty(txg_list_t *tl, uint64_t txg); +extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg); +extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); +extern void *txg_list_remove(txg_list_t *tl, uint64_t txg); +extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg); +extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg); +extern void *txg_list_head(txg_list_t *tl, uint64_t txg); +extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_TXG_H */ diff --git a/uts/common/fs/zfs/sys/txg_impl.h b/uts/common/fs/zfs/sys/txg_impl.h new file mode 100644 index 000000000000..7b356eac1293 --- /dev/null +++ b/uts/common/fs/zfs/sys/txg_impl.h @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_TXG_IMPL_H +#define _SYS_TXG_IMPL_H + +#include <sys/spa.h> +#include <sys/txg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct tx_cpu { + kmutex_t tc_lock; + kcondvar_t tc_cv[TXG_SIZE]; + uint64_t tc_count[TXG_SIZE]; + list_t tc_callbacks[TXG_SIZE]; /* commit cb list */ + char tc_pad[16]; +}; + +typedef struct tx_state { + tx_cpu_t *tx_cpu; /* protects right to enter txg */ + kmutex_t tx_sync_lock; /* protects tx_state_t */ + uint64_t tx_open_txg; /* currently open txg id */ + uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */ + uint64_t tx_syncing_txg; /* currently syncing txg id */ + uint64_t tx_synced_txg; /* last synced txg id */ + + uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */ + uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */ + + kcondvar_t tx_sync_more_cv; + kcondvar_t tx_sync_done_cv; + kcondvar_t tx_quiesce_more_cv; + kcondvar_t tx_quiesce_done_cv; + kcondvar_t tx_timeout_cv; + kcondvar_t tx_exit_cv; /* wait for all threads to exit */ + + uint8_t tx_threads; /* number of threads */ + uint8_t tx_exiting; /* set when we're exiting */ + + kthread_t *tx_sync_thread; + kthread_t *tx_quiesce_thread; + + taskq_t *tx_commit_cb_taskq; /* commit callback taskq */ +} tx_state_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_TXG_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/uberblock.h b/uts/common/fs/zfs/sys/uberblock.h new file mode 100644 index 000000000000..b5bb91573145 --- /dev/null +++ b/uts/common/fs/zfs/sys/uberblock.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_UBERBLOCK_H +#define _SYS_UBERBLOCK_H + +#include <sys/spa.h> +#include <sys/vdev.h> +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct uberblock uberblock_t; + +extern int uberblock_verify(uberblock_t *ub); +extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_UBERBLOCK_H */ diff --git a/uts/common/fs/zfs/sys/uberblock_impl.h b/uts/common/fs/zfs/sys/uberblock_impl.h new file mode 100644 index 000000000000..6ab6aa3135a2 --- /dev/null +++ b/uts/common/fs/zfs/sys/uberblock_impl.h @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_UBERBLOCK_IMPL_H +#define _SYS_UBERBLOCK_IMPL_H + +#include <sys/uberblock.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The uberblock version is incremented whenever an incompatible on-disk + * format change is made to the SPA, DMU, or ZAP. + * + * Note: the first two fields should never be moved. When a storage pool + * is opened, the uberblock must be read off the disk before the version + * can be checked. If the ub_version field is moved, we may not detect + * version mismatch. If the ub_magic field is moved, applications that + * expect the magic number in the first word won't work. + */ +#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ +#define UBERBLOCK_SHIFT 10 /* up to 1K */ + +struct uberblock { + uint64_t ub_magic; /* UBERBLOCK_MAGIC */ + uint64_t ub_version; /* SPA_VERSION */ + uint64_t ub_txg; /* txg of last sync */ + uint64_t ub_guid_sum; /* sum of all vdev guids */ + uint64_t ub_timestamp; /* UTC time of last sync */ + blkptr_t ub_rootbp; /* MOS objset_phys_t */ + + /* highest SPA_VERSION supported by software that wrote this txg */ + uint64_t ub_software_version; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_UBERBLOCK_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/unique.h b/uts/common/fs/zfs/sys/unique.h new file mode 100644 index 000000000000..2ef3093edf1c --- /dev/null +++ b/uts/common/fs/zfs/sys/unique.h @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_UNIQUE_H +#define _SYS_UNIQUE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* The number of significant bits in each unique value. */ +#define UNIQUE_BITS 56 + +void unique_init(void); +void unique_fini(void); + +/* + * Return a new unique value (which will not be uniquified against until + * it is unique_insert()-ed. + */ +uint64_t unique_create(void); + +/* Return a unique value, which equals the one passed in if possible. */ +uint64_t unique_insert(uint64_t value); + +/* Indicate that this value no longer needs to be uniquified against. */ +void unique_remove(uint64_t value); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_UNIQUE_H */ diff --git a/uts/common/fs/zfs/sys/vdev.h b/uts/common/fs/zfs/sys/vdev.h new file mode 100644 index 000000000000..941f234dc68f --- /dev/null +++ b/uts/common/fs/zfs/sys/vdev.h @@ -0,0 +1,161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_VDEV_H +#define _SYS_VDEV_H + +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu.h> +#include <sys/space_map.h> +#include <sys/fs/zfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum vdev_dtl_type { + DTL_MISSING, /* 0% replication: no copies of the data */ + DTL_PARTIAL, /* less than 100% replication: some copies missing */ + DTL_SCRUB, /* unable to fully repair during scrub/resilver */ + DTL_OUTAGE, /* temporarily missing (used to attempt detach) */ + DTL_TYPES +} vdev_dtl_type_t; + +extern boolean_t zfs_nocacheflush; + +extern int vdev_open(vdev_t *); +extern void vdev_open_children(vdev_t *); +extern boolean_t vdev_uses_zvols(vdev_t *); +extern int vdev_validate(vdev_t *); +extern void vdev_close(vdev_t *); +extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace); +extern void vdev_reopen(vdev_t *); +extern int vdev_validate_aux(vdev_t *vd); +extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio); + +extern boolean_t vdev_is_bootable(vdev_t *vd); +extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev); +extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid); +extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d, + uint64_t txg, uint64_t size); +extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d, + uint64_t txg, uint64_t size); +extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d); +extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, + int scrub_done); +extern boolean_t vdev_dtl_required(vdev_t *vd); +extern boolean_t vdev_resilver_needed(vdev_t *vd, + uint64_t *minp, uint64_t *maxp); + +extern void vdev_hold(vdev_t *); +extern void vdev_rele(vdev_t *); + +extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg); +extern void vdev_metaslab_fini(vdev_t *vd); +extern void vdev_metaslab_set_size(vdev_t *); +extern void vdev_expand(vdev_t *vd, uint64_t txg); +extern void vdev_split(vdev_t *vd); + + +extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs); +extern void vdev_clear_stats(vdev_t *vd); +extern void vdev_stat_update(zio_t *zio, uint64_t psize); +extern void vdev_scan_stat_init(vdev_t *vd); +extern void vdev_propagate_state(vdev_t *vd); +extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, + vdev_aux_t aux); + +extern void vdev_space_update(vdev_t *vd, + int64_t alloc_delta, int64_t defer_delta, int64_t space_delta); + +extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize); + +extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux); +extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux); +extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, + vdev_state_t *); +extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags); +extern void vdev_clear(spa_t *spa, vdev_t *vd); + +extern boolean_t vdev_is_dead(vdev_t *vd); +extern boolean_t vdev_readable(vdev_t *vd); +extern boolean_t vdev_writeable(vdev_t *vd); +extern boolean_t vdev_allocatable(vdev_t *vd); +extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio); + +extern void vdev_cache_init(vdev_t *vd); +extern void vdev_cache_fini(vdev_t *vd); +extern int vdev_cache_read(zio_t *zio); +extern void vdev_cache_write(zio_t *zio); +extern void vdev_cache_purge(vdev_t *vd); + +extern void vdev_queue_init(vdev_t *vd); +extern void vdev_queue_fini(vdev_t *vd); +extern zio_t *vdev_queue_io(zio_t *zio); +extern void vdev_queue_io_done(zio_t *zio); + +extern void vdev_config_dirty(vdev_t *vd); +extern void vdev_config_clean(vdev_t *vd); +extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, + boolean_t); + +extern void vdev_state_dirty(vdev_t *vd); +extern void vdev_state_clean(vdev_t *vd); + +typedef enum vdev_config_flag { + VDEV_CONFIG_SPARE = 1 << 0, + VDEV_CONFIG_L2CACHE = 1 << 1, + VDEV_CONFIG_REMOVING = 1 << 2 +} vdev_config_flag_t; + +extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); +extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, + boolean_t getstats, vdev_config_flag_t flags); + +/* + * Label routines + */ +struct uberblock; +extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset); +extern int vdev_label_number(uint64_t psise, uint64_t offset); +extern nvlist_t *vdev_label_read_config(vdev_t *vd); +extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub); + +typedef enum { + VDEV_LABEL_CREATE, /* create/add a new device */ + VDEV_LABEL_REPLACE, /* replace an existing device */ + VDEV_LABEL_SPARE, /* add a new hot spare */ + VDEV_LABEL_REMOVE, /* remove an existing device */ + VDEV_LABEL_L2CACHE, /* add an L2ARC cache device */ + VDEV_LABEL_SPLIT /* generating new label for split-off dev */ +} vdev_labeltype_t; + +extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_H */ diff --git a/uts/common/fs/zfs/sys/vdev_disk.h b/uts/common/fs/zfs/sys/vdev_disk.h new file mode 100644 index 000000000000..b748571ea0c3 --- /dev/null +++ b/uts/common/fs/zfs/sys/vdev_disk.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_VDEV_DISK_H +#define _SYS_VDEV_DISK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/vdev.h> +#ifdef _KERNEL +#include <sys/buf.h> +#include <sys/ddi.h> +#include <sys/sunldi.h> +#include <sys/sunddi.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct vdev_disk { + ddi_devid_t vd_devid; + char *vd_minor; + ldi_handle_t vd_lh; +} vdev_disk_t; + +#ifdef _KERNEL +extern int vdev_disk_physio(ldi_handle_t, caddr_t, size_t, uint64_t, int); +#endif +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_DISK_H */ diff --git a/uts/common/fs/zfs/sys/vdev_file.h b/uts/common/fs/zfs/sys/vdev_file.h new file mode 100644 index 000000000000..cd496735778c --- /dev/null +++ b/uts/common/fs/zfs/sys/vdev_file.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_VDEV_FILE_H +#define _SYS_VDEV_FILE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/vdev.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct vdev_file { + vnode_t *vf_vnode; +} vdev_file_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_FILE_H */ diff --git a/uts/common/fs/zfs/sys/vdev_impl.h b/uts/common/fs/zfs/sys/vdev_impl.h new file mode 100644 index 000000000000..161bd21f05a6 --- /dev/null +++ b/uts/common/fs/zfs/sys/vdev_impl.h @@ -0,0 +1,322 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_VDEV_IMPL_H +#define _SYS_VDEV_IMPL_H + +#include <sys/avl.h> +#include <sys/dmu.h> +#include <sys/metaslab.h> +#include <sys/nvpair.h> +#include <sys/space_map.h> +#include <sys/vdev.h> +#include <sys/dkio.h> +#include <sys/uberblock_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Virtual device descriptors. + * + * All storage pool operations go through the virtual device framework, + * which provides data replication and I/O scheduling. + */ + +/* + * Forward declarations that lots of things need. + */ +typedef struct vdev_queue vdev_queue_t; +typedef struct vdev_cache vdev_cache_t; +typedef struct vdev_cache_entry vdev_cache_entry_t; + +/* + * Virtual device operations + */ +typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift); +typedef void vdev_close_func_t(vdev_t *vd); +typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); +typedef int vdev_io_start_func_t(zio_t *zio); +typedef void vdev_io_done_func_t(zio_t *zio); +typedef void vdev_state_change_func_t(vdev_t *vd, int, int); +typedef void vdev_hold_func_t(vdev_t *vd); +typedef void vdev_rele_func_t(vdev_t *vd); + +typedef struct vdev_ops { + vdev_open_func_t *vdev_op_open; + vdev_close_func_t *vdev_op_close; + vdev_asize_func_t *vdev_op_asize; + vdev_io_start_func_t *vdev_op_io_start; + vdev_io_done_func_t *vdev_op_io_done; + vdev_state_change_func_t *vdev_op_state_change; + vdev_hold_func_t *vdev_op_hold; + vdev_rele_func_t *vdev_op_rele; + char vdev_op_type[16]; + boolean_t vdev_op_leaf; +} vdev_ops_t; + +/* + * Virtual device properties + */ +struct vdev_cache_entry { + char *ve_data; + uint64_t ve_offset; + uint64_t ve_lastused; + avl_node_t ve_offset_node; + avl_node_t ve_lastused_node; + uint32_t ve_hits; + uint16_t ve_missed_update; + zio_t *ve_fill_io; +}; + +struct vdev_cache { + avl_tree_t vc_offset_tree; + avl_tree_t vc_lastused_tree; + kmutex_t vc_lock; +}; + +struct vdev_queue { + avl_tree_t vq_deadline_tree; + avl_tree_t vq_read_tree; + avl_tree_t vq_write_tree; + avl_tree_t vq_pending_tree; + kmutex_t vq_lock; +}; + +/* + * Virtual device descriptor + */ +struct vdev { + /* + * Common to all vdev types. + */ + uint64_t vdev_id; /* child number in vdev parent */ + uint64_t vdev_guid; /* unique ID for this vdev */ + uint64_t vdev_guid_sum; /* self guid + all child guids */ + uint64_t vdev_orig_guid; /* orig. guid prior to remove */ + uint64_t vdev_asize; /* allocatable device capacity */ + uint64_t vdev_min_asize; /* min acceptable asize */ + uint64_t vdev_ashift; /* block alignment shift */ + uint64_t vdev_state; /* see VDEV_STATE_* #defines */ + uint64_t vdev_prevstate; /* used when reopening a vdev */ + vdev_ops_t *vdev_ops; /* vdev operations */ + spa_t *vdev_spa; /* spa for this vdev */ + void *vdev_tsd; /* type-specific data */ + vnode_t *vdev_name_vp; /* vnode for pathname */ + vnode_t *vdev_devid_vp; /* vnode for devid */ + vdev_t *vdev_top; /* top-level vdev */ + vdev_t *vdev_parent; /* parent vdev */ + vdev_t **vdev_child; /* array of children */ + uint64_t vdev_children; /* number of children */ + space_map_t vdev_dtl[DTL_TYPES]; /* in-core dirty time logs */ + vdev_stat_t vdev_stat; /* virtual device statistics */ + boolean_t vdev_expanding; /* expand the vdev? */ + boolean_t vdev_reopening; /* reopen in progress? */ + int vdev_open_error; /* error on last open */ + kthread_t *vdev_open_thread; /* thread opening children */ + uint64_t vdev_crtxg; /* txg when top-level was added */ + + /* + * Top-level vdev state. + */ + uint64_t vdev_ms_array; /* metaslab array object */ + uint64_t vdev_ms_shift; /* metaslab size shift */ + uint64_t vdev_ms_count; /* number of metaslabs */ + metaslab_group_t *vdev_mg; /* metaslab group */ + metaslab_t **vdev_ms; /* metaslab array */ + txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */ + txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */ + txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */ + boolean_t vdev_remove_wanted; /* async remove wanted? */ + boolean_t vdev_probe_wanted; /* async probe wanted? */ + uint64_t vdev_removing; /* device is being removed? */ + list_node_t vdev_config_dirty_node; /* config dirty list */ + list_node_t vdev_state_dirty_node; /* state dirty list */ + uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */ + uint64_t vdev_islog; /* is an intent log device */ + uint64_t vdev_ishole; /* is a hole in the namespace */ + + /* + * Leaf vdev state. + */ + uint64_t vdev_psize; /* physical device capacity */ + space_map_obj_t vdev_dtl_smo; /* dirty time log space map obj */ + txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */ + uint64_t vdev_wholedisk; /* true if this is a whole disk */ + uint64_t vdev_offline; /* persistent offline state */ + uint64_t vdev_faulted; /* persistent faulted state */ + uint64_t vdev_degraded; /* persistent degraded state */ + uint64_t vdev_removed; /* persistent removed state */ + uint64_t vdev_resilvering; /* persistent resilvering state */ + uint64_t vdev_nparity; /* number of parity devices for raidz */ + char *vdev_path; /* vdev path (if any) */ + char *vdev_devid; /* vdev devid (if any) */ + char *vdev_physpath; /* vdev device path (if any) */ + char *vdev_fru; /* physical FRU location */ + uint64_t vdev_not_present; /* not present during import */ + uint64_t vdev_unspare; /* unspare when resilvering done */ + hrtime_t vdev_last_try; /* last reopen time */ + boolean_t vdev_nowritecache; /* true if flushwritecache failed */ + boolean_t vdev_checkremove; /* temporary online test */ + boolean_t vdev_forcefault; /* force online fault */ + boolean_t vdev_splitting; /* split or repair in progress */ + boolean_t vdev_delayed_close; /* delayed device close? */ + uint8_t vdev_tmpoffline; /* device taken offline temporarily? */ + uint8_t vdev_detached; /* device detached? */ + uint8_t vdev_cant_read; /* vdev is failing all reads */ + uint8_t vdev_cant_write; /* vdev is failing all writes */ + uint64_t vdev_isspare; /* was a hot spare */ + uint64_t vdev_isl2cache; /* was a l2cache device */ + vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ + vdev_cache_t vdev_cache; /* physical block cache */ + spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */ + zio_t *vdev_probe_zio; /* root of current probe */ + vdev_aux_t vdev_label_aux; /* on-disk aux state */ + + /* + * For DTrace to work in userland (libzpool) context, these fields must + * remain at the end of the structure. DTrace will use the kernel's + * CTF definition for 'struct vdev', and since the size of a kmutex_t is + * larger in userland, the offsets for the rest fields would be + * incorrect. + */ + kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */ + kmutex_t vdev_stat_lock; /* vdev_stat */ + kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */ +}; + +#define VDEV_RAIDZ_MAXPARITY 3 + +#define VDEV_PAD_SIZE (8 << 10) +/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 +#define VDEV_PHYS_SIZE (112 << 10) +#define VDEV_UBERBLOCK_RING (128 << 10) + +#define VDEV_UBERBLOCK_SHIFT(vd) \ + MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT) +#define VDEV_UBERBLOCK_COUNT(vd) \ + (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd)) +#define VDEV_UBERBLOCK_OFFSET(vd, n) \ + offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)]) +#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd)) + +typedef struct vdev_phys { + char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; + zio_eck_t vp_zbt; +} vdev_phys_t; + +typedef struct vdev_label { + char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ + char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ + vdev_phys_t vl_vdev_phys; /* 112K */ + char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ +} vdev_label_t; /* 256K total */ + +/* + * vdev_dirty() flags + */ +#define VDD_METASLAB 0x01 +#define VDD_DTL 0x02 + +/* + * Size and offset of embedded boot loader region on each label. + * The total size of the first two labels plus the boot area is 4MB. + */ +#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t)) +#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */ + +/* + * Size of label regions at the start and end of each leaf device. + */ +#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) +#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) +#define VDEV_LABELS 4 + +#define VDEV_ALLOC_LOAD 0 +#define VDEV_ALLOC_ADD 1 +#define VDEV_ALLOC_SPARE 2 +#define VDEV_ALLOC_L2CACHE 3 +#define VDEV_ALLOC_ROOTPOOL 4 +#define VDEV_ALLOC_SPLIT 5 + +/* + * Allocate or free a vdev + */ +extern vdev_t *vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, + vdev_ops_t *ops); +extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config, + vdev_t *parent, uint_t id, int alloctype); +extern void vdev_free(vdev_t *vd); + +/* + * Add or remove children and parents + */ +extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd); +extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd); +extern void vdev_compact_children(vdev_t *pvd); +extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops); +extern void vdev_remove_parent(vdev_t *cvd); + +/* + * vdev sync load and sync + */ +extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd); +extern boolean_t vdev_log_state_valid(vdev_t *vd); +extern void vdev_load(vdev_t *vd); +extern void vdev_sync(vdev_t *vd, uint64_t txg); +extern void vdev_sync_done(vdev_t *vd, uint64_t txg); +extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg); + +/* + * Available vdev types. + */ +extern vdev_ops_t vdev_root_ops; +extern vdev_ops_t vdev_mirror_ops; +extern vdev_ops_t vdev_replacing_ops; +extern vdev_ops_t vdev_raidz_ops; +extern vdev_ops_t vdev_disk_ops; +extern vdev_ops_t vdev_file_ops; +extern vdev_ops_t vdev_missing_ops; +extern vdev_ops_t vdev_hole_ops; +extern vdev_ops_t vdev_spare_ops; + +/* + * Common size functions + */ +extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize); +extern uint64_t vdev_get_min_asize(vdev_t *vd); +extern void vdev_set_min_asize(vdev_t *vd); + +/* + * zdb uses this tunable, so it must be declared here to make lint happy. + */ +extern int zfs_vdev_cache_size; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/zap.h b/uts/common/fs/zfs/sys/zap.h new file mode 100644 index 000000000000..a1130bbbaaae --- /dev/null +++ b/uts/common/fs/zfs/sys/zap.h @@ -0,0 +1,482 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZAP_H +#define _SYS_ZAP_H + +/* + * ZAP - ZFS Attribute Processor + * + * The ZAP is a module which sits on top of the DMU (Data Management + * Unit) and implements a higher-level storage primitive using DMU + * objects. Its primary consumer is the ZPL (ZFS Posix Layer). + * + * A "zapobj" is a DMU object which the ZAP uses to stores attributes. + * Users should use only zap routines to access a zapobj - they should + * not access the DMU object directly using DMU routines. + * + * The attributes stored in a zapobj are name-value pairs. The name is + * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including + * terminating NULL). The value is an array of integers, which may be + * 1, 2, 4, or 8 bytes long. The total space used by the array (number + * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes. + * Note that an 8-byte integer value can be used to store the location + * (object number) of another dmu object (which may be itself a zapobj). + * Note that you can use a zero-length attribute to store a single bit + * of information - the attribute is present or not. + * + * The ZAP routines are thread-safe. However, you must observe the + * DMU's restriction that a transaction may not be operated on + * concurrently. + * + * Any of the routines that return an int may return an I/O error (EIO + * or ECHECKSUM). + * + * + * Implementation / Performance Notes: + * + * The ZAP is intended to operate most efficiently on attributes with + * short (49 bytes or less) names and single 8-byte values, for which + * the microzap will be used. The ZAP should be efficient enough so + * that the user does not need to cache these attributes. + * + * The ZAP's locking scheme makes its routines thread-safe. Operations + * on different zapobjs will be processed concurrently. Operations on + * the same zapobj which only read data will be processed concurrently. + * Operations on the same zapobj which modify data will be processed + * concurrently when there are many attributes in the zapobj (because + * the ZAP uses per-block locking - more than 128 * (number of cpus) + * small attributes will suffice). + */ + +/* + * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C + * strings) for the names of attributes, rather than a byte string + * bounded by an explicit length. If some day we want to support names + * in character sets which have embedded zeros (eg. UTF-16, UTF-32), + * we'll have to add routines for using length-bounded strings. + */ + +#include <sys/dmu.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The matchtype specifies which entry will be accessed. + * MT_EXACT: only find an exact match (non-normalized) + * MT_FIRST: find the "first" normalized (case and Unicode + * form) match; the designated "first" match will not change as long + * as the set of entries with this normalization doesn't change + * MT_BEST: if there is an exact match, find that, otherwise find the + * first normalized match + */ +typedef enum matchtype +{ + MT_EXACT, + MT_BEST, + MT_FIRST +} matchtype_t; + +typedef enum zap_flags { + /* Use 64-bit hash value (serialized cursors will always use 64-bits) */ + ZAP_FLAG_HASH64 = 1 << 0, + /* Key is binary, not string (zap_add_uint64() can be used) */ + ZAP_FLAG_UINT64_KEY = 1 << 1, + /* + * First word of key (which must be an array of uint64) is + * already randomly distributed. + */ + ZAP_FLAG_PRE_HASHED_KEY = 1 << 2, +} zap_flags_t; + +/* + * Create a new zapobj with no attributes and return its object number. + * MT_EXACT will cause the zap object to only support MT_EXACT lookups, + * otherwise any matchtype can be used for lookups. + * + * normflags specifies what normalization will be done. values are: + * 0: no normalization (legacy on-disk format, supports MT_EXACT matching + * only) + * U8_TEXTPREP_TOLOWER: case normalization will be performed. + * MT_FIRST/MT_BEST matching will find entries that match without + * regard to case (eg. looking for "foo" can find an entry "Foo"). + * Eventually, other flags will permit unicode normalization as well. + */ +uint64_t zap_create(objset_t *ds, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); +uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); +uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); + +/* + * Create a new zapobj with no attributes from the given (unallocated) + * object number. + */ +int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); +int zap_create_claim_norm(objset_t *ds, uint64_t obj, + int normflags, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); + +/* + * The zapobj passed in must be a valid ZAP object for all of the + * following routines. + */ + +/* + * Destroy this zapobj and all its attributes. + * + * Frees the object number using dmu_object_free. + */ +int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx); + +/* + * Manipulate attributes. + * + * 'integer_size' is in bytes, and must be 1, 2, 4, or 8. + */ + +/* + * Retrieve the contents of the attribute with the given name. + * + * If the requested attribute does not exist, the call will fail and + * return ENOENT. + * + * If 'integer_size' is smaller than the attribute's integer size, the + * call will fail and return EINVAL. + * + * If 'integer_size' is equal to or larger than the attribute's integer + * size, the call will succeed and return 0. * When converting to a + * larger integer size, the integers will be treated as unsigned (ie. no + * sign-extension will be performed). + * + * 'num_integers' is the length (in integers) of 'buf'. + * + * If the attribute is longer than the buffer, as many integers as will + * fit will be transferred to 'buf'. If the entire attribute was not + * transferred, the call will return EOVERFLOW. + * + * If rn_len is nonzero, realname will be set to the name of the found + * entry (which may be different from the requested name if matchtype is + * not MT_EXACT). + * + * If normalization_conflictp is not NULL, it will be set if there is + * another name with the same case/unicode normalized form. + */ +int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf); +int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf, + matchtype_t mt, char *realname, int rn_len, + boolean_t *normalization_conflictp); +int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf); +int zap_contains(objset_t *ds, uint64_t zapobj, const char *name); +int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints); + +int zap_count_write(objset_t *os, uint64_t zapobj, const char *name, + int add, uint64_t *towrite, uint64_t *tooverwrite); + +/* + * Create an attribute with the given name and value. + * + * If an attribute with the given name already exists, the call will + * fail and return EEXIST. + */ +int zap_add(objset_t *ds, uint64_t zapobj, const char *key, + int integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx); +int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key, + int key_numints, int integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx); + +/* + * Set the attribute with the given name to the given value. If an + * attribute with the given name does not exist, it will be created. If + * an attribute with the given name already exists, the previous value + * will be overwritten. The integer_size may be different from the + * existing attribute's integer size, in which case the attribute's + * integer size will be updated to the new value. + */ +int zap_update(objset_t *ds, uint64_t zapobj, const char *name, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); +int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); + +/* + * Get the length (in integers) and the integer size of the specified + * attribute. + * + * If the requested attribute does not exist, the call will fail and + * return ENOENT. + */ +int zap_length(objset_t *ds, uint64_t zapobj, const char *name, + uint64_t *integer_size, uint64_t *num_integers); +int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t *integer_size, uint64_t *num_integers); + +/* + * Remove the specified attribute. + * + * If the specified attribute does not exist, the call will fail and + * return ENOENT. + */ +int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx); +int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name, + matchtype_t mt, dmu_tx_t *tx); +int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, dmu_tx_t *tx); + +/* + * Returns (in *count) the number of attributes in the specified zap + * object. + */ +int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count); + +/* + * Returns (in name) the name of the entry whose (value & mask) + * (za_first_integer) is value, or ENOENT if not found. The string + * pointed to by name must be at least 256 bytes long. If mask==0, the + * match must be exact (ie, same as mask=-1ULL). + */ +int zap_value_search(objset_t *os, uint64_t zapobj, + uint64_t value, uint64_t mask, char *name); + +/* + * Transfer all the entries from fromobj into intoobj. Only works on + * int_size=8 num_integers=1 values. Fails if there are any duplicated + * entries. + */ +int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx); + +/* Same as zap_join, but set the values to 'value'. */ +int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, + uint64_t value, dmu_tx_t *tx); + +/* Same as zap_join, but add together any duplicated entries. */ +int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, + dmu_tx_t *tx); + +/* + * Manipulate entries where the name + value are the "same" (the name is + * a stringified version of the value). + */ +int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); +int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); +int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value); +int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx); + +/* Here the key is an int and the value is a different int. */ +int zap_add_int_key(objset_t *os, uint64_t obj, + uint64_t key, uint64_t value, dmu_tx_t *tx); +int zap_lookup_int_key(objset_t *os, uint64_t obj, + uint64_t key, uint64_t *valuep); + +/* + * They name is a stringified version of key; increment its value by + * delta. Zero values will be zap_remove()-ed. + */ +int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx); +int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta, + dmu_tx_t *tx); + +struct zap; +struct zap_leaf; +typedef struct zap_cursor { + /* This structure is opaque! */ + objset_t *zc_objset; + struct zap *zc_zap; + struct zap_leaf *zc_leaf; + uint64_t zc_zapobj; + uint64_t zc_serialized; + uint64_t zc_hash; + uint32_t zc_cd; +} zap_cursor_t; + +typedef struct { + int za_integer_length; + /* + * za_normalization_conflict will be set if there are additional + * entries with this normalized form (eg, "foo" and "Foo"). + */ + boolean_t za_normalization_conflict; + uint64_t za_num_integers; + uint64_t za_first_integer; /* no sign extension for <8byte ints */ + char za_name[MAXNAMELEN]; +} zap_attribute_t; + +/* + * The interface for listing all the attributes of a zapobj can be + * thought of as cursor moving down a list of the attributes one by + * one. The cookie returned by the zap_cursor_serialize routine is + * persistent across system calls (and across reboot, even). + */ + +/* + * Initialize a zap cursor, pointing to the "first" attribute of the + * zapobj. You must _fini the cursor when you are done with it. + */ +void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj); +void zap_cursor_fini(zap_cursor_t *zc); + +/* + * Get the attribute currently pointed to by the cursor. Returns + * ENOENT if at the end of the attributes. + */ +int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za); + +/* + * Advance the cursor to the next attribute. + */ +void zap_cursor_advance(zap_cursor_t *zc); + +/* + * Get a persistent cookie pointing to the current position of the zap + * cursor. The low 4 bits in the cookie are always zero, and thus can + * be used as to differentiate a serialized cookie from a different type + * of value. The cookie will be less than 2^32 as long as there are + * fewer than 2^22 (4.2 million) entries in the zap object. + */ +uint64_t zap_cursor_serialize(zap_cursor_t *zc); + +/* + * Advance the cursor to the attribute having the given key. + */ +int zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt); + +/* + * Initialize a zap cursor pointing to the position recorded by + * zap_cursor_serialize (in the "serialized" argument). You can also + * use a "serialized" argument of 0 to start at the beginning of the + * zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to + * zap_cursor_init(...).) + */ +void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds, + uint64_t zapobj, uint64_t serialized); + + +#define ZAP_HISTOGRAM_SIZE 10 + +typedef struct zap_stats { + /* + * Size of the pointer table (in number of entries). + * This is always a power of 2, or zero if it's a microzap. + * In general, it should be considerably greater than zs_num_leafs. + */ + uint64_t zs_ptrtbl_len; + + uint64_t zs_blocksize; /* size of zap blocks */ + + /* + * The number of blocks used. Note that some blocks may be + * wasted because old ptrtbl's and large name/value blocks are + * not reused. (Although their space is reclaimed, we don't + * reuse those offsets in the object.) + */ + uint64_t zs_num_blocks; + + /* + * Pointer table values from zap_ptrtbl in the zap_phys_t + */ + uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */ + uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */ + uint64_t zs_ptrtbl_zt_blk; /* starting block number */ + uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */ + uint64_t zs_ptrtbl_zt_shift; /* bits to index it */ + + /* + * Values of the other members of the zap_phys_t + */ + uint64_t zs_block_type; /* ZBT_HEADER */ + uint64_t zs_magic; /* ZAP_MAGIC */ + uint64_t zs_num_leafs; /* The number of leaf blocks */ + uint64_t zs_num_entries; /* The number of zap entries */ + uint64_t zs_salt; /* salt to stir into hash function */ + + /* + * Histograms. For all histograms, the last index + * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater + * than what can be represented. For example + * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number + * of leafs with more than 45 entries. + */ + + /* + * zs_leafs_with_n_pointers[n] is the number of leafs with + * 2^n pointers to it. + */ + uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE]; + + /* + * zs_leafs_with_n_entries[n] is the number of leafs with + * [n*5, (n+1)*5) entries. In the current implementation, there + * can be at most 55 entries in any block, but there may be + * fewer if the name or value is large, or the block is not + * completely full. + */ + uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE]; + + /* + * zs_leafs_n_tenths_full[n] is the number of leafs whose + * fullness is in the range [n/10, (n+1)/10). + */ + uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE]; + + /* + * zs_entries_using_n_chunks[n] is the number of entries which + * consume n 24-byte chunks. (Note, large names/values only use + * one chunk, but contribute to zs_num_blocks_large.) + */ + uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE]; + + /* + * zs_buckets_with_n_entries[n] is the number of buckets (each + * leaf has 64 buckets) with n entries. + * zs_buckets_with_n_entries[1] should be very close to + * zs_num_entries. + */ + uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE]; +} zap_stats_t; + +/* + * Get statistics about a ZAP object. Note: you need to be aware of the + * internal implementation of the ZAP to correctly interpret some of the + * statistics. This interface shouldn't be relied on unless you really + * know what you're doing. + */ +int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZAP_H */ diff --git a/uts/common/fs/zfs/sys/zap_impl.h b/uts/common/fs/zfs/sys/zap_impl.h new file mode 100644 index 000000000000..1dc322e02f6f --- /dev/null +++ b/uts/common/fs/zfs/sys/zap_impl.h @@ -0,0 +1,228 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZAP_IMPL_H +#define _SYS_ZAP_IMPL_H + +#include <sys/zap.h> +#include <sys/zfs_context.h> +#include <sys/avl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int fzap_default_block_shift; + +#define ZAP_MAGIC 0x2F52AB2ABULL + +#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift) + +#define MZAP_ENT_LEN 64 +#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) +#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT +#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) + +#define ZAP_NEED_CD (-1U) + +typedef struct mzap_ent_phys { + uint64_t mze_value; + uint32_t mze_cd; + uint16_t mze_pad; /* in case we want to chain them someday */ + char mze_name[MZAP_NAME_LEN]; +} mzap_ent_phys_t; + +typedef struct mzap_phys { + uint64_t mz_block_type; /* ZBT_MICRO */ + uint64_t mz_salt; + uint64_t mz_normflags; + uint64_t mz_pad[5]; + mzap_ent_phys_t mz_chunk[1]; + /* actually variable size depending on block size */ +} mzap_phys_t; + +typedef struct mzap_ent { + avl_node_t mze_node; + int mze_chunkid; + uint64_t mze_hash; + uint32_t mze_cd; /* copy from mze_phys->mze_cd */ +} mzap_ent_t; + +#define MZE_PHYS(zap, mze) \ + (&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid]) + +/* + * The (fat) zap is stored in one object. It is an array of + * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of: + * + * ptrtbl fits in first block: + * [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ... + * + * ptrtbl too big for first block: + * [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ... + * + */ + +struct dmu_buf; +struct zap_leaf; + +#define ZBT_LEAF ((1ULL << 63) + 0) +#define ZBT_HEADER ((1ULL << 63) + 1) +#define ZBT_MICRO ((1ULL << 63) + 3) +/* any other values are ptrtbl blocks */ + +/* + * the embedded pointer table takes up half a block: + * block size / entry size (2^3) / 2 + */ +#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1) + +/* + * The embedded pointer table starts half-way through the block. Since + * the pointer table itself is half the block, it starts at (64-bit) + * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)). + */ +#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \ + ((uint64_t *)(zap)->zap_f.zap_phys) \ + [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))] + +/* + * TAKE NOTE: + * If zap_phys_t is modified, zap_byteswap() must be modified. + */ +typedef struct zap_phys { + uint64_t zap_block_type; /* ZBT_HEADER */ + uint64_t zap_magic; /* ZAP_MAGIC */ + + struct zap_table_phys { + uint64_t zt_blk; /* starting block number */ + uint64_t zt_numblks; /* number of blocks */ + uint64_t zt_shift; /* bits to index it */ + uint64_t zt_nextblk; /* next (larger) copy start block */ + uint64_t zt_blks_copied; /* number source blocks copied */ + } zap_ptrtbl; + + uint64_t zap_freeblk; /* the next free block */ + uint64_t zap_num_leafs; /* number of leafs */ + uint64_t zap_num_entries; /* number of entries */ + uint64_t zap_salt; /* salt to stir into hash function */ + uint64_t zap_normflags; /* flags for u8_textprep_str() */ + uint64_t zap_flags; /* zap_flags_t */ + /* + * This structure is followed by padding, and then the embedded + * pointer table. The embedded pointer table takes up second + * half of the block. It is accessed using the + * ZAP_EMBEDDED_PTRTBL_ENT() macro. + */ +} zap_phys_t; + +typedef struct zap_table_phys zap_table_phys_t; + +typedef struct zap { + objset_t *zap_objset; + uint64_t zap_object; + struct dmu_buf *zap_dbuf; + krwlock_t zap_rwlock; + boolean_t zap_ismicro; + int zap_normflags; + uint64_t zap_salt; + union { + struct { + zap_phys_t *zap_phys; + + /* + * zap_num_entries_mtx protects + * zap_num_entries + */ + kmutex_t zap_num_entries_mtx; + int zap_block_shift; + } zap_fat; + struct { + mzap_phys_t *zap_phys; + int16_t zap_num_entries; + int16_t zap_num_chunks; + int16_t zap_alloc_next; + avl_tree_t zap_avl; + } zap_micro; + } zap_u; +} zap_t; + +typedef struct zap_name { + zap_t *zn_zap; + int zn_key_intlen; + const void *zn_key_orig; + int zn_key_orig_numints; + const void *zn_key_norm; + int zn_key_norm_numints; + uint64_t zn_hash; + matchtype_t zn_matchtype; + char zn_normbuf[ZAP_MAXNAMELEN]; +} zap_name_t; + +#define zap_f zap_u.zap_fat +#define zap_m zap_u.zap_micro + +boolean_t zap_match(zap_name_t *zn, const char *matchname); +int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, + krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp); +void zap_unlockdir(zap_t *zap); +void zap_evict(dmu_buf_t *db, void *vmzap); +zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt); +void zap_name_free(zap_name_t *zn); +int zap_hashbits(zap_t *zap); +uint32_t zap_maxcd(zap_t *zap); +uint64_t zap_getflags(zap_t *zap); + +#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n)))) + +void fzap_byteswap(void *buf, size_t size); +int fzap_count(zap_t *zap, uint64_t *count); +int fzap_lookup(zap_name_t *zn, + uint64_t integer_size, uint64_t num_integers, void *buf, + char *realname, int rn_len, boolean_t *normalization_conflictp); +void fzap_prefetch(zap_name_t *zn); +int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, + uint64_t *tooverwrite); +int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx); +int fzap_update(zap_name_t *zn, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); +int fzap_length(zap_name_t *zn, + uint64_t *integer_size, uint64_t *num_integers); +int fzap_remove(zap_name_t *zn, dmu_tx_t *tx); +int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za); +void fzap_get_stats(zap_t *zap, zap_stats_t *zs); +void zap_put_leaf(struct zap_leaf *l); + +int fzap_add_cd(zap_name_t *zn, + uint64_t integer_size, uint64_t num_integers, + const void *val, uint32_t cd, dmu_tx_t *tx); +void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags); +int fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZAP_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/zap_leaf.h b/uts/common/fs/zfs/sys/zap_leaf.h new file mode 100644 index 000000000000..3a33636741d9 --- /dev/null +++ b/uts/common/fs/zfs/sys/zap_leaf.h @@ -0,0 +1,245 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZAP_LEAF_H +#define _SYS_ZAP_LEAF_H + +#include <sys/zap.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct zap; +struct zap_name; +struct zap_stats; + +#define ZAP_LEAF_MAGIC 0x2AB1EAF + +/* chunk size = 24 bytes */ +#define ZAP_LEAF_CHUNKSIZE 24 + +/* + * The amount of space available for chunks is: + * block size (1<<l->l_bs) - hash entry size (2) * number of hash + * entries - header space (2*chunksize) + */ +#define ZAP_LEAF_NUMCHUNKS(l) \ + (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \ + ZAP_LEAF_CHUNKSIZE - 2) + +/* + * The amount of space within the chunk available for the array is: + * chunk size - space for type (1) - space for next pointer (2) + */ +#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) + +#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \ + (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) + +/* + * Low water mark: when there are only this many chunks free, start + * growing the ptrtbl. Ideally, this should be larger than a + * "reasonably-sized" entry. 20 chunks is more than enough for the + * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value), + * while still being only around 3% for 16k blocks. + */ +#define ZAP_LEAF_LOW_WATER (20) + +/* + * The leaf hash table has block size / 2^5 (32) number of entries, + * which should be more than enough for the maximum number of entries, + * which is less than block size / CHUNKSIZE (24) / minimum number of + * chunks per entry (3). + */ +#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5) +#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l)) + +/* + * The chunks start immediately after the hash table. The end of the + * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a + * chunk_t. + */ +#define ZAP_LEAF_CHUNK(l, idx) \ + ((zap_leaf_chunk_t *) \ + ((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx] +#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry) + +typedef enum zap_chunk_type { + ZAP_CHUNK_FREE = 253, + ZAP_CHUNK_ENTRY = 252, + ZAP_CHUNK_ARRAY = 251, + ZAP_CHUNK_TYPE_MAX = 250 +} zap_chunk_type_t; + +#define ZLF_ENTRIES_CDSORTED (1<<0) + +/* + * TAKE NOTE: + * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified. + */ +typedef struct zap_leaf_phys { + struct zap_leaf_header { + uint64_t lh_block_type; /* ZBT_LEAF */ + uint64_t lh_pad1; + uint64_t lh_prefix; /* hash prefix of this leaf */ + uint32_t lh_magic; /* ZAP_LEAF_MAGIC */ + uint16_t lh_nfree; /* number free chunks */ + uint16_t lh_nentries; /* number of entries */ + uint16_t lh_prefix_len; /* num bits used to id this */ + +/* above is accessable to zap, below is zap_leaf private */ + + uint16_t lh_freelist; /* chunk head of free list */ + uint8_t lh_flags; /* ZLF_* flags */ + uint8_t lh_pad2[11]; + } l_hdr; /* 2 24-byte chunks */ + + /* + * The header is followed by a hash table with + * ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is + * followed by an array of ZAP_LEAF_NUMCHUNKS(zap) + * zap_leaf_chunk structures. These structures are accessed + * with the ZAP_LEAF_CHUNK() macro. + */ + + uint16_t l_hash[1]; +} zap_leaf_phys_t; + +typedef union zap_leaf_chunk { + struct zap_leaf_entry { + uint8_t le_type; /* always ZAP_CHUNK_ENTRY */ + uint8_t le_value_intlen; /* size of value's ints */ + uint16_t le_next; /* next entry in hash chain */ + uint16_t le_name_chunk; /* first chunk of the name */ + uint16_t le_name_numints; /* ints in name (incl null) */ + uint16_t le_value_chunk; /* first chunk of the value */ + uint16_t le_value_numints; /* value length in ints */ + uint32_t le_cd; /* collision differentiator */ + uint64_t le_hash; /* hash value of the name */ + } l_entry; + struct zap_leaf_array { + uint8_t la_type; /* always ZAP_CHUNK_ARRAY */ + uint8_t la_array[ZAP_LEAF_ARRAY_BYTES]; + uint16_t la_next; /* next blk or CHAIN_END */ + } l_array; + struct zap_leaf_free { + uint8_t lf_type; /* always ZAP_CHUNK_FREE */ + uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES]; + uint16_t lf_next; /* next in free list, or CHAIN_END */ + } l_free; +} zap_leaf_chunk_t; + +typedef struct zap_leaf { + krwlock_t l_rwlock; + uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */ + int l_bs; /* block size shift */ + dmu_buf_t *l_dbuf; + zap_leaf_phys_t *l_phys; +} zap_leaf_t; + + +typedef struct zap_entry_handle { + /* below is set by zap_leaf.c and is public to zap.c */ + uint64_t zeh_num_integers; + uint64_t zeh_hash; + uint32_t zeh_cd; + uint8_t zeh_integer_size; + + /* below is private to zap_leaf.c */ + uint16_t zeh_fakechunk; + uint16_t *zeh_chunkp; + zap_leaf_t *zeh_leaf; +} zap_entry_handle_t; + +/* + * Return a handle to the named entry, or ENOENT if not found. The hash + * value must equal zap_hash(name). + */ +extern int zap_leaf_lookup(zap_leaf_t *l, + struct zap_name *zn, zap_entry_handle_t *zeh); + +/* + * Return a handle to the entry with this hash+cd, or the entry with the + * next closest hash+cd. + */ +extern int zap_leaf_lookup_closest(zap_leaf_t *l, + uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh); + +/* + * Read the first num_integers in the attribute. Integer size + * conversion will be done without sign extension. Return EINVAL if + * integer_size is too small. Return EOVERFLOW if there are more than + * num_integers in the attribute. + */ +extern int zap_entry_read(const zap_entry_handle_t *zeh, + uint8_t integer_size, uint64_t num_integers, void *buf); + +extern int zap_entry_read_name(struct zap *zap, const zap_entry_handle_t *zeh, + uint16_t buflen, char *buf); + +/* + * Replace the value of an existing entry. + * + * zap_entry_update may fail if it runs out of space (ENOSPC). + */ +extern int zap_entry_update(zap_entry_handle_t *zeh, + uint8_t integer_size, uint64_t num_integers, const void *buf); + +/* + * Remove an entry. + */ +extern void zap_entry_remove(zap_entry_handle_t *zeh); + +/* + * Create an entry. An equal entry must not exist, and this entry must + * belong in this leaf (according to its hash value). Fills in the + * entry handle on success. Returns 0 on success or ENOSPC on failure. + */ +extern int zap_entry_create(zap_leaf_t *l, struct zap_name *zn, uint32_t cd, + uint8_t integer_size, uint64_t num_integers, const void *buf, + zap_entry_handle_t *zeh); + +/* + * Return true if there are additional entries with the same normalized + * form. + */ +extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh, + struct zap_name *zn, const char *name, struct zap *zap); + +/* + * Other stuff. + */ + +extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort); +extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len); +extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort); +extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l, + struct zap_stats *zs); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZAP_LEAF_H */ diff --git a/uts/common/fs/zfs/sys/zfs_acl.h b/uts/common/fs/zfs/sys/zfs_acl.h new file mode 100644 index 000000000000..c1a0aeebdce4 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_acl.h @@ -0,0 +1,245 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FS_ZFS_ACL_H +#define _SYS_FS_ZFS_ACL_H + +#ifdef _KERNEL +#include <sys/isa_defs.h> +#include <sys/types32.h> +#endif +#include <sys/acl.h> +#include <sys/dmu.h> +#include <sys/zfs_fuid.h> +#include <sys/sa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct znode_phys; + +#define ACE_SLOT_CNT 6 +#define ZFS_ACL_VERSION_INITIAL 0ULL +#define ZFS_ACL_VERSION_FUID 1ULL +#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID + +/* + * ZFS ACLs are store in various forms. + * Files created with ACL version ZFS_ACL_VERSION_INITIAL + * will all be created with fixed length ACEs of type + * zfs_oldace_t. + * + * Files with ACL version ZFS_ACL_VERSION_FUID will be created + * with various sized ACEs. The abstraction entries will utilize + * zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t + * and some specialized CIFS ACEs will use zfs_object_ace_t. + */ + +/* + * All ACEs have a common hdr. For + * owner@, group@, and everyone@ this is all + * thats needed. + */ +typedef struct zfs_ace_hdr { + uint16_t z_type; + uint16_t z_flags; + uint32_t z_access_mask; +} zfs_ace_hdr_t; + +typedef zfs_ace_hdr_t zfs_ace_abstract_t; + +/* + * Standard ACE + */ +typedef struct zfs_ace { + zfs_ace_hdr_t z_hdr; + uint64_t z_fuid; +} zfs_ace_t; + +/* + * The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE + * and will only be set/retrieved in a CIFS context. + */ + +typedef struct zfs_object_ace { + zfs_ace_t z_ace; + uint8_t z_object_type[16]; /* object type */ + uint8_t z_inherit_type[16]; /* inherited object type */ +} zfs_object_ace_t; + +typedef struct zfs_oldace { + uint32_t z_fuid; /* "who" */ + uint32_t z_access_mask; /* access mask */ + uint16_t z_flags; /* flags, i.e inheritance */ + uint16_t z_type; /* type of entry allow/deny */ +} zfs_oldace_t; + +typedef struct zfs_acl_phys_v0 { + uint64_t z_acl_extern_obj; /* ext acl pieces */ + uint32_t z_acl_count; /* Number of ACEs */ + uint16_t z_acl_version; /* acl version */ + uint16_t z_acl_pad; /* pad */ + zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */ +} zfs_acl_phys_v0_t; + +#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT) + +/* + * Size of ACL count is always 2 bytes. + * Necessary to for dealing with both V0 ACL and V1 ACL layout + */ +#define ZFS_ACL_COUNT_SIZE (sizeof (uint16_t)) + +typedef struct zfs_acl_phys { + uint64_t z_acl_extern_obj; /* ext acl pieces */ + uint32_t z_acl_size; /* Number of bytes in ACL */ + uint16_t z_acl_version; /* acl version */ + uint16_t z_acl_count; /* ace count */ + uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */ +} zfs_acl_phys_t; + +typedef struct acl_ops { + uint32_t (*ace_mask_get) (void *acep); /* get access mask */ + void (*ace_mask_set) (void *acep, + uint32_t mask); /* set access mask */ + uint16_t (*ace_flags_get) (void *acep); /* get flags */ + void (*ace_flags_set) (void *acep, + uint16_t flags); /* set flags */ + uint16_t (*ace_type_get)(void *acep); /* get type */ + void (*ace_type_set)(void *acep, + uint16_t type); /* set type */ + uint64_t (*ace_who_get)(void *acep); /* get who/fuid */ + void (*ace_who_set)(void *acep, + uint64_t who); /* set who/fuid */ + size_t (*ace_size)(void *acep); /* how big is this ace */ + size_t (*ace_abstract_size)(void); /* sizeof abstract entry */ + int (*ace_mask_off)(void); /* off of access mask in ace */ + int (*ace_data)(void *acep, void **datap); + /* ptr to data if any */ +} acl_ops_t; + +/* + * A zfs_acl_t structure is composed of a list of zfs_acl_node_t's. + * Each node will have one or more ACEs associated with it. You will + * only have multiple nodes during a chmod operation. Normally only + * one node is required. + */ +typedef struct zfs_acl_node { + list_node_t z_next; /* Next chunk of ACEs */ + void *z_acldata; /* pointer into actual ACE(s) */ + void *z_allocdata; /* pointer to kmem allocated memory */ + size_t z_allocsize; /* Size of blob in bytes */ + size_t z_size; /* length of ACL data */ + uint64_t z_ace_count; /* number of ACEs in this acl node */ + int z_ace_idx; /* ace iterator positioned on */ +} zfs_acl_node_t; + +typedef struct zfs_acl { + uint64_t z_acl_count; /* Number of ACEs */ + size_t z_acl_bytes; /* Number of bytes in ACL */ + uint_t z_version; /* version of ACL */ + void *z_next_ace; /* pointer to next ACE */ + uint64_t z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */ + zfs_acl_node_t *z_curr_node; /* current node iterator is handling */ + list_t z_acl; /* chunks of ACE data */ + acl_ops_t z_ops; /* ACL operations */ +} zfs_acl_t; + +typedef struct acl_locator_cb { + zfs_acl_t *cb_aclp; + zfs_acl_node_t *cb_acl_node; +} zfs_acl_locator_cb_t; + +#define ACL_DATA_ALLOCED 0x1 +#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt)) + +struct zfs_fuid_info; + +typedef struct zfs_acl_ids { + uint64_t z_fuid; /* file owner fuid */ + uint64_t z_fgid; /* file group owner fuid */ + uint64_t z_mode; /* mode to set on create */ + zfs_acl_t *z_aclp; /* ACL to create with file */ + struct zfs_fuid_info *z_fuidp; /* for tracking fuids for log */ +} zfs_acl_ids_t; + +/* + * Property values for acl_mode and acl_inherit. + * + * acl_mode can take discard, noallow, groupmask and passthrough. + * whereas acl_inherit has secure instead of groupmask. + */ + +#define ZFS_ACL_DISCARD 0 +#define ZFS_ACL_NOALLOW 1 +#define ZFS_ACL_GROUPMASK 2 +#define ZFS_ACL_PASSTHROUGH 3 +#define ZFS_ACL_RESTRICTED 4 +#define ZFS_ACL_PASSTHROUGH_X 5 + +struct znode; +struct zfsvfs; + +#ifdef _KERNEL +int zfs_acl_ids_create(struct znode *, int, vattr_t *, + cred_t *, vsecattr_t *, zfs_acl_ids_t *); +void zfs_acl_ids_free(zfs_acl_ids_t *); +boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *); +int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *); +int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *); +void zfs_acl_rele(void *); +void zfs_oldace_byteswap(ace_t *, int); +void zfs_ace_byteswap(void *, size_t, boolean_t); +extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr); +extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *); +int zfs_fastaccesschk_execute(struct znode *, cred_t *); +extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *); +extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *); +extern int zfs_acl_access(struct znode *, int, cred_t *); +void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t); +int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *); +int zfs_zaccess_rename(struct znode *, struct znode *, + struct znode *, struct znode *, cred_t *cr); +void zfs_acl_free(zfs_acl_t *); +int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, cred_t *, + struct zfs_fuid_info **, zfs_acl_t **); +int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *); +uint64_t zfs_external_acl(struct znode *); +int zfs_znode_acl_version(struct znode *); +int zfs_acl_size(struct znode *, int *); +zfs_acl_t *zfs_acl_alloc(int); +zfs_acl_node_t *zfs_acl_node_alloc(size_t); +void zfs_acl_xform(struct znode *, zfs_acl_t *, cred_t *); +void zfs_acl_data_locator(void **, uint32_t *, uint32_t, boolean_t, void *); +uint64_t zfs_mode_compute(uint64_t, zfs_acl_t *, + uint64_t *, uint64_t, uint64_t); +int zfs_acl_chown_setattr(struct znode *); + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* _SYS_FS_ZFS_ACL_H */ diff --git a/uts/common/fs/zfs/sys/zfs_context.h b/uts/common/fs/zfs/sys/zfs_context.h new file mode 100644 index 000000000000..558e9e1884e3 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_context.h @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZFS_CONTEXT_H +#define _SYS_ZFS_CONTEXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/note.h> +#include <sys/types.h> +#include <sys/t_lock.h> +#include <sys/atomic.h> +#include <sys/sysmacros.h> +#include <sys/bitmap.h> +#include <sys/cmn_err.h> +#include <sys/kmem.h> +#include <sys/taskq.h> +#include <sys/buf.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/cpuvar.h> +#include <sys/kobj.h> +#include <sys/conf.h> +#include <sys/disp.h> +#include <sys/debug.h> +#include <sys/random.h> +#include <sys/byteorder.h> +#include <sys/systm.h> +#include <sys/list.h> +#include <sys/uio.h> +#include <sys/dirent.h> +#include <sys/time.h> +#include <vm/seg_kmem.h> +#include <sys/zone.h> +#include <sys/uio.h> +#include <sys/zfs_debug.h> +#include <sys/sysevent.h> +#include <sys/sysevent/eventdefs.h> +#include <sys/sysevent/dev.h> +#include <sys/fm/util.h> +#include <sys/sunddi.h> + +#define CPU_SEQID (CPU->cpu_seqid) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_CONTEXT_H */ diff --git a/uts/common/fs/zfs/sys/zfs_ctldir.h b/uts/common/fs/zfs/sys/zfs_ctldir.h new file mode 100644 index 000000000000..f88ef95fdca8 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_ctldir.h @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZFS_CTLDIR_H +#define _ZFS_CTLDIR_H + +#include <sys/pathname.h> +#include <sys/vnode.h> +#include <sys/zfs_vfsops.h> +#include <sys/zfs_znode.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZFS_CTLDIR_NAME ".zfs" + +#define zfs_has_ctldir(zdp) \ + ((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \ + ((zdp)->z_zfsvfs->z_ctldir != NULL)) +#define zfs_show_ctldir(zdp) \ + (zfs_has_ctldir(zdp) && \ + ((zdp)->z_zfsvfs->z_show_ctldir)) + +void zfsctl_create(zfsvfs_t *); +void zfsctl_destroy(zfsvfs_t *); +vnode_t *zfsctl_root(znode_t *); +void zfsctl_init(void); +void zfsctl_fini(void); +boolean_t zfsctl_is_node(vnode_t *); + +int zfsctl_rename_snapshot(const char *from, const char *to); +int zfsctl_destroy_snapshot(const char *snapname, int force); +int zfsctl_umount_snapshots(vfs_t *, int, cred_t *); + +int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp); + +int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen, + fid_t *fidp); +int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp); + +#define ZFSCTL_INO_ROOT 0x1 +#define ZFSCTL_INO_SNAPDIR 0x2 +#define ZFSCTL_INO_SHARES 0x3 + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_CTLDIR_H */ diff --git a/uts/common/fs/zfs/sys/zfs_debug.h b/uts/common/fs/zfs/sys/zfs_debug.h new file mode 100644 index 000000000000..50ecf9b36249 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_debug.h @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZFS_DEBUG_H +#define _SYS_ZFS_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* + * ZFS debugging + */ + +#if defined(DEBUG) || !defined(_KERNEL) +#define ZFS_DEBUG +#endif + +extern int zfs_flags; + +#define ZFS_DEBUG_DPRINTF 0x0001 +#define ZFS_DEBUG_DBUF_VERIFY 0x0002 +#define ZFS_DEBUG_DNODE_VERIFY 0x0004 +#define ZFS_DEBUG_SNAPNAMES 0x0008 +#define ZFS_DEBUG_MODIFY 0x0010 + +#ifdef ZFS_DEBUG +extern void __dprintf(const char *file, const char *func, + int line, const char *fmt, ...); +#define dprintf(...) \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) \ + __dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__) +#else +#define dprintf(...) ((void)0) +#endif /* ZFS_DEBUG */ + +extern void zfs_panic_recover(const char *fmt, ...); + +typedef struct zfs_dbgmsg { + list_node_t zdm_node; + time_t zdm_timestamp; + char zdm_msg[1]; /* variable length allocation */ +} zfs_dbgmsg_t; + +extern void zfs_dbgmsg_init(void); +extern void zfs_dbgmsg_fini(void); +extern void zfs_dbgmsg(const char *fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_DEBUG_H */ diff --git a/uts/common/fs/zfs/sys/zfs_dir.h b/uts/common/fs/zfs/sys/zfs_dir.h new file mode 100644 index 000000000000..349f8ef37321 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_dir.h @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_DIR_H +#define _SYS_FS_ZFS_DIR_H + +#include <sys/pathname.h> +#include <sys/dmu.h> +#include <sys/zfs_znode.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* zfs_dirent_lock() flags */ +#define ZNEW 0x0001 /* entry should not exist */ +#define ZEXISTS 0x0002 /* entry should exist */ +#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */ +#define ZXATTR 0x0008 /* we want the xattr dir */ +#define ZRENAMING 0x0010 /* znode is being renamed */ +#define ZCILOOK 0x0020 /* case-insensitive lookup requested */ +#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */ +#define ZHAVELOCK 0x0080 /* z_name_lock is already held */ + +/* mknode flags */ +#define IS_ROOT_NODE 0x01 /* create a root node */ +#define IS_XATTR 0x02 /* create an extended attribute node */ + +extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **, + int, int *, pathname_t *); +extern void zfs_dirent_unlock(zfs_dirlock_t *); +extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int); +extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int, + boolean_t *); +extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *, + pathname_t *); +extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *, + uint_t, znode_t **, zfs_acl_ids_t *); +extern void zfs_rmnode(znode_t *); +extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old); +extern boolean_t zfs_dirempty(znode_t *); +extern void zfs_unlinked_add(znode_t *, dmu_tx_t *); +extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs); +extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr); +extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int); +extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_DIR_H */ diff --git a/uts/common/fs/zfs/sys/zfs_fuid.h b/uts/common/fs/zfs/sys/zfs_fuid.h new file mode 100644 index 000000000000..0feb3ce4bb7c --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_fuid.h @@ -0,0 +1,131 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_FUID_H +#define _SYS_FS_ZFS_FUID_H + +#ifdef _KERNEL +#include <sys/kidmap.h> +#include <sys/sid.h> +#include <sys/dmu.h> +#include <sys/zfs_vfsops.h> +#endif +#include <sys/avl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + ZFS_OWNER, + ZFS_GROUP, + ZFS_ACE_USER, + ZFS_ACE_GROUP +} zfs_fuid_type_t; + +/* + * Estimate space needed for one more fuid table entry. + * for now assume its current size + 1K + */ +#define FUID_SIZE_ESTIMATE(z) ((z)->z_fuid_size + (SPA_MINBLOCKSIZE << 1)) + +#define FUID_INDEX(x) ((x) >> 32) +#define FUID_RID(x) ((x) & 0xffffffff) +#define FUID_ENCODE(idx, rid) (((uint64_t)(idx) << 32) | (rid)) +/* + * FUIDs cause problems for the intent log + * we need to replay the creation of the FUID, + * but we can't count on the idmapper to be around + * and during replay the FUID index may be different than + * before. Also, if an ACL has 100 ACEs and 12 different + * domains we don't want to log 100 domain strings, but rather + * just the unique 12. + */ + +/* + * The FUIDs in the log will index into + * domain string table and the bottom half will be the rid. + * Used for mapping ephemeral uid/gid during ACL setting to FUIDs + */ +typedef struct zfs_fuid { + list_node_t z_next; + uint64_t z_id; /* uid/gid being converted to fuid */ + uint64_t z_domidx; /* index in AVL domain table */ + uint64_t z_logfuid; /* index for domain in log */ +} zfs_fuid_t; + +/* list of unique domains */ +typedef struct zfs_fuid_domain { + list_node_t z_next; + uint64_t z_domidx; /* AVL tree idx */ + const char *z_domain; /* domain string */ +} zfs_fuid_domain_t; + +/* + * FUID information necessary for logging create, setattr, and setacl. + */ +typedef struct zfs_fuid_info { + list_t z_fuids; + list_t z_domains; + uint64_t z_fuid_owner; + uint64_t z_fuid_group; + char **z_domain_table; /* Used during replay */ + uint32_t z_fuid_cnt; /* How many fuids in z_fuids */ + uint32_t z_domain_cnt; /* How many domains */ + size_t z_domain_str_sz; /* len of domain strings z_domain list */ +} zfs_fuid_info_t; + +#ifdef _KERNEL +struct znode; +extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t); +extern void zfs_fuid_node_add(zfs_fuid_info_t **, const char *, uint32_t, + uint64_t, uint64_t, zfs_fuid_type_t); +extern void zfs_fuid_destroy(zfsvfs_t *); +extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t, + cred_t *, zfs_fuid_info_t **); +extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t, + zfs_fuid_info_t **); +extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, + uid_t *uid, uid_t *gid); +extern zfs_fuid_info_t *zfs_fuid_info_alloc(void); +extern void zfs_fuid_info_free(zfs_fuid_info_t *); +extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *); +void zfs_fuid_sync(zfsvfs_t *, dmu_tx_t *); +extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain, + char **retdomain, boolean_t addok); +extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx); +extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +#endif + +char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t); +void zfs_fuid_avl_tree_create(avl_tree_t *, avl_tree_t *); +uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *); +void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_FUID_H */ diff --git a/uts/common/fs/zfs/sys/zfs_ioctl.h b/uts/common/fs/zfs/sys/zfs_ioctl.h new file mode 100644 index 000000000000..84bf794fe5f0 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -0,0 +1,349 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZFS_IOCTL_H +#define _SYS_ZFS_IOCTL_H + +#include <sys/cred.h> +#include <sys/dmu.h> +#include <sys/zio.h> +#include <sys/dsl_deleg.h> +#include <sys/spa.h> +#include <sys/zfs_stat.h> + +#ifdef _KERNEL +#include <sys/nvpair.h> +#endif /* _KERNEL */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Property values for snapdir + */ +#define ZFS_SNAPDIR_HIDDEN 0 +#define ZFS_SNAPDIR_VISIBLE 1 + +/* + * Field manipulation macros for the drr_versioninfo field of the + * send stream header. + */ + +/* + * Header types for zfs send streams. + */ +typedef enum drr_headertype { + DMU_SUBSTREAM = 0x1, + DMU_COMPOUNDSTREAM = 0x2 +} drr_headertype_t; + +#define DMU_GET_STREAM_HDRTYPE(vi) BF64_GET((vi), 0, 2) +#define DMU_SET_STREAM_HDRTYPE(vi, x) BF64_SET((vi), 0, 2, x) + +#define DMU_GET_FEATUREFLAGS(vi) BF64_GET((vi), 2, 30) +#define DMU_SET_FEATUREFLAGS(vi, x) BF64_SET((vi), 2, 30, x) + +/* + * Feature flags for zfs send streams (flags in drr_versioninfo) + */ + +#define DMU_BACKUP_FEATURE_DEDUP (0x1) +#define DMU_BACKUP_FEATURE_DEDUPPROPS (0x2) +#define DMU_BACKUP_FEATURE_SA_SPILL (0x4) + +/* + * Mask of all supported backup features + */ +#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \ + DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL) + +/* Are all features in the given flag word currently supported? */ +#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) + +/* + * The drr_versioninfo field of the dmu_replay_record has the + * following layout: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | reserved | feature-flags |C|S| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * The low order two bits indicate the header type: SUBSTREAM (0x1) + * or COMPOUNDSTREAM (0x2). Using two bits for this is historical: + * this field used to be a version number, where the two version types + * were 1 and 2. Using two bits for this allows earlier versions of + * the code to be able to recognize send streams that don't use any + * of the features indicated by feature flags. + */ + +#define DMU_BACKUP_MAGIC 0x2F5bacbacULL + +#define DRR_FLAG_CLONE (1<<0) +#define DRR_FLAG_CI_DATA (1<<1) + +/* + * flags in the drr_checksumflags field in the DRR_WRITE and + * DRR_WRITE_BYREF blocks + */ +#define DRR_CHECKSUM_DEDUP (1<<0) + +#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) + +/* + * zfs ioctl command structure + */ +typedef struct dmu_replay_record { + enum { + DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, + DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, + DRR_SPILL, DRR_NUMTYPES + } drr_type; + uint32_t drr_payloadlen; + union { + struct drr_begin { + uint64_t drr_magic; + uint64_t drr_versioninfo; /* was drr_version */ + uint64_t drr_creation_time; + dmu_objset_type_t drr_type; + uint32_t drr_flags; + uint64_t drr_toguid; + uint64_t drr_fromguid; + char drr_toname[MAXNAMELEN]; + } drr_begin; + struct drr_end { + zio_cksum_t drr_checksum; + uint64_t drr_toguid; + } drr_end; + struct drr_object { + uint64_t drr_object; + dmu_object_type_t drr_type; + dmu_object_type_t drr_bonustype; + uint32_t drr_blksz; + uint32_t drr_bonuslen; + uint8_t drr_checksumtype; + uint8_t drr_compress; + uint8_t drr_pad[6]; + uint64_t drr_toguid; + /* bonus content follows */ + } drr_object; + struct drr_freeobjects { + uint64_t drr_firstobj; + uint64_t drr_numobjs; + uint64_t drr_toguid; + } drr_freeobjects; + struct drr_write { + uint64_t drr_object; + dmu_object_type_t drr_type; + uint32_t drr_pad; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + uint8_t drr_checksumtype; + uint8_t drr_checksumflags; + uint8_t drr_pad2[6]; + ddt_key_t drr_key; /* deduplication key */ + /* content follows */ + } drr_write; + struct drr_free { + uint64_t drr_object; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + } drr_free; + struct drr_write_byref { + /* where to put the data */ + uint64_t drr_object; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + /* where to find the prior copy of the data */ + uint64_t drr_refguid; + uint64_t drr_refobject; + uint64_t drr_refoffset; + /* properties of the data */ + uint8_t drr_checksumtype; + uint8_t drr_checksumflags; + uint8_t drr_pad2[6]; + ddt_key_t drr_key; /* deduplication key */ + } drr_write_byref; + struct drr_spill { + uint64_t drr_object; + uint64_t drr_length; + uint64_t drr_toguid; + uint64_t drr_pad[4]; /* needed for crypto */ + /* spill data follows */ + } drr_spill; + } drr_u; +} dmu_replay_record_t; + +/* diff record range types */ +typedef enum diff_type { + DDR_NONE = 0x1, + DDR_INUSE = 0x2, + DDR_FREE = 0x4 +} diff_type_t; + +/* + * The diff reports back ranges of free or in-use objects. + */ +typedef struct dmu_diff_record { + uint64_t ddr_type; + uint64_t ddr_first; + uint64_t ddr_last; +} dmu_diff_record_t; + +typedef struct zinject_record { + uint64_t zi_objset; + uint64_t zi_object; + uint64_t zi_start; + uint64_t zi_end; + uint64_t zi_guid; + uint32_t zi_level; + uint32_t zi_error; + uint64_t zi_type; + uint32_t zi_freq; + uint32_t zi_failfast; + char zi_func[MAXNAMELEN]; + uint32_t zi_iotype; + int32_t zi_duration; + uint64_t zi_timer; +} zinject_record_t; + +#define ZINJECT_NULL 0x1 +#define ZINJECT_FLUSH_ARC 0x2 +#define ZINJECT_UNLOAD_SPA 0x4 + +typedef struct zfs_share { + uint64_t z_exportdata; + uint64_t z_sharedata; + uint64_t z_sharetype; /* 0 = share, 1 = unshare */ + uint64_t z_sharemax; /* max length of share string */ +} zfs_share_t; + +/* + * ZFS file systems may behave the usual, POSIX-compliant way, where + * name lookups are case-sensitive. They may also be set up so that + * all the name lookups are case-insensitive, or so that only some + * lookups, the ones that set an FIGNORECASE flag, are case-insensitive. + */ +typedef enum zfs_case { + ZFS_CASE_SENSITIVE, + ZFS_CASE_INSENSITIVE, + ZFS_CASE_MIXED +} zfs_case_t; + +typedef struct zfs_cmd { + char zc_name[MAXPATHLEN]; + char zc_value[MAXPATHLEN * 2]; + char zc_string[MAXNAMELEN]; + char zc_top_ds[MAXPATHLEN]; + uint64_t zc_guid; + uint64_t zc_nvlist_conf; /* really (char *) */ + uint64_t zc_nvlist_conf_size; + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + uint64_t zc_cookie; + uint64_t zc_objset_type; + uint64_t zc_perm_action; + uint64_t zc_history; /* really (char *) */ + uint64_t zc_history_len; + uint64_t zc_history_offset; + uint64_t zc_obj; + uint64_t zc_iflags; /* internal to zfs(7fs) */ + zfs_share_t zc_share; + dmu_objset_stats_t zc_objset_stats; + struct drr_begin zc_begin_record; + zinject_record_t zc_inject_record; + boolean_t zc_defer_destroy; + boolean_t zc_temphold; + uint64_t zc_action_handle; + int zc_cleanup_fd; + uint8_t zc_pad[4]; /* alignment */ + uint64_t zc_sendobj; + uint64_t zc_fromobj; + uint64_t zc_createtxg; + zfs_stat_t zc_stat; +} zfs_cmd_t; + +typedef struct zfs_useracct { + char zu_domain[256]; + uid_t zu_rid; + uint32_t zu_pad; + uint64_t zu_space; +} zfs_useracct_t; + +#define ZFSDEV_MAX_MINOR (1 << 16) +#define ZFS_MIN_MINOR (ZFSDEV_MAX_MINOR + 1) + +#define ZPOOL_EXPORT_AFTER_SPLIT 0x1 + +#ifdef _KERNEL + +typedef struct zfs_creat { + nvlist_t *zct_zplprops; + nvlist_t *zct_props; +} zfs_creat_t; + +extern dev_info_t *zfs_dip; + +extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); +extern int zfs_secpolicy_rename_perms(const char *from, + const char *to, cred_t *cr); +extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); +extern int zfs_busy(void); +extern int zfs_unmount_snap(const char *, void *); + +/* + * ZFS minor numbers can refer to either a control device instance or + * a zvol. Depending on the value of zss_type, zss_data points to either + * a zvol_state_t or a zfs_onexit_t. + */ +enum zfs_soft_state_type { + ZSST_ZVOL, + ZSST_CTLDEV +}; + +typedef struct zfs_soft_state { + enum zfs_soft_state_type zss_type; + void *zss_data; +} zfs_soft_state_t; + +extern void *zfsdev_get_soft_state(minor_t minor, + enum zfs_soft_state_type which); +extern minor_t zfsdev_minor_alloc(void); + +extern void *zfsdev_state; +extern kmutex_t zfsdev_state_lock; + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_IOCTL_H */ diff --git a/uts/common/fs/zfs/sys/zfs_onexit.h b/uts/common/fs/zfs/sys/zfs_onexit.h new file mode 100644 index 000000000000..4982bd4d0afc --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_onexit.h @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZFS_ONEXIT_H +#define _SYS_ZFS_ONEXIT_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef struct zfs_onexit { + kmutex_t zo_lock; + list_t zo_actions; +} zfs_onexit_t; + +typedef struct zfs_onexit_action_node { + list_node_t za_link; + void (*za_func)(void *); + void *za_data; +} zfs_onexit_action_node_t; + +extern void zfs_onexit_init(zfs_onexit_t **zo); +extern void zfs_onexit_destroy(zfs_onexit_t *zo); + +#endif + +extern int zfs_onexit_fd_hold(int fd, minor_t *minorp); +extern void zfs_onexit_fd_rele(int fd); +extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, + uint64_t *action_handle); +extern int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, + boolean_t fire); +extern int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, + void **data); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_ONEXIT_H */ diff --git a/uts/common/fs/zfs/sys/zfs_rlock.h b/uts/common/fs/zfs/sys/zfs_rlock.h new file mode 100644 index 000000000000..f302b663e22a --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_rlock.h @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_RLOCK_H +#define _SYS_FS_ZFS_RLOCK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +#include <sys/zfs_znode.h> + +typedef enum { + RL_READER, + RL_WRITER, + RL_APPEND +} rl_type_t; + +typedef struct rl { + znode_t *r_zp; /* znode this lock applies to */ + avl_node_t r_node; /* avl node link */ + uint64_t r_off; /* file range offset */ + uint64_t r_len; /* file range length */ + uint_t r_cnt; /* range reference count in tree */ + rl_type_t r_type; /* range type */ + kcondvar_t r_wr_cv; /* cv for waiting writers */ + kcondvar_t r_rd_cv; /* cv for waiting readers */ + uint8_t r_proxy; /* acting for original range */ + uint8_t r_write_wanted; /* writer wants to lock this range */ + uint8_t r_read_wanted; /* reader wants to lock this range */ +} rl_t; + +/* + * Lock a range (offset, length) as either shared (READER) + * or exclusive (WRITER or APPEND). APPEND is a special type that + * is converted to WRITER that specified to lock from the start of the + * end of file. zfs_range_lock() returns the range lock structure. + */ +rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type); + +/* + * Unlock range and destroy range lock structure. + */ +void zfs_range_unlock(rl_t *rl); + +/* + * Reduce range locked as RW_WRITER from whole file to specified range. + * Asserts the whole file was previously locked. + */ +void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len); + +/* + * AVL comparison function used to compare range locks + */ +int zfs_range_compare(const void *arg1, const void *arg2); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_RLOCK_H */ diff --git a/uts/common/fs/zfs/sys/zfs_sa.h b/uts/common/fs/zfs/sys/zfs_sa.h new file mode 100644 index 000000000000..cd312b27a94d --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_sa.h @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZFS_SA_H +#define _SYS_ZFS_SA_H + +#ifdef _KERNEL +#include <sys/types32.h> +#include <sys/list.h> +#include <sys/dmu.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_znode.h> +#include <sys/sa.h> +#include <sys/zil.h> + + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This is the list of known attributes + * to the ZPL. The values of the actual + * attributes are not defined by the order + * the enums. It is controlled by the attribute + * registration mechanism. Two different file system + * could have different numeric values for the same + * attributes. this list is only used for dereferencing + * into the table that will hold the actual numeric value. + */ +typedef enum zpl_attr { + ZPL_ATIME, + ZPL_MTIME, + ZPL_CTIME, + ZPL_CRTIME, + ZPL_GEN, + ZPL_MODE, + ZPL_SIZE, + ZPL_PARENT, + ZPL_LINKS, + ZPL_XATTR, + ZPL_RDEV, + ZPL_FLAGS, + ZPL_UID, + ZPL_GID, + ZPL_PAD, + ZPL_ZNODE_ACL, + ZPL_DACL_COUNT, + ZPL_SYMLINK, + ZPL_SCANSTAMP, + ZPL_DACL_ACES, + ZPL_END +} zpl_attr_t; + +#define ZFS_OLD_ZNODE_PHYS_SIZE 0x108 +#define ZFS_SA_BASE_ATTR_SIZE (ZFS_OLD_ZNODE_PHYS_SIZE - \ + sizeof (zfs_acl_phys_t)) + +#define SA_MODE_OFFSET 0 +#define SA_SIZE_OFFSET 8 +#define SA_GEN_OFFSET 16 +#define SA_UID_OFFSET 24 +#define SA_GID_OFFSET 32 +#define SA_PARENT_OFFSET 40 + +extern sa_attr_reg_t zfs_attr_table[ZPL_END + 1]; +extern sa_attr_reg_t zfs_legacy_attr_table[ZPL_END + 1]; + +/* + * This is a deprecated data structure that only exists for + * dealing with file systems create prior to ZPL version 5. + */ +typedef struct znode_phys { + uint64_t zp_atime[2]; /* 0 - last file access time */ + uint64_t zp_mtime[2]; /* 16 - last file modification time */ + uint64_t zp_ctime[2]; /* 32 - last file change time */ + uint64_t zp_crtime[2]; /* 48 - creation time */ + uint64_t zp_gen; /* 64 - generation (txg of creation) */ + uint64_t zp_mode; /* 72 - file mode bits */ + uint64_t zp_size; /* 80 - size of file */ + uint64_t zp_parent; /* 88 - directory parent (`..') */ + uint64_t zp_links; /* 96 - number of links to file */ + uint64_t zp_xattr; /* 104 - DMU object for xattrs */ + uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ + uint64_t zp_flags; /* 120 - persistent flags */ + uint64_t zp_uid; /* 128 - file owner */ + uint64_t zp_gid; /* 136 - owning group */ + uint64_t zp_zap; /* 144 - extra attributes */ + uint64_t zp_pad[3]; /* 152 - future */ + zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */ + /* + * Data may pad out any remaining bytes in the znode buffer, eg: + * + * |<---------------------- dnode_phys (512) ------------------------>| + * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| + * |<---- znode (264) ---->|<---- data (56) ---->| + * + * At present, we use this space for the following: + * - symbolic links + * - 32-byte anti-virus scanstamp (regular files only) + */ +} znode_phys_t; + +#ifdef _KERNEL +int zfs_sa_readlink(struct znode *, uio_t *); +void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *); +void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *); +void zfs_sa_get_scanstamp(struct znode *, xvattr_t *); +void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *); +void zfs_sa_uprade_pre(struct sa_handle *, void *, dmu_tx_t *); +void zfs_sa_upgrade_post(struct sa_handle *, void *, dmu_tx_t *); +void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_SA_H */ diff --git a/uts/common/fs/zfs/sys/zfs_stat.h b/uts/common/fs/zfs/sys/zfs_stat.h new file mode 100644 index 000000000000..465aefaa2063 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_stat.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FS_ZFS_STAT_H +#define _SYS_FS_ZFS_STAT_H + +#ifdef _KERNEL +#include <sys/isa_defs.h> +#include <sys/types32.h> +#include <sys/dmu.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A limited number of zpl level stats are retrievable + * with an ioctl. zfs diff is the current consumer. + */ +typedef struct zfs_stat { + uint64_t zs_gen; + uint64_t zs_mode; + uint64_t zs_links; + uint64_t zs_ctime[2]; +} zfs_stat_t; + +extern int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, + char *buf, int len); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_STAT_H */ diff --git a/uts/common/fs/zfs/sys/zfs_vfsops.h b/uts/common/fs/zfs/sys/zfs_vfsops.h new file mode 100644 index 000000000000..38c87df4300f --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_vfsops.h @@ -0,0 +1,159 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FS_ZFS_VFSOPS_H +#define _SYS_FS_ZFS_VFSOPS_H + +#include <sys/isa_defs.h> +#include <sys/types32.h> +#include <sys/list.h> +#include <sys/vfs.h> +#include <sys/zil.h> +#include <sys/sa.h> +#include <sys/rrwlock.h> +#include <sys/zfs_ioctl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct zfsvfs zfsvfs_t; +struct znode; + +struct zfsvfs { + vfs_t *z_vfs; /* generic fs struct */ + zfsvfs_t *z_parent; /* parent fs */ + objset_t *z_os; /* objset reference */ + uint64_t z_root; /* id of root znode */ + uint64_t z_unlinkedobj; /* id of unlinked zapobj */ + uint64_t z_max_blksz; /* maximum block size for files */ + uint64_t z_fuid_obj; /* fuid table object number */ + uint64_t z_fuid_size; /* fuid table size */ + avl_tree_t z_fuid_idx; /* fuid tree keyed by index */ + avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */ + krwlock_t z_fuid_lock; /* fuid lock */ + boolean_t z_fuid_loaded; /* fuid tables are loaded */ + boolean_t z_fuid_dirty; /* need to sync fuid table ? */ + struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */ + zilog_t *z_log; /* intent log pointer */ + uint_t z_acl_inherit; /* acl inheritance behavior */ + zfs_case_t z_case; /* case-sense */ + boolean_t z_utf8; /* utf8-only */ + int z_norm; /* normalization flags */ + boolean_t z_atime; /* enable atimes mount option */ + boolean_t z_unmounted; /* unmounted */ + rrwlock_t z_teardown_lock; + krwlock_t z_teardown_inactive_lock; + list_t z_all_znodes; /* all vnodes in the fs */ + kmutex_t z_znodes_lock; /* lock for z_all_znodes */ + vnode_t *z_ctldir; /* .zfs directory pointer */ + boolean_t z_show_ctldir; /* expose .zfs in the root dir */ + boolean_t z_issnap; /* true if this is a snapshot */ + boolean_t z_vscan; /* virus scan on/off */ + boolean_t z_use_fuids; /* version allows fuids */ + boolean_t z_replay; /* set during ZIL replay */ + boolean_t z_use_sa; /* version allow system attributes */ + uint64_t z_version; /* ZPL version */ + uint64_t z_shares_dir; /* hidden shares dir */ + kmutex_t z_lock; + uint64_t z_userquota_obj; + uint64_t z_groupquota_obj; + uint64_t z_replay_eof; /* New end of file - replay only */ + sa_attr_type_t *z_attr_table; /* SA attr mapping->id */ +#define ZFS_OBJ_MTX_SZ 64 + kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */ +}; + +/* + * Normal filesystems (those not under .zfs/snapshot) have a total + * file ID size limited to 12 bytes (including the length field) due to + * NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical + * reasons, this same limit is being imposed by the Solaris NFSv3 implementation + * (although the NFSv3 protocol actually permits a maximum of 64 bytes). It + * is not possible to expand beyond 12 bytes without abandoning support + * of NFSv2. + * + * For normal filesystems, we partition up the available space as follows: + * 2 bytes fid length (required) + * 6 bytes object number (48 bits) + * 4 bytes generation number (32 bits) + * + * We reserve only 48 bits for the object number, as this is the limit + * currently defined and imposed by the DMU. + */ +typedef struct zfid_short { + uint16_t zf_len; + uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */ + uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */ +} zfid_short_t; + +/* + * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes + * (including the length field). This makes files under .zfs/snapshot + * accessible by NFSv3 and NFSv4, but not NFSv2. + * + * For files under .zfs/snapshot, we partition up the available space + * as follows: + * 2 bytes fid length (required) + * 6 bytes object number (48 bits) + * 4 bytes generation number (32 bits) + * 6 bytes objset id (48 bits) + * 4 bytes currently just zero (32 bits) + * + * We reserve only 48 bits for the object number and objset id, as these are + * the limits currently defined and imposed by the DMU. + */ +typedef struct zfid_long { + zfid_short_t z_fid; + uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */ + uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */ +} zfid_long_t; + +#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t)) +#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t)) + +extern uint_t zfs_fsyncer_key; + +extern int zfs_suspend_fs(zfsvfs_t *zfsvfs); +extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname); +extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + const char *domain, uint64_t rid, uint64_t *valuep); +extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + uint64_t *cookiep, void *vbuf, uint64_t *bufsizep); +extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + const char *domain, uint64_t rid, uint64_t quota); +extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *, + boolean_t isgroup); +extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, + uint64_t fuid); +extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers); +extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp); +extern void zfsvfs_free(zfsvfs_t *zfsvfs); +extern int zfs_check_global_label(const char *dsname, const char *hexsl); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_VFSOPS_H */ diff --git a/uts/common/fs/zfs/sys/zfs_znode.h b/uts/common/fs/zfs/sys/zfs_znode.h new file mode 100644 index 000000000000..3e9621a0ee24 --- /dev/null +++ b/uts/common/fs/zfs/sys/zfs_znode.h @@ -0,0 +1,361 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FS_ZFS_ZNODE_H +#define _SYS_FS_ZFS_ZNODE_H + +#ifdef _KERNEL +#include <sys/isa_defs.h> +#include <sys/types32.h> +#include <sys/attr.h> +#include <sys/list.h> +#include <sys/dmu.h> +#include <sys/sa.h> +#include <sys/zfs_vfsops.h> +#include <sys/rrwlock.h> +#include <sys/zfs_sa.h> +#include <sys/zfs_stat.h> +#endif +#include <sys/zfs_acl.h> +#include <sys/zil.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Additional file level attributes, that are stored + * in the upper half of zp_flags + */ +#define ZFS_READONLY 0x0000000100000000 +#define ZFS_HIDDEN 0x0000000200000000 +#define ZFS_SYSTEM 0x0000000400000000 +#define ZFS_ARCHIVE 0x0000000800000000 +#define ZFS_IMMUTABLE 0x0000001000000000 +#define ZFS_NOUNLINK 0x0000002000000000 +#define ZFS_APPENDONLY 0x0000004000000000 +#define ZFS_NODUMP 0x0000008000000000 +#define ZFS_OPAQUE 0x0000010000000000 +#define ZFS_AV_QUARANTINED 0x0000020000000000 +#define ZFS_AV_MODIFIED 0x0000040000000000 +#define ZFS_REPARSE 0x0000080000000000 +#define ZFS_OFFLINE 0x0000100000000000 +#define ZFS_SPARSE 0x0000200000000000 + +#define ZFS_ATTR_SET(zp, attr, value, pflags, tx) \ +{ \ + if (value) \ + pflags |= attr; \ + else \ + pflags &= ~attr; \ + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \ + &pflags, sizeof (pflags), tx)); \ +} + +/* + * Define special zfs pflags + */ +#define ZFS_XATTR 0x1 /* is an extended attribute */ +#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ +#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ +#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */ +#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */ +#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */ +#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */ +#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */ +#define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */ + +#define SA_ZPL_ATIME(z) z->z_attr_table[ZPL_ATIME] +#define SA_ZPL_MTIME(z) z->z_attr_table[ZPL_MTIME] +#define SA_ZPL_CTIME(z) z->z_attr_table[ZPL_CTIME] +#define SA_ZPL_CRTIME(z) z->z_attr_table[ZPL_CRTIME] +#define SA_ZPL_GEN(z) z->z_attr_table[ZPL_GEN] +#define SA_ZPL_DACL_ACES(z) z->z_attr_table[ZPL_DACL_ACES] +#define SA_ZPL_XATTR(z) z->z_attr_table[ZPL_XATTR] +#define SA_ZPL_SYMLINK(z) z->z_attr_table[ZPL_SYMLINK] +#define SA_ZPL_RDEV(z) z->z_attr_table[ZPL_RDEV] +#define SA_ZPL_SCANSTAMP(z) z->z_attr_table[ZPL_SCANSTAMP] +#define SA_ZPL_UID(z) z->z_attr_table[ZPL_UID] +#define SA_ZPL_GID(z) z->z_attr_table[ZPL_GID] +#define SA_ZPL_PARENT(z) z->z_attr_table[ZPL_PARENT] +#define SA_ZPL_LINKS(z) z->z_attr_table[ZPL_LINKS] +#define SA_ZPL_MODE(z) z->z_attr_table[ZPL_MODE] +#define SA_ZPL_DACL_COUNT(z) z->z_attr_table[ZPL_DACL_COUNT] +#define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS] +#define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE] +#define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL] +#define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD] + +/* + * Is ID ephemeral? + */ +#define IS_EPHEMERAL(x) (x > MAXUID) + +/* + * Should we use FUIDs? + */ +#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID && \ + spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) +#define USE_SA(version, os) (version >= ZPL_VERSION_SA && \ + spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA) + +#define MASTER_NODE_OBJ 1 + +/* + * Special attributes for master node. + * "userquota@" and "groupquota@" are also valid (from + * zfs_userquota_prop_prefixes[]). + */ +#define ZFS_FSID "FSID" +#define ZFS_UNLINKED_SET "DELETE_QUEUE" +#define ZFS_ROOT_OBJ "ROOT" +#define ZPL_VERSION_STR "VERSION" +#define ZFS_FUID_TABLES "FUID" +#define ZFS_SHARES_DIR "SHARES" +#define ZFS_SA_ATTRS "SA_ATTRS" + +#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE) + +/* Path component length */ +/* + * The generic fs code uses MAXNAMELEN to represent + * what the largest component length is. Unfortunately, + * this length includes the terminating NULL. ZFS needs + * to tell the users via pathconf() and statvfs() what the + * true maximum length of a component is, excluding the NULL. + */ +#define ZFS_MAXNAMELEN (MAXNAMELEN - 1) + +/* + * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in + * the directory entries. + */ +#define IFTODT(mode) (((mode) & S_IFMT) >> 12) + +/* + * The directory entry has the type (currently unused on Solaris) in the + * top 4 bits, and the object number in the low 48 bits. The "middle" + * 12 bits are unused. + */ +#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) +#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) + +/* + * Directory entry locks control access to directory entries. + * They are used to protect creates, deletes, and renames. + * Each directory znode has a mutex and a list of locked names. + */ +#ifdef _KERNEL +typedef struct zfs_dirlock { + char *dl_name; /* directory entry being locked */ + uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */ + uint8_t dl_namelock; /* 1 if z_name_lock is NOT held */ + uint16_t dl_namesize; /* set if dl_name was allocated */ + kcondvar_t dl_cv; /* wait for entry to be unlocked */ + struct znode *dl_dzp; /* directory znode */ + struct zfs_dirlock *dl_next; /* next in z_dirlocks list */ +} zfs_dirlock_t; + +typedef struct znode { + struct zfsvfs *z_zfsvfs; + vnode_t *z_vnode; + uint64_t z_id; /* object ID for this znode */ + kmutex_t z_lock; /* znode modification lock */ + krwlock_t z_parent_lock; /* parent lock for directories */ + krwlock_t z_name_lock; /* "master" lock for dirent locks */ + zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ + kmutex_t z_range_lock; /* protects changes to z_range_avl */ + avl_tree_t z_range_avl; /* avl tree of file range locks */ + uint8_t z_unlinked; /* file has been unlinked */ + uint8_t z_atime_dirty; /* atime needs to be synced */ + uint8_t z_zn_prefetch; /* Prefetch znodes? */ + uint8_t z_moved; /* Has this znode been moved? */ + uint_t z_blksz; /* block size in bytes */ + uint_t z_seq; /* modification sequence number */ + uint64_t z_mapcnt; /* number of pages mapped to file */ + uint64_t z_gen; /* generation (cached) */ + uint64_t z_size; /* file size (cached) */ + uint64_t z_atime[2]; /* atime (cached) */ + uint64_t z_links; /* file links (cached) */ + uint64_t z_pflags; /* pflags (cached) */ + uint64_t z_uid; /* uid fuid (cached) */ + uint64_t z_gid; /* gid fuid (cached) */ + mode_t z_mode; /* mode (cached) */ + uint32_t z_sync_cnt; /* synchronous open count */ + kmutex_t z_acl_lock; /* acl data lock */ + zfs_acl_t *z_acl_cached; /* cached acl */ + list_node_t z_link_node; /* all znodes in fs link */ + sa_handle_t *z_sa_hdl; /* handle to sa data */ + boolean_t z_is_sa; /* are we native sa? */ +} znode_t; + + +/* + * Range locking rules + * -------------------- + * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole + * file range needs to be locked as RL_WRITER. Only then can the pages be + * freed etc and zp_size reset. zp_size must be set within range lock. + * 2. For writes and punching holes (zfs_write & zfs_space) just the range + * being written or freed needs to be locked as RL_WRITER. + * Multiple writes at the end of the file must coordinate zp_size updates + * to ensure data isn't lost. A compare and swap loop is currently used + * to ensure the file size is at least the offset last written. + * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being + * read needs to be locked as RL_READER. A check against zp_size can then + * be made for reading beyond end of file. + */ + +/* + * Convert between znode pointers and vnode pointers + */ +#define ZTOV(ZP) ((ZP)->z_vnode) +#define VTOZ(VP) ((znode_t *)(VP)->v_data) + +/* + * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation. + * ZFS_EXIT() must be called before exitting the vop. + * ZFS_VERIFY_ZP() verifies the znode is valid. + */ +#define ZFS_ENTER(zfsvfs) \ + { \ + rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \ + if ((zfsvfs)->z_unmounted) { \ + ZFS_EXIT(zfsvfs); \ + return (EIO); \ + } \ + } + +#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG) + +#define ZFS_VERIFY_ZP(zp) \ + if ((zp)->z_sa_hdl == NULL) { \ + ZFS_EXIT((zp)->z_zfsvfs); \ + return (EIO); \ + } \ + +/* + * Macros for dealing with dmu_buf_hold + */ +#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1)) +#define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \ + (&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) +#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \ + mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) +#define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \ + mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) +#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \ + mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) + +/* + * Macros to encode/decode ZFS stored time values from/to struct timespec + */ +#define ZFS_TIME_ENCODE(tp, stmp) \ +{ \ + (stmp)[0] = (uint64_t)(tp)->tv_sec; \ + (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ +} + +#define ZFS_TIME_DECODE(tp, stmp) \ +{ \ + (tp)->tv_sec = (time_t)(stmp)[0]; \ + (tp)->tv_nsec = (long)(stmp)[1]; \ +} + +/* + * Timestamp defines + */ +#define ACCESSED (AT_ATIME) +#define STATE_CHANGED (AT_CTIME) +#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME) + +#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \ + if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \ + zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE); + +extern int zfs_init_fs(zfsvfs_t *, znode_t **); +extern void zfs_set_dataprop(objset_t *); +extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *, + dmu_tx_t *tx); +extern void zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2], + uint64_t [2], boolean_t); +extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); +extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); +extern void zfs_znode_init(void); +extern void zfs_znode_fini(void); +extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); +extern int zfs_rezget(znode_t *); +extern void zfs_zinactive(znode_t *); +extern void zfs_znode_delete(znode_t *, dmu_tx_t *); +extern void zfs_znode_free(znode_t *); +extern void zfs_remove_op_tables(); +extern int zfs_create_op_tables(); +extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr); +extern dev_t zfs_cmpldev(uint64_t); +extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); +extern int zfs_get_stats(objset_t *os, nvlist_t *nv); +extern void zfs_znode_dmu_fini(znode_t *); + +extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *, + vattr_t *vap); +extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, + vattr_t *vap); +extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, char *name, uint64_t foid); +#define ZFS_NO_OBJECT 0 /* no object id */ +extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name); +extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name, char *link); +extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); +extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, offset_t off, ssize_t len, int ioflag); +extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, uint64_t off, uint64_t len); +extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp); +extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, + vsecattr_t *vsecp, zfs_fuid_info_t *fuidp); +extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx); +extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); + +extern caddr_t zfs_map_page(page_t *, enum seg_rw); +extern void zfs_unmap_page(page_t *, caddr_t); + +extern zil_get_data_t zfs_get_data; +extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; +extern int zfsfstype; + +#endif /* _KERNEL */ + +extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_ZNODE_H */ diff --git a/uts/common/fs/zfs/sys/zil.h b/uts/common/fs/zfs/sys/zil.h new file mode 100644 index 000000000000..a4c5575b2dba --- /dev/null +++ b/uts/common/fs/zfs/sys/zil.h @@ -0,0 +1,428 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#ifndef _SYS_ZIL_H +#define _SYS_ZIL_H + +#include <sys/types.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Intent log format: + * + * Each objset has its own intent log. The log header (zil_header_t) + * for objset N's intent log is kept in the Nth object of the SPA's + * intent_log objset. The log header points to a chain of log blocks, + * each of which contains log records (i.e., transactions) followed by + * a log block trailer (zil_trailer_t). The format of a log record + * depends on the record (or transaction) type, but all records begin + * with a common structure that defines the type, length, and txg. + */ + +/* + * Intent log header - this on disk structure holds fields to manage + * the log. All fields are 64 bit to easily handle cross architectures. + */ +typedef struct zil_header { + uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ + uint64_t zh_replay_seq; /* highest replayed sequence number */ + blkptr_t zh_log; /* log chain */ + uint64_t zh_claim_blk_seq; /* highest claimed block sequence number */ + uint64_t zh_flags; /* header flags */ + uint64_t zh_claim_lr_seq; /* highest claimed lr sequence number */ + uint64_t zh_pad[3]; +} zil_header_t; + +/* + * zh_flags bit settings + */ +#define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */ +#define ZIL_CLAIM_LR_SEQ_VALID 0x2 /* zh_claim_lr_seq field is valid */ + +/* + * Log block chaining. + * + * Log blocks are chained together. Originally they were chained at the + * end of the block. For performance reasons the chain was moved to the + * beginning of the block which allows writes for only the data being used. + * The older position is supported for backwards compatability. + * + * The zio_eck_t contains a zec_cksum which for the intent log is + * the sequence number of this log block. A seq of 0 is invalid. + * The zec_cksum is checked by the SPA against the sequence + * number passed in the blk_cksum field of the blkptr_t + */ +typedef struct zil_chain { + uint64_t zc_pad; + blkptr_t zc_next_blk; /* next block in chain */ + uint64_t zc_nused; /* bytes in log block used */ + zio_eck_t zc_eck; /* block trailer */ +} zil_chain_t; + +#define ZIL_MIN_BLKSZ 4096ULL +#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE + +/* + * The words of a log block checksum. + */ +#define ZIL_ZC_GUID_0 0 +#define ZIL_ZC_GUID_1 1 +#define ZIL_ZC_OBJSET 2 +#define ZIL_ZC_SEQ 3 + +typedef enum zil_create { + Z_FILE, + Z_DIR, + Z_XATTRDIR, +} zil_create_t; + +/* + * size of xvattr log section. + * its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps + * for create time and a single 64 bit integer for all of the attributes, + * and 4 64 bit integers (32 bytes) for the scanstamp. + * + */ + +#define ZIL_XVAT_SIZE(mapsize) \ + sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \ + (sizeof (uint64_t) * 7) + +/* + * Size of ACL in log. The ACE data is padded out to properly align + * on 8 byte boundary. + */ + +#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t))) + +/* + * Intent log transaction types and record structures + */ +#define TX_CREATE 1 /* Create file */ +#define TX_MKDIR 2 /* Make directory */ +#define TX_MKXATTR 3 /* Make XATTR directory */ +#define TX_SYMLINK 4 /* Create symbolic link to a file */ +#define TX_REMOVE 5 /* Remove file */ +#define TX_RMDIR 6 /* Remove directory */ +#define TX_LINK 7 /* Create hard link to a file */ +#define TX_RENAME 8 /* Rename a file */ +#define TX_WRITE 9 /* File write */ +#define TX_TRUNCATE 10 /* Truncate a file */ +#define TX_SETATTR 11 /* Set file attributes */ +#define TX_ACL_V0 12 /* Set old formatted ACL */ +#define TX_ACL 13 /* Set ACL */ +#define TX_CREATE_ACL 14 /* create with ACL */ +#define TX_CREATE_ATTR 15 /* create + attrs */ +#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */ +#define TX_MKDIR_ACL 17 /* mkdir with ACL */ +#define TX_MKDIR_ATTR 18 /* mkdir with attr */ +#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */ +#define TX_WRITE2 20 /* dmu_sync EALREADY write */ +#define TX_MAX_TYPE 21 /* Max transaction type */ + +/* + * The transactions for mkdir, symlink, remove, rmdir, link, and rename + * may have the following bit set, indicating the original request + * specified case-insensitive handling of names. + */ +#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */ + +/* + * Transactions for write, truncate, setattr, acl_v0, and acl can be logged + * out of order. For convenience in the code, all such records must have + * lr_foid at the same offset. + */ +#define TX_OOO(txtype) \ + ((txtype) == TX_WRITE || \ + (txtype) == TX_TRUNCATE || \ + (txtype) == TX_SETATTR || \ + (txtype) == TX_ACL_V0 || \ + (txtype) == TX_ACL || \ + (txtype) == TX_WRITE2) + +/* + * Format of log records. + * The fields are carefully defined to allow them to be aligned + * and sized the same on sparc & intel architectures. + * Each log record has a common structure at the beginning. + * + * The log record on disk (lrc_seq) holds the sequence number of all log + * records which is used to ensure we don't replay the same record. + */ +typedef struct { /* common log record header */ + uint64_t lrc_txtype; /* intent log transaction type */ + uint64_t lrc_reclen; /* transaction record length */ + uint64_t lrc_txg; /* dmu transaction group number */ + uint64_t lrc_seq; /* see comment above */ +} lr_t; + +/* + * Common start of all out-of-order record types (TX_OOO() above). + */ +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* object id */ +} lr_ooo_t; + +/* + * Handle option extended vattr attributes. + * + * Whenever new attributes are added the version number + * will need to be updated as will code in + * zfs_log.c and zfs_replay.c + */ +typedef struct { + uint32_t lr_attr_masksize; /* number of elements in array */ + uint32_t lr_attr_bitmap; /* First entry of array */ + /* remainder of array and any additional fields */ +} lr_attr_t; + +/* + * log record for creates without optional ACL. + * This log record does support optional xvattr_t attributes. + */ +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_doid; /* object id of directory */ + uint64_t lr_foid; /* object id of created file object */ + uint64_t lr_mode; /* mode of object */ + uint64_t lr_uid; /* uid of object */ + uint64_t lr_gid; /* gid of object */ + uint64_t lr_gen; /* generation (txg of creation) */ + uint64_t lr_crtime[2]; /* creation time */ + uint64_t lr_rdev; /* rdev of object to create */ + /* name of object to create follows this */ + /* for symlinks, link content follows name */ + /* for creates with xvattr data, the name follows the xvattr info */ +} lr_create_t; + +/* + * FUID ACL record will be an array of ACEs from the original ACL. + * If this array includes ephemeral IDs, the record will also include + * an array of log-specific FUIDs to replace the ephemeral IDs. + * Only one copy of each unique domain will be present, so the log-specific + * FUIDs will use an index into a compressed domain table. On replay this + * information will be used to construct real FUIDs (and bypass idmap, + * since it may not be available). + */ + +/* + * Log record for creates with optional ACL + * This log record is also used for recording any FUID + * information needed for replaying the create. If the + * file doesn't have any actual ACEs then the lr_aclcnt + * would be zero. + */ +typedef struct { + lr_create_t lr_create; /* common create portion */ + uint64_t lr_aclcnt; /* number of ACEs in ACL */ + uint64_t lr_domcnt; /* number of unique domains */ + uint64_t lr_fuidcnt; /* number of real fuids */ + uint64_t lr_acl_bytes; /* number of bytes in ACL */ + uint64_t lr_acl_flags; /* ACL flags */ + /* lr_acl_bytes number of variable sized ace's follows */ + /* if create is also setting xvattr's, then acl data follows xvattr */ + /* if ACE FUIDs are needed then they will follow the xvattr_t */ + /* Following the FUIDs will be the domain table information. */ + /* The FUIDs for the owner and group will be in the lr_create */ + /* portion of the record. */ + /* name follows ACL data */ +} lr_acl_create_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_doid; /* obj id of directory */ + /* name of object to remove follows this */ +} lr_remove_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_doid; /* obj id of directory */ + uint64_t lr_link_obj; /* obj id of link */ + /* name of object to link follows this */ +} lr_link_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_sdoid; /* obj id of source directory */ + uint64_t lr_tdoid; /* obj id of target directory */ + /* 2 strings: names of source and destination follow this */ +} lr_rename_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* file object to write */ + uint64_t lr_offset; /* offset to write to */ + uint64_t lr_length; /* user data length to write */ + uint64_t lr_blkoff; /* no longer used */ + blkptr_t lr_blkptr; /* spa block pointer for replay */ + /* write data will follow for small writes */ +} lr_write_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* object id of file to truncate */ + uint64_t lr_offset; /* offset to truncate from */ + uint64_t lr_length; /* length to truncate */ +} lr_truncate_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* file object to change attributes */ + uint64_t lr_mask; /* mask of attributes to set */ + uint64_t lr_mode; /* mode to set */ + uint64_t lr_uid; /* uid to set */ + uint64_t lr_gid; /* gid to set */ + uint64_t lr_size; /* size to set */ + uint64_t lr_atime[2]; /* access time */ + uint64_t lr_mtime[2]; /* modification time */ + /* optional attribute lr_attr_t may be here */ +} lr_setattr_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* obj id of file */ + uint64_t lr_aclcnt; /* number of acl entries */ + /* lr_aclcnt number of ace_t entries follow this */ +} lr_acl_v0_t; + +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* obj id of file */ + uint64_t lr_aclcnt; /* number of ACEs in ACL */ + uint64_t lr_domcnt; /* number of unique domains */ + uint64_t lr_fuidcnt; /* number of real fuids */ + uint64_t lr_acl_bytes; /* number of bytes in ACL */ + uint64_t lr_acl_flags; /* ACL flags */ + /* lr_acl_bytes number of variable sized ace's follows */ +} lr_acl_t; + +/* + * ZIL structure definitions, interface function prototype and globals. + */ + +/* + * Writes are handled in three different ways: + * + * WR_INDIRECT: + * In this mode, if we need to commit the write later, then the block + * is immediately written into the file system (using dmu_sync), + * and a pointer to the block is put into the log record. + * When the txg commits the block is linked in. + * This saves additionally writing the data into the log record. + * There are a few requirements for this to occur: + * - write is greater than zfs/zvol_immediate_write_sz + * - not using slogs (as slogs are assumed to always be faster + * than writing into the main pool) + * - the write occupies only one block + * WR_COPIED: + * If we know we'll immediately be committing the + * transaction (FSYNC or FDSYNC), the we allocate a larger + * log record here for the data and copy the data in. + * WR_NEED_COPY: + * Otherwise we don't allocate a buffer, and *if* we need to + * flush the write later then a buffer is allocated and + * we retrieve the data using the dmu. + */ +typedef enum { + WR_INDIRECT, /* indirect - a large write (dmu_sync() data */ + /* and put blkptr in log, rather than actual data) */ + WR_COPIED, /* immediate - data is copied into lr_write_t */ + WR_NEED_COPY, /* immediate - data needs to be copied if pushed */ + WR_NUM_STATES /* number of states */ +} itx_wr_state_t; + +typedef struct itx { + list_node_t itx_node; /* linkage on zl_itx_list */ + void *itx_private; /* type-specific opaque data */ + itx_wr_state_t itx_wr_state; /* write state */ + uint8_t itx_sync; /* synchronous transaction */ + uint64_t itx_sod; /* record size on disk */ + uint64_t itx_oid; /* object id */ + lr_t itx_lr; /* common part of log record */ + /* followed by type-specific part of lr_xx_t and its immediate data */ +} itx_t; + +typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg, + uint64_t txg); +typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg, + uint64_t txg); +typedef int zil_replay_func_t(); +typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); + +extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + +extern void zil_init(void); +extern void zil_fini(void); + +extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys); +extern void zil_free(zilog_t *zilog); + +extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data); +extern void zil_close(zilog_t *zilog); + +extern void zil_replay(objset_t *os, void *arg, + zil_replay_func_t *replay_func[TX_MAX_TYPE]); +extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx); +extern void zil_destroy(zilog_t *zilog, boolean_t keep_first); +extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx); + +extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); +extern void zil_itx_destroy(itx_t *itx); +extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx); + +extern void zil_commit(zilog_t *zilog, uint64_t oid); + +extern int zil_vdev_offline(const char *osname, void *txarg); +extern int zil_claim(const char *osname, void *txarg); +extern int zil_check_log_chain(const char *osname, void *txarg); +extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx); +extern void zil_clean(zilog_t *zilog, uint64_t synced_txg); + +extern int zil_suspend(zilog_t *zilog); +extern void zil_resume(zilog_t *zilog); + +extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp); +extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp); + +extern void zil_set_sync(zilog_t *zilog, uint64_t syncval); + +extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); + +extern int zil_replay_disable; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIL_H */ diff --git a/uts/common/fs/zfs/sys/zil_impl.h b/uts/common/fs/zfs/sys/zil_impl.h new file mode 100644 index 000000000000..1d4c0cc6c1de --- /dev/null +++ b/uts/common/fs/zfs/sys/zil_impl.h @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#ifndef _SYS_ZIL_IMPL_H +#define _SYS_ZIL_IMPL_H + +#include <sys/zil.h> +#include <sys/dmu_objset.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Log write buffer. + */ +typedef struct lwb { + zilog_t *lwb_zilog; /* back pointer to log struct */ + blkptr_t lwb_blk; /* on disk address of this log blk */ + int lwb_nused; /* # used bytes in buffer */ + int lwb_sz; /* size of block and buffer */ + char *lwb_buf; /* log write buffer */ + zio_t *lwb_zio; /* zio for this buffer */ + dmu_tx_t *lwb_tx; /* tx for log block allocation */ + uint64_t lwb_max_txg; /* highest txg in this lwb */ + list_node_t lwb_node; /* zilog->zl_lwb_list linkage */ +} lwb_t; + +/* + * Intent log transaction lists + */ +typedef struct itxs { + list_t i_sync_list; /* list of synchronous itxs */ + avl_tree_t i_async_tree; /* tree of foids for async itxs */ +} itxs_t; + +typedef struct itxg { + kmutex_t itxg_lock; /* lock for this structure */ + uint64_t itxg_txg; /* txg for this chain */ + uint64_t itxg_sod; /* total size on disk for this txg */ + itxs_t *itxg_itxs; /* sync and async itxs */ +} itxg_t; + +/* for async nodes we build up an AVL tree of lists of async itxs per file */ +typedef struct itx_async_node { + uint64_t ia_foid; /* file object id */ + list_t ia_list; /* list of async itxs for this foid */ + avl_node_t ia_node; /* AVL tree linkage */ +} itx_async_node_t; + +/* + * Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs + * we've touched so we know which ones need a write cache flush at the end. + */ +typedef struct zil_vdev_node { + uint64_t zv_vdev; /* vdev to be flushed */ + avl_node_t zv_node; /* AVL tree linkage */ +} zil_vdev_node_t; + +#define ZIL_PREV_BLKS 16 + +/* + * Stable storage intent log management structure. One per dataset. + */ +struct zilog { + kmutex_t zl_lock; /* protects most zilog_t fields */ + struct dsl_pool *zl_dmu_pool; /* DSL pool */ + spa_t *zl_spa; /* handle for read/write log */ + const zil_header_t *zl_header; /* log header buffer */ + objset_t *zl_os; /* object set we're logging */ + zil_get_data_t *zl_get_data; /* callback to get object content */ + zio_t *zl_root_zio; /* log writer root zio */ + uint64_t zl_lr_seq; /* on-disk log record sequence number */ + uint64_t zl_commit_lr_seq; /* last committed on-disk lr seq */ + uint64_t zl_destroy_txg; /* txg of last zil_destroy() */ + uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */ + uint64_t zl_replaying_seq; /* current replay seq number */ + uint32_t zl_suspend; /* log suspend count */ + kcondvar_t zl_cv_writer; /* log writer thread completion */ + kcondvar_t zl_cv_suspend; /* log suspend completion */ + uint8_t zl_suspending; /* log is currently suspending */ + uint8_t zl_keep_first; /* keep first log block in destroy */ + uint8_t zl_replay; /* replaying records while set */ + uint8_t zl_stop_sync; /* for debugging */ + uint8_t zl_writer; /* boolean: write setup in progress */ + uint8_t zl_logbias; /* latency or throughput */ + uint8_t zl_sync; /* synchronous or asynchronous */ + int zl_parse_error; /* last zil_parse() error */ + uint64_t zl_parse_blk_seq; /* highest blk seq on last parse */ + uint64_t zl_parse_lr_seq; /* highest lr seq on last parse */ + uint64_t zl_parse_blk_count; /* number of blocks parsed */ + uint64_t zl_parse_lr_count; /* number of log records parsed */ + uint64_t zl_next_batch; /* next batch number */ + uint64_t zl_com_batch; /* committed batch number */ + kcondvar_t zl_cv_batch[2]; /* batch condition variables */ + itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */ + list_t zl_itx_commit_list; /* itx list to be committed */ + uint64_t zl_itx_list_sz; /* total size of records on list */ + uint64_t zl_cur_used; /* current commit log size used */ + list_t zl_lwb_list; /* in-flight log write list */ + kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */ + avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */ + taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */ + avl_tree_t zl_bp_tree; /* track bps during log parse */ + clock_t zl_replay_time; /* lbolt of when replay started */ + uint64_t zl_replay_blks; /* number of log blocks replayed */ + zil_header_t zl_old_header; /* debugging aid */ + uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */ + uint_t zl_prev_rotor; /* rotor for zl_prev[] */ +}; + +typedef struct zil_bp_node { + dva_t zn_dva; + avl_node_t zn_node; +} zil_bp_node_t; + +#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ + sizeof (lr_write_t)) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIL_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/zio.h b/uts/common/fs/zfs/sys/zio.h new file mode 100644 index 000000000000..97d8ec74d2e9 --- /dev/null +++ b/uts/common/fs/zfs/sys/zio.h @@ -0,0 +1,559 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZIO_H +#define _ZIO_H + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/avl.h> +#include <sys/fs/zfs.h> +#include <sys/zio_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Embedded checksum + */ +#define ZEC_MAGIC 0x210da7ab10c7a11ULL + +typedef struct zio_eck { + uint64_t zec_magic; /* for validation, endianness */ + zio_cksum_t zec_cksum; /* 256-bit checksum */ +} zio_eck_t; + +/* + * Gang block headers are self-checksumming and contain an array + * of block pointers. + */ +#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE +#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_eck_t)) / sizeof (blkptr_t)) +#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_eck_t) - \ + (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ + sizeof (uint64_t)) + +typedef struct zio_gbh { + blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; + uint64_t zg_filler[SPA_GBH_FILLER]; + zio_eck_t zg_tail; +} zio_gbh_phys_t; + +enum zio_checksum { + ZIO_CHECKSUM_INHERIT = 0, + ZIO_CHECKSUM_ON, + ZIO_CHECKSUM_OFF, + ZIO_CHECKSUM_LABEL, + ZIO_CHECKSUM_GANG_HEADER, + ZIO_CHECKSUM_ZILOG, + ZIO_CHECKSUM_FLETCHER_2, + ZIO_CHECKSUM_FLETCHER_4, + ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_ZILOG2, + ZIO_CHECKSUM_FUNCTIONS +}; + +#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4 +#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON + +#define ZIO_CHECKSUM_MASK 0xffULL +#define ZIO_CHECKSUM_VERIFY (1 << 8) + +#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 +#define ZIO_DEDUPDITTO_MIN 100 + +enum zio_compress { + ZIO_COMPRESS_INHERIT = 0, + ZIO_COMPRESS_ON, + ZIO_COMPRESS_OFF, + ZIO_COMPRESS_LZJB, + ZIO_COMPRESS_EMPTY, + ZIO_COMPRESS_GZIP_1, + ZIO_COMPRESS_GZIP_2, + ZIO_COMPRESS_GZIP_3, + ZIO_COMPRESS_GZIP_4, + ZIO_COMPRESS_GZIP_5, + ZIO_COMPRESS_GZIP_6, + ZIO_COMPRESS_GZIP_7, + ZIO_COMPRESS_GZIP_8, + ZIO_COMPRESS_GZIP_9, + ZIO_COMPRESS_ZLE, + ZIO_COMPRESS_FUNCTIONS +}; + +#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB +#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF + +#define BOOTFS_COMPRESS_VALID(compress) \ + ((compress) == ZIO_COMPRESS_LZJB || \ + ((compress) == ZIO_COMPRESS_ON && \ + ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \ + (compress) == ZIO_COMPRESS_OFF) + +#define ZIO_FAILURE_MODE_WAIT 0 +#define ZIO_FAILURE_MODE_CONTINUE 1 +#define ZIO_FAILURE_MODE_PANIC 2 + +#define ZIO_PRIORITY_NOW (zio_priority_table[0]) +#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1]) +#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2]) +#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[3]) +#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[4]) +#define ZIO_PRIORITY_AGG (zio_priority_table[5]) +#define ZIO_PRIORITY_FREE (zio_priority_table[6]) +#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[7]) +#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8]) +#define ZIO_PRIORITY_RESILVER (zio_priority_table[9]) +#define ZIO_PRIORITY_SCRUB (zio_priority_table[10]) +#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11]) +#define ZIO_PRIORITY_TABLE_SIZE 12 + +#define ZIO_PIPELINE_CONTINUE 0x100 +#define ZIO_PIPELINE_STOP 0x101 + +enum zio_flag { + /* + * Flags inherited by gang, ddt, and vdev children, + * and that must be equal for two zios to aggregate + */ + ZIO_FLAG_DONT_AGGREGATE = 1 << 0, + ZIO_FLAG_IO_REPAIR = 1 << 1, + ZIO_FLAG_SELF_HEAL = 1 << 2, + ZIO_FLAG_RESILVER = 1 << 3, + ZIO_FLAG_SCRUB = 1 << 4, + ZIO_FLAG_SCAN_THREAD = 1 << 5, + +#define ZIO_FLAG_AGG_INHERIT (ZIO_FLAG_CANFAIL - 1) + + /* + * Flags inherited by ddt, gang, and vdev children. + */ + ZIO_FLAG_CANFAIL = 1 << 6, /* must be first for INHERIT */ + ZIO_FLAG_SPECULATIVE = 1 << 7, + ZIO_FLAG_CONFIG_WRITER = 1 << 8, + ZIO_FLAG_DONT_RETRY = 1 << 9, + ZIO_FLAG_DONT_CACHE = 1 << 10, + ZIO_FLAG_NODATA = 1 << 11, + ZIO_FLAG_INDUCE_DAMAGE = 1 << 12, + +#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) +#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) + + /* + * Flags inherited by vdev children. + */ + ZIO_FLAG_IO_RETRY = 1 << 13, /* must be first for INHERIT */ + ZIO_FLAG_PROBE = 1 << 14, + ZIO_FLAG_TRYHARD = 1 << 15, + ZIO_FLAG_OPTIONAL = 1 << 16, + +#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) + + /* + * Flags not inherited by any children. + */ + ZIO_FLAG_DONT_QUEUE = 1 << 17, /* must be first for INHERIT */ + ZIO_FLAG_DONT_PROPAGATE = 1 << 18, + ZIO_FLAG_IO_BYPASS = 1 << 19, + ZIO_FLAG_IO_REWRITE = 1 << 20, + ZIO_FLAG_RAW = 1 << 21, + ZIO_FLAG_GANG_CHILD = 1 << 22, + ZIO_FLAG_DDT_CHILD = 1 << 23, + ZIO_FLAG_GODFATHER = 1 << 24 +}; + +#define ZIO_FLAG_MUSTSUCCEED 0 + +#define ZIO_DDT_CHILD_FLAGS(zio) \ + (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ + ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL) + +#define ZIO_GANG_CHILD_FLAGS(zio) \ + (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \ + ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL) + +#define ZIO_VDEV_CHILD_FLAGS(zio) \ + (((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) | \ + ZIO_FLAG_CANFAIL) + +enum zio_child { + ZIO_CHILD_VDEV = 0, + ZIO_CHILD_GANG, + ZIO_CHILD_DDT, + ZIO_CHILD_LOGICAL, + ZIO_CHILD_TYPES +}; + +enum zio_wait_type { + ZIO_WAIT_READY = 0, + ZIO_WAIT_DONE, + ZIO_WAIT_TYPES +}; + +/* + * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent + * graveyard) to indicate checksum errors and fragmentation. + */ +#define ECKSUM EBADE +#define EFRAGS EBADR + +typedef void zio_done_func_t(zio_t *zio); + +extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE]; +extern char *zio_type_name[ZIO_TYPES]; + +/* + * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely + * identifies any block in the pool. By convention, the meta-objset (MOS) + * is objset 0, and the meta-dnode is object 0. This covers all blocks + * except root blocks and ZIL blocks, which are defined as follows: + * + * Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>. + * ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>. + * dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>. + * + * Note: this structure is called a bookmark because its original purpose + * was to remember where to resume a pool-wide traverse. + * + * Note: this structure is passed between userland and the kernel. + * Therefore it must not change size or alignment between 32/64 bit + * compilation options. + */ +typedef struct zbookmark { + uint64_t zb_objset; + uint64_t zb_object; + int64_t zb_level; + uint64_t zb_blkid; +} zbookmark_t; + +#define SET_BOOKMARK(zb, objset, object, level, blkid) \ +{ \ + (zb)->zb_objset = objset; \ + (zb)->zb_object = object; \ + (zb)->zb_level = level; \ + (zb)->zb_blkid = blkid; \ +} + +#define ZB_DESTROYED_OBJSET (-1ULL) + +#define ZB_ROOT_OBJECT (0ULL) +#define ZB_ROOT_LEVEL (-1LL) +#define ZB_ROOT_BLKID (0ULL) + +#define ZB_ZIL_OBJECT (0ULL) +#define ZB_ZIL_LEVEL (-2LL) + +typedef struct zio_prop { + enum zio_checksum zp_checksum; + enum zio_compress zp_compress; + dmu_object_type_t zp_type; + uint8_t zp_level; + uint8_t zp_copies; + uint8_t zp_dedup; + uint8_t zp_dedup_verify; +} zio_prop_t; + +typedef struct zio_cksum_report zio_cksum_report_t; + +typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, + const void *good_data); +typedef void zio_cksum_free_f(void *cbdata, size_t size); + +struct zio_bad_cksum; /* defined in zio_checksum.h */ + +struct zio_cksum_report { + struct zio_cksum_report *zcr_next; + nvlist_t *zcr_ereport; + nvlist_t *zcr_detector; + void *zcr_cbdata; + size_t zcr_cbinfo; /* passed to zcr_free() */ + uint64_t zcr_align; + uint64_t zcr_length; + zio_cksum_finish_f *zcr_finish; + zio_cksum_free_f *zcr_free; + + /* internal use only */ + struct zio_bad_cksum *zcr_ckinfo; /* information from failure */ +}; + +typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr, + void *arg); + +zio_vsd_cksum_report_f zio_vsd_default_cksum_report; + +typedef struct zio_vsd_ops { + zio_done_func_t *vsd_free; + zio_vsd_cksum_report_f *vsd_cksum_report; +} zio_vsd_ops_t; + +typedef struct zio_gang_node { + zio_gbh_phys_t *gn_gbh; + struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS]; +} zio_gang_node_t; + +typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, + zio_gang_node_t *gn, void *data); + +typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size); + +typedef struct zio_transform { + void *zt_orig_data; + uint64_t zt_orig_size; + uint64_t zt_bufsize; + zio_transform_func_t *zt_transform; + struct zio_transform *zt_next; +} zio_transform_t; + +typedef int zio_pipe_stage_t(zio_t *zio); + +/* + * The io_reexecute flags are distinct from io_flags because the child must + * be able to propagate them to the parent. The normal io_flags are local + * to the zio, not protected by any lock, and not modifiable by children; + * the reexecute flags are protected by io_lock, modifiable by children, + * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. + */ +#define ZIO_REEXECUTE_NOW 0x01 +#define ZIO_REEXECUTE_SUSPEND 0x02 + +typedef struct zio_link { + zio_t *zl_parent; + zio_t *zl_child; + list_node_t zl_parent_node; + list_node_t zl_child_node; +} zio_link_t; + +struct zio { + /* Core information about this I/O */ + zbookmark_t io_bookmark; + zio_prop_t io_prop; + zio_type_t io_type; + enum zio_child io_child_type; + int io_cmd; + uint8_t io_priority; + uint8_t io_reexecute; + uint8_t io_state[ZIO_WAIT_TYPES]; + uint64_t io_txg; + spa_t *io_spa; + blkptr_t *io_bp; + blkptr_t *io_bp_override; + blkptr_t io_bp_copy; + list_t io_parent_list; + list_t io_child_list; + zio_link_t *io_walk_link; + zio_t *io_logical; + zio_transform_t *io_transform_stack; + + /* Callback info */ + zio_done_func_t *io_ready; + zio_done_func_t *io_done; + void *io_private; + int64_t io_prev_space_delta; /* DMU private */ + blkptr_t io_bp_orig; + + /* Data represented by this I/O */ + void *io_data; + void *io_orig_data; + uint64_t io_size; + uint64_t io_orig_size; + + /* Stuff for the vdev stack */ + vdev_t *io_vd; + void *io_vsd; + const zio_vsd_ops_t *io_vsd_ops; + + uint64_t io_offset; + uint64_t io_deadline; + avl_node_t io_offset_node; + avl_node_t io_deadline_node; + avl_tree_t *io_vdev_tree; + + /* Internal pipeline state */ + enum zio_flag io_flags; + enum zio_stage io_stage; + enum zio_stage io_pipeline; + enum zio_flag io_orig_flags; + enum zio_stage io_orig_stage; + enum zio_stage io_orig_pipeline; + int io_error; + int io_child_error[ZIO_CHILD_TYPES]; + uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; + uint64_t io_child_count; + uint64_t io_parent_count; + uint64_t *io_stall; + zio_t *io_gang_leader; + zio_gang_node_t *io_gang_tree; + void *io_executor; + void *io_waiter; + kmutex_t io_lock; + kcondvar_t io_cv; + + /* FMA state */ + zio_cksum_report_t *io_cksum_report; + uint64_t io_ena; +}; + +extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, + zio_done_func_t *done, void *private, enum zio_flag flags); + +extern zio_t *zio_root(spa_t *spa, + zio_done_func_t *done, void *private, enum zio_flag flags); + +extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, + uint64_t size, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, const zbookmark_t *zb); + +extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, + void *data, uint64_t size, const zio_prop_t *zp, + zio_done_func_t *ready, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, const zbookmark_t *zb); + +extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, + void *data, uint64_t size, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, zbookmark_t *zb); + +extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies); + +extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); + +extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, + const blkptr_t *bp, + zio_done_func_t *done, void *private, enum zio_flag flags); + +extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, + zio_done_func_t *done, void *private, int priority, enum zio_flag flags); + +extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, + uint64_t size, void *data, int checksum, + zio_done_func_t *done, void *private, int priority, enum zio_flag flags, + boolean_t labels); + +extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, + uint64_t size, void *data, int checksum, + zio_done_func_t *done, void *private, int priority, enum zio_flag flags, + boolean_t labels); + +extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, + const blkptr_t *bp, enum zio_flag flags); + +extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, + blkptr_t *old_bp, uint64_t size, boolean_t use_slog); +extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); +extern void zio_flush(zio_t *zio, vdev_t *vd); +extern void zio_shrink(zio_t *zio, uint64_t size); + +extern int zio_wait(zio_t *zio); +extern void zio_nowait(zio_t *zio); +extern void zio_execute(zio_t *zio); +extern void zio_interrupt(zio_t *zio); + +extern zio_t *zio_walk_parents(zio_t *cio); +extern zio_t *zio_walk_children(zio_t *pio); +extern zio_t *zio_unique_parent(zio_t *cio); +extern void zio_add_child(zio_t *pio, zio_t *cio); + +extern void *zio_buf_alloc(size_t size); +extern void zio_buf_free(void *buf, size_t size); +extern void *zio_data_buf_alloc(size_t size); +extern void zio_data_buf_free(void *buf, size_t size); + +extern void zio_resubmit_stage_async(void *); + +extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, + uint64_t offset, void *data, uint64_t size, int type, int priority, + enum zio_flag flags, zio_done_func_t *done, void *private); + +extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, + void *data, uint64_t size, int type, int priority, + enum zio_flag flags, zio_done_func_t *done, void *private); + +extern void zio_vdev_io_bypass(zio_t *zio); +extern void zio_vdev_io_reissue(zio_t *zio); +extern void zio_vdev_io_redone(zio_t *zio); + +extern void zio_checksum_verified(zio_t *zio); +extern int zio_worst_error(int e1, int e2); + +extern enum zio_checksum zio_checksum_select(enum zio_checksum child, + enum zio_checksum parent); +extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa, + enum zio_checksum child, enum zio_checksum parent); +extern enum zio_compress zio_compress_select(enum zio_compress child, + enum zio_compress parent); + +extern void zio_suspend(spa_t *spa, zio_t *zio); +extern int zio_resume(spa_t *spa); +extern void zio_resume_wait(spa_t *spa); + +/* + * Initial setup and teardown. + */ +extern void zio_init(void); +extern void zio_fini(void); + +/* + * Fault injection + */ +struct zinject_record; +extern uint32_t zio_injection_enabled; +extern int zio_inject_fault(char *name, int flags, int *id, + struct zinject_record *record); +extern int zio_inject_list_next(int *id, char *name, size_t buflen, + struct zinject_record *record); +extern int zio_clear_fault(int id); +extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type); +extern int zio_handle_fault_injection(zio_t *zio, int error); +extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); +extern int zio_handle_label_injection(zio_t *zio, int error); +extern void zio_handle_ignored_writes(zio_t *zio); + +/* + * Checksum ereport functions + */ +extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, + uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); +extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, + const void *good_data, const void *bad_data, boolean_t drop_if_identical); + +extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report); +extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); + +/* If we have the good data in hand, this function can be used */ +extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, + struct zio *zio, uint64_t offset, uint64_t length, + const void *good_data, const void *bad_data, struct zio_bad_cksum *info); + +/* Called from spa_sync(), but primarily an injection handler */ +extern void spa_handle_ignored_writes(spa_t *spa); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZIO_H */ diff --git a/uts/common/fs/zfs/sys/zio_checksum.h b/uts/common/fs/zfs/sys/zio_checksum.h new file mode 100644 index 000000000000..0956c04ab1b4 --- /dev/null +++ b/uts/common/fs/zfs/sys/zio_checksum.h @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZIO_CHECKSUM_H +#define _SYS_ZIO_CHECKSUM_H + +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Signature for checksum functions. + */ +typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp); + +/* + * Information about each checksum function. + */ +typedef struct zio_checksum_info { + zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */ + int ci_correctable; /* number of correctable bits */ + int ci_eck; /* uses zio embedded checksum? */ + int ci_dedup; /* strong enough for dedup? */ + char *ci_name; /* descriptive name */ +} zio_checksum_info_t; + +typedef struct zio_bad_cksum { + zio_cksum_t zbc_expected; + zio_cksum_t zbc_actual; + const char *zbc_checksum_name; + uint8_t zbc_byteswapped; + uint8_t zbc_injected; + uint8_t zbc_has_cksum; /* expected/actual valid */ +} zio_bad_cksum_t; + +extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS]; + +/* + * Checksum routines. + */ +extern zio_checksum_t zio_checksum_SHA256; + +extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, + void *data, uint64_t size); +extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); +extern enum zio_checksum spa_dedup_checksum(spa_t *spa); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_CHECKSUM_H */ diff --git a/uts/common/fs/zfs/sys/zio_compress.h b/uts/common/fs/zfs/sys/zio_compress.h new file mode 100644 index 000000000000..30bed1a676e3 --- /dev/null +++ b/uts/common/fs/zfs/sys/zio_compress.h @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIO_COMPRESS_H +#define _SYS_ZIO_COMPRESS_H + +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Common signature for all zio compress/decompress functions. + */ +typedef size_t zio_compress_func_t(void *src, void *dst, + size_t s_len, size_t d_len, int); +typedef int zio_decompress_func_t(void *src, void *dst, + size_t s_len, size_t d_len, int); + +/* + * Information about each compression function. + */ +typedef struct zio_compress_info { + zio_compress_func_t *ci_compress; /* compression function */ + zio_decompress_func_t *ci_decompress; /* decompression function */ + int ci_level; /* level parameter */ + char *ci_name; /* algorithm name */ +} zio_compress_info_t; + +extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS]; + +/* + * Compression routines. + */ +extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len, + int level); +extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len, + int level); +extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len, + int level); +extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len, + int level); +extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len, + int level); +extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len, + int level); + +/* + * Compress and decompress data if necessary. + */ +extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst, + size_t s_len); +extern int zio_decompress_data(enum zio_compress c, void *src, void *dst, + size_t s_len, size_t d_len); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_COMPRESS_H */ diff --git a/uts/common/fs/zfs/sys/zio_impl.h b/uts/common/fs/zfs/sys/zio_impl.h new file mode 100644 index 000000000000..d90bd8bd5921 --- /dev/null +++ b/uts/common/fs/zfs/sys/zio_impl.h @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZIO_IMPL_H +#define _ZIO_IMPL_H + +#include <sys/zfs_context.h> +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * zio pipeline stage definitions + */ +enum zio_stage { + ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */ + + ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */ + ZIO_STAGE_FREE_BP_INIT = 1 << 2, /* --F-- */ + ZIO_STAGE_ISSUE_ASYNC = 1 << 3, /* RWF-- */ + ZIO_STAGE_WRITE_BP_INIT = 1 << 4, /* -W--- */ + + ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */ + + ZIO_STAGE_DDT_READ_START = 1 << 6, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 7, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 8, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 9, /* --F-- */ + + ZIO_STAGE_GANG_ASSEMBLE = 1 << 10, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 11, /* RWFC- */ + + ZIO_STAGE_DVA_ALLOCATE = 1 << 12, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 13, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 14, /* ---C- */ + + ZIO_STAGE_READY = 1 << 15, /* RWFCI */ + + ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RW--I */ + + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 19, /* R---- */ + + ZIO_STAGE_DONE = 1 << 20 /* RWFCI */ +}; + +#define ZIO_INTERLOCK_STAGES \ + (ZIO_STAGE_READY | \ + ZIO_STAGE_DONE) + +#define ZIO_INTERLOCK_PIPELINE \ + ZIO_INTERLOCK_STAGES + +#define ZIO_VDEV_IO_STAGES \ + (ZIO_STAGE_VDEV_IO_START | \ + ZIO_STAGE_VDEV_IO_DONE | \ + ZIO_STAGE_VDEV_IO_ASSESS) + +#define ZIO_VDEV_CHILD_PIPELINE \ + (ZIO_VDEV_IO_STAGES | \ + ZIO_STAGE_DONE) + +#define ZIO_READ_COMMON_STAGES \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_VDEV_IO_STAGES | \ + ZIO_STAGE_CHECKSUM_VERIFY) + +#define ZIO_READ_PHYS_PIPELINE \ + ZIO_READ_COMMON_STAGES + +#define ZIO_READ_PIPELINE \ + (ZIO_READ_COMMON_STAGES | \ + ZIO_STAGE_READ_BP_INIT) + +#define ZIO_DDT_CHILD_READ_PIPELINE \ + ZIO_READ_COMMON_STAGES + +#define ZIO_DDT_READ_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_READ_BP_INIT | \ + ZIO_STAGE_DDT_READ_START | \ + ZIO_STAGE_DDT_READ_DONE) + +#define ZIO_WRITE_COMMON_STAGES \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_VDEV_IO_STAGES | \ + ZIO_STAGE_ISSUE_ASYNC | \ + ZIO_STAGE_CHECKSUM_GENERATE) + +#define ZIO_WRITE_PHYS_PIPELINE \ + ZIO_WRITE_COMMON_STAGES + +#define ZIO_REWRITE_PIPELINE \ + (ZIO_WRITE_COMMON_STAGES | \ + ZIO_STAGE_WRITE_BP_INIT) + +#define ZIO_WRITE_PIPELINE \ + (ZIO_WRITE_COMMON_STAGES | \ + ZIO_STAGE_WRITE_BP_INIT | \ + ZIO_STAGE_DVA_ALLOCATE) + +#define ZIO_DDT_CHILD_WRITE_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_VDEV_IO_STAGES | \ + ZIO_STAGE_DVA_ALLOCATE) + +#define ZIO_DDT_WRITE_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_ISSUE_ASYNC | \ + ZIO_STAGE_WRITE_BP_INIT | \ + ZIO_STAGE_CHECKSUM_GENERATE | \ + ZIO_STAGE_DDT_WRITE) + +#define ZIO_GANG_STAGES \ + (ZIO_STAGE_GANG_ASSEMBLE | \ + ZIO_STAGE_GANG_ISSUE) + +#define ZIO_FREE_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_FREE_BP_INIT | \ + ZIO_STAGE_DVA_FREE) + +#define ZIO_DDT_FREE_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_FREE_BP_INIT | \ + ZIO_STAGE_ISSUE_ASYNC | \ + ZIO_STAGE_DDT_FREE) + +#define ZIO_CLAIM_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_DVA_CLAIM) + +#define ZIO_IOCTL_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_VDEV_IO_START | \ + ZIO_STAGE_VDEV_IO_ASSESS) + +#define ZIO_BLOCKING_STAGES \ + (ZIO_STAGE_DVA_ALLOCATE | \ + ZIO_STAGE_DVA_CLAIM | \ + ZIO_STAGE_VDEV_IO_START) + +extern void zio_inject_init(void); +extern void zio_inject_fini(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZIO_IMPL_H */ diff --git a/uts/common/fs/zfs/sys/zrlock.h b/uts/common/fs/zfs/sys/zrlock.h new file mode 100644 index 000000000000..dcd63f7b5b91 --- /dev/null +++ b/uts/common/fs/zfs/sys/zrlock.h @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZRLOCK_H +#define _SYS_ZRLOCK_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct zrlock { + kmutex_t zr_mtx; + volatile int32_t zr_refcount; + kcondvar_t zr_cv; + uint16_t zr_pad; +#ifdef ZFS_DEBUG + kthread_t *zr_owner; + const char *zr_caller; +#endif +} zrlock_t; + +extern void zrl_init(zrlock_t *); +extern void zrl_destroy(zrlock_t *); +#ifdef ZFS_DEBUG +#define zrl_add(_z) zrl_add_debug((_z), __func__) +extern void zrl_add_debug(zrlock_t *, const char *); +#else +extern void zrl_add(zrlock_t *); +#endif +extern void zrl_remove(zrlock_t *); +extern int zrl_tryenter(zrlock_t *); +extern void zrl_exit(zrlock_t *); +extern int zrl_is_zero(zrlock_t *); +extern int zrl_is_locked(zrlock_t *); +#ifdef ZFS_DEBUG +extern kthread_t *zrl_owner(zrlock_t *); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZRLOCK_H */ diff --git a/uts/common/fs/zfs/sys/zvol.h b/uts/common/fs/zfs/sys/zvol.h new file mode 100644 index 000000000000..0059bf510260 --- /dev/null +++ b/uts/common/fs/zfs/sys/zvol.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_ZVOL_H +#define _SYS_ZVOL_H + +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZVOL_OBJ 1ULL +#define ZVOL_ZAP_OBJ 2ULL + +#ifdef _KERNEL +extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize); +extern int zvol_check_volblocksize(uint64_t volblocksize); +extern int zvol_get_stats(objset_t *os, nvlist_t *nv); +extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); +extern int zvol_create_minor(const char *); +extern int zvol_remove_minor(const char *); +extern void zvol_remove_minors(const char *); +extern int zvol_set_volsize(const char *, major_t, uint64_t); + +extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr); +extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks); +extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr); +extern int zvol_strategy(buf_t *bp); +extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr); +extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr); +extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr); +extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr); +extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, + int *rvalp); +extern int zvol_busy(void); +extern void zvol_init(void); +extern void zvol_fini(void); + +extern int zvol_get_volume_params(minor_t minor, uint64_t *blksize, + uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, + void **rl_hdl, void **bonus_hdl); +extern uint64_t zvol_get_volume_size(void *minor_hdl); +extern int zvol_get_volume_wce(void *minor_hdl); +extern void zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, + ssize_t resid, boolean_t sync); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZVOL_H */ diff --git a/uts/common/fs/zfs/txg.c b/uts/common/fs/zfs/txg.c new file mode 100644 index 000000000000..9b308ca4e71a --- /dev/null +++ b/uts/common/fs/zfs/txg.c @@ -0,0 +1,724 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/txg_impl.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_scan.h> +#include <sys/callb.h> + +/* + * Pool-wide transaction groups. + */ + +static void txg_sync_thread(dsl_pool_t *dp); +static void txg_quiesce_thread(dsl_pool_t *dp); + +int zfs_txg_timeout = 5; /* max seconds worth of delta per txg */ + +/* + * Prepare the txg subsystem. + */ +void +txg_init(dsl_pool_t *dp, uint64_t txg) +{ + tx_state_t *tx = &dp->dp_tx; + int c; + bzero(tx, sizeof (tx_state_t)); + + tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP); + + for (c = 0; c < max_ncpus; c++) { + int i; + + mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL); + for (i = 0; i < TXG_SIZE; i++) { + cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT, + NULL); + list_create(&tx->tx_cpu[c].tc_callbacks[i], + sizeof (dmu_tx_callback_t), + offsetof(dmu_tx_callback_t, dcb_node)); + } + } + + mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL); + + cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL); + + tx->tx_open_txg = txg; +} + +/* + * Close down the txg subsystem. + */ +void +txg_fini(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + int c; + + ASSERT(tx->tx_threads == 0); + + mutex_destroy(&tx->tx_sync_lock); + + cv_destroy(&tx->tx_sync_more_cv); + cv_destroy(&tx->tx_sync_done_cv); + cv_destroy(&tx->tx_quiesce_more_cv); + cv_destroy(&tx->tx_quiesce_done_cv); + cv_destroy(&tx->tx_exit_cv); + + for (c = 0; c < max_ncpus; c++) { + int i; + + mutex_destroy(&tx->tx_cpu[c].tc_lock); + for (i = 0; i < TXG_SIZE; i++) { + cv_destroy(&tx->tx_cpu[c].tc_cv[i]); + list_destroy(&tx->tx_cpu[c].tc_callbacks[i]); + } + } + + if (tx->tx_commit_cb_taskq != NULL) + taskq_destroy(tx->tx_commit_cb_taskq); + + kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t)); + + bzero(tx, sizeof (tx_state_t)); +} + +/* + * Start syncing transaction groups. + */ +void +txg_sync_start(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + + mutex_enter(&tx->tx_sync_lock); + + dprintf("pool %p\n", dp); + + ASSERT(tx->tx_threads == 0); + + tx->tx_threads = 2; + + tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread, + dp, 0, &p0, TS_RUN, minclsyspri); + + /* + * The sync thread can need a larger-than-default stack size on + * 32-bit x86. This is due in part to nested pools and + * scrub_visitbp() recursion. + */ + tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread, + dp, 0, &p0, TS_RUN, minclsyspri); + + mutex_exit(&tx->tx_sync_lock); +} + +static void +txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr) +{ + CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG); + mutex_enter(&tx->tx_sync_lock); +} + +static void +txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp) +{ + ASSERT(*tpp != NULL); + *tpp = NULL; + tx->tx_threads--; + cv_broadcast(&tx->tx_exit_cv); + CALLB_CPR_EXIT(cpr); /* drops &tx->tx_sync_lock */ + thread_exit(); +} + +static void +txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time) +{ + CALLB_CPR_SAFE_BEGIN(cpr); + + if (time) + (void) cv_timedwait(cv, &tx->tx_sync_lock, + ddi_get_lbolt() + time); + else + cv_wait(cv, &tx->tx_sync_lock); + + CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock); +} + +/* + * Stop syncing transaction groups. + */ +void +txg_sync_stop(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + + dprintf("pool %p\n", dp); + /* + * Finish off any work in progress. + */ + ASSERT(tx->tx_threads == 2); + + /* + * We need to ensure that we've vacated the deferred space_maps. + */ + txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE); + + /* + * Wake all sync threads and wait for them to die. + */ + mutex_enter(&tx->tx_sync_lock); + + ASSERT(tx->tx_threads == 2); + + tx->tx_exiting = 1; + + cv_broadcast(&tx->tx_quiesce_more_cv); + cv_broadcast(&tx->tx_quiesce_done_cv); + cv_broadcast(&tx->tx_sync_more_cv); + + while (tx->tx_threads != 0) + cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock); + + tx->tx_exiting = 0; + + mutex_exit(&tx->tx_sync_lock); +} + +uint64_t +txg_hold_open(dsl_pool_t *dp, txg_handle_t *th) +{ + tx_state_t *tx = &dp->dp_tx; + tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID]; + uint64_t txg; + + mutex_enter(&tc->tc_lock); + + txg = tx->tx_open_txg; + tc->tc_count[txg & TXG_MASK]++; + + th->th_cpu = tc; + th->th_txg = txg; + + return (txg); +} + +void +txg_rele_to_quiesce(txg_handle_t *th) +{ + tx_cpu_t *tc = th->th_cpu; + + mutex_exit(&tc->tc_lock); +} + +void +txg_register_callbacks(txg_handle_t *th, list_t *tx_callbacks) +{ + tx_cpu_t *tc = th->th_cpu; + int g = th->th_txg & TXG_MASK; + + mutex_enter(&tc->tc_lock); + list_move_tail(&tc->tc_callbacks[g], tx_callbacks); + mutex_exit(&tc->tc_lock); +} + +void +txg_rele_to_sync(txg_handle_t *th) +{ + tx_cpu_t *tc = th->th_cpu; + int g = th->th_txg & TXG_MASK; + + mutex_enter(&tc->tc_lock); + ASSERT(tc->tc_count[g] != 0); + if (--tc->tc_count[g] == 0) + cv_broadcast(&tc->tc_cv[g]); + mutex_exit(&tc->tc_lock); + + th->th_cpu = NULL; /* defensive */ +} + +static void +txg_quiesce(dsl_pool_t *dp, uint64_t txg) +{ + tx_state_t *tx = &dp->dp_tx; + int g = txg & TXG_MASK; + int c; + + /* + * Grab all tx_cpu locks so nobody else can get into this txg. + */ + for (c = 0; c < max_ncpus; c++) + mutex_enter(&tx->tx_cpu[c].tc_lock); + + ASSERT(txg == tx->tx_open_txg); + tx->tx_open_txg++; + + /* + * Now that we've incremented tx_open_txg, we can let threads + * enter the next transaction group. + */ + for (c = 0; c < max_ncpus; c++) + mutex_exit(&tx->tx_cpu[c].tc_lock); + + /* + * Quiesce the transaction group by waiting for everyone to txg_exit(). + */ + for (c = 0; c < max_ncpus; c++) { + tx_cpu_t *tc = &tx->tx_cpu[c]; + mutex_enter(&tc->tc_lock); + while (tc->tc_count[g] != 0) + cv_wait(&tc->tc_cv[g], &tc->tc_lock); + mutex_exit(&tc->tc_lock); + } +} + +static void +txg_do_callbacks(list_t *cb_list) +{ + dmu_tx_do_callbacks(cb_list, 0); + + list_destroy(cb_list); + + kmem_free(cb_list, sizeof (list_t)); +} + +/* + * Dispatch the commit callbacks registered on this txg to worker threads. + */ +static void +txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg) +{ + int c; + tx_state_t *tx = &dp->dp_tx; + list_t *cb_list; + + for (c = 0; c < max_ncpus; c++) { + tx_cpu_t *tc = &tx->tx_cpu[c]; + /* No need to lock tx_cpu_t at this point */ + + int g = txg & TXG_MASK; + + if (list_is_empty(&tc->tc_callbacks[g])) + continue; + + if (tx->tx_commit_cb_taskq == NULL) { + /* + * Commit callback taskq hasn't been created yet. + */ + tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb", + max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2, + TASKQ_PREPOPULATE); + } + + cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP); + list_create(cb_list, sizeof (dmu_tx_callback_t), + offsetof(dmu_tx_callback_t, dcb_node)); + + list_move_tail(&tc->tc_callbacks[g], cb_list); + + (void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *) + txg_do_callbacks, cb_list, TQ_SLEEP); + } +} + +static void +txg_sync_thread(dsl_pool_t *dp) +{ + spa_t *spa = dp->dp_spa; + tx_state_t *tx = &dp->dp_tx; + callb_cpr_t cpr; + uint64_t start, delta; + + txg_thread_enter(tx, &cpr); + + start = delta = 0; + for (;;) { + uint64_t timer, timeout = zfs_txg_timeout * hz; + uint64_t txg; + + /* + * We sync when we're scanning, there's someone waiting + * on us, or the quiesce thread has handed off a txg to + * us, or we have reached our timeout. + */ + timer = (delta >= timeout ? 0 : timeout - delta); + while (!dsl_scan_active(dp->dp_scan) && + !tx->tx_exiting && timer > 0 && + tx->tx_synced_txg >= tx->tx_sync_txg_waiting && + tx->tx_quiesced_txg == 0) { + dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n", + tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); + txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer); + delta = ddi_get_lbolt() - start; + timer = (delta > timeout ? 0 : timeout - delta); + } + + /* + * Wait until the quiesce thread hands off a txg to us, + * prompting it to do so if necessary. + */ + while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) { + if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1) + tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1; + cv_broadcast(&tx->tx_quiesce_more_cv); + txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0); + } + + if (tx->tx_exiting) + txg_thread_exit(tx, &cpr, &tx->tx_sync_thread); + + /* + * Consume the quiesced txg which has been handed off to + * us. This may cause the quiescing thread to now be + * able to quiesce another txg, so we must signal it. + */ + txg = tx->tx_quiesced_txg; + tx->tx_quiesced_txg = 0; + tx->tx_syncing_txg = txg; + cv_broadcast(&tx->tx_quiesce_more_cv); + + dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", + txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); + mutex_exit(&tx->tx_sync_lock); + + start = ddi_get_lbolt(); + spa_sync(spa, txg); + delta = ddi_get_lbolt() - start; + + mutex_enter(&tx->tx_sync_lock); + tx->tx_synced_txg = txg; + tx->tx_syncing_txg = 0; + cv_broadcast(&tx->tx_sync_done_cv); + + /* + * Dispatch commit callbacks to worker threads. + */ + txg_dispatch_callbacks(dp, txg); + } +} + +static void +txg_quiesce_thread(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + callb_cpr_t cpr; + + txg_thread_enter(tx, &cpr); + + for (;;) { + uint64_t txg; + + /* + * We quiesce when there's someone waiting on us. + * However, we can only have one txg in "quiescing" or + * "quiesced, waiting to sync" state. So we wait until + * the "quiesced, waiting to sync" txg has been consumed + * by the sync thread. + */ + while (!tx->tx_exiting && + (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting || + tx->tx_quiesced_txg != 0)) + txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0); + + if (tx->tx_exiting) + txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread); + + txg = tx->tx_open_txg; + dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", + txg, tx->tx_quiesce_txg_waiting, + tx->tx_sync_txg_waiting); + mutex_exit(&tx->tx_sync_lock); + txg_quiesce(dp, txg); + mutex_enter(&tx->tx_sync_lock); + + /* + * Hand this txg off to the sync thread. + */ + dprintf("quiesce done, handing off txg %llu\n", txg); + tx->tx_quiesced_txg = txg; + cv_broadcast(&tx->tx_sync_more_cv); + cv_broadcast(&tx->tx_quiesce_done_cv); + } +} + +/* + * Delay this thread by 'ticks' if we are still in the open transaction + * group and there is already a waiting txg quiesing or quiesced. Abort + * the delay if this txg stalls or enters the quiesing state. + */ +void +txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) +{ + tx_state_t *tx = &dp->dp_tx; + int timeout = ddi_get_lbolt() + ticks; + + /* don't delay if this txg could transition to quiesing immediately */ + if (tx->tx_open_txg > txg || + tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1) + return; + + mutex_enter(&tx->tx_sync_lock); + if (tx->tx_open_txg > txg || tx->tx_synced_txg == txg-1) { + mutex_exit(&tx->tx_sync_lock); + return; + } + + while (ddi_get_lbolt() < timeout && + tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) + (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, + timeout); + + mutex_exit(&tx->tx_sync_lock); +} + +void +txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +{ + tx_state_t *tx = &dp->dp_tx; + + mutex_enter(&tx->tx_sync_lock); + ASSERT(tx->tx_threads == 2); + if (txg == 0) + txg = tx->tx_open_txg + TXG_DEFER_SIZE; + if (tx->tx_sync_txg_waiting < txg) + tx->tx_sync_txg_waiting = txg; + dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", + txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); + while (tx->tx_synced_txg < txg) { + dprintf("broadcasting sync more " + "tx_synced=%llu waiting=%llu dp=%p\n", + tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); + cv_broadcast(&tx->tx_sync_more_cv); + cv_wait(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + } + mutex_exit(&tx->tx_sync_lock); +} + +void +txg_wait_open(dsl_pool_t *dp, uint64_t txg) +{ + tx_state_t *tx = &dp->dp_tx; + + mutex_enter(&tx->tx_sync_lock); + ASSERT(tx->tx_threads == 2); + if (txg == 0) + txg = tx->tx_open_txg + 1; + if (tx->tx_quiesce_txg_waiting < txg) + tx->tx_quiesce_txg_waiting = txg; + dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", + txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); + while (tx->tx_open_txg < txg) { + cv_broadcast(&tx->tx_quiesce_more_cv); + cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock); + } + mutex_exit(&tx->tx_sync_lock); +} + +boolean_t +txg_stalled(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg); +} + +boolean_t +txg_sync_waiting(dsl_pool_t *dp) +{ + tx_state_t *tx = &dp->dp_tx; + + return (tx->tx_syncing_txg <= tx->tx_sync_txg_waiting || + tx->tx_quiesced_txg != 0); +} + +/* + * Per-txg object lists. + */ +void +txg_list_create(txg_list_t *tl, size_t offset) +{ + int t; + + mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL); + + tl->tl_offset = offset; + + for (t = 0; t < TXG_SIZE; t++) + tl->tl_head[t] = NULL; +} + +void +txg_list_destroy(txg_list_t *tl) +{ + int t; + + for (t = 0; t < TXG_SIZE; t++) + ASSERT(txg_list_empty(tl, t)); + + mutex_destroy(&tl->tl_lock); +} + +int +txg_list_empty(txg_list_t *tl, uint64_t txg) +{ + return (tl->tl_head[txg & TXG_MASK] == NULL); +} + +/* + * Add an entry to the list. + * Returns 0 if it's a new entry, 1 if it's already there. + */ +int +txg_list_add(txg_list_t *tl, void *p, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); + int already_on_list; + + mutex_enter(&tl->tl_lock); + already_on_list = tn->tn_member[t]; + if (!already_on_list) { + tn->tn_member[t] = 1; + tn->tn_next[t] = tl->tl_head[t]; + tl->tl_head[t] = tn; + } + mutex_exit(&tl->tl_lock); + + return (already_on_list); +} + +/* + * Add an entry to the end of the list (walks list to find end). + * Returns 0 if it's a new entry, 1 if it's already there. + */ +int +txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); + int already_on_list; + + mutex_enter(&tl->tl_lock); + already_on_list = tn->tn_member[t]; + if (!already_on_list) { + txg_node_t **tp; + + for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t]) + continue; + + tn->tn_member[t] = 1; + tn->tn_next[t] = NULL; + *tp = tn; + } + mutex_exit(&tl->tl_lock); + + return (already_on_list); +} + +/* + * Remove the head of the list and return it. + */ +void * +txg_list_remove(txg_list_t *tl, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn; + void *p = NULL; + + mutex_enter(&tl->tl_lock); + if ((tn = tl->tl_head[t]) != NULL) { + p = (char *)tn - tl->tl_offset; + tl->tl_head[t] = tn->tn_next[t]; + tn->tn_next[t] = NULL; + tn->tn_member[t] = 0; + } + mutex_exit(&tl->tl_lock); + + return (p); +} + +/* + * Remove a specific item from the list and return it. + */ +void * +txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn, **tp; + + mutex_enter(&tl->tl_lock); + + for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) { + if ((char *)tn - tl->tl_offset == p) { + *tp = tn->tn_next[t]; + tn->tn_next[t] = NULL; + tn->tn_member[t] = 0; + mutex_exit(&tl->tl_lock); + return (p); + } + } + + mutex_exit(&tl->tl_lock); + + return (NULL); +} + +int +txg_list_member(txg_list_t *tl, void *p, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); + + return (tn->tn_member[t]); +} + +/* + * Walk a txg list -- only safe if you know it's not changing. + */ +void * +txg_list_head(txg_list_t *tl, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn = tl->tl_head[t]; + + return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); +} + +void * +txg_list_next(txg_list_t *tl, void *p, uint64_t txg) +{ + int t = txg & TXG_MASK; + txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); + + tn = tn->tn_next[t]; + + return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); +} diff --git a/uts/common/fs/zfs/uberblock.c b/uts/common/fs/zfs/uberblock.c new file mode 100644 index 000000000000..692cda137f1a --- /dev/null +++ b/uts/common/fs/zfs/uberblock.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/uberblock_impl.h> +#include <sys/vdev_impl.h> + +int +uberblock_verify(uberblock_t *ub) +{ + if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC)) + byteswap_uint64_array(ub, sizeof (uberblock_t)); + + if (ub->ub_magic != UBERBLOCK_MAGIC) + return (EINVAL); + + return (0); +} + +/* + * Update the uberblock and return a boolean value indicating whether + * anything changed in this transaction group. + */ +int +uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg) +{ + ASSERT(ub->ub_txg < txg); + + /* + * We explicitly do not set ub_version here, so that older versions + * continue to be written with the previous uberblock version. + */ + ub->ub_magic = UBERBLOCK_MAGIC; + ub->ub_txg = txg; + ub->ub_guid_sum = rvd->vdev_guid_sum; + ub->ub_timestamp = gethrestime_sec(); + ub->ub_software_version = SPA_VERSION; + + return (ub->ub_rootbp.blk_birth == txg); +} diff --git a/uts/common/fs/zfs/unique.c b/uts/common/fs/zfs/unique.c new file mode 100644 index 000000000000..fbe7b619a29a --- /dev/null +++ b/uts/common/fs/zfs/unique.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <sys/unique.h> + +static avl_tree_t unique_avl; +static kmutex_t unique_mtx; + +typedef struct unique { + avl_node_t un_link; + uint64_t un_value; +} unique_t; + +#define UNIQUE_MASK ((1ULL << UNIQUE_BITS) - 1) + +static int +unique_compare(const void *a, const void *b) +{ + const unique_t *una = a; + const unique_t *unb = b; + + if (una->un_value < unb->un_value) + return (-1); + if (una->un_value > unb->un_value) + return (+1); + return (0); +} + +void +unique_init(void) +{ + avl_create(&unique_avl, unique_compare, + sizeof (unique_t), offsetof(unique_t, un_link)); + mutex_init(&unique_mtx, NULL, MUTEX_DEFAULT, NULL); +} + +void +unique_fini(void) +{ + avl_destroy(&unique_avl); + mutex_destroy(&unique_mtx); +} + +uint64_t +unique_create(void) +{ + uint64_t value = unique_insert(0); + unique_remove(value); + return (value); +} + +uint64_t +unique_insert(uint64_t value) +{ + avl_index_t idx; + unique_t *un = kmem_alloc(sizeof (unique_t), KM_SLEEP); + + un->un_value = value; + + mutex_enter(&unique_mtx); + while (un->un_value == 0 || un->un_value & ~UNIQUE_MASK || + avl_find(&unique_avl, un, &idx)) { + mutex_exit(&unique_mtx); + (void) random_get_pseudo_bytes((void*)&un->un_value, + sizeof (un->un_value)); + un->un_value &= UNIQUE_MASK; + mutex_enter(&unique_mtx); + } + + avl_insert(&unique_avl, un, idx); + mutex_exit(&unique_mtx); + + return (un->un_value); +} + +void +unique_remove(uint64_t value) +{ + unique_t un_tofind; + unique_t *un; + + un_tofind.un_value = value; + mutex_enter(&unique_mtx); + un = avl_find(&unique_avl, &un_tofind, NULL); + if (un != NULL) { + avl_remove(&unique_avl, un); + kmem_free(un, sizeof (unique_t)); + } + mutex_exit(&unique_mtx); +} diff --git a/uts/common/fs/zfs/vdev.c b/uts/common/fs/zfs/vdev.c new file mode 100644 index 000000000000..bac3e86054d6 --- /dev/null +++ b/uts/common/fs/zfs/vdev.c @@ -0,0 +1,3130 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/fm/fs/zfs.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/vdev_impl.h> +#include <sys/uberblock_impl.h> +#include <sys/metaslab.h> +#include <sys/metaslab_impl.h> +#include <sys/space_map.h> +#include <sys/zio.h> +#include <sys/zap.h> +#include <sys/fs/zfs.h> +#include <sys/arc.h> +#include <sys/zil.h> +#include <sys/dsl_scan.h> + +/* + * Virtual device management. + */ + +static vdev_ops_t *vdev_ops_table[] = { + &vdev_root_ops, + &vdev_raidz_ops, + &vdev_mirror_ops, + &vdev_replacing_ops, + &vdev_spare_ops, + &vdev_disk_ops, + &vdev_file_ops, + &vdev_missing_ops, + &vdev_hole_ops, + NULL +}; + +/* maximum scrub/resilver I/O queue per leaf vdev */ +int zfs_scrub_limit = 10; + +/* + * Given a vdev type, return the appropriate ops vector. + */ +static vdev_ops_t * +vdev_getops(const char *type) +{ + vdev_ops_t *ops, **opspp; + + for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) + if (strcmp(ops->vdev_op_type, type) == 0) + break; + + return (ops); +} + +/* + * Default asize function: return the MAX of psize with the asize of + * all children. This is what's used by anything other than RAID-Z. + */ +uint64_t +vdev_default_asize(vdev_t *vd, uint64_t psize) +{ + uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift); + uint64_t csize; + + for (int c = 0; c < vd->vdev_children; c++) { + csize = vdev_psize_to_asize(vd->vdev_child[c], psize); + asize = MAX(asize, csize); + } + + return (asize); +} + +/* + * Get the minimum allocatable size. We define the allocatable size as + * the vdev's asize rounded to the nearest metaslab. This allows us to + * replace or attach devices which don't have the same physical size but + * can still satisfy the same number of allocations. + */ +uint64_t +vdev_get_min_asize(vdev_t *vd) +{ + vdev_t *pvd = vd->vdev_parent; + + /* + * The our parent is NULL (inactive spare or cache) or is the root, + * just return our own asize. + */ + if (pvd == NULL) + return (vd->vdev_asize); + + /* + * The top-level vdev just returns the allocatable size rounded + * to the nearest metaslab. + */ + if (vd == vd->vdev_top) + return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift)); + + /* + * The allocatable space for a raidz vdev is N * sizeof(smallest child), + * so each child must provide at least 1/Nth of its asize. + */ + if (pvd->vdev_ops == &vdev_raidz_ops) + return (pvd->vdev_min_asize / pvd->vdev_children); + + return (pvd->vdev_min_asize); +} + +void +vdev_set_min_asize(vdev_t *vd) +{ + vd->vdev_min_asize = vdev_get_min_asize(vd); + + for (int c = 0; c < vd->vdev_children; c++) + vdev_set_min_asize(vd->vdev_child[c]); +} + +vdev_t * +vdev_lookup_top(spa_t *spa, uint64_t vdev) +{ + vdev_t *rvd = spa->spa_root_vdev; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); + + if (vdev < rvd->vdev_children) { + ASSERT(rvd->vdev_child[vdev] != NULL); + return (rvd->vdev_child[vdev]); + } + + return (NULL); +} + +vdev_t * +vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) +{ + vdev_t *mvd; + + if (vd->vdev_guid == guid) + return (vd); + + for (int c = 0; c < vd->vdev_children; c++) + if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != + NULL) + return (mvd); + + return (NULL); +} + +void +vdev_add_child(vdev_t *pvd, vdev_t *cvd) +{ + size_t oldsize, newsize; + uint64_t id = cvd->vdev_id; + vdev_t **newchild; + + ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); + ASSERT(cvd->vdev_parent == NULL); + + cvd->vdev_parent = pvd; + + if (pvd == NULL) + return; + + ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); + + oldsize = pvd->vdev_children * sizeof (vdev_t *); + pvd->vdev_children = MAX(pvd->vdev_children, id + 1); + newsize = pvd->vdev_children * sizeof (vdev_t *); + + newchild = kmem_zalloc(newsize, KM_SLEEP); + if (pvd->vdev_child != NULL) { + bcopy(pvd->vdev_child, newchild, oldsize); + kmem_free(pvd->vdev_child, oldsize); + } + + pvd->vdev_child = newchild; + pvd->vdev_child[id] = cvd; + + cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); + ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); + + /* + * Walk up all ancestors to update guid sum. + */ + for (; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum += cvd->vdev_guid_sum; +} + +void +vdev_remove_child(vdev_t *pvd, vdev_t *cvd) +{ + int c; + uint_t id = cvd->vdev_id; + + ASSERT(cvd->vdev_parent == pvd); + + if (pvd == NULL) + return; + + ASSERT(id < pvd->vdev_children); + ASSERT(pvd->vdev_child[id] == cvd); + + pvd->vdev_child[id] = NULL; + cvd->vdev_parent = NULL; + + for (c = 0; c < pvd->vdev_children; c++) + if (pvd->vdev_child[c]) + break; + + if (c == pvd->vdev_children) { + kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); + pvd->vdev_child = NULL; + pvd->vdev_children = 0; + } + + /* + * Walk up all ancestors to update guid sum. + */ + for (; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum -= cvd->vdev_guid_sum; +} + +/* + * Remove any holes in the child array. + */ +void +vdev_compact_children(vdev_t *pvd) +{ + vdev_t **newchild, *cvd; + int oldc = pvd->vdev_children; + int newc; + + ASSERT(spa_config_held(pvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + for (int c = newc = 0; c < oldc; c++) + if (pvd->vdev_child[c]) + newc++; + + newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); + + for (int c = newc = 0; c < oldc; c++) { + if ((cvd = pvd->vdev_child[c]) != NULL) { + newchild[newc] = cvd; + cvd->vdev_id = newc++; + } + } + + kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); + pvd->vdev_child = newchild; + pvd->vdev_children = newc; +} + +/* + * Allocate and minimally initialize a vdev_t. + */ +vdev_t * +vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) +{ + vdev_t *vd; + + vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); + + if (spa->spa_root_vdev == NULL) { + ASSERT(ops == &vdev_root_ops); + spa->spa_root_vdev = vd; + } + + if (guid == 0 && ops != &vdev_hole_ops) { + if (spa->spa_root_vdev == vd) { + /* + * The root vdev's guid will also be the pool guid, + * which must be unique among all pools. + */ + guid = spa_generate_guid(NULL); + } else { + /* + * Any other vdev's guid must be unique within the pool. + */ + guid = spa_generate_guid(spa); + } + ASSERT(!spa_guid_exists(spa_guid(spa), guid)); + } + + vd->vdev_spa = spa; + vd->vdev_id = id; + vd->vdev_guid = guid; + vd->vdev_guid_sum = guid; + vd->vdev_ops = ops; + vd->vdev_state = VDEV_STATE_CLOSED; + vd->vdev_ishole = (ops == &vdev_hole_ops); + + mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL); + for (int t = 0; t < DTL_TYPES; t++) { + space_map_create(&vd->vdev_dtl[t], 0, -1ULL, 0, + &vd->vdev_dtl_lock); + } + txg_list_create(&vd->vdev_ms_list, + offsetof(struct metaslab, ms_txg_node)); + txg_list_create(&vd->vdev_dtl_list, + offsetof(struct vdev, vdev_dtl_node)); + vd->vdev_stat.vs_timestamp = gethrtime(); + vdev_queue_init(vd); + vdev_cache_init(vd); + + return (vd); +} + +/* + * Allocate a new vdev. The 'alloctype' is used to control whether we are + * creating a new vdev or loading an existing one - the behavior is slightly + * different for each case. + */ +int +vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, + int alloctype) +{ + vdev_ops_t *ops; + char *type; + uint64_t guid = 0, islog, nparity; + vdev_t *vd; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) + return (EINVAL); + + if ((ops = vdev_getops(type)) == NULL) + return (EINVAL); + + /* + * If this is a load, get the vdev guid from the nvlist. + * Otherwise, vdev_alloc_common() will generate one for us. + */ + if (alloctype == VDEV_ALLOC_LOAD) { + uint64_t label_id; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || + label_id != id) + return (EINVAL); + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (EINVAL); + } else if (alloctype == VDEV_ALLOC_SPARE) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (EINVAL); + } else if (alloctype == VDEV_ALLOC_L2CACHE) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (EINVAL); + } else if (alloctype == VDEV_ALLOC_ROOTPOOL) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (EINVAL); + } + + /* + * The first allocated vdev must be of type 'root'. + */ + if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) + return (EINVAL); + + /* + * Determine whether we're a log vdev. + */ + islog = 0; + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &islog); + if (islog && spa_version(spa) < SPA_VERSION_SLOGS) + return (ENOTSUP); + + if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES) + return (ENOTSUP); + + /* + * Set the nparity property for RAID-Z vdevs. + */ + nparity = -1ULL; + if (ops == &vdev_raidz_ops) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, + &nparity) == 0) { + if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY) + return (EINVAL); + /* + * Previous versions could only support 1 or 2 parity + * device. + */ + if (nparity > 1 && + spa_version(spa) < SPA_VERSION_RAIDZ2) + return (ENOTSUP); + if (nparity > 2 && + spa_version(spa) < SPA_VERSION_RAIDZ3) + return (ENOTSUP); + } else { + /* + * We require the parity to be specified for SPAs that + * support multiple parity levels. + */ + if (spa_version(spa) >= SPA_VERSION_RAIDZ2) + return (EINVAL); + /* + * Otherwise, we default to 1 parity device for RAID-Z. + */ + nparity = 1; + } + } else { + nparity = 0; + } + ASSERT(nparity != -1ULL); + + vd = vdev_alloc_common(spa, id, guid, ops); + + vd->vdev_islog = islog; + vd->vdev_nparity = nparity; + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) + vd->vdev_path = spa_strdup(vd->vdev_path); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) + vd->vdev_devid = spa_strdup(vd->vdev_devid); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, + &vd->vdev_physpath) == 0) + vd->vdev_physpath = spa_strdup(vd->vdev_physpath); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0) + vd->vdev_fru = spa_strdup(vd->vdev_fru); + + /* + * Set the whole_disk property. If it's not specified, leave the value + * as -1. + */ + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &vd->vdev_wholedisk) != 0) + vd->vdev_wholedisk = -1ULL; + + /* + * Look for the 'not present' flag. This will only be set if the device + * was not present at the time of import. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, + &vd->vdev_not_present); + + /* + * Get the alignment requirement. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); + + /* + * Retrieve the vdev creation time. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, + &vd->vdev_crtxg); + + /* + * If we're a top-level vdev, try to load the allocation parameters. + */ + if (parent && !parent->vdev_parent && + (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_SPLIT)) { + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, + &vd->vdev_ms_array); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, + &vd->vdev_ms_shift); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, + &vd->vdev_asize); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING, + &vd->vdev_removing); + } + + if (parent && !parent->vdev_parent) { + ASSERT(alloctype == VDEV_ALLOC_LOAD || + alloctype == VDEV_ALLOC_ADD || + alloctype == VDEV_ALLOC_SPLIT || + alloctype == VDEV_ALLOC_ROOTPOOL); + vd->vdev_mg = metaslab_group_create(islog ? + spa_log_class(spa) : spa_normal_class(spa), vd); + } + + /* + * If we're a leaf vdev, try to load the DTL object and other state. + */ + if (vd->vdev_ops->vdev_op_leaf && + (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE || + alloctype == VDEV_ALLOC_ROOTPOOL)) { + if (alloctype == VDEV_ALLOC_LOAD) { + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, + &vd->vdev_dtl_smo.smo_object); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE, + &vd->vdev_unspare); + } + + if (alloctype == VDEV_ALLOC_ROOTPOOL) { + uint64_t spare = 0; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, + &spare) == 0 && spare) + spa_spare_add(vd); + } + + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, + &vd->vdev_offline); + + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING, + &vd->vdev_resilvering); + + /* + * When importing a pool, we want to ignore the persistent fault + * state, as the diagnosis made on another system may not be + * valid in the current context. Local vdevs will + * remain in the faulted state. + */ + if (spa_load_state(spa) == SPA_LOAD_OPEN) { + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, + &vd->vdev_faulted); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, + &vd->vdev_degraded); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, + &vd->vdev_removed); + + if (vd->vdev_faulted || vd->vdev_degraded) { + char *aux; + + vd->vdev_label_aux = + VDEV_AUX_ERR_EXCEEDED; + if (nvlist_lookup_string(nv, + ZPOOL_CONFIG_AUX_STATE, &aux) == 0 && + strcmp(aux, "external") == 0) + vd->vdev_label_aux = VDEV_AUX_EXTERNAL; + } + } + } + + /* + * Add ourselves to the parent's list of children. + */ + vdev_add_child(parent, vd); + + *vdp = vd; + + return (0); +} + +void +vdev_free(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + /* + * vdev_free() implies closing the vdev first. This is simpler than + * trying to ensure complicated semantics for all callers. + */ + vdev_close(vd); + + ASSERT(!list_link_active(&vd->vdev_config_dirty_node)); + ASSERT(!list_link_active(&vd->vdev_state_dirty_node)); + + /* + * Free all children. + */ + for (int c = 0; c < vd->vdev_children; c++) + vdev_free(vd->vdev_child[c]); + + ASSERT(vd->vdev_child == NULL); + ASSERT(vd->vdev_guid_sum == vd->vdev_guid); + + /* + * Discard allocation state. + */ + if (vd->vdev_mg != NULL) { + vdev_metaslab_fini(vd); + metaslab_group_destroy(vd->vdev_mg); + } + + ASSERT3U(vd->vdev_stat.vs_space, ==, 0); + ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0); + ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); + + /* + * Remove this vdev from its parent's child list. + */ + vdev_remove_child(vd->vdev_parent, vd); + + ASSERT(vd->vdev_parent == NULL); + + /* + * Clean up vdev structure. + */ + vdev_queue_fini(vd); + vdev_cache_fini(vd); + + if (vd->vdev_path) + spa_strfree(vd->vdev_path); + if (vd->vdev_devid) + spa_strfree(vd->vdev_devid); + if (vd->vdev_physpath) + spa_strfree(vd->vdev_physpath); + if (vd->vdev_fru) + spa_strfree(vd->vdev_fru); + + if (vd->vdev_isspare) + spa_spare_remove(vd); + if (vd->vdev_isl2cache) + spa_l2cache_remove(vd); + + txg_list_destroy(&vd->vdev_ms_list); + txg_list_destroy(&vd->vdev_dtl_list); + + mutex_enter(&vd->vdev_dtl_lock); + for (int t = 0; t < DTL_TYPES; t++) { + space_map_unload(&vd->vdev_dtl[t]); + space_map_destroy(&vd->vdev_dtl[t]); + } + mutex_exit(&vd->vdev_dtl_lock); + + mutex_destroy(&vd->vdev_dtl_lock); + mutex_destroy(&vd->vdev_stat_lock); + mutex_destroy(&vd->vdev_probe_lock); + + if (vd == spa->spa_root_vdev) + spa->spa_root_vdev = NULL; + + kmem_free(vd, sizeof (vdev_t)); +} + +/* + * Transfer top-level vdev state from svd to tvd. + */ +static void +vdev_top_transfer(vdev_t *svd, vdev_t *tvd) +{ + spa_t *spa = svd->vdev_spa; + metaslab_t *msp; + vdev_t *vd; + int t; + + ASSERT(tvd == tvd->vdev_top); + + tvd->vdev_ms_array = svd->vdev_ms_array; + tvd->vdev_ms_shift = svd->vdev_ms_shift; + tvd->vdev_ms_count = svd->vdev_ms_count; + + svd->vdev_ms_array = 0; + svd->vdev_ms_shift = 0; + svd->vdev_ms_count = 0; + + tvd->vdev_mg = svd->vdev_mg; + tvd->vdev_ms = svd->vdev_ms; + + svd->vdev_mg = NULL; + svd->vdev_ms = NULL; + + if (tvd->vdev_mg != NULL) + tvd->vdev_mg->mg_vd = tvd; + + tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; + tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; + tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace; + + svd->vdev_stat.vs_alloc = 0; + svd->vdev_stat.vs_space = 0; + svd->vdev_stat.vs_dspace = 0; + + for (t = 0; t < TXG_SIZE; t++) { + while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) + (void) txg_list_add(&tvd->vdev_ms_list, msp, t); + while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) + (void) txg_list_add(&tvd->vdev_dtl_list, vd, t); + if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) + (void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); + } + + if (list_link_active(&svd->vdev_config_dirty_node)) { + vdev_config_clean(svd); + vdev_config_dirty(tvd); + } + + if (list_link_active(&svd->vdev_state_dirty_node)) { + vdev_state_clean(svd); + vdev_state_dirty(tvd); + } + + tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio; + svd->vdev_deflate_ratio = 0; + + tvd->vdev_islog = svd->vdev_islog; + svd->vdev_islog = 0; +} + +static void +vdev_top_update(vdev_t *tvd, vdev_t *vd) +{ + if (vd == NULL) + return; + + vd->vdev_top = tvd; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_top_update(tvd, vd->vdev_child[c]); +} + +/* + * Add a mirror/replacing vdev above an existing vdev. + */ +vdev_t * +vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) +{ + spa_t *spa = cvd->vdev_spa; + vdev_t *pvd = cvd->vdev_parent; + vdev_t *mvd; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); + + mvd->vdev_asize = cvd->vdev_asize; + mvd->vdev_min_asize = cvd->vdev_min_asize; + mvd->vdev_ashift = cvd->vdev_ashift; + mvd->vdev_state = cvd->vdev_state; + mvd->vdev_crtxg = cvd->vdev_crtxg; + + vdev_remove_child(pvd, cvd); + vdev_add_child(pvd, mvd); + cvd->vdev_id = mvd->vdev_children; + vdev_add_child(mvd, cvd); + vdev_top_update(cvd->vdev_top, cvd->vdev_top); + + if (mvd == mvd->vdev_top) + vdev_top_transfer(cvd, mvd); + + return (mvd); +} + +/* + * Remove a 1-way mirror/replacing vdev from the tree. + */ +void +vdev_remove_parent(vdev_t *cvd) +{ + vdev_t *mvd = cvd->vdev_parent; + vdev_t *pvd = mvd->vdev_parent; + + ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + ASSERT(mvd->vdev_children == 1); + ASSERT(mvd->vdev_ops == &vdev_mirror_ops || + mvd->vdev_ops == &vdev_replacing_ops || + mvd->vdev_ops == &vdev_spare_ops); + cvd->vdev_ashift = mvd->vdev_ashift; + + vdev_remove_child(mvd, cvd); + vdev_remove_child(pvd, mvd); + + /* + * If cvd will replace mvd as a top-level vdev, preserve mvd's guid. + * Otherwise, we could have detached an offline device, and when we + * go to import the pool we'll think we have two top-level vdevs, + * instead of a different version of the same top-level vdev. + */ + if (mvd->vdev_top == mvd) { + uint64_t guid_delta = mvd->vdev_guid - cvd->vdev_guid; + cvd->vdev_orig_guid = cvd->vdev_guid; + cvd->vdev_guid += guid_delta; + cvd->vdev_guid_sum += guid_delta; + } + cvd->vdev_id = mvd->vdev_id; + vdev_add_child(pvd, cvd); + vdev_top_update(cvd->vdev_top, cvd->vdev_top); + + if (cvd == cvd->vdev_top) + vdev_top_transfer(mvd, cvd); + + ASSERT(mvd->vdev_children == 0); + vdev_free(mvd); +} + +int +vdev_metaslab_init(vdev_t *vd, uint64_t txg) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + uint64_t m; + uint64_t oldc = vd->vdev_ms_count; + uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; + metaslab_t **mspp; + int error; + + ASSERT(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER)); + + /* + * This vdev is not being allocated from yet or is a hole. + */ + if (vd->vdev_ms_shift == 0) + return (0); + + ASSERT(!vd->vdev_ishole); + + /* + * Compute the raidz-deflation ratio. Note, we hard-code + * in 128k (1 << 17) because it is the current "typical" blocksize. + * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change, + * or we will inconsistently account for existing bp's. + */ + vd->vdev_deflate_ratio = (1 << 17) / + (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT); + + ASSERT(oldc <= newc); + + mspp = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); + + if (oldc != 0) { + bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); + kmem_free(vd->vdev_ms, oldc * sizeof (*mspp)); + } + + vd->vdev_ms = mspp; + vd->vdev_ms_count = newc; + + for (m = oldc; m < newc; m++) { + space_map_obj_t smo = { 0, 0, 0 }; + if (txg == 0) { + uint64_t object = 0; + error = dmu_read(mos, vd->vdev_ms_array, + m * sizeof (uint64_t), sizeof (uint64_t), &object, + DMU_READ_PREFETCH); + if (error) + return (error); + if (object != 0) { + dmu_buf_t *db; + error = dmu_bonus_hold(mos, object, FTAG, &db); + if (error) + return (error); + ASSERT3U(db->db_size, >=, sizeof (smo)); + bcopy(db->db_data, &smo, sizeof (smo)); + ASSERT3U(smo.smo_object, ==, object); + dmu_buf_rele(db, FTAG); + } + } + vd->vdev_ms[m] = metaslab_init(vd->vdev_mg, &smo, + m << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); + } + + if (txg == 0) + spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER); + + /* + * If the vdev is being removed we don't activate + * the metaslabs since we want to ensure that no new + * allocations are performed on this device. + */ + if (oldc == 0 && !vd->vdev_removing) + metaslab_group_activate(vd->vdev_mg); + + if (txg == 0) + spa_config_exit(spa, SCL_ALLOC, FTAG); + + return (0); +} + +void +vdev_metaslab_fini(vdev_t *vd) +{ + uint64_t m; + uint64_t count = vd->vdev_ms_count; + + if (vd->vdev_ms != NULL) { + metaslab_group_passivate(vd->vdev_mg); + for (m = 0; m < count; m++) + if (vd->vdev_ms[m] != NULL) + metaslab_fini(vd->vdev_ms[m]); + kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); + vd->vdev_ms = NULL; + } +} + +typedef struct vdev_probe_stats { + boolean_t vps_readable; + boolean_t vps_writeable; + int vps_flags; +} vdev_probe_stats_t; + +static void +vdev_probe_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + vdev_t *vd = zio->io_vd; + vdev_probe_stats_t *vps = zio->io_private; + + ASSERT(vd->vdev_probe_zio != NULL); + + if (zio->io_type == ZIO_TYPE_READ) { + if (zio->io_error == 0) + vps->vps_readable = 1; + if (zio->io_error == 0 && spa_writeable(spa)) { + zio_nowait(zio_write_phys(vd->vdev_probe_zio, vd, + zio->io_offset, zio->io_size, zio->io_data, + ZIO_CHECKSUM_OFF, vdev_probe_done, vps, + ZIO_PRIORITY_SYNC_WRITE, vps->vps_flags, B_TRUE)); + } else { + zio_buf_free(zio->io_data, zio->io_size); + } + } else if (zio->io_type == ZIO_TYPE_WRITE) { + if (zio->io_error == 0) + vps->vps_writeable = 1; + zio_buf_free(zio->io_data, zio->io_size); + } else if (zio->io_type == ZIO_TYPE_NULL) { + zio_t *pio; + + vd->vdev_cant_read |= !vps->vps_readable; + vd->vdev_cant_write |= !vps->vps_writeable; + + if (vdev_readable(vd) && + (vdev_writeable(vd) || !spa_writeable(spa))) { + zio->io_error = 0; + } else { + ASSERT(zio->io_error != 0); + zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, + spa, vd, NULL, 0, 0); + zio->io_error = ENXIO; + } + + mutex_enter(&vd->vdev_probe_lock); + ASSERT(vd->vdev_probe_zio == zio); + vd->vdev_probe_zio = NULL; + mutex_exit(&vd->vdev_probe_lock); + + while ((pio = zio_walk_parents(zio)) != NULL) + if (!vdev_accessible(vd, pio)) + pio->io_error = ENXIO; + + kmem_free(vps, sizeof (*vps)); + } +} + +/* + * Determine whether this device is accessible by reading and writing + * to several known locations: the pad regions of each vdev label + * but the first (which we leave alone in case it contains a VTOC). + */ +zio_t * +vdev_probe(vdev_t *vd, zio_t *zio) +{ + spa_t *spa = vd->vdev_spa; + vdev_probe_stats_t *vps = NULL; + zio_t *pio; + + ASSERT(vd->vdev_ops->vdev_op_leaf); + + /* + * Don't probe the probe. + */ + if (zio && (zio->io_flags & ZIO_FLAG_PROBE)) + return (NULL); + + /* + * To prevent 'probe storms' when a device fails, we create + * just one probe i/o at a time. All zios that want to probe + * this vdev will become parents of the probe io. + */ + mutex_enter(&vd->vdev_probe_lock); + + if ((pio = vd->vdev_probe_zio) == NULL) { + vps = kmem_zalloc(sizeof (*vps), KM_SLEEP); + + vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE | + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE | + ZIO_FLAG_TRYHARD; + + if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) { + /* + * vdev_cant_read and vdev_cant_write can only + * transition from TRUE to FALSE when we have the + * SCL_ZIO lock as writer; otherwise they can only + * transition from FALSE to TRUE. This ensures that + * any zio looking at these values can assume that + * failures persist for the life of the I/O. That's + * important because when a device has intermittent + * connectivity problems, we want to ensure that + * they're ascribed to the device (ENXIO) and not + * the zio (EIO). + * + * Since we hold SCL_ZIO as writer here, clear both + * values so the probe can reevaluate from first + * principles. + */ + vps->vps_flags |= ZIO_FLAG_CONFIG_WRITER; + vd->vdev_cant_read = B_FALSE; + vd->vdev_cant_write = B_FALSE; + } + + vd->vdev_probe_zio = pio = zio_null(NULL, spa, vd, + vdev_probe_done, vps, + vps->vps_flags | ZIO_FLAG_DONT_PROPAGATE); + + /* + * We can't change the vdev state in this context, so we + * kick off an async task to do it on our behalf. + */ + if (zio != NULL) { + vd->vdev_probe_wanted = B_TRUE; + spa_async_request(spa, SPA_ASYNC_PROBE); + } + } + + if (zio != NULL) + zio_add_child(zio, pio); + + mutex_exit(&vd->vdev_probe_lock); + + if (vps == NULL) { + ASSERT(zio != NULL); + return (NULL); + } + + for (int l = 1; l < VDEV_LABELS; l++) { + zio_nowait(zio_read_phys(pio, vd, + vdev_label_offset(vd->vdev_psize, l, + offsetof(vdev_label_t, vl_pad2)), + VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE), + ZIO_CHECKSUM_OFF, vdev_probe_done, vps, + ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); + } + + if (zio == NULL) + return (pio); + + zio_nowait(pio); + return (NULL); +} + +static void +vdev_open_child(void *arg) +{ + vdev_t *vd = arg; + + vd->vdev_open_thread = curthread; + vd->vdev_open_error = vdev_open(vd); + vd->vdev_open_thread = NULL; +} + +boolean_t +vdev_uses_zvols(vdev_t *vd) +{ + if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR, + strlen(ZVOL_DIR)) == 0) + return (B_TRUE); + for (int c = 0; c < vd->vdev_children; c++) + if (vdev_uses_zvols(vd->vdev_child[c])) + return (B_TRUE); + return (B_FALSE); +} + +void +vdev_open_children(vdev_t *vd) +{ + taskq_t *tq; + int children = vd->vdev_children; + + /* + * in order to handle pools on top of zvols, do the opens + * in a single thread so that the same thread holds the + * spa_namespace_lock + */ + if (vdev_uses_zvols(vd)) { + for (int c = 0; c < children; c++) + vd->vdev_child[c]->vdev_open_error = + vdev_open(vd->vdev_child[c]); + return; + } + tq = taskq_create("vdev_open", children, minclsyspri, + children, children, TASKQ_PREPOPULATE); + + for (int c = 0; c < children; c++) + VERIFY(taskq_dispatch(tq, vdev_open_child, vd->vdev_child[c], + TQ_SLEEP) != NULL); + + taskq_destroy(tq); +} + +/* + * Prepare a virtual device for access. + */ +int +vdev_open(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + int error; + uint64_t osize = 0; + uint64_t asize, psize; + uint64_t ashift = 0; + + ASSERT(vd->vdev_open_thread == curthread || + spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || + vd->vdev_state == VDEV_STATE_CANT_OPEN || + vd->vdev_state == VDEV_STATE_OFFLINE); + + vd->vdev_stat.vs_aux = VDEV_AUX_NONE; + vd->vdev_cant_read = B_FALSE; + vd->vdev_cant_write = B_FALSE; + vd->vdev_min_asize = vdev_get_min_asize(vd); + + /* + * If this vdev is not removed, check its fault status. If it's + * faulted, bail out of the open. + */ + if (!vd->vdev_removed && vd->vdev_faulted) { + ASSERT(vd->vdev_children == 0); + ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED || + vd->vdev_label_aux == VDEV_AUX_EXTERNAL); + vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, + vd->vdev_label_aux); + return (ENXIO); + } else if (vd->vdev_offline) { + ASSERT(vd->vdev_children == 0); + vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); + return (ENXIO); + } + + error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); + + /* + * Reset the vdev_reopening flag so that we actually close + * the vdev on error. + */ + vd->vdev_reopening = B_FALSE; + if (zio_injection_enabled && error == 0) + error = zio_handle_device_injection(vd, NULL, ENXIO); + + if (error) { + if (vd->vdev_removed && + vd->vdev_stat.vs_aux != VDEV_AUX_OPEN_FAILED) + vd->vdev_removed = B_FALSE; + + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + vd->vdev_stat.vs_aux); + return (error); + } + + vd->vdev_removed = B_FALSE; + + /* + * Recheck the faulted flag now that we have confirmed that + * the vdev is accessible. If we're faulted, bail. + */ + if (vd->vdev_faulted) { + ASSERT(vd->vdev_children == 0); + ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED || + vd->vdev_label_aux == VDEV_AUX_EXTERNAL); + vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, + vd->vdev_label_aux); + return (ENXIO); + } + + if (vd->vdev_degraded) { + ASSERT(vd->vdev_children == 0); + vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, + VDEV_AUX_ERR_EXCEEDED); + } else { + vdev_set_state(vd, B_TRUE, VDEV_STATE_HEALTHY, 0); + } + + /* + * For hole or missing vdevs we just return success. + */ + if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops) + return (0); + + for (int c = 0; c < vd->vdev_children; c++) { + if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, + VDEV_AUX_NONE); + break; + } + } + + osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); + + if (vd->vdev_children == 0) { + if (osize < SPA_MINDEVSIZE) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_TOO_SMALL); + return (EOVERFLOW); + } + psize = osize; + asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); + } else { + if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE - + (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_TOO_SMALL); + return (EOVERFLOW); + } + psize = 0; + asize = osize; + } + + vd->vdev_psize = psize; + + /* + * Make sure the allocatable size hasn't shrunk. + */ + if (asize < vd->vdev_min_asize) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (EINVAL); + } + + if (vd->vdev_asize == 0) { + /* + * This is the first-ever open, so use the computed values. + * For testing purposes, a higher ashift can be requested. + */ + vd->vdev_asize = asize; + vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); + } else { + /* + * Make sure the alignment requirement hasn't increased. + */ + if (ashift > vd->vdev_top->vdev_ashift) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (EINVAL); + } + } + + /* + * If all children are healthy and the asize has increased, + * then we've experienced dynamic LUN growth. If automatic + * expansion is enabled then use the additional space. + */ + if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize && + (vd->vdev_expanding || spa->spa_autoexpand)) + vd->vdev_asize = asize; + + vdev_set_min_asize(vd); + + /* + * Ensure we can issue some IO before declaring the + * vdev open for business. + */ + if (vd->vdev_ops->vdev_op_leaf && + (error = zio_wait(vdev_probe(vd, NULL))) != 0) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, + VDEV_AUX_ERR_EXCEEDED); + return (error); + } + + /* + * If a leaf vdev has a DTL, and seems healthy, then kick off a + * resilver. But don't do this if we are doing a reopen for a scrub, + * since this would just restart the scrub we are already doing. + */ + if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && + vdev_resilver_needed(vd, NULL, NULL)) + spa_async_request(spa, SPA_ASYNC_RESILVER); + + return (0); +} + +/* + * Called once the vdevs are all opened, this routine validates the label + * contents. This needs to be done before vdev_load() so that we don't + * inadvertently do repair I/Os to the wrong device. + * + * This function will only return failure if one of the vdevs indicates that it + * has since been destroyed or exported. This is only possible if + * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state + * will be updated but the function will return 0. + */ +int +vdev_validate(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + nvlist_t *label; + uint64_t guid = 0, top_guid; + uint64_t state; + + for (int c = 0; c < vd->vdev_children; c++) + if (vdev_validate(vd->vdev_child[c]) != 0) + return (EBADF); + + /* + * If the device has already failed, or was marked offline, don't do + * any further validation. Otherwise, label I/O will fail and we will + * overwrite the previous state. + */ + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + uint64_t aux_guid = 0; + nvlist_t *nvl; + + if ((label = vdev_label_read_config(vd)) == NULL) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (0); + } + + /* + * Determine if this vdev has been split off into another + * pool. If so, then refuse to open it. + */ + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_SPLIT_GUID, + &aux_guid) == 0 && aux_guid == spa_guid(spa)) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_SPLIT_POOL); + nvlist_free(label); + return (0); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, + &guid) != 0 || guid != spa_guid(spa)) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvl) + != 0 || nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_ORIG_GUID, + &aux_guid) != 0) + aux_guid = 0; + + /* + * If this vdev just became a top-level vdev because its + * sibling was detached, it will have adopted the parent's + * vdev guid -- but the label may or may not be on disk yet. + * Fortunately, either version of the label will have the + * same top guid, so if we're a top-level vdev, we can + * safely compare to that instead. + * + * If we split this vdev off instead, then we also check the + * original pool's guid. We don't want to consider the vdev + * corrupt if it is partway through a split operation. + */ + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, + &guid) != 0 || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_TOP_GUID, + &top_guid) != 0 || + ((vd->vdev_guid != guid && vd->vdev_guid != aux_guid) && + (vd->vdev_guid != top_guid || vd != vd->vdev_top))) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, + &state) != 0) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + nvlist_free(label); + + /* + * If this is a verbatim import, no need to check the + * state of the pool. + */ + if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) && + spa_load_state(spa) == SPA_LOAD_OPEN && + state != POOL_STATE_ACTIVE) + return (EBADF); + + /* + * If we were able to open and validate a vdev that was + * previously marked permanently unavailable, clear that state + * now. + */ + if (vd->vdev_not_present) + vd->vdev_not_present = 0; + } + + return (0); +} + +/* + * Close a virtual device. + */ +void +vdev_close(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *pvd = vd->vdev_parent; + + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + + /* + * If our parent is reopening, then we are as well, unless we are + * going offline. + */ + if (pvd != NULL && pvd->vdev_reopening) + vd->vdev_reopening = (pvd->vdev_reopening && !vd->vdev_offline); + + vd->vdev_ops->vdev_op_close(vd); + + vdev_cache_purge(vd); + + /* + * We record the previous state before we close it, so that if we are + * doing a reopen(), we don't generate FMA ereports if we notice that + * it's still faulted. + */ + vd->vdev_prevstate = vd->vdev_state; + + if (vd->vdev_offline) + vd->vdev_state = VDEV_STATE_OFFLINE; + else + vd->vdev_state = VDEV_STATE_CLOSED; + vd->vdev_stat.vs_aux = VDEV_AUX_NONE; +} + +void +vdev_hold(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_is_root(spa)); + if (spa->spa_state == POOL_STATE_UNINITIALIZED) + return; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_hold(vd->vdev_child[c]); + + if (vd->vdev_ops->vdev_op_leaf) + vd->vdev_ops->vdev_op_hold(vd); +} + +void +vdev_rele(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_is_root(spa)); + for (int c = 0; c < vd->vdev_children; c++) + vdev_rele(vd->vdev_child[c]); + + if (vd->vdev_ops->vdev_op_leaf) + vd->vdev_ops->vdev_op_rele(vd); +} + +/* + * Reopen all interior vdevs and any unopened leaves. We don't actually + * reopen leaf vdevs which had previously been opened as they might deadlock + * on the spa_config_lock. Instead we only obtain the leaf's physical size. + * If the leaf has never been opened then open it, as usual. + */ +void +vdev_reopen(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + + /* set the reopening flag unless we're taking the vdev offline */ + vd->vdev_reopening = !vd->vdev_offline; + vdev_close(vd); + (void) vdev_open(vd); + + /* + * Call vdev_validate() here to make sure we have the same device. + * Otherwise, a device with an invalid label could be successfully + * opened in response to vdev_reopen(). + */ + if (vd->vdev_aux) { + (void) vdev_validate_aux(vd); + if (vdev_readable(vd) && vdev_writeable(vd) && + vd->vdev_aux == &spa->spa_l2cache && + !l2arc_vdev_present(vd)) + l2arc_add_vdev(spa, vd); + } else { + (void) vdev_validate(vd); + } + + /* + * Reassess parent vdev's health. + */ + vdev_propagate_state(vd); +} + +int +vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing) +{ + int error; + + /* + * Normally, partial opens (e.g. of a mirror) are allowed. + * For a create, however, we want to fail the request if + * there are any components we can't open. + */ + error = vdev_open(vd); + + if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { + vdev_close(vd); + return (error ? error : ENXIO); + } + + /* + * Recursively initialize all labels. + */ + if ((error = vdev_label_init(vd, txg, isreplacing ? + VDEV_LABEL_REPLACE : VDEV_LABEL_CREATE)) != 0) { + vdev_close(vd); + return (error); + } + + return (0); +} + +void +vdev_metaslab_set_size(vdev_t *vd) +{ + /* + * Aim for roughly 200 metaslabs per vdev. + */ + vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); + vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); +} + +void +vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) +{ + ASSERT(vd == vd->vdev_top); + ASSERT(!vd->vdev_ishole); + ASSERT(ISP2(flags)); + ASSERT(spa_writeable(vd->vdev_spa)); + + if (flags & VDD_METASLAB) + (void) txg_list_add(&vd->vdev_ms_list, arg, txg); + + if (flags & VDD_DTL) + (void) txg_list_add(&vd->vdev_dtl_list, arg, txg); + + (void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg); +} + +/* + * DTLs. + * + * A vdev's DTL (dirty time log) is the set of transaction groups for which + * the vdev has less than perfect replication. There are four kinds of DTL: + * + * DTL_MISSING: txgs for which the vdev has no valid copies of the data + * + * DTL_PARTIAL: txgs for which data is available, but not fully replicated + * + * DTL_SCRUB: the txgs that could not be repaired by the last scrub; upon + * scrub completion, DTL_SCRUB replaces DTL_MISSING in the range of + * txgs that was scrubbed. + * + * DTL_OUTAGE: txgs which cannot currently be read, whether due to + * persistent errors or just some device being offline. + * Unlike the other three, the DTL_OUTAGE map is not generally + * maintained; it's only computed when needed, typically to + * determine whether a device can be detached. + * + * For leaf vdevs, DTL_MISSING and DTL_PARTIAL are identical: the device + * either has the data or it doesn't. + * + * For interior vdevs such as mirror and RAID-Z the picture is more complex. + * A vdev's DTL_PARTIAL is the union of its children's DTL_PARTIALs, because + * if any child is less than fully replicated, then so is its parent. + * A vdev's DTL_MISSING is a modified union of its children's DTL_MISSINGs, + * comprising only those txgs which appear in 'maxfaults' or more children; + * those are the txgs we don't have enough replication to read. For example, + * double-parity RAID-Z can tolerate up to two missing devices (maxfaults == 2); + * thus, its DTL_MISSING consists of the set of txgs that appear in more than + * two child DTL_MISSING maps. + * + * It should be clear from the above that to compute the DTLs and outage maps + * for all vdevs, it suffices to know just the leaf vdevs' DTL_MISSING maps. + * Therefore, that is all we keep on disk. When loading the pool, or after + * a configuration change, we generate all other DTLs from first principles. + */ +void +vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) +{ + space_map_t *sm = &vd->vdev_dtl[t]; + + ASSERT(t < DTL_TYPES); + ASSERT(vd != vd->vdev_spa->spa_root_vdev); + ASSERT(spa_writeable(vd->vdev_spa)); + + mutex_enter(sm->sm_lock); + if (!space_map_contains(sm, txg, size)) + space_map_add(sm, txg, size); + mutex_exit(sm->sm_lock); +} + +boolean_t +vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) +{ + space_map_t *sm = &vd->vdev_dtl[t]; + boolean_t dirty = B_FALSE; + + ASSERT(t < DTL_TYPES); + ASSERT(vd != vd->vdev_spa->spa_root_vdev); + + mutex_enter(sm->sm_lock); + if (sm->sm_space != 0) + dirty = space_map_contains(sm, txg, size); + mutex_exit(sm->sm_lock); + + return (dirty); +} + +boolean_t +vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t) +{ + space_map_t *sm = &vd->vdev_dtl[t]; + boolean_t empty; + + mutex_enter(sm->sm_lock); + empty = (sm->sm_space == 0); + mutex_exit(sm->sm_lock); + + return (empty); +} + +/* + * Reassess DTLs after a config change or scrub completion. + */ +void +vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) +{ + spa_t *spa = vd->vdev_spa; + avl_tree_t reftree; + int minref; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); + + for (int c = 0; c < vd->vdev_children; c++) + vdev_dtl_reassess(vd->vdev_child[c], txg, + scrub_txg, scrub_done); + + if (vd == spa->spa_root_vdev || vd->vdev_ishole || vd->vdev_aux) + return; + + if (vd->vdev_ops->vdev_op_leaf) { + dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + + mutex_enter(&vd->vdev_dtl_lock); + if (scrub_txg != 0 && + (spa->spa_scrub_started || + (scn && scn->scn_phys.scn_errors == 0))) { + /* + * We completed a scrub up to scrub_txg. If we + * did it without rebooting, then the scrub dtl + * will be valid, so excise the old region and + * fold in the scrub dtl. Otherwise, leave the + * dtl as-is if there was an error. + * + * There's little trick here: to excise the beginning + * of the DTL_MISSING map, we put it into a reference + * tree and then add a segment with refcnt -1 that + * covers the range [0, scrub_txg). This means + * that each txg in that range has refcnt -1 or 0. + * We then add DTL_SCRUB with a refcnt of 2, so that + * entries in the range [0, scrub_txg) will have a + * positive refcnt -- either 1 or 2. We then convert + * the reference tree into the new DTL_MISSING map. + */ + space_map_ref_create(&reftree); + space_map_ref_add_map(&reftree, + &vd->vdev_dtl[DTL_MISSING], 1); + space_map_ref_add_seg(&reftree, 0, scrub_txg, -1); + space_map_ref_add_map(&reftree, + &vd->vdev_dtl[DTL_SCRUB], 2); + space_map_ref_generate_map(&reftree, + &vd->vdev_dtl[DTL_MISSING], 1); + space_map_ref_destroy(&reftree); + } + space_map_vacate(&vd->vdev_dtl[DTL_PARTIAL], NULL, NULL); + space_map_walk(&vd->vdev_dtl[DTL_MISSING], + space_map_add, &vd->vdev_dtl[DTL_PARTIAL]); + if (scrub_done) + space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL); + space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL); + if (!vdev_readable(vd)) + space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL); + else + space_map_walk(&vd->vdev_dtl[DTL_MISSING], + space_map_add, &vd->vdev_dtl[DTL_OUTAGE]); + mutex_exit(&vd->vdev_dtl_lock); + + if (txg != 0) + vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); + return; + } + + mutex_enter(&vd->vdev_dtl_lock); + for (int t = 0; t < DTL_TYPES; t++) { + /* account for child's outage in parent's missing map */ + int s = (t == DTL_MISSING) ? DTL_OUTAGE: t; + if (t == DTL_SCRUB) + continue; /* leaf vdevs only */ + if (t == DTL_PARTIAL) + minref = 1; /* i.e. non-zero */ + else if (vd->vdev_nparity != 0) + minref = vd->vdev_nparity + 1; /* RAID-Z */ + else + minref = vd->vdev_children; /* any kind of mirror */ + space_map_ref_create(&reftree); + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + mutex_enter(&cvd->vdev_dtl_lock); + space_map_ref_add_map(&reftree, &cvd->vdev_dtl[s], 1); + mutex_exit(&cvd->vdev_dtl_lock); + } + space_map_ref_generate_map(&reftree, &vd->vdev_dtl[t], minref); + space_map_ref_destroy(&reftree); + } + mutex_exit(&vd->vdev_dtl_lock); +} + +static int +vdev_dtl_load(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + space_map_obj_t *smo = &vd->vdev_dtl_smo; + objset_t *mos = spa->spa_meta_objset; + dmu_buf_t *db; + int error; + + ASSERT(vd->vdev_children == 0); + + if (smo->smo_object == 0) + return (0); + + ASSERT(!vd->vdev_ishole); + + if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0) + return (error); + + ASSERT3U(db->db_size, >=, sizeof (*smo)); + bcopy(db->db_data, smo, sizeof (*smo)); + dmu_buf_rele(db, FTAG); + + mutex_enter(&vd->vdev_dtl_lock); + error = space_map_load(&vd->vdev_dtl[DTL_MISSING], + NULL, SM_ALLOC, smo, mos); + mutex_exit(&vd->vdev_dtl_lock); + + return (error); +} + +void +vdev_dtl_sync(vdev_t *vd, uint64_t txg) +{ + spa_t *spa = vd->vdev_spa; + space_map_obj_t *smo = &vd->vdev_dtl_smo; + space_map_t *sm = &vd->vdev_dtl[DTL_MISSING]; + objset_t *mos = spa->spa_meta_objset; + space_map_t smsync; + kmutex_t smlock; + dmu_buf_t *db; + dmu_tx_t *tx; + + ASSERT(!vd->vdev_ishole); + + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + + if (vd->vdev_detached) { + if (smo->smo_object != 0) { + int err = dmu_object_free(mos, smo->smo_object, tx); + ASSERT3U(err, ==, 0); + smo->smo_object = 0; + } + dmu_tx_commit(tx); + return; + } + + if (smo->smo_object == 0) { + ASSERT(smo->smo_objsize == 0); + ASSERT(smo->smo_alloc == 0); + smo->smo_object = dmu_object_alloc(mos, + DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, + DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); + ASSERT(smo->smo_object != 0); + vdev_config_dirty(vd->vdev_top); + } + + mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); + + space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, + &smlock); + + mutex_enter(&smlock); + + mutex_enter(&vd->vdev_dtl_lock); + space_map_walk(sm, space_map_add, &smsync); + mutex_exit(&vd->vdev_dtl_lock); + + space_map_truncate(smo, mos, tx); + space_map_sync(&smsync, SM_ALLOC, smo, mos, tx); + + space_map_destroy(&smsync); + + mutex_exit(&smlock); + mutex_destroy(&smlock); + + VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); + dmu_buf_will_dirty(db, tx); + ASSERT3U(db->db_size, >=, sizeof (*smo)); + bcopy(smo, db->db_data, sizeof (*smo)); + dmu_buf_rele(db, FTAG); + + dmu_tx_commit(tx); +} + +/* + * Determine whether the specified vdev can be offlined/detached/removed + * without losing data. + */ +boolean_t +vdev_dtl_required(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *tvd = vd->vdev_top; + uint8_t cant_read = vd->vdev_cant_read; + boolean_t required; + + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + + if (vd == spa->spa_root_vdev || vd == tvd) + return (B_TRUE); + + /* + * Temporarily mark the device as unreadable, and then determine + * whether this results in any DTL outages in the top-level vdev. + * If not, we can safely offline/detach/remove the device. + */ + vd->vdev_cant_read = B_TRUE; + vdev_dtl_reassess(tvd, 0, 0, B_FALSE); + required = !vdev_dtl_empty(tvd, DTL_OUTAGE); + vd->vdev_cant_read = cant_read; + vdev_dtl_reassess(tvd, 0, 0, B_FALSE); + + if (!required && zio_injection_enabled) + required = !!zio_handle_device_injection(vd, NULL, ECHILD); + + return (required); +} + +/* + * Determine if resilver is needed, and if so the txg range. + */ +boolean_t +vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp) +{ + boolean_t needed = B_FALSE; + uint64_t thismin = UINT64_MAX; + uint64_t thismax = 0; + + if (vd->vdev_children == 0) { + mutex_enter(&vd->vdev_dtl_lock); + if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 && + vdev_writeable(vd)) { + space_seg_t *ss; + + ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); + thismin = ss->ss_start - 1; + ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); + thismax = ss->ss_end; + needed = B_TRUE; + } + mutex_exit(&vd->vdev_dtl_lock); + } else { + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + uint64_t cmin, cmax; + + if (vdev_resilver_needed(cvd, &cmin, &cmax)) { + thismin = MIN(thismin, cmin); + thismax = MAX(thismax, cmax); + needed = B_TRUE; + } + } + } + + if (needed && minp) { + *minp = thismin; + *maxp = thismax; + } + return (needed); +} + +void +vdev_load(vdev_t *vd) +{ + /* + * Recursively load all children. + */ + for (int c = 0; c < vd->vdev_children; c++) + vdev_load(vd->vdev_child[c]); + + /* + * If this is a top-level vdev, initialize its metaslabs. + */ + if (vd == vd->vdev_top && !vd->vdev_ishole && + (vd->vdev_ashift == 0 || vd->vdev_asize == 0 || + vdev_metaslab_init(vd, 0) != 0)) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + + /* + * If this is a leaf vdev, load its DTL. + */ + if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); +} + +/* + * The special vdev case is used for hot spares and l2cache devices. Its + * sole purpose it to set the vdev state for the associated vdev. To do this, + * we make sure that we can open the underlying device, then try to read the + * label, and make sure that the label is sane and that it hasn't been + * repurposed to another pool. + */ +int +vdev_validate_aux(vdev_t *vd) +{ + nvlist_t *label; + uint64_t guid, version; + uint64_t state; + + if (!vdev_readable(vd)) + return (0); + + if ((label = vdev_label_read_config(vd)) == NULL) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + return (-1); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 || + version > SPA_VERSION || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 || + guid != vd->vdev_guid || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (-1); + } + + /* + * We don't actually check the pool state here. If it's in fact in + * use by another pool, we update this fact on the fly when requested. + */ + nvlist_free(label); + return (0); +} + +void +vdev_remove(vdev_t *vd, uint64_t txg) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + dmu_tx_t *tx; + + tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); + + if (vd->vdev_dtl_smo.smo_object) { + ASSERT3U(vd->vdev_dtl_smo.smo_alloc, ==, 0); + (void) dmu_object_free(mos, vd->vdev_dtl_smo.smo_object, tx); + vd->vdev_dtl_smo.smo_object = 0; + } + + if (vd->vdev_ms != NULL) { + for (int m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + + if (msp == NULL || msp->ms_smo.smo_object == 0) + continue; + + ASSERT3U(msp->ms_smo.smo_alloc, ==, 0); + (void) dmu_object_free(mos, msp->ms_smo.smo_object, tx); + msp->ms_smo.smo_object = 0; + } + } + + if (vd->vdev_ms_array) { + (void) dmu_object_free(mos, vd->vdev_ms_array, tx); + vd->vdev_ms_array = 0; + vd->vdev_ms_shift = 0; + } + dmu_tx_commit(tx); +} + +void +vdev_sync_done(vdev_t *vd, uint64_t txg) +{ + metaslab_t *msp; + boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg)); + + ASSERT(!vd->vdev_ishole); + + while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) + metaslab_sync_done(msp, txg); + + if (reassess) + metaslab_sync_reassess(vd->vdev_mg); +} + +void +vdev_sync(vdev_t *vd, uint64_t txg) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *lvd; + metaslab_t *msp; + dmu_tx_t *tx; + + ASSERT(!vd->vdev_ishole); + + if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) { + ASSERT(vd == vd->vdev_top); + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); + ASSERT(vd->vdev_ms_array != 0); + vdev_config_dirty(vd); + dmu_tx_commit(tx); + } + + /* + * Remove the metadata associated with this vdev once it's empty. + */ + if (vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing) + vdev_remove(vd, txg); + + while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) { + metaslab_sync(msp, txg); + (void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg)); + } + + while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) + vdev_dtl_sync(lvd, txg); + + (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); +} + +uint64_t +vdev_psize_to_asize(vdev_t *vd, uint64_t psize) +{ + return (vd->vdev_ops->vdev_op_asize(vd, psize)); +} + +/* + * Mark the given vdev faulted. A faulted vdev behaves as if the device could + * not be opened, and no I/O is attempted. + */ +int +vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux) +{ + vdev_t *vd, *tvd; + + spa_vdev_state_enter(spa, SCL_NONE); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, ENODEV)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); + + tvd = vd->vdev_top; + + /* + * We don't directly use the aux state here, but if we do a + * vdev_reopen(), we need this value to be present to remember why we + * were faulted. + */ + vd->vdev_label_aux = aux; + + /* + * Faulted state takes precedence over degraded. + */ + vd->vdev_delayed_close = B_FALSE; + vd->vdev_faulted = 1ULL; + vd->vdev_degraded = 0ULL; + vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, aux); + + /* + * If this device has the only valid copy of the data, then + * back off and simply mark the vdev as degraded instead. + */ + if (!tvd->vdev_islog && vd->vdev_aux == NULL && vdev_dtl_required(vd)) { + vd->vdev_degraded = 1ULL; + vd->vdev_faulted = 0ULL; + + /* + * If we reopen the device and it's not dead, only then do we + * mark it degraded. + */ + vdev_reopen(tvd); + + if (vdev_readable(vd)) + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, aux); + } + + return (spa_vdev_state_exit(spa, vd, 0)); +} + +/* + * Mark the given vdev degraded. A degraded vdev is purely an indication to the + * user that something is wrong. The vdev continues to operate as normal as far + * as I/O is concerned. + */ +int +vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux) +{ + vdev_t *vd; + + spa_vdev_state_enter(spa, SCL_NONE); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, ENODEV)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); + + /* + * If the vdev is already faulted, then don't do anything. + */ + if (vd->vdev_faulted || vd->vdev_degraded) + return (spa_vdev_state_exit(spa, NULL, 0)); + + vd->vdev_degraded = 1ULL; + if (!vdev_is_dead(vd)) + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, + aux); + + return (spa_vdev_state_exit(spa, vd, 0)); +} + +/* + * Online the given vdev. If 'unspare' is set, it implies two things. First, + * any attached spare device should be detached when the device finishes + * resilvering. Second, the online should be treated like a 'test' online case, + * so no FMA events are generated if the device fails to open. + */ +int +vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) +{ + vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev; + + spa_vdev_state_enter(spa, SCL_NONE); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, ENODEV)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); + + tvd = vd->vdev_top; + vd->vdev_offline = B_FALSE; + vd->vdev_tmpoffline = B_FALSE; + vd->vdev_checkremove = !!(flags & ZFS_ONLINE_CHECKREMOVE); + vd->vdev_forcefault = !!(flags & ZFS_ONLINE_FORCEFAULT); + + /* XXX - L2ARC 1.0 does not support expansion */ + if (!vd->vdev_aux) { + for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent) + pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND); + } + + vdev_reopen(tvd); + vd->vdev_checkremove = vd->vdev_forcefault = B_FALSE; + + if (!vd->vdev_aux) { + for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent) + pvd->vdev_expanding = B_FALSE; + } + + if (newstate) + *newstate = vd->vdev_state; + if ((flags & ZFS_ONLINE_UNSPARE) && + !vdev_is_dead(vd) && vd->vdev_parent && + vd->vdev_parent->vdev_ops == &vdev_spare_ops && + vd->vdev_parent->vdev_child[0] == vd) + vd->vdev_unspare = B_TRUE; + + if ((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand) { + + /* XXX - L2ARC 1.0 does not support expansion */ + if (vd->vdev_aux) + return (spa_vdev_state_exit(spa, vd, ENOTSUP)); + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + } + return (spa_vdev_state_exit(spa, vd, 0)); +} + +static int +vdev_offline_locked(spa_t *spa, uint64_t guid, uint64_t flags) +{ + vdev_t *vd, *tvd; + int error = 0; + uint64_t generation; + metaslab_group_t *mg; + +top: + spa_vdev_state_enter(spa, SCL_ALLOC); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, ENODEV)); + + if (!vd->vdev_ops->vdev_op_leaf) + return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); + + tvd = vd->vdev_top; + mg = tvd->vdev_mg; + generation = spa->spa_config_generation + 1; + + /* + * If the device isn't already offline, try to offline it. + */ + if (!vd->vdev_offline) { + /* + * If this device has the only valid copy of some data, + * don't allow it to be offlined. Log devices are always + * expendable. + */ + if (!tvd->vdev_islog && vd->vdev_aux == NULL && + vdev_dtl_required(vd)) + return (spa_vdev_state_exit(spa, NULL, EBUSY)); + + /* + * If the top-level is a slog and it has had allocations + * then proceed. We check that the vdev's metaslab group + * is not NULL since it's possible that we may have just + * added this vdev but not yet initialized its metaslabs. + */ + if (tvd->vdev_islog && mg != NULL) { + /* + * Prevent any future allocations. + */ + metaslab_group_passivate(mg); + (void) spa_vdev_state_exit(spa, vd, 0); + + error = spa_offline_log(spa); + + spa_vdev_state_enter(spa, SCL_ALLOC); + + /* + * Check to see if the config has changed. + */ + if (error || generation != spa->spa_config_generation) { + metaslab_group_activate(mg); + if (error) + return (spa_vdev_state_exit(spa, + vd, error)); + (void) spa_vdev_state_exit(spa, vd, 0); + goto top; + } + ASSERT3U(tvd->vdev_stat.vs_alloc, ==, 0); + } + + /* + * Offline this device and reopen its top-level vdev. + * If the top-level vdev is a log device then just offline + * it. Otherwise, if this action results in the top-level + * vdev becoming unusable, undo it and fail the request. + */ + vd->vdev_offline = B_TRUE; + vdev_reopen(tvd); + + if (!tvd->vdev_islog && vd->vdev_aux == NULL && + vdev_is_dead(tvd)) { + vd->vdev_offline = B_FALSE; + vdev_reopen(tvd); + return (spa_vdev_state_exit(spa, NULL, EBUSY)); + } + + /* + * Add the device back into the metaslab rotor so that + * once we online the device it's open for business. + */ + if (tvd->vdev_islog && mg != NULL) + metaslab_group_activate(mg); + } + + vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY); + + return (spa_vdev_state_exit(spa, vd, 0)); +} + +int +vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags) +{ + int error; + + mutex_enter(&spa->spa_vdev_top_lock); + error = vdev_offline_locked(spa, guid, flags); + mutex_exit(&spa->spa_vdev_top_lock); + + return (error); +} + +/* + * Clear the error counts associated with this vdev. Unlike vdev_online() and + * vdev_offline(), we assume the spa config is locked. We also clear all + * children. If 'vd' is NULL, then the user wants to clear all vdevs. + */ +void +vdev_clear(spa_t *spa, vdev_t *vd) +{ + vdev_t *rvd = spa->spa_root_vdev; + + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + + if (vd == NULL) + vd = rvd; + + vd->vdev_stat.vs_read_errors = 0; + vd->vdev_stat.vs_write_errors = 0; + vd->vdev_stat.vs_checksum_errors = 0; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_clear(spa, vd->vdev_child[c]); + + /* + * If we're in the FAULTED state or have experienced failed I/O, then + * clear the persistent state and attempt to reopen the device. We + * also mark the vdev config dirty, so that the new faulted state is + * written out to disk. + */ + if (vd->vdev_faulted || vd->vdev_degraded || + !vdev_readable(vd) || !vdev_writeable(vd)) { + + /* + * When reopening in reponse to a clear event, it may be due to + * a fmadm repair request. In this case, if the device is + * still broken, we want to still post the ereport again. + */ + vd->vdev_forcefault = B_TRUE; + + vd->vdev_faulted = vd->vdev_degraded = 0ULL; + vd->vdev_cant_read = B_FALSE; + vd->vdev_cant_write = B_FALSE; + + vdev_reopen(vd == rvd ? rvd : vd->vdev_top); + + vd->vdev_forcefault = B_FALSE; + + if (vd != rvd && vdev_writeable(vd->vdev_top)) + vdev_state_dirty(vd->vdev_top); + + if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) + spa_async_request(spa, SPA_ASYNC_RESILVER); + + spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR); + } + + /* + * When clearing a FMA-diagnosed fault, we always want to + * unspare the device, as we assume that the original spare was + * done in response to the FMA fault. + */ + if (!vdev_is_dead(vd) && vd->vdev_parent != NULL && + vd->vdev_parent->vdev_ops == &vdev_spare_ops && + vd->vdev_parent->vdev_child[0] == vd) + vd->vdev_unspare = B_TRUE; +} + +boolean_t +vdev_is_dead(vdev_t *vd) +{ + /* + * Holes and missing devices are always considered "dead". + * This simplifies the code since we don't have to check for + * these types of devices in the various code paths. + * Instead we rely on the fact that we skip over dead devices + * before issuing I/O to them. + */ + return (vd->vdev_state < VDEV_STATE_DEGRADED || vd->vdev_ishole || + vd->vdev_ops == &vdev_missing_ops); +} + +boolean_t +vdev_readable(vdev_t *vd) +{ + return (!vdev_is_dead(vd) && !vd->vdev_cant_read); +} + +boolean_t +vdev_writeable(vdev_t *vd) +{ + return (!vdev_is_dead(vd) && !vd->vdev_cant_write); +} + +boolean_t +vdev_allocatable(vdev_t *vd) +{ + uint64_t state = vd->vdev_state; + + /* + * We currently allow allocations from vdevs which may be in the + * process of reopening (i.e. VDEV_STATE_CLOSED). If the device + * fails to reopen then we'll catch it later when we're holding + * the proper locks. Note that we have to get the vdev state + * in a local variable because although it changes atomically, + * we're asking two separate questions about it. + */ + return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) && + !vd->vdev_cant_write && !vd->vdev_ishole); +} + +boolean_t +vdev_accessible(vdev_t *vd, zio_t *zio) +{ + ASSERT(zio->io_vd == vd); + + if (vdev_is_dead(vd) || vd->vdev_remove_wanted) + return (B_FALSE); + + if (zio->io_type == ZIO_TYPE_READ) + return (!vd->vdev_cant_read); + + if (zio->io_type == ZIO_TYPE_WRITE) + return (!vd->vdev_cant_write); + + return (B_TRUE); +} + +/* + * Get statistics for the given vdev. + */ +void +vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) +{ + vdev_t *rvd = vd->vdev_spa->spa_root_vdev; + + mutex_enter(&vd->vdev_stat_lock); + bcopy(&vd->vdev_stat, vs, sizeof (*vs)); + vs->vs_timestamp = gethrtime() - vs->vs_timestamp; + vs->vs_state = vd->vdev_state; + vs->vs_rsize = vdev_get_min_asize(vd); + if (vd->vdev_ops->vdev_op_leaf) + vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; + mutex_exit(&vd->vdev_stat_lock); + + /* + * If we're getting stats on the root vdev, aggregate the I/O counts + * over all top-level vdevs (i.e. the direct children of the root). + */ + if (vd == rvd) { + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *cvd = rvd->vdev_child[c]; + vdev_stat_t *cvs = &cvd->vdev_stat; + + mutex_enter(&vd->vdev_stat_lock); + for (int t = 0; t < ZIO_TYPES; t++) { + vs->vs_ops[t] += cvs->vs_ops[t]; + vs->vs_bytes[t] += cvs->vs_bytes[t]; + } + cvs->vs_scan_removing = cvd->vdev_removing; + mutex_exit(&vd->vdev_stat_lock); + } + } +} + +void +vdev_clear_stats(vdev_t *vd) +{ + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_space = 0; + vd->vdev_stat.vs_dspace = 0; + vd->vdev_stat.vs_alloc = 0; + mutex_exit(&vd->vdev_stat_lock); +} + +void +vdev_scan_stat_init(vdev_t *vd) +{ + vdev_stat_t *vs = &vd->vdev_stat; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_scan_stat_init(vd->vdev_child[c]); + + mutex_enter(&vd->vdev_stat_lock); + vs->vs_scan_processed = 0; + mutex_exit(&vd->vdev_stat_lock); +} + +void +vdev_stat_update(zio_t *zio, uint64_t psize) +{ + spa_t *spa = zio->io_spa; + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd = zio->io_vd ? zio->io_vd : rvd; + vdev_t *pvd; + uint64_t txg = zio->io_txg; + vdev_stat_t *vs = &vd->vdev_stat; + zio_type_t type = zio->io_type; + int flags = zio->io_flags; + + /* + * If this i/o is a gang leader, it didn't do any actual work. + */ + if (zio->io_gang_tree) + return; + + if (zio->io_error == 0) { + /* + * If this is a root i/o, don't count it -- we've already + * counted the top-level vdevs, and vdev_get_stats() will + * aggregate them when asked. This reduces contention on + * the root vdev_stat_lock and implicitly handles blocks + * that compress away to holes, for which there is no i/o. + * (Holes never create vdev children, so all the counters + * remain zero, which is what we want.) + * + * Note: this only applies to successful i/o (io_error == 0) + * because unlike i/o counts, errors are not additive. + * When reading a ditto block, for example, failure of + * one top-level vdev does not imply a root-level error. + */ + if (vd == rvd) + return; + + ASSERT(vd == zio->io_vd); + + if (flags & ZIO_FLAG_IO_BYPASS) + return; + + mutex_enter(&vd->vdev_stat_lock); + + if (flags & ZIO_FLAG_IO_REPAIR) { + if (flags & ZIO_FLAG_SCAN_THREAD) { + dsl_scan_phys_t *scn_phys = + &spa->spa_dsl_pool->dp_scan->scn_phys; + uint64_t *processed = &scn_phys->scn_processed; + + /* XXX cleanup? */ + if (vd->vdev_ops->vdev_op_leaf) + atomic_add_64(processed, psize); + vs->vs_scan_processed += psize; + } + + if (flags & ZIO_FLAG_SELF_HEAL) + vs->vs_self_healed += psize; + } + + vs->vs_ops[type]++; + vs->vs_bytes[type] += psize; + + mutex_exit(&vd->vdev_stat_lock); + return; + } + + if (flags & ZIO_FLAG_SPECULATIVE) + return; + + /* + * If this is an I/O error that is going to be retried, then ignore the + * error. Otherwise, the user may interpret B_FAILFAST I/O errors as + * hard errors, when in reality they can happen for any number of + * innocuous reasons (bus resets, MPxIO link failure, etc). + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + + /* + * Intent logs writes won't propagate their error to the root + * I/O so don't mark these types of failures as pool-level + * errors. + */ + if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) + return; + + mutex_enter(&vd->vdev_stat_lock); + if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) { + if (zio->io_error == ECKSUM) + vs->vs_checksum_errors++; + else + vs->vs_read_errors++; + } + if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd)) + vs->vs_write_errors++; + mutex_exit(&vd->vdev_stat_lock); + + if (type == ZIO_TYPE_WRITE && txg != 0 && + (!(flags & ZIO_FLAG_IO_REPAIR) || + (flags & ZIO_FLAG_SCAN_THREAD) || + spa->spa_claiming)) { + /* + * This is either a normal write (not a repair), or it's + * a repair induced by the scrub thread, or it's a repair + * made by zil_claim() during spa_load() in the first txg. + * In the normal case, we commit the DTL change in the same + * txg as the block was born. In the scrub-induced repair + * case, we know that scrubs run in first-pass syncing context, + * so we commit the DTL change in spa_syncing_txg(spa). + * In the zil_claim() case, we commit in spa_first_txg(spa). + * + * We currently do not make DTL entries for failed spontaneous + * self-healing writes triggered by normal (non-scrubbing) + * reads, because we have no transactional context in which to + * do so -- and it's not clear that it'd be desirable anyway. + */ + if (vd->vdev_ops->vdev_op_leaf) { + uint64_t commit_txg = txg; + if (flags & ZIO_FLAG_SCAN_THREAD) { + ASSERT(flags & ZIO_FLAG_IO_REPAIR); + ASSERT(spa_sync_pass(spa) == 1); + vdev_dtl_dirty(vd, DTL_SCRUB, txg, 1); + commit_txg = spa_syncing_txg(spa); + } else if (spa->spa_claiming) { + ASSERT(flags & ZIO_FLAG_IO_REPAIR); + commit_txg = spa_first_txg(spa); + } + ASSERT(commit_txg >= spa_syncing_txg(spa)); + if (vdev_dtl_contains(vd, DTL_MISSING, txg, 1)) + return; + for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent) + vdev_dtl_dirty(pvd, DTL_PARTIAL, txg, 1); + vdev_dirty(vd->vdev_top, VDD_DTL, vd, commit_txg); + } + if (vd != rvd) + vdev_dtl_dirty(vd, DTL_MISSING, txg, 1); + } +} + +/* + * Update the in-core space usage stats for this vdev, its metaslab class, + * and the root vdev. + */ +void +vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, + int64_t space_delta) +{ + int64_t dspace_delta = space_delta; + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + metaslab_group_t *mg = vd->vdev_mg; + metaslab_class_t *mc = mg ? mg->mg_class : NULL; + + ASSERT(vd == vd->vdev_top); + + /* + * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion + * factor. We must calculate this here and not at the root vdev + * because the root vdev's psize-to-asize is simply the max of its + * childrens', thus not accurate enough for us. + */ + ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); + ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache); + dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * + vd->vdev_deflate_ratio; + + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_alloc += alloc_delta; + vd->vdev_stat.vs_space += space_delta; + vd->vdev_stat.vs_dspace += dspace_delta; + mutex_exit(&vd->vdev_stat_lock); + + if (mc == spa_normal_class(spa)) { + mutex_enter(&rvd->vdev_stat_lock); + rvd->vdev_stat.vs_alloc += alloc_delta; + rvd->vdev_stat.vs_space += space_delta; + rvd->vdev_stat.vs_dspace += dspace_delta; + mutex_exit(&rvd->vdev_stat_lock); + } + + if (mc != NULL) { + ASSERT(rvd == vd->vdev_parent); + ASSERT(vd->vdev_ms_count != 0); + + metaslab_class_space_update(mc, + alloc_delta, defer_delta, space_delta, dspace_delta); + } +} + +/* + * Mark a top-level vdev's config as dirty, placing it on the dirty list + * so that it will be written out next time the vdev configuration is synced. + * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. + */ +void +vdev_config_dirty(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + int c; + + ASSERT(spa_writeable(spa)); + + /* + * If this is an aux vdev (as with l2cache and spare devices), then we + * update the vdev config manually and set the sync flag. + */ + if (vd->vdev_aux != NULL) { + spa_aux_vdev_t *sav = vd->vdev_aux; + nvlist_t **aux; + uint_t naux; + + for (c = 0; c < sav->sav_count; c++) { + if (sav->sav_vdevs[c] == vd) + break; + } + + if (c == sav->sav_count) { + /* + * We're being removed. There's nothing more to do. + */ + ASSERT(sav->sav_sync == B_TRUE); + return; + } + + sav->sav_sync = B_TRUE; + + if (nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) { + VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_SPARES, &aux, &naux) == 0); + } + + ASSERT(c < naux); + + /* + * Setting the nvlist in the middle if the array is a little + * sketchy, but it will work. + */ + nvlist_free(aux[c]); + aux[c] = vdev_config_generate(spa, vd, B_TRUE, 0); + + return; + } + + /* + * The dirty list is protected by the SCL_CONFIG lock. The caller + * must either hold SCL_CONFIG as writer, or must be the sync thread + * (which holds SCL_CONFIG as reader). There's only one sync thread, + * so this is sufficient to ensure mutual exclusion. + */ + ASSERT(spa_config_held(spa, SCL_CONFIG, RW_WRITER) || + (dsl_pool_sync_context(spa_get_dsl(spa)) && + spa_config_held(spa, SCL_CONFIG, RW_READER))); + + if (vd == rvd) { + for (c = 0; c < rvd->vdev_children; c++) + vdev_config_dirty(rvd->vdev_child[c]); + } else { + ASSERT(vd == vd->vdev_top); + + if (!list_link_active(&vd->vdev_config_dirty_node) && + !vd->vdev_ishole) + list_insert_head(&spa->spa_config_dirty_list, vd); + } +} + +void +vdev_config_clean(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_config_held(spa, SCL_CONFIG, RW_WRITER) || + (dsl_pool_sync_context(spa_get_dsl(spa)) && + spa_config_held(spa, SCL_CONFIG, RW_READER))); + + ASSERT(list_link_active(&vd->vdev_config_dirty_node)); + list_remove(&spa->spa_config_dirty_list, vd); +} + +/* + * Mark a top-level vdev's state as dirty, so that the next pass of + * spa_sync() can convert this into vdev_config_dirty(). We distinguish + * the state changes from larger config changes because they require + * much less locking, and are often needed for administrative actions. + */ +void +vdev_state_dirty(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_writeable(spa)); + ASSERT(vd == vd->vdev_top); + + /* + * The state list is protected by the SCL_STATE lock. The caller + * must either hold SCL_STATE as writer, or must be the sync thread + * (which holds SCL_STATE as reader). There's only one sync thread, + * so this is sufficient to ensure mutual exclusion. + */ + ASSERT(spa_config_held(spa, SCL_STATE, RW_WRITER) || + (dsl_pool_sync_context(spa_get_dsl(spa)) && + spa_config_held(spa, SCL_STATE, RW_READER))); + + if (!list_link_active(&vd->vdev_state_dirty_node) && !vd->vdev_ishole) + list_insert_head(&spa->spa_state_dirty_list, vd); +} + +void +vdev_state_clean(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_config_held(spa, SCL_STATE, RW_WRITER) || + (dsl_pool_sync_context(spa_get_dsl(spa)) && + spa_config_held(spa, SCL_STATE, RW_READER))); + + ASSERT(list_link_active(&vd->vdev_state_dirty_node)); + list_remove(&spa->spa_state_dirty_list, vd); +} + +/* + * Propagate vdev state up from children to parent. + */ +void +vdev_propagate_state(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + int degraded = 0, faulted = 0; + int corrupted = 0; + vdev_t *child; + + if (vd->vdev_children > 0) { + for (int c = 0; c < vd->vdev_children; c++) { + child = vd->vdev_child[c]; + + /* + * Don't factor holes into the decision. + */ + if (child->vdev_ishole) + continue; + + if (!vdev_readable(child) || + (!vdev_writeable(child) && spa_writeable(spa))) { + /* + * Root special: if there is a top-level log + * device, treat the root vdev as if it were + * degraded. + */ + if (child->vdev_islog && vd == rvd) + degraded++; + else + faulted++; + } else if (child->vdev_state <= VDEV_STATE_DEGRADED) { + degraded++; + } + + if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA) + corrupted++; + } + + vd->vdev_ops->vdev_op_state_change(vd, faulted, degraded); + + /* + * Root special: if there is a top-level vdev that cannot be + * opened due to corrupted metadata, then propagate the root + * vdev's aux state as 'corrupt' rather than 'insufficient + * replicas'. + */ + if (corrupted && vd == rvd && + rvd->vdev_state == VDEV_STATE_CANT_OPEN) + vdev_set_state(rvd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + } + + if (vd->vdev_parent) + vdev_propagate_state(vd->vdev_parent); +} + +/* + * Set a vdev's state. If this is during an open, we don't update the parent + * state, because we're in the process of opening children depth-first. + * Otherwise, we propagate the change to the parent. + * + * If this routine places a device in a faulted state, an appropriate ereport is + * generated. + */ +void +vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) +{ + uint64_t save_state; + spa_t *spa = vd->vdev_spa; + + if (state == vd->vdev_state) { + vd->vdev_stat.vs_aux = aux; + return; + } + + save_state = vd->vdev_state; + + vd->vdev_state = state; + vd->vdev_stat.vs_aux = aux; + + /* + * If we are setting the vdev state to anything but an open state, then + * always close the underlying device unless the device has requested + * a delayed close (i.e. we're about to remove or fault the device). + * Otherwise, we keep accessible but invalid devices open forever. + * We don't call vdev_close() itself, because that implies some extra + * checks (offline, etc) that we don't want here. This is limited to + * leaf devices, because otherwise closing the device will affect other + * children. + */ + if (!vd->vdev_delayed_close && vdev_is_dead(vd) && + vd->vdev_ops->vdev_op_leaf) + vd->vdev_ops->vdev_op_close(vd); + + /* + * If we have brought this vdev back into service, we need + * to notify fmd so that it can gracefully repair any outstanding + * cases due to a missing device. We do this in all cases, even those + * that probably don't correlate to a repaired fault. This is sure to + * catch all cases, and we let the zfs-retire agent sort it out. If + * this is a transient state it's OK, as the retire agent will + * double-check the state of the vdev before repairing it. + */ + if (state == VDEV_STATE_HEALTHY && vd->vdev_ops->vdev_op_leaf && + vd->vdev_prevstate != state) + zfs_post_state_change(spa, vd); + + if (vd->vdev_removed && + state == VDEV_STATE_CANT_OPEN && + (aux == VDEV_AUX_OPEN_FAILED || vd->vdev_checkremove)) { + /* + * If the previous state is set to VDEV_STATE_REMOVED, then this + * device was previously marked removed and someone attempted to + * reopen it. If this failed due to a nonexistent device, then + * keep the device in the REMOVED state. We also let this be if + * it is one of our special test online cases, which is only + * attempting to online the device and shouldn't generate an FMA + * fault. + */ + vd->vdev_state = VDEV_STATE_REMOVED; + vd->vdev_stat.vs_aux = VDEV_AUX_NONE; + } else if (state == VDEV_STATE_REMOVED) { + vd->vdev_removed = B_TRUE; + } else if (state == VDEV_STATE_CANT_OPEN) { + /* + * If we fail to open a vdev during an import or recovery, we + * mark it as "not available", which signifies that it was + * never there to begin with. Failure to open such a device + * is not considered an error. + */ + if ((spa_load_state(spa) == SPA_LOAD_IMPORT || + spa_load_state(spa) == SPA_LOAD_RECOVER) && + vd->vdev_ops->vdev_op_leaf) + vd->vdev_not_present = 1; + + /* + * Post the appropriate ereport. If the 'prevstate' field is + * set to something other than VDEV_STATE_UNKNOWN, it indicates + * that this is part of a vdev_reopen(). In this case, we don't + * want to post the ereport if the device was already in the + * CANT_OPEN state beforehand. + * + * If the 'checkremove' flag is set, then this is an attempt to + * online the device in response to an insertion event. If we + * hit this case, then we have detected an insertion event for a + * faulted or offline device that wasn't in the removed state. + * In this scenario, we don't post an ereport because we are + * about to replace the device, or attempt an online with + * vdev_forcefault, which will generate the fault for us. + */ + if ((vd->vdev_prevstate != state || vd->vdev_forcefault) && + !vd->vdev_not_present && !vd->vdev_checkremove && + vd != spa->spa_root_vdev) { + const char *class; + + switch (aux) { + case VDEV_AUX_OPEN_FAILED: + class = FM_EREPORT_ZFS_DEVICE_OPEN_FAILED; + break; + case VDEV_AUX_CORRUPT_DATA: + class = FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA; + break; + case VDEV_AUX_NO_REPLICAS: + class = FM_EREPORT_ZFS_DEVICE_NO_REPLICAS; + break; + case VDEV_AUX_BAD_GUID_SUM: + class = FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM; + break; + case VDEV_AUX_TOO_SMALL: + class = FM_EREPORT_ZFS_DEVICE_TOO_SMALL; + break; + case VDEV_AUX_BAD_LABEL: + class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL; + break; + default: + class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; + } + + zfs_ereport_post(class, spa, vd, NULL, save_state, 0); + } + + /* Erase any notion of persistent removed state */ + vd->vdev_removed = B_FALSE; + } else { + vd->vdev_removed = B_FALSE; + } + + if (!isopen && vd->vdev_parent) + vdev_propagate_state(vd->vdev_parent); +} + +/* + * Check the vdev configuration to ensure that it's capable of supporting + * a root pool. Currently, we do not support RAID-Z or partial configuration. + * In addition, only a single top-level vdev is allowed and none of the leaves + * can be wholedisks. + */ +boolean_t +vdev_is_bootable(vdev_t *vd) +{ + if (!vd->vdev_ops->vdev_op_leaf) { + char *vdev_type = vd->vdev_ops->vdev_op_type; + + if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 && + vd->vdev_children > 1) { + return (B_FALSE); + } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || + strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { + return (B_FALSE); + } + } else if (vd->vdev_wholedisk == 1) { + return (B_FALSE); + } + + for (int c = 0; c < vd->vdev_children; c++) { + if (!vdev_is_bootable(vd->vdev_child[c])) + return (B_FALSE); + } + return (B_TRUE); +} + +/* + * Load the state from the original vdev tree (ovd) which + * we've retrieved from the MOS config object. If the original + * vdev was offline or faulted then we transfer that state to the + * device in the current vdev tree (nvd). + */ +void +vdev_load_log_state(vdev_t *nvd, vdev_t *ovd) +{ + spa_t *spa = nvd->vdev_spa; + + ASSERT(nvd->vdev_top->vdev_islog); + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + ASSERT3U(nvd->vdev_guid, ==, ovd->vdev_guid); + + for (int c = 0; c < nvd->vdev_children; c++) + vdev_load_log_state(nvd->vdev_child[c], ovd->vdev_child[c]); + + if (nvd->vdev_ops->vdev_op_leaf) { + /* + * Restore the persistent vdev state + */ + nvd->vdev_offline = ovd->vdev_offline; + nvd->vdev_faulted = ovd->vdev_faulted; + nvd->vdev_degraded = ovd->vdev_degraded; + nvd->vdev_removed = ovd->vdev_removed; + } +} + +/* + * Determine if a log device has valid content. If the vdev was + * removed or faulted in the MOS config then we know that + * the content on the log device has already been written to the pool. + */ +boolean_t +vdev_log_state_valid(vdev_t *vd) +{ + if (vd->vdev_ops->vdev_op_leaf && !vd->vdev_faulted && + !vd->vdev_removed) + return (B_TRUE); + + for (int c = 0; c < vd->vdev_children; c++) + if (vdev_log_state_valid(vd->vdev_child[c])) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Expand a vdev if possible. + */ +void +vdev_expand(vdev_t *vd, uint64_t txg) +{ + ASSERT(vd->vdev_top == vd); + ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) { + VERIFY(vdev_metaslab_init(vd, txg) == 0); + vdev_config_dirty(vd); + } +} + +/* + * Split a vdev. + */ +void +vdev_split(vdev_t *vd) +{ + vdev_t *cvd, *pvd = vd->vdev_parent; + + vdev_remove_child(pvd, vd); + vdev_compact_children(pvd); + + cvd = pvd->vdev_child[0]; + if (pvd->vdev_children == 1) { + vdev_remove_parent(cvd); + cvd->vdev_splitting = B_TRUE; + } + vdev_propagate_state(cvd); +} diff --git a/uts/common/fs/zfs/vdev_cache.c b/uts/common/fs/zfs/vdev_cache.c new file mode 100644 index 000000000000..688d541344cb --- /dev/null +++ b/uts/common/fs/zfs/vdev_cache.c @@ -0,0 +1,416 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/kstat.h> + +/* + * Virtual device read-ahead caching. + * + * This file implements a simple LRU read-ahead cache. When the DMU reads + * a given block, it will often want other, nearby blocks soon thereafter. + * We take advantage of this by reading a larger disk region and caching + * the result. In the best case, this can turn 128 back-to-back 512-byte + * reads into a single 64k read followed by 127 cache hits; this reduces + * latency dramatically. In the worst case, it can turn an isolated 512-byte + * read into a 64k read, which doesn't affect latency all that much but is + * terribly wasteful of bandwidth. A more intelligent version of the cache + * could keep track of access patterns and not do read-ahead unless it sees + * at least two temporally close I/Os to the same region. Currently, only + * metadata I/O is inflated. A futher enhancement could take advantage of + * more semantic information about the I/O. And it could use something + * faster than an AVL tree; that was chosen solely for convenience. + * + * There are five cache operations: allocate, fill, read, write, evict. + * + * (1) Allocate. This reserves a cache entry for the specified region. + * We separate the allocate and fill operations so that multiple threads + * don't generate I/O for the same cache miss. + * + * (2) Fill. When the I/O for a cache miss completes, the fill routine + * places the data in the previously allocated cache entry. + * + * (3) Read. Read data from the cache. + * + * (4) Write. Update cache contents after write completion. + * + * (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry + * if the total cache size exceeds zfs_vdev_cache_size. + */ + +/* + * These tunables are for performance analysis. + */ +/* + * All i/os smaller than zfs_vdev_cache_max will be turned into + * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software + * track buffer). At most zfs_vdev_cache_size bytes will be kept in each + * vdev's vdev_cache. + */ +int zfs_vdev_cache_max = 1<<14; /* 16KB */ +int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */ +int zfs_vdev_cache_bshift = 16; + +#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */ + +kstat_t *vdc_ksp = NULL; + +typedef struct vdc_stats { + kstat_named_t vdc_stat_delegations; + kstat_named_t vdc_stat_hits; + kstat_named_t vdc_stat_misses; +} vdc_stats_t; + +static vdc_stats_t vdc_stats = { + { "delegations", KSTAT_DATA_UINT64 }, + { "hits", KSTAT_DATA_UINT64 }, + { "misses", KSTAT_DATA_UINT64 } +}; + +#define VDCSTAT_BUMP(stat) atomic_add_64(&vdc_stats.stat.value.ui64, 1); + +static int +vdev_cache_offset_compare(const void *a1, const void *a2) +{ + const vdev_cache_entry_t *ve1 = a1; + const vdev_cache_entry_t *ve2 = a2; + + if (ve1->ve_offset < ve2->ve_offset) + return (-1); + if (ve1->ve_offset > ve2->ve_offset) + return (1); + return (0); +} + +static int +vdev_cache_lastused_compare(const void *a1, const void *a2) +{ + const vdev_cache_entry_t *ve1 = a1; + const vdev_cache_entry_t *ve2 = a2; + + if (ve1->ve_lastused < ve2->ve_lastused) + return (-1); + if (ve1->ve_lastused > ve2->ve_lastused) + return (1); + + /* + * Among equally old entries, sort by offset to ensure uniqueness. + */ + return (vdev_cache_offset_compare(a1, a2)); +} + +/* + * Evict the specified entry from the cache. + */ +static void +vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve) +{ + ASSERT(MUTEX_HELD(&vc->vc_lock)); + ASSERT(ve->ve_fill_io == NULL); + ASSERT(ve->ve_data != NULL); + + avl_remove(&vc->vc_lastused_tree, ve); + avl_remove(&vc->vc_offset_tree, ve); + zio_buf_free(ve->ve_data, VCBS); + kmem_free(ve, sizeof (vdev_cache_entry_t)); +} + +/* + * Allocate an entry in the cache. At the point we don't have the data, + * we're just creating a placeholder so that multiple threads don't all + * go off and read the same blocks. + */ +static vdev_cache_entry_t * +vdev_cache_allocate(zio_t *zio) +{ + vdev_cache_t *vc = &zio->io_vd->vdev_cache; + uint64_t offset = P2ALIGN(zio->io_offset, VCBS); + vdev_cache_entry_t *ve; + + ASSERT(MUTEX_HELD(&vc->vc_lock)); + + if (zfs_vdev_cache_size == 0) + return (NULL); + + /* + * If adding a new entry would exceed the cache size, + * evict the oldest entry (LRU). + */ + if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) > + zfs_vdev_cache_size) { + ve = avl_first(&vc->vc_lastused_tree); + if (ve->ve_fill_io != NULL) + return (NULL); + ASSERT(ve->ve_hits != 0); + vdev_cache_evict(vc, ve); + } + + ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); + ve->ve_offset = offset; + ve->ve_lastused = ddi_get_lbolt(); + ve->ve_data = zio_buf_alloc(VCBS); + + avl_add(&vc->vc_offset_tree, ve); + avl_add(&vc->vc_lastused_tree, ve); + + return (ve); +} + +static void +vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) +{ + uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); + + ASSERT(MUTEX_HELD(&vc->vc_lock)); + ASSERT(ve->ve_fill_io == NULL); + + if (ve->ve_lastused != ddi_get_lbolt()) { + avl_remove(&vc->vc_lastused_tree, ve); + ve->ve_lastused = ddi_get_lbolt(); + avl_add(&vc->vc_lastused_tree, ve); + } + + ve->ve_hits++; + bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size); +} + +/* + * Fill a previously allocated cache entry with data. + */ +static void +vdev_cache_fill(zio_t *fio) +{ + vdev_t *vd = fio->io_vd; + vdev_cache_t *vc = &vd->vdev_cache; + vdev_cache_entry_t *ve = fio->io_private; + zio_t *pio; + + ASSERT(fio->io_size == VCBS); + + /* + * Add data to the cache. + */ + mutex_enter(&vc->vc_lock); + + ASSERT(ve->ve_fill_io == fio); + ASSERT(ve->ve_offset == fio->io_offset); + ASSERT(ve->ve_data == fio->io_data); + + ve->ve_fill_io = NULL; + + /* + * Even if this cache line was invalidated by a missed write update, + * any reads that were queued up before the missed update are still + * valid, so we can satisfy them from this line before we evict it. + */ + while ((pio = zio_walk_parents(fio)) != NULL) + vdev_cache_hit(vc, ve, pio); + + if (fio->io_error || ve->ve_missed_update) + vdev_cache_evict(vc, ve); + + mutex_exit(&vc->vc_lock); +} + +/* + * Read data from the cache. Returns 0 on cache hit, errno on a miss. + */ +int +vdev_cache_read(zio_t *zio) +{ + vdev_cache_t *vc = &zio->io_vd->vdev_cache; + vdev_cache_entry_t *ve, ve_search; + uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS); + uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); + zio_t *fio; + + ASSERT(zio->io_type == ZIO_TYPE_READ); + + if (zio->io_flags & ZIO_FLAG_DONT_CACHE) + return (EINVAL); + + if (zio->io_size > zfs_vdev_cache_max) + return (EOVERFLOW); + + /* + * If the I/O straddles two or more cache blocks, don't cache it. + */ + if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS)) + return (EXDEV); + + ASSERT(cache_phase + zio->io_size <= VCBS); + + mutex_enter(&vc->vc_lock); + + ve_search.ve_offset = cache_offset; + ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL); + + if (ve != NULL) { + if (ve->ve_missed_update) { + mutex_exit(&vc->vc_lock); + return (ESTALE); + } + + if ((fio = ve->ve_fill_io) != NULL) { + zio_vdev_io_bypass(zio); + zio_add_child(zio, fio); + mutex_exit(&vc->vc_lock); + VDCSTAT_BUMP(vdc_stat_delegations); + return (0); + } + + vdev_cache_hit(vc, ve, zio); + zio_vdev_io_bypass(zio); + + mutex_exit(&vc->vc_lock); + VDCSTAT_BUMP(vdc_stat_hits); + return (0); + } + + ve = vdev_cache_allocate(zio); + + if (ve == NULL) { + mutex_exit(&vc->vc_lock); + return (ENOMEM); + } + + fio = zio_vdev_delegated_io(zio->io_vd, cache_offset, + ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL, + ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve); + + ve->ve_fill_io = fio; + zio_vdev_io_bypass(zio); + zio_add_child(zio, fio); + + mutex_exit(&vc->vc_lock); + zio_nowait(fio); + VDCSTAT_BUMP(vdc_stat_misses); + + return (0); +} + +/* + * Update cache contents upon write completion. + */ +void +vdev_cache_write(zio_t *zio) +{ + vdev_cache_t *vc = &zio->io_vd->vdev_cache; + vdev_cache_entry_t *ve, ve_search; + uint64_t io_start = zio->io_offset; + uint64_t io_end = io_start + zio->io_size; + uint64_t min_offset = P2ALIGN(io_start, VCBS); + uint64_t max_offset = P2ROUNDUP(io_end, VCBS); + avl_index_t where; + + ASSERT(zio->io_type == ZIO_TYPE_WRITE); + + mutex_enter(&vc->vc_lock); + + ve_search.ve_offset = min_offset; + ve = avl_find(&vc->vc_offset_tree, &ve_search, &where); + + if (ve == NULL) + ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER); + + while (ve != NULL && ve->ve_offset < max_offset) { + uint64_t start = MAX(ve->ve_offset, io_start); + uint64_t end = MIN(ve->ve_offset + VCBS, io_end); + + if (ve->ve_fill_io != NULL) { + ve->ve_missed_update = 1; + } else { + bcopy((char *)zio->io_data + start - io_start, + ve->ve_data + start - ve->ve_offset, end - start); + } + ve = AVL_NEXT(&vc->vc_offset_tree, ve); + } + mutex_exit(&vc->vc_lock); +} + +void +vdev_cache_purge(vdev_t *vd) +{ + vdev_cache_t *vc = &vd->vdev_cache; + vdev_cache_entry_t *ve; + + mutex_enter(&vc->vc_lock); + while ((ve = avl_first(&vc->vc_offset_tree)) != NULL) + vdev_cache_evict(vc, ve); + mutex_exit(&vc->vc_lock); +} + +void +vdev_cache_init(vdev_t *vd) +{ + vdev_cache_t *vc = &vd->vdev_cache; + + mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL); + + avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare, + sizeof (vdev_cache_entry_t), + offsetof(struct vdev_cache_entry, ve_offset_node)); + + avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare, + sizeof (vdev_cache_entry_t), + offsetof(struct vdev_cache_entry, ve_lastused_node)); +} + +void +vdev_cache_fini(vdev_t *vd) +{ + vdev_cache_t *vc = &vd->vdev_cache; + + vdev_cache_purge(vd); + + avl_destroy(&vc->vc_offset_tree); + avl_destroy(&vc->vc_lastused_tree); + + mutex_destroy(&vc->vc_lock); +} + +void +vdev_cache_stat_init(void) +{ + vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc", + KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + if (vdc_ksp != NULL) { + vdc_ksp->ks_data = &vdc_stats; + kstat_install(vdc_ksp); + } +} + +void +vdev_cache_stat_fini(void) +{ + if (vdc_ksp != NULL) { + kstat_delete(vdc_ksp); + vdc_ksp = NULL; + } +} diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c new file mode 100644 index 000000000000..d7417736b4ee --- /dev/null +++ b/uts/common/fs/zfs/vdev_disk.c @@ -0,0 +1,610 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa_impl.h> +#include <sys/refcount.h> +#include <sys/vdev_disk.h> +#include <sys/vdev_impl.h> +#include <sys/fs/zfs.h> +#include <sys/zio.h> +#include <sys/sunldi.h> +#include <sys/fm/fs/zfs.h> + +/* + * Virtual device vector for disks. + */ + +extern ldi_ident_t zfs_li; + +typedef struct vdev_disk_buf { + buf_t vdb_buf; + zio_t *vdb_io; +} vdev_disk_buf_t; + +static void +vdev_disk_hold(vdev_t *vd) +{ + ddi_devid_t devid; + char *minor; + + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + /* + * We must have a pathname, and it must be absolute. + */ + if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') + return; + + /* + * Only prefetch path and devid info if the device has + * never been opened. + */ + if (vd->vdev_tsd != NULL) + return; + + if (vd->vdev_wholedisk == -1ULL) { + size_t len = strlen(vd->vdev_path) + 3; + char *buf = kmem_alloc(len, KM_SLEEP); + + (void) snprintf(buf, len, "%ss0", vd->vdev_path); + + (void) ldi_vp_from_name(buf, &vd->vdev_name_vp); + kmem_free(buf, len); + } + + if (vd->vdev_name_vp == NULL) + (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp); + + if (vd->vdev_devid != NULL && + ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) { + (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp); + ddi_devid_str_free(minor); + ddi_devid_free(devid); + } +} + +static void +vdev_disk_rele(vdev_t *vd) +{ + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + if (vd->vdev_name_vp) { + VN_RELE_ASYNC(vd->vdev_name_vp, + dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); + vd->vdev_name_vp = NULL; + } + if (vd->vdev_devid_vp) { + VN_RELE_ASYNC(vd->vdev_devid_vp, + dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); + vd->vdev_devid_vp = NULL; + } +} + +static int +vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) +{ + spa_t *spa = vd->vdev_spa; + vdev_disk_t *dvd; + struct dk_minfo_ext dkmext; + int error; + dev_t dev; + int otyp; + + /* + * We must have a pathname, and it must be absolute. + */ + if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + + /* + * Reopen the device if it's not currently open. Otherwise, + * just update the physical size of the device. + */ + if (vd->vdev_tsd != NULL) { + ASSERT(vd->vdev_reopening); + dvd = vd->vdev_tsd; + goto skip_open; + } + + dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + + /* + * When opening a disk device, we want to preserve the user's original + * intent. We always want to open the device by the path the user gave + * us, even if it is one of multiple paths to the save device. But we + * also want to be able to survive disks being removed/recabled. + * Therefore the sequence of opening devices is: + * + * 1. Try opening the device by path. For legacy pools without the + * 'whole_disk' property, attempt to fix the path by appending 's0'. + * + * 2. If the devid of the device matches the stored value, return + * success. + * + * 3. Otherwise, the device may have moved. Try opening the device + * by the devid instead. + */ + if (vd->vdev_devid != NULL) { + if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, + &dvd->vd_minor) != 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + } + + error = EINVAL; /* presume failure */ + + if (vd->vdev_path != NULL) { + ddi_devid_t devid; + + if (vd->vdev_wholedisk == -1ULL) { + size_t len = strlen(vd->vdev_path) + 3; + char *buf = kmem_alloc(len, KM_SLEEP); + ldi_handle_t lh; + + (void) snprintf(buf, len, "%ss0", vd->vdev_path); + + if (ldi_open_by_name(buf, spa_mode(spa), kcred, + &lh, zfs_li) == 0) { + spa_strfree(vd->vdev_path); + vd->vdev_path = buf; + vd->vdev_wholedisk = 1ULL; + (void) ldi_close(lh, spa_mode(spa), kcred); + } else { + kmem_free(buf, len); + } + } + + error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, + &dvd->vd_lh, zfs_li); + + /* + * Compare the devid to the stored value. + */ + if (error == 0 && vd->vdev_devid != NULL && + ldi_get_devid(dvd->vd_lh, &devid) == 0) { + if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { + error = EINVAL; + (void) ldi_close(dvd->vd_lh, spa_mode(spa), + kcred); + dvd->vd_lh = NULL; + } + ddi_devid_free(devid); + } + + /* + * If we succeeded in opening the device, but 'vdev_wholedisk' + * is not yet set, then this must be a slice. + */ + if (error == 0 && vd->vdev_wholedisk == -1ULL) + vd->vdev_wholedisk = 0; + } + + /* + * If we were unable to open by path, or the devid check fails, open by + * devid instead. + */ + if (error != 0 && vd->vdev_devid != NULL) + error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, + spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); + + /* + * If all else fails, then try opening by physical path (if available) + * or the logical path (if we failed due to the devid check). While not + * as reliable as the devid, this will give us something, and the higher + * level vdev validation will prevent us from opening the wrong device. + */ + if (error) { + if (vd->vdev_physpath != NULL && + (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) + error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), + kcred, &dvd->vd_lh, zfs_li); + + /* + * Note that we don't support the legacy auto-wholedisk support + * as above. This hasn't been used in a very long time and we + * don't need to propagate its oddities to this edge condition. + */ + if (error && vd->vdev_path != NULL) + error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), + kcred, &dvd->vd_lh, zfs_li); + } + + if (error) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (error); + } + + /* + * Once a device is opened, verify that the physical device path (if + * available) is up to date. + */ + if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && + ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { + char *physpath, *minorname; + + physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); + minorname = NULL; + if (ddi_dev_pathname(dev, otyp, physpath) == 0 && + ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && + (vd->vdev_physpath == NULL || + strcmp(vd->vdev_physpath, physpath) != 0)) { + if (vd->vdev_physpath) + spa_strfree(vd->vdev_physpath); + (void) strlcat(physpath, ":", MAXPATHLEN); + (void) strlcat(physpath, minorname, MAXPATHLEN); + vd->vdev_physpath = spa_strdup(physpath); + } + if (minorname) + kmem_free(minorname, strlen(minorname) + 1); + kmem_free(physpath, MAXPATHLEN); + } + +skip_open: + /* + * Determine the actual size of the device. + */ + if (ldi_get_size(dvd->vd_lh, psize) != 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (EINVAL); + } + + /* + * If we own the whole disk, try to enable disk write caching. + * We ignore errors because it's OK if we can't do it. + */ + if (vd->vdev_wholedisk == 1) { + int wce = 1; + (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, + FKIOCTL, kcred, NULL); + } + + /* + * Determine the device's minimum transfer size. + * If the ioctl isn't supported, assume DEV_BSIZE. + */ + if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext, + FKIOCTL, kcred, NULL) != 0) + dkmext.dki_pbsize = DEV_BSIZE; + + *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1; + + /* + * Clear the nowritecache bit, so that on a vdev_reopen() we will + * try again. + */ + vd->vdev_nowritecache = B_FALSE; + + return (0); +} + +static void +vdev_disk_close(vdev_t *vd) +{ + vdev_disk_t *dvd = vd->vdev_tsd; + + if (vd->vdev_reopening || dvd == NULL) + return; + + if (dvd->vd_minor != NULL) + ddi_devid_str_free(dvd->vd_minor); + + if (dvd->vd_devid != NULL) + ddi_devid_free(dvd->vd_devid); + + if (dvd->vd_lh != NULL) + (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred); + + vd->vdev_delayed_close = B_FALSE; + kmem_free(dvd, sizeof (vdev_disk_t)); + vd->vdev_tsd = NULL; +} + +int +vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, + uint64_t offset, int flags) +{ + buf_t *bp; + int error = 0; + + if (vd_lh == NULL) + return (EINVAL); + + ASSERT(flags & B_READ || flags & B_WRITE); + + bp = getrbuf(KM_SLEEP); + bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; + bp->b_bcount = size; + bp->b_un.b_addr = (void *)data; + bp->b_lblkno = lbtodb(offset); + bp->b_bufsize = size; + + error = ldi_strategy(vd_lh, bp); + ASSERT(error == 0); + if ((error = biowait(bp)) == 0 && bp->b_resid != 0) + error = EIO; + freerbuf(bp); + + return (error); +} + +static void +vdev_disk_io_intr(buf_t *bp) +{ + vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; + zio_t *zio = vdb->vdb_io; + + /* + * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO. + * Rather than teach the rest of the stack about other error + * possibilities (EFAULT, etc), we normalize the error value here. + */ + zio->io_error = (geterror(bp) != 0 ? EIO : 0); + + if (zio->io_error == 0 && bp->b_resid != 0) + zio->io_error = EIO; + + kmem_free(vdb, sizeof (vdev_disk_buf_t)); + + zio_interrupt(zio); +} + +static void +vdev_disk_ioctl_free(zio_t *zio) +{ + kmem_free(zio->io_vsd, sizeof (struct dk_callback)); +} + +static const zio_vsd_ops_t vdev_disk_vsd_ops = { + vdev_disk_ioctl_free, + zio_vsd_default_cksum_report +}; + +static void +vdev_disk_ioctl_done(void *zio_arg, int error) +{ + zio_t *zio = zio_arg; + + zio->io_error = error; + + zio_interrupt(zio); +} + +static int +vdev_disk_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_disk_t *dvd = vd->vdev_tsd; + vdev_disk_buf_t *vdb; + struct dk_callback *dkc; + buf_t *bp; + int error; + + if (zio->io_type == ZIO_TYPE_IOCTL) { + /* XXPOLICY */ + if (!vdev_readable(vd)) { + zio->io_error = ENXIO; + return (ZIO_PIPELINE_CONTINUE); + } + + switch (zio->io_cmd) { + + case DKIOCFLUSHWRITECACHE: + + if (zfs_nocacheflush) + break; + + if (vd->vdev_nowritecache) { + zio->io_error = ENOTSUP; + break; + } + + zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); + zio->io_vsd_ops = &vdev_disk_vsd_ops; + + dkc->dkc_callback = vdev_disk_ioctl_done; + dkc->dkc_flag = FLUSH_VOLATILE; + dkc->dkc_cookie = zio; + + error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, + (uintptr_t)dkc, FKIOCTL, kcred, NULL); + + if (error == 0) { + /* + * The ioctl will be done asychronously, + * and will call vdev_disk_ioctl_done() + * upon completion. + */ + return (ZIO_PIPELINE_STOP); + } + + if (error == ENOTSUP || error == ENOTTY) { + /* + * If we get ENOTSUP or ENOTTY, we know that + * no future attempts will ever succeed. + * In this case we set a persistent bit so + * that we don't bother with the ioctl in the + * future. + */ + vd->vdev_nowritecache = B_TRUE; + } + zio->io_error = error; + + break; + + default: + zio->io_error = ENOTSUP; + } + + return (ZIO_PIPELINE_CONTINUE); + } + + vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); + + vdb->vdb_io = zio; + bp = &vdb->vdb_buf; + + bioinit(bp); + bp->b_flags = B_BUSY | B_NOCACHE | + (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); + if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) + bp->b_flags |= B_FAILFAST; + bp->b_bcount = zio->io_size; + bp->b_un.b_addr = zio->io_data; + bp->b_lblkno = lbtodb(zio->io_offset); + bp->b_bufsize = zio->io_size; + bp->b_iodone = (int (*)())vdev_disk_io_intr; + + /* ldi_strategy() will return non-zero only on programming errors */ + VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); + + return (ZIO_PIPELINE_STOP); +} + +static void +vdev_disk_io_done(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + + /* + * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if + * the device has been removed. If this is the case, then we trigger an + * asynchronous removal of the device. Otherwise, probe the device and + * make sure it's still accessible. + */ + if (zio->io_error == EIO && !vd->vdev_remove_wanted) { + vdev_disk_t *dvd = vd->vdev_tsd; + int state = DKIO_NONE; + + if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, + FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) { + /* + * We post the resource as soon as possible, instead of + * when the async removal actually happens, because the + * DE is using this information to discard previous I/O + * errors. + */ + zfs_post_remove(zio->io_spa, vd); + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); + } else if (!vd->vdev_delayed_close) { + vd->vdev_delayed_close = B_TRUE; + } + } +} + +vdev_ops_t vdev_disk_ops = { + vdev_disk_open, + vdev_disk_close, + vdev_default_asize, + vdev_disk_io_start, + vdev_disk_io_done, + NULL, + vdev_disk_hold, + vdev_disk_rele, + VDEV_TYPE_DISK, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; + +/* + * Given the root disk device devid or pathname, read the label from + * the device, and construct a configuration nvlist. + */ +int +vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) +{ + ldi_handle_t vd_lh; + vdev_label_t *label; + uint64_t s, size; + int l; + ddi_devid_t tmpdevid; + int error = -1; + char *minor_name; + + /* + * Read the device label and build the nvlist. + */ + if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid, + &minor_name) == 0) { + error = ldi_open_by_devid(tmpdevid, minor_name, + FREAD, kcred, &vd_lh, zfs_li); + ddi_devid_free(tmpdevid); + ddi_devid_str_free(minor_name); + } + + if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, + zfs_li))) + return (error); + + if (ldi_get_size(vd_lh, &s)) { + (void) ldi_close(vd_lh, FREAD, kcred); + return (EIO); + } + + size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); + label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); + + *config = NULL; + for (l = 0; l < VDEV_LABELS; l++) { + uint64_t offset, state, txg = 0; + + /* read vdev label */ + offset = vdev_label_offset(size, l, 0); + if (vdev_disk_physio(vd_lh, (caddr_t)label, + VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0) + continue; + + if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, + sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) { + *config = NULL; + continue; + } + + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state >= POOL_STATE_DESTROYED) { + nvlist_free(*config); + *config = NULL; + continue; + } + + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0) { + nvlist_free(*config); + *config = NULL; + continue; + } + + break; + } + + kmem_free(label, sizeof (vdev_label_t)); + (void) ldi_close(vd_lh, FREAD, kcred); + if (*config == NULL) + error = EIDRM; + + return (error); +} diff --git a/uts/common/fs/zfs/vdev_file.c b/uts/common/fs/zfs/vdev_file.c new file mode 100644 index 000000000000..8c22aa5316a1 --- /dev/null +++ b/uts/common/fs/zfs/vdev_file.c @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_file.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/fs/zfs.h> +#include <sys/fm/fs/zfs.h> + +/* + * Virtual device vector for files. + */ + +static void +vdev_file_hold(vdev_t *vd) +{ + ASSERT(vd->vdev_path != NULL); +} + +static void +vdev_file_rele(vdev_t *vd) +{ + ASSERT(vd->vdev_path != NULL); +} + +static int +vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) +{ + vdev_file_t *vf; + vnode_t *vp; + vattr_t vattr; + int error; + + /* + * We must have a pathname, and it must be absolute. + */ + if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + + /* + * Reopen the device if it's not currently open. Otherwise, + * just update the physical size of the device. + */ + if (vd->vdev_tsd != NULL) { + ASSERT(vd->vdev_reopening); + vf = vd->vdev_tsd; + goto skip_open; + } + + vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); + + /* + * We always open the files from the root of the global zone, even if + * we're in a local zone. If the user has gotten to this point, the + * administrator has already decided that the pool should be available + * to local zone users, so the underlying devices should be as well. + */ + ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); + error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, + spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); + + if (error) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (error); + } + + vf->vf_vnode = vp; + +#ifdef _KERNEL + /* + * Make sure it's a regular file. + */ + if (vp->v_type != VREG) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (ENODEV); + } +#endif + +skip_open: + /* + * Determine the physical size of the file. + */ + vattr.va_mask = AT_SIZE; + error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); + if (error) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (error); + } + + *psize = vattr.va_size; + *ashift = SPA_MINBLOCKSHIFT; + + return (0); +} + +static void +vdev_file_close(vdev_t *vd) +{ + vdev_file_t *vf = vd->vdev_tsd; + + if (vd->vdev_reopening || vf == NULL) + return; + + if (vf->vf_vnode != NULL) { + (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); + (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, + kcred, NULL); + VN_RELE(vf->vf_vnode); + } + + vd->vdev_delayed_close = B_FALSE; + kmem_free(vf, sizeof (vdev_file_t)); + vd->vdev_tsd = NULL; +} + +static int +vdev_file_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_file_t *vf = vd->vdev_tsd; + ssize_t resid; + + if (zio->io_type == ZIO_TYPE_IOCTL) { + /* XXPOLICY */ + if (!vdev_readable(vd)) { + zio->io_error = ENXIO; + return (ZIO_PIPELINE_CONTINUE); + } + + switch (zio->io_cmd) { + case DKIOCFLUSHWRITECACHE: + zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, + kcred, NULL); + break; + default: + zio->io_error = ENOTSUP; + } + + return (ZIO_PIPELINE_CONTINUE); + } + + zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? + UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, + zio->io_size, zio->io_offset, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, &resid); + + if (resid != 0 && zio->io_error == 0) + zio->io_error = ENOSPC; + + zio_interrupt(zio); + + return (ZIO_PIPELINE_STOP); +} + +/* ARGSUSED */ +static void +vdev_file_io_done(zio_t *zio) +{ +} + +vdev_ops_t vdev_file_ops = { + vdev_file_open, + vdev_file_close, + vdev_default_asize, + vdev_file_io_start, + vdev_file_io_done, + NULL, + vdev_file_hold, + vdev_file_rele, + VDEV_TYPE_FILE, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; + +/* + * From userland we access disks just like files. + */ +#ifndef _KERNEL + +vdev_ops_t vdev_disk_ops = { + vdev_file_open, + vdev_file_close, + vdev_default_asize, + vdev_file_io_start, + vdev_file_io_done, + NULL, + vdev_file_hold, + vdev_file_rele, + VDEV_TYPE_DISK, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; + +#endif diff --git a/uts/common/fs/zfs/vdev_label.c b/uts/common/fs/zfs/vdev_label.c new file mode 100644 index 000000000000..c08ed8ba0467 --- /dev/null +++ b/uts/common/fs/zfs/vdev_label.c @@ -0,0 +1,1216 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Virtual Device Labels + * --------------------- + * + * The vdev label serves several distinct purposes: + * + * 1. Uniquely identify this device as part of a ZFS pool and confirm its + * identity within the pool. + * + * 2. Verify that all the devices given in a configuration are present + * within the pool. + * + * 3. Determine the uberblock for the pool. + * + * 4. In case of an import operation, determine the configuration of the + * toplevel vdev of which it is a part. + * + * 5. If an import operation cannot find all the devices in the pool, + * provide enough information to the administrator to determine which + * devices are missing. + * + * It is important to note that while the kernel is responsible for writing the + * label, it only consumes the information in the first three cases. The + * latter information is only consumed in userland when determining the + * configuration to import a pool. + * + * + * Label Organization + * ------------------ + * + * Before describing the contents of the label, it's important to understand how + * the labels are written and updated with respect to the uberblock. + * + * When the pool configuration is altered, either because it was newly created + * or a device was added, we want to update all the labels such that we can deal + * with fatal failure at any point. To this end, each disk has two labels which + * are updated before and after the uberblock is synced. Assuming we have + * labels and an uberblock with the following transaction groups: + * + * L1 UB L2 + * +------+ +------+ +------+ + * | | | | | | + * | t10 | | t10 | | t10 | + * | | | | | | + * +------+ +------+ +------+ + * + * In this stable state, the labels and the uberblock were all updated within + * the same transaction group (10). Each label is mirrored and checksummed, so + * that we can detect when we fail partway through writing the label. + * + * In order to identify which labels are valid, the labels are written in the + * following manner: + * + * 1. For each vdev, update 'L1' to the new label + * 2. Update the uberblock + * 3. For each vdev, update 'L2' to the new label + * + * Given arbitrary failure, we can determine the correct label to use based on + * the transaction group. If we fail after updating L1 but before updating the + * UB, we will notice that L1's transaction group is greater than the uberblock, + * so L2 must be valid. If we fail after writing the uberblock but before + * writing L2, we will notice that L2's transaction group is less than L1, and + * therefore L1 is valid. + * + * Another added complexity is that not every label is updated when the config + * is synced. If we add a single device, we do not want to have to re-write + * every label for every device in the pool. This means that both L1 and L2 may + * be older than the pool uberblock, because the necessary information is stored + * on another vdev. + * + * + * On-disk Format + * -------------- + * + * The vdev label consists of two distinct parts, and is wrapped within the + * vdev_label_t structure. The label includes 8k of padding to permit legacy + * VTOC disk labels, but is otherwise ignored. + * + * The first half of the label is a packed nvlist which contains pool wide + * properties, per-vdev properties, and configuration information. It is + * described in more detail below. + * + * The latter half of the label consists of a redundant array of uberblocks. + * These uberblocks are updated whenever a transaction group is committed, + * or when the configuration is updated. When a pool is loaded, we scan each + * vdev for the 'best' uberblock. + * + * + * Configuration Information + * ------------------------- + * + * The nvlist describing the pool and vdev contains the following elements: + * + * version ZFS on-disk version + * name Pool name + * state Pool state + * txg Transaction group in which this label was written + * pool_guid Unique identifier for this pool + * vdev_tree An nvlist describing vdev tree. + * + * Each leaf device label also contains the following: + * + * top_guid Unique ID for top-level vdev in which this is contained + * guid Unique ID for the leaf vdev + * + * The 'vs' configuration follows the format described in 'spa_config.c'. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/dmu.h> +#include <sys/zap.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#include <sys/uberblock_impl.h> +#include <sys/metaslab.h> +#include <sys/zio.h> +#include <sys/dsl_scan.h> +#include <sys/fs/zfs.h> + +/* + * Basic routines to read and write from a vdev label. + * Used throughout the rest of this file. + */ +uint64_t +vdev_label_offset(uint64_t psize, int l, uint64_t offset) +{ + ASSERT(offset < sizeof (vdev_label_t)); + ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0); + + return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); +} + +/* + * Returns back the vdev label associated with the passed in offset. + */ +int +vdev_label_number(uint64_t psize, uint64_t offset) +{ + int l; + + if (offset >= psize - VDEV_LABEL_END_SIZE) { + offset -= psize - VDEV_LABEL_END_SIZE; + offset += (VDEV_LABELS / 2) * sizeof (vdev_label_t); + } + l = offset / sizeof (vdev_label_t); + return (l < VDEV_LABELS ? l : -1); +} + +static void +vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, + uint64_t size, zio_done_func_t *done, void *private, int flags) +{ + ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == + SCL_STATE_ALL); + ASSERT(flags & ZIO_FLAG_CONFIG_WRITER); + + zio_nowait(zio_read_phys(zio, vd, + vdev_label_offset(vd->vdev_psize, l, offset), + size, buf, ZIO_CHECKSUM_LABEL, done, private, + ZIO_PRIORITY_SYNC_READ, flags, B_TRUE)); +} + +static void +vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, + uint64_t size, zio_done_func_t *done, void *private, int flags) +{ + ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || + (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) == + (SCL_CONFIG | SCL_STATE) && + dsl_pool_sync_context(spa_get_dsl(zio->io_spa)))); + ASSERT(flags & ZIO_FLAG_CONFIG_WRITER); + + zio_nowait(zio_write_phys(zio, vd, + vdev_label_offset(vd->vdev_psize, l, offset), + size, buf, ZIO_CHECKSUM_LABEL, done, private, + ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE)); +} + +/* + * Generate the nvlist representing this vdev's config. + */ +nvlist_t * +vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, + vdev_config_flag_t flags) +{ + nvlist_t *nv = NULL; + + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, + vd->vdev_ops->vdev_op_type) == 0); + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) + == 0); + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); + + if (vd->vdev_path != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, + vd->vdev_path) == 0); + + if (vd->vdev_devid != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, + vd->vdev_devid) == 0); + + if (vd->vdev_physpath != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath) == 0); + + if (vd->vdev_fru != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_FRU, + vd->vdev_fru) == 0); + + if (vd->vdev_nparity != 0) { + ASSERT(strcmp(vd->vdev_ops->vdev_op_type, + VDEV_TYPE_RAIDZ) == 0); + + /* + * Make sure someone hasn't managed to sneak a fancy new vdev + * into a crufty old storage pool. + */ + ASSERT(vd->vdev_nparity == 1 || + (vd->vdev_nparity <= 2 && + spa_version(spa) >= SPA_VERSION_RAIDZ2) || + (vd->vdev_nparity <= 3 && + spa_version(spa) >= SPA_VERSION_RAIDZ3)); + + /* + * Note that we'll add the nparity tag even on storage pools + * that only support a single parity device -- older software + * will just ignore it. + */ + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, + vd->vdev_nparity) == 0); + } + + if (vd->vdev_wholedisk != -1ULL) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + vd->vdev_wholedisk) == 0); + + if (vd->vdev_not_present) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0); + + if (vd->vdev_isspare) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0); + + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && + vd == vd->vdev_top) { + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, + vd->vdev_ms_array) == 0); + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, + vd->vdev_ms_shift) == 0); + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, + vd->vdev_ashift) == 0); + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, + vd->vdev_asize) == 0); + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, + vd->vdev_islog) == 0); + if (vd->vdev_removing) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, + vd->vdev_removing) == 0); + } + + if (vd->vdev_dtl_smo.smo_object != 0) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, + vd->vdev_dtl_smo.smo_object) == 0); + + if (vd->vdev_crtxg) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, + vd->vdev_crtxg) == 0); + + if (getstats) { + vdev_stat_t vs; + pool_scan_stat_t ps; + + vdev_get_stats(vd, &vs); + VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0); + + /* provide either current or previous scan information */ + if (spa_scan_get_stats(spa, &ps) == 0) { + VERIFY(nvlist_add_uint64_array(nv, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps, + sizeof (pool_scan_stat_t) / sizeof (uint64_t)) + == 0); + } + } + + if (!vd->vdev_ops->vdev_op_leaf) { + nvlist_t **child; + int c, idx; + + ASSERT(!vd->vdev_ishole); + + child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *), + KM_SLEEP); + + for (c = 0, idx = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + /* + * If we're generating an nvlist of removing + * vdevs then skip over any device which is + * not being removed. + */ + if ((flags & VDEV_CONFIG_REMOVING) && + !cvd->vdev_removing) + continue; + + child[idx++] = vdev_config_generate(spa, cvd, + getstats, flags); + } + + if (idx) { + VERIFY(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, idx) == 0); + } + + for (c = 0; c < idx; c++) + nvlist_free(child[c]); + + kmem_free(child, vd->vdev_children * sizeof (nvlist_t *)); + + } else { + const char *aux = NULL; + + if (vd->vdev_offline && !vd->vdev_tmpoffline) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, + B_TRUE) == 0); + if (vd->vdev_resilvering) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING, + B_TRUE) == 0); + if (vd->vdev_faulted) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, + B_TRUE) == 0); + if (vd->vdev_degraded) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, + B_TRUE) == 0); + if (vd->vdev_removed) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, + B_TRUE) == 0); + if (vd->vdev_unspare) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, + B_TRUE) == 0); + if (vd->vdev_ishole) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, + B_TRUE) == 0); + + switch (vd->vdev_stat.vs_aux) { + case VDEV_AUX_ERR_EXCEEDED: + aux = "err_exceeded"; + break; + + case VDEV_AUX_EXTERNAL: + aux = "external"; + break; + } + + if (aux != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, + aux) == 0); + + if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, + vd->vdev_orig_guid) == 0); + } + } + + return (nv); +} + +/* + * Generate a view of the top-level vdevs. If we currently have holes + * in the namespace, then generate an array which contains a list of holey + * vdevs. Additionally, add the number of top-level children that currently + * exist. + */ +void +vdev_top_config_generate(spa_t *spa, nvlist_t *config) +{ + vdev_t *rvd = spa->spa_root_vdev; + uint64_t *array; + uint_t c, idx; + + array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP); + + for (c = 0, idx = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + + if (tvd->vdev_ishole) + array[idx++] = c; + } + + if (idx) { + VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, + array, idx) == 0); + } + + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, + rvd->vdev_children) == 0); + + kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); +} + +nvlist_t * +vdev_label_read_config(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + nvlist_t *config = NULL; + vdev_phys_t *vp; + zio_t *zio; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; + + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + + if (!vdev_readable(vd)) + return (NULL); + + vp = zio_buf_alloc(sizeof (vdev_phys_t)); + +retry: + for (int l = 0; l < VDEV_LABELS; l++) { + + zio = zio_root(spa, NULL, NULL, flags); + + vdev_label_read(zio, vd, l, vp, + offsetof(vdev_label_t, vl_vdev_phys), + sizeof (vdev_phys_t), NULL, NULL, flags); + + if (zio_wait(zio) == 0 && + nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), + &config, 0) == 0) + break; + + if (config != NULL) { + nvlist_free(config); + config = NULL; + } + } + + if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + + zio_buf_free(vp, sizeof (vdev_phys_t)); + + return (config); +} + +/* + * Determine if a device is in use. The 'spare_guid' parameter will be filled + * in with the device guid if this spare is active elsewhere on the system. + */ +static boolean_t +vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, + uint64_t *spare_guid, uint64_t *l2cache_guid) +{ + spa_t *spa = vd->vdev_spa; + uint64_t state, pool_guid, device_guid, txg, spare_pool; + uint64_t vdtxg = 0; + nvlist_t *label; + + if (spare_guid) + *spare_guid = 0ULL; + if (l2cache_guid) + *l2cache_guid = 0ULL; + + /* + * Read the label, if any, and perform some basic sanity checks. + */ + if ((label = vdev_label_read_config(vd)) == NULL) + return (B_FALSE); + + (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, + &vdtxg); + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, + &device_guid) != 0) { + nvlist_free(label); + return (B_FALSE); + } + + if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, + &pool_guid) != 0 || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0)) { + nvlist_free(label); + return (B_FALSE); + } + + nvlist_free(label); + + /* + * Check to see if this device indeed belongs to the pool it claims to + * be a part of. The only way this is allowed is if the device is a hot + * spare (which we check for later on). + */ + if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + !spa_guid_exists(pool_guid, device_guid) && + !spa_spare_exists(device_guid, NULL, NULL) && + !spa_l2cache_exists(device_guid, NULL)) + return (B_FALSE); + + /* + * If the transaction group is zero, then this an initialized (but + * unused) label. This is only an error if the create transaction + * on-disk is the same as the one we're using now, in which case the + * user has attempted to add the same vdev multiple times in the same + * transaction. + */ + if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + txg == 0 && vdtxg == crtxg) + return (B_TRUE); + + /* + * Check to see if this is a spare device. We do an explicit check for + * spa_has_spare() here because it may be on our pending list of spares + * to add. We also check if it is an l2cache device. + */ + if (spa_spare_exists(device_guid, &spare_pool, NULL) || + spa_has_spare(spa, device_guid)) { + if (spare_guid) + *spare_guid = device_guid; + + switch (reason) { + case VDEV_LABEL_CREATE: + case VDEV_LABEL_L2CACHE: + return (B_TRUE); + + case VDEV_LABEL_REPLACE: + return (!spa_has_spare(spa, device_guid) || + spare_pool != 0ULL); + + case VDEV_LABEL_SPARE: + return (spa_has_spare(spa, device_guid)); + } + } + + /* + * Check to see if this is an l2cache device. + */ + if (spa_l2cache_exists(device_guid, NULL)) + return (B_TRUE); + + /* + * We can't rely on a pool's state if it's been imported + * read-only. Instead we look to see if the pools is marked + * read-only in the namespace and set the state to active. + */ + if ((spa = spa_by_guid(pool_guid, device_guid)) != NULL && + spa_mode(spa) == FREAD) + state = POOL_STATE_ACTIVE; + + /* + * If the device is marked ACTIVE, then this device is in use by another + * pool on the system. + */ + return (state == POOL_STATE_ACTIVE); +} + +/* + * Initialize a vdev label. We check to make sure each leaf device is not in + * use, and writable. We put down an initial label which we will later + * overwrite with a complete label. Note that it's important to do this + * sequentially, not in parallel, so that we catch cases of multiple use of the + * same leaf vdev in the vdev we're creating -- e.g. mirroring a disk with + * itself. + */ +int +vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) +{ + spa_t *spa = vd->vdev_spa; + nvlist_t *label; + vdev_phys_t *vp; + char *pad2; + uberblock_t *ub; + zio_t *zio; + char *buf; + size_t buflen; + int error; + uint64_t spare_guid, l2cache_guid; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + for (int c = 0; c < vd->vdev_children; c++) + if ((error = vdev_label_init(vd->vdev_child[c], + crtxg, reason)) != 0) + return (error); + + /* Track the creation time for this vdev */ + vd->vdev_crtxg = crtxg; + + if (!vd->vdev_ops->vdev_op_leaf) + return (0); + + /* + * Dead vdevs cannot be initialized. + */ + if (vdev_is_dead(vd)) + return (EIO); + + /* + * Determine if the vdev is in use. + */ + if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT && + vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid)) + return (EBUSY); + + /* + * If this is a request to add or replace a spare or l2cache device + * that is in use elsewhere on the system, then we must update the + * guid (which was initialized to a random value) to reflect the + * actual GUID (which is shared between multiple pools). + */ + if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE && + spare_guid != 0ULL) { + uint64_t guid_delta = spare_guid - vd->vdev_guid; + + vd->vdev_guid += guid_delta; + + for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum += guid_delta; + + /* + * If this is a replacement, then we want to fallthrough to the + * rest of the code. If we're adding a spare, then it's already + * labeled appropriately and we can just return. + */ + if (reason == VDEV_LABEL_SPARE) + return (0); + ASSERT(reason == VDEV_LABEL_REPLACE || + reason == VDEV_LABEL_SPLIT); + } + + if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE && + l2cache_guid != 0ULL) { + uint64_t guid_delta = l2cache_guid - vd->vdev_guid; + + vd->vdev_guid += guid_delta; + + for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum += guid_delta; + + /* + * If this is a replacement, then we want to fallthrough to the + * rest of the code. If we're adding an l2cache, then it's + * already labeled appropriately and we can just return. + */ + if (reason == VDEV_LABEL_L2CACHE) + return (0); + ASSERT(reason == VDEV_LABEL_REPLACE); + } + + /* + * Initialize its label. + */ + vp = zio_buf_alloc(sizeof (vdev_phys_t)); + bzero(vp, sizeof (vdev_phys_t)); + + /* + * Generate a label describing the pool and our top-level vdev. + * We mark it as being from txg 0 to indicate that it's not + * really part of an active pool just yet. The labels will + * be written again with a meaningful txg by spa_sync(). + */ + if (reason == VDEV_LABEL_SPARE || + (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)) { + /* + * For inactive hot spares, we generate a special label that + * identifies as a mutually shared hot spare. We write the + * label if we are adding a hot spare, or if we are removing an + * active hot spare (in which case we want to revert the + * labels). + */ + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, + spa_version(spa)) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, + POOL_STATE_SPARE) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } else if (reason == VDEV_LABEL_L2CACHE || + (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) { + /* + * For level 2 ARC devices, add a special label. + */ + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, + spa_version(spa)) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, + POOL_STATE_L2CACHE) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } else { + uint64_t txg = 0ULL; + + if (reason == VDEV_LABEL_SPLIT) + txg = spa->spa_uberblock.ub_txg; + label = spa_config_generate(spa, vd, txg, B_FALSE); + + /* + * Add our creation time. This allows us to detect multiple + * vdev uses as described above, and automatically expires if we + * fail. + */ + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, + crtxg) == 0); + } + + buf = vp->vp_nvlist; + buflen = sizeof (vp->vp_nvlist); + + error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); + if (error != 0) { + nvlist_free(label); + zio_buf_free(vp, sizeof (vdev_phys_t)); + /* EFAULT means nvlist_pack ran out of room */ + return (error == EFAULT ? ENAMETOOLONG : EINVAL); + } + + /* + * Initialize uberblock template. + */ + ub = zio_buf_alloc(VDEV_UBERBLOCK_RING); + bzero(ub, VDEV_UBERBLOCK_RING); + *ub = spa->spa_uberblock; + ub->ub_txg = 0; + + /* Initialize the 2nd padding area. */ + pad2 = zio_buf_alloc(VDEV_PAD_SIZE); + bzero(pad2, VDEV_PAD_SIZE); + + /* + * Write everything in parallel. + */ +retry: + zio = zio_root(spa, NULL, NULL, flags); + + for (int l = 0; l < VDEV_LABELS; l++) { + + vdev_label_write(zio, vd, l, vp, + offsetof(vdev_label_t, vl_vdev_phys), + sizeof (vdev_phys_t), NULL, NULL, flags); + + /* + * Skip the 1st padding area. + * Zero out the 2nd padding area where it might have + * left over data from previous filesystem format. + */ + vdev_label_write(zio, vd, l, pad2, + offsetof(vdev_label_t, vl_pad2), + VDEV_PAD_SIZE, NULL, NULL, flags); + + vdev_label_write(zio, vd, l, ub, + offsetof(vdev_label_t, vl_uberblock), + VDEV_UBERBLOCK_RING, NULL, NULL, flags); + } + + error = zio_wait(zio); + + if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + + nvlist_free(label); + zio_buf_free(pad2, VDEV_PAD_SIZE); + zio_buf_free(ub, VDEV_UBERBLOCK_RING); + zio_buf_free(vp, sizeof (vdev_phys_t)); + + /* + * If this vdev hasn't been previously identified as a spare, then we + * mark it as such only if a) we are labeling it as a spare, or b) it + * exists as a spare elsewhere in the system. Do the same for + * level 2 ARC devices. + */ + if (error == 0 && !vd->vdev_isspare && + (reason == VDEV_LABEL_SPARE || + spa_spare_exists(vd->vdev_guid, NULL, NULL))) + spa_spare_add(vd); + + if (error == 0 && !vd->vdev_isl2cache && + (reason == VDEV_LABEL_L2CACHE || + spa_l2cache_exists(vd->vdev_guid, NULL))) + spa_l2cache_add(vd); + + return (error); +} + +/* + * ========================================================================== + * uberblock load/sync + * ========================================================================== + */ + +/* + * Consider the following situation: txg is safely synced to disk. We've + * written the first uberblock for txg + 1, and then we lose power. When we + * come back up, we fail to see the uberblock for txg + 1 because, say, + * it was on a mirrored device and the replica to which we wrote txg + 1 + * is now offline. If we then make some changes and sync txg + 1, and then + * the missing replica comes back, then for a new seconds we'll have two + * conflicting uberblocks on disk with the same txg. The solution is simple: + * among uberblocks with equal txg, choose the one with the latest timestamp. + */ +static int +vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) +{ + if (ub1->ub_txg < ub2->ub_txg) + return (-1); + if (ub1->ub_txg > ub2->ub_txg) + return (1); + + if (ub1->ub_timestamp < ub2->ub_timestamp) + return (-1); + if (ub1->ub_timestamp > ub2->ub_timestamp) + return (1); + + return (0); +} + +static void +vdev_uberblock_load_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + zio_t *rio = zio->io_private; + uberblock_t *ub = zio->io_data; + uberblock_t *ubbest = rio->io_private; + + ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd)); + + if (zio->io_error == 0 && uberblock_verify(ub) == 0) { + mutex_enter(&rio->io_lock); + if (ub->ub_txg <= spa->spa_load_max_txg && + vdev_uberblock_compare(ub, ubbest) > 0) + *ubbest = *ub; + mutex_exit(&rio->io_lock); + } + + zio_buf_free(zio->io_data, zio->io_size); +} + +void +vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; + + if (vd == rvd) { + ASSERT(zio == NULL); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + zio = zio_root(spa, NULL, ubbest, flags); + bzero(ubbest, sizeof (uberblock_t)); + } + + ASSERT(zio != NULL); + + for (int c = 0; c < vd->vdev_children; c++) + vdev_uberblock_load(zio, vd->vdev_child[c], ubbest); + + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + for (int l = 0; l < VDEV_LABELS; l++) { + for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { + vdev_label_read(zio, vd, l, + zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), + VDEV_UBERBLOCK_OFFSET(vd, n), + VDEV_UBERBLOCK_SIZE(vd), + vdev_uberblock_load_done, zio, flags); + } + } + } + + if (vd == rvd) { + (void) zio_wait(zio); + spa_config_exit(spa, SCL_ALL, FTAG); + } +} + +/* + * On success, increment root zio's count of good writes. + * We only get credit for writes to known-visible vdevs; see spa_vdev_add(). + */ +static void +vdev_uberblock_sync_done(zio_t *zio) +{ + uint64_t *good_writes = zio->io_private; + + if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0) + atomic_add_64(good_writes, 1); +} + +/* + * Write the uberblock to all labels of all leaves of the specified vdev. + */ +static void +vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) +{ + uberblock_t *ubbuf; + int n; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags); + + if (!vd->vdev_ops->vdev_op_leaf) + return; + + if (!vdev_writeable(vd)) + return; + + n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); + + ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)); + bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); + *ubbuf = *ub; + + for (int l = 0; l < VDEV_LABELS; l++) + vdev_label_write(zio, vd, l, ubbuf, + VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), + vdev_uberblock_sync_done, zio->io_private, + flags | ZIO_FLAG_DONT_PROPAGATE); + + zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); +} + +int +vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) +{ + spa_t *spa = svd[0]->vdev_spa; + zio_t *zio; + uint64_t good_writes = 0; + + zio = zio_root(spa, NULL, &good_writes, flags); + + for (int v = 0; v < svdcount; v++) + vdev_uberblock_sync(zio, ub, svd[v], flags); + + (void) zio_wait(zio); + + /* + * Flush the uberblocks to disk. This ensures that the odd labels + * are no longer needed (because the new uberblocks and the even + * labels are safely on disk), so it is safe to overwrite them. + */ + zio = zio_root(spa, NULL, NULL, flags); + + for (int v = 0; v < svdcount; v++) + zio_flush(zio, svd[v]); + + (void) zio_wait(zio); + + return (good_writes >= 1 ? 0 : EIO); +} + +/* + * On success, increment the count of good writes for our top-level vdev. + */ +static void +vdev_label_sync_done(zio_t *zio) +{ + uint64_t *good_writes = zio->io_private; + + if (zio->io_error == 0) + atomic_add_64(good_writes, 1); +} + +/* + * If there weren't enough good writes, indicate failure to the parent. + */ +static void +vdev_label_sync_top_done(zio_t *zio) +{ + uint64_t *good_writes = zio->io_private; + + if (*good_writes == 0) + zio->io_error = EIO; + + kmem_free(good_writes, sizeof (uint64_t)); +} + +/* + * We ignore errors for log and cache devices, simply free the private data. + */ +static void +vdev_label_sync_ignore_done(zio_t *zio) +{ + kmem_free(zio->io_private, sizeof (uint64_t)); +} + +/* + * Write all even or odd labels to all leaves of the specified vdev. + */ +static void +vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) +{ + nvlist_t *label; + vdev_phys_t *vp; + char *buf; + size_t buflen; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags); + + if (!vd->vdev_ops->vdev_op_leaf) + return; + + if (!vdev_writeable(vd)) + return; + + /* + * Generate a label describing the top-level config to which we belong. + */ + label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); + + vp = zio_buf_alloc(sizeof (vdev_phys_t)); + bzero(vp, sizeof (vdev_phys_t)); + + buf = vp->vp_nvlist; + buflen = sizeof (vp->vp_nvlist); + + if (nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP) == 0) { + for (; l < VDEV_LABELS; l += 2) { + vdev_label_write(zio, vd, l, vp, + offsetof(vdev_label_t, vl_vdev_phys), + sizeof (vdev_phys_t), + vdev_label_sync_done, zio->io_private, + flags | ZIO_FLAG_DONT_PROPAGATE); + } + } + + zio_buf_free(vp, sizeof (vdev_phys_t)); + nvlist_free(label); +} + +int +vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) +{ + list_t *dl = &spa->spa_config_dirty_list; + vdev_t *vd; + zio_t *zio; + int error; + + /* + * Write the new labels to disk. + */ + zio = zio_root(spa, NULL, NULL, flags); + + for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { + uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t), + KM_SLEEP); + + ASSERT(!vd->vdev_ishole); + + zio_t *vio = zio_null(zio, spa, NULL, + (vd->vdev_islog || vd->vdev_aux != NULL) ? + vdev_label_sync_ignore_done : vdev_label_sync_top_done, + good_writes, flags); + vdev_label_sync(vio, vd, l, txg, flags); + zio_nowait(vio); + } + + error = zio_wait(zio); + + /* + * Flush the new labels to disk. + */ + zio = zio_root(spa, NULL, NULL, flags); + + for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) + zio_flush(zio, vd); + + (void) zio_wait(zio); + + return (error); +} + +/* + * Sync the uberblock and any changes to the vdev configuration. + * + * The order of operations is carefully crafted to ensure that + * if the system panics or loses power at any time, the state on disk + * is still transactionally consistent. The in-line comments below + * describe the failure semantics at each stage. + * + * Moreover, vdev_config_sync() is designed to be idempotent: if it fails + * at any time, you can just call it again, and it will resume its work. + */ +int +vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard) +{ + spa_t *spa = svd[0]->vdev_spa; + uberblock_t *ub = &spa->spa_uberblock; + vdev_t *vd; + zio_t *zio; + int error; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + + /* + * Normally, we don't want to try too hard to write every label and + * uberblock. If there is a flaky disk, we don't want the rest of the + * sync process to block while we retry. But if we can't write a + * single label out, we should retry with ZIO_FLAG_TRYHARD before + * bailing out and declaring the pool faulted. + */ + if (tryhard) + flags |= ZIO_FLAG_TRYHARD; + + ASSERT(ub->ub_txg <= txg); + + /* + * If this isn't a resync due to I/O errors, + * and nothing changed in this transaction group, + * and the vdev configuration hasn't changed, + * then there's nothing to do. + */ + if (ub->ub_txg < txg && + uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE && + list_is_empty(&spa->spa_config_dirty_list)) + return (0); + + if (txg > spa_freeze_txg(spa)) + return (0); + + ASSERT(txg <= spa->spa_final_txg); + + /* + * Flush the write cache of every disk that's been written to + * in this transaction group. This ensures that all blocks + * written in this txg will be committed to stable storage + * before any uberblock that references them. + */ + zio = zio_root(spa, NULL, NULL, flags); + + for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd; + vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg))) + zio_flush(zio, vd); + + (void) zio_wait(zio); + + /* + * Sync out the even labels (L0, L2) for every dirty vdev. If the + * system dies in the middle of this process, that's OK: all of the + * even labels that made it to disk will be newer than any uberblock, + * and will therefore be considered invalid. The odd labels (L1, L3), + * which have not yet been touched, will still be valid. We flush + * the new labels to disk to ensure that all even-label updates + * are committed to stable storage before the uberblock update. + */ + if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0) + return (error); + + /* + * Sync the uberblocks to all vdevs in svd[]. + * If the system dies in the middle of this step, there are two cases + * to consider, and the on-disk state is consistent either way: + * + * (1) If none of the new uberblocks made it to disk, then the + * previous uberblock will be the newest, and the odd labels + * (which had not yet been touched) will be valid with respect + * to that uberblock. + * + * (2) If one or more new uberblocks made it to disk, then they + * will be the newest, and the even labels (which had all + * been successfully committed) will be valid with respect + * to the new uberblocks. + */ + if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0) + return (error); + + /* + * Sync out odd labels for every dirty vdev. If the system dies + * in the middle of this process, the even labels and the new + * uberblocks will suffice to open the pool. The next time + * the pool is opened, the first thing we'll do -- before any + * user data is modified -- is mark every vdev dirty so that + * all labels will be brought up to date. We flush the new labels + * to disk to ensure that all odd-label updates are committed to + * stable storage before the next transaction group begins. + */ + return (vdev_label_sync_list(spa, 1, txg, flags)); +} diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c new file mode 100644 index 000000000000..698c0275d34e --- /dev/null +++ b/uts/common/fs/zfs/vdev_mirror.c @@ -0,0 +1,485 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/fs/zfs.h> + +/* + * Virtual device vector for mirroring. + */ + +typedef struct mirror_child { + vdev_t *mc_vd; + uint64_t mc_offset; + int mc_error; + uint8_t mc_tried; + uint8_t mc_skipped; + uint8_t mc_speculative; +} mirror_child_t; + +typedef struct mirror_map { + int mm_children; + int mm_replacing; + int mm_preferred; + int mm_root; + mirror_child_t mm_child[1]; +} mirror_map_t; + +int vdev_mirror_shift = 21; + +static void +vdev_mirror_map_free(zio_t *zio) +{ + mirror_map_t *mm = zio->io_vsd; + + kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children])); +} + +static const zio_vsd_ops_t vdev_mirror_vsd_ops = { + vdev_mirror_map_free, + zio_vsd_default_cksum_report +}; + +static mirror_map_t * +vdev_mirror_map_alloc(zio_t *zio) +{ + mirror_map_t *mm = NULL; + mirror_child_t *mc; + vdev_t *vd = zio->io_vd; + int c, d; + + if (vd == NULL) { + dva_t *dva = zio->io_bp->blk_dva; + spa_t *spa = zio->io_spa; + + c = BP_GET_NDVAS(zio->io_bp); + + mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); + mm->mm_children = c; + mm->mm_replacing = B_FALSE; + mm->mm_preferred = spa_get_random(c); + mm->mm_root = B_TRUE; + + /* + * Check the other, lower-index DVAs to see if they're on + * the same vdev as the child we picked. If they are, use + * them since they are likely to have been allocated from + * the primary metaslab in use at the time, and hence are + * more likely to have locality with single-copy data. + */ + for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) { + if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c])) + mm->mm_preferred = d; + } + + for (c = 0; c < mm->mm_children; c++) { + mc = &mm->mm_child[c]; + + mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); + mc->mc_offset = DVA_GET_OFFSET(&dva[c]); + } + } else { + c = vd->vdev_children; + + mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); + mm->mm_children = c; + mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || + vd->vdev_ops == &vdev_spare_ops); + mm->mm_preferred = mm->mm_replacing ? 0 : + (zio->io_offset >> vdev_mirror_shift) % c; + mm->mm_root = B_FALSE; + + for (c = 0; c < mm->mm_children; c++) { + mc = &mm->mm_child[c]; + mc->mc_vd = vd->vdev_child[c]; + mc->mc_offset = zio->io_offset; + } + } + + zio->io_vsd = mm; + zio->io_vsd_ops = &vdev_mirror_vsd_ops; + return (mm); +} + +static int +vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +{ + int numerrors = 0; + int lasterror = 0; + + if (vd->vdev_children == 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + + vdev_open_children(vd); + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error) { + lasterror = cvd->vdev_open_error; + numerrors++; + continue; + } + + *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; + *ashift = MAX(*ashift, cvd->vdev_ashift); + } + + if (numerrors == vd->vdev_children) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); + } + + return (0); +} + +static void +vdev_mirror_close(vdev_t *vd) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_close(vd->vdev_child[c]); +} + +static void +vdev_mirror_child_done(zio_t *zio) +{ + mirror_child_t *mc = zio->io_private; + + mc->mc_error = zio->io_error; + mc->mc_tried = 1; + mc->mc_skipped = 0; +} + +static void +vdev_mirror_scrub_done(zio_t *zio) +{ + mirror_child_t *mc = zio->io_private; + + if (zio->io_error == 0) { + zio_t *pio; + + mutex_enter(&zio->io_lock); + while ((pio = zio_walk_parents(zio)) != NULL) { + mutex_enter(&pio->io_lock); + ASSERT3U(zio->io_size, >=, pio->io_size); + bcopy(zio->io_data, pio->io_data, pio->io_size); + mutex_exit(&pio->io_lock); + } + mutex_exit(&zio->io_lock); + } + + zio_buf_free(zio->io_data, zio->io_size); + + mc->mc_error = zio->io_error; + mc->mc_tried = 1; + mc->mc_skipped = 0; +} + +/* + * Try to find a child whose DTL doesn't contain the block we want to read. + * If we can't, try the read on any vdev we haven't already tried. + */ +static int +vdev_mirror_child_select(zio_t *zio) +{ + mirror_map_t *mm = zio->io_vsd; + mirror_child_t *mc; + uint64_t txg = zio->io_txg; + int i, c; + + ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg); + + /* + * Try to find a child whose DTL doesn't contain the block to read. + * If a child is known to be completely inaccessible (indicated by + * vdev_readable() returning B_FALSE), don't even try. + */ + for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) { + if (c >= mm->mm_children) + c = 0; + mc = &mm->mm_child[c]; + if (mc->mc_tried || mc->mc_skipped) + continue; + if (!vdev_readable(mc->mc_vd)) { + mc->mc_error = ENXIO; + mc->mc_tried = 1; /* don't even try */ + mc->mc_skipped = 1; + continue; + } + if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) + return (c); + mc->mc_error = ESTALE; + mc->mc_skipped = 1; + mc->mc_speculative = 1; + } + + /* + * Every device is either missing or has this txg in its DTL. + * Look for any child we haven't already tried before giving up. + */ + for (c = 0; c < mm->mm_children; c++) + if (!mm->mm_child[c].mc_tried) + return (c); + + /* + * Every child failed. There's no place left to look. + */ + return (-1); +} + +static int +vdev_mirror_io_start(zio_t *zio) +{ + mirror_map_t *mm; + mirror_child_t *mc; + int c, children; + + mm = vdev_mirror_map_alloc(zio); + + if (zio->io_type == ZIO_TYPE_READ) { + if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) { + /* + * For scrubbing reads we need to allocate a read + * buffer for each child and issue reads to all + * children. If any child succeeds, it will copy its + * data into zio->io_data in vdev_mirror_scrub_done. + */ + for (c = 0; c < mm->mm_children; c++) { + mc = &mm->mm_child[c]; + zio_nowait(zio_vdev_child_io(zio, zio->io_bp, + mc->mc_vd, mc->mc_offset, + zio_buf_alloc(zio->io_size), zio->io_size, + zio->io_type, zio->io_priority, 0, + vdev_mirror_scrub_done, mc)); + } + return (ZIO_PIPELINE_CONTINUE); + } + /* + * For normal reads just pick one child. + */ + c = vdev_mirror_child_select(zio); + children = (c >= 0); + } else { + ASSERT(zio->io_type == ZIO_TYPE_WRITE); + + /* + * Writes go to all children. + */ + c = 0; + children = mm->mm_children; + } + + while (children--) { + mc = &mm->mm_child[c]; + zio_nowait(zio_vdev_child_io(zio, zio->io_bp, + mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, + zio->io_type, zio->io_priority, 0, + vdev_mirror_child_done, mc)); + c++; + } + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +vdev_mirror_worst_error(mirror_map_t *mm) +{ + int error[2] = { 0, 0 }; + + for (int c = 0; c < mm->mm_children; c++) { + mirror_child_t *mc = &mm->mm_child[c]; + int s = mc->mc_speculative; + error[s] = zio_worst_error(error[s], mc->mc_error); + } + + return (error[0] ? error[0] : error[1]); +} + +static void +vdev_mirror_io_done(zio_t *zio) +{ + mirror_map_t *mm = zio->io_vsd; + mirror_child_t *mc; + int c; + int good_copies = 0; + int unexpected_errors = 0; + + for (c = 0; c < mm->mm_children; c++) { + mc = &mm->mm_child[c]; + + if (mc->mc_error) { + if (!mc->mc_skipped) + unexpected_errors++; + } else if (mc->mc_tried) { + good_copies++; + } + } + + if (zio->io_type == ZIO_TYPE_WRITE) { + /* + * XXX -- for now, treat partial writes as success. + * + * Now that we support write reallocation, it would be better + * to treat partial failure as real failure unless there are + * no non-degraded top-level vdevs left, and not update DTLs + * if we intend to reallocate. + */ + /* XXPOLICY */ + if (good_copies != mm->mm_children) { + /* + * Always require at least one good copy. + * + * For ditto blocks (io_vd == NULL), require + * all copies to be good. + * + * XXX -- for replacing vdevs, there's no great answer. + * If the old device is really dead, we may not even + * be able to access it -- so we only want to + * require good writes to the new device. But if + * the new device turns out to be flaky, we want + * to be able to detach it -- which requires all + * writes to the old device to have succeeded. + */ + if (good_copies == 0 || zio->io_vd == NULL) + zio->io_error = vdev_mirror_worst_error(mm); + } + return; + } + + ASSERT(zio->io_type == ZIO_TYPE_READ); + + /* + * If we don't have a good copy yet, keep trying other children. + */ + /* XXPOLICY */ + if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { + ASSERT(c >= 0 && c < mm->mm_children); + mc = &mm->mm_child[c]; + zio_vdev_io_redone(zio); + zio_nowait(zio_vdev_child_io(zio, zio->io_bp, + mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, + ZIO_TYPE_READ, zio->io_priority, 0, + vdev_mirror_child_done, mc)); + return; + } + + /* XXPOLICY */ + if (good_copies == 0) { + zio->io_error = vdev_mirror_worst_error(mm); + ASSERT(zio->io_error != 0); + } + + if (good_copies && spa_writeable(zio->io_spa) && + (unexpected_errors || + (zio->io_flags & ZIO_FLAG_RESILVER) || + ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { + /* + * Use the good data we have in hand to repair damaged children. + */ + for (c = 0; c < mm->mm_children; c++) { + /* + * Don't rewrite known good children. + * Not only is it unnecessary, it could + * actually be harmful: if the system lost + * power while rewriting the only good copy, + * there would be no good copies left! + */ + mc = &mm->mm_child[c]; + + if (mc->mc_error == 0) { + if (mc->mc_tried) + continue; + if (!(zio->io_flags & ZIO_FLAG_SCRUB) && + !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL, + zio->io_txg, 1)) + continue; + mc->mc_error = ESTALE; + } + + zio_nowait(zio_vdev_child_io(zio, zio->io_bp, + mc->mc_vd, mc->mc_offset, + zio->io_data, zio->io_size, + ZIO_TYPE_WRITE, zio->io_priority, + ZIO_FLAG_IO_REPAIR | (unexpected_errors ? + ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); + } + } +} + +static void +vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) +{ + if (faulted == vd->vdev_children) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_NO_REPLICAS); + else if (degraded + faulted != 0) + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); + else + vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); +} + +vdev_ops_t vdev_mirror_ops = { + vdev_mirror_open, + vdev_mirror_close, + vdev_default_asize, + vdev_mirror_io_start, + vdev_mirror_io_done, + vdev_mirror_state_change, + NULL, + NULL, + VDEV_TYPE_MIRROR, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; + +vdev_ops_t vdev_replacing_ops = { + vdev_mirror_open, + vdev_mirror_close, + vdev_default_asize, + vdev_mirror_io_start, + vdev_mirror_io_done, + vdev_mirror_state_change, + NULL, + NULL, + VDEV_TYPE_REPLACING, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; + +vdev_ops_t vdev_spare_ops = { + vdev_mirror_open, + vdev_mirror_close, + vdev_default_asize, + vdev_mirror_io_start, + vdev_mirror_io_done, + vdev_mirror_state_change, + NULL, + NULL, + VDEV_TYPE_SPARE, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; diff --git a/uts/common/fs/zfs/vdev_missing.c b/uts/common/fs/zfs/vdev_missing.c new file mode 100644 index 000000000000..6a5588d59213 --- /dev/null +++ b/uts/common/fs/zfs/vdev_missing.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The 'missing' vdev is a special vdev type used only during import. It + * signifies a placeholder in the root vdev for some vdev that we know is + * missing. We pass it down to the kernel to allow the rest of the + * configuration to parsed and an attempt made to open all available devices. + * Because its GUID is always 0, we know that the guid sum will mismatch and we + * won't be able to open the pool anyway. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_impl.h> +#include <sys/fs/zfs.h> +#include <sys/zio.h> + +/* ARGSUSED */ +static int +vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) +{ + /* + * Really this should just fail. But then the root vdev will be in the + * faulted state with VDEV_AUX_NO_REPLICAS, when what we really want is + * VDEV_AUX_BAD_GUID_SUM. So we pretend to succeed, knowing that we + * will fail the GUID sum check before ever trying to open the pool. + */ + *psize = 0; + *ashift = 0; + return (0); +} + +/* ARGSUSED */ +static void +vdev_missing_close(vdev_t *vd) +{ +} + +/* ARGSUSED */ +static int +vdev_missing_io_start(zio_t *zio) +{ + zio->io_error = ENOTSUP; + return (ZIO_PIPELINE_CONTINUE); +} + +/* ARGSUSED */ +static void +vdev_missing_io_done(zio_t *zio) +{ +} + +vdev_ops_t vdev_missing_ops = { + vdev_missing_open, + vdev_missing_close, + vdev_default_asize, + vdev_missing_io_start, + vdev_missing_io_done, + NULL, + NULL, + NULL, + VDEV_TYPE_MISSING, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; + +vdev_ops_t vdev_hole_ops = { + vdev_missing_open, + vdev_missing_close, + vdev_default_asize, + vdev_missing_io_start, + vdev_missing_io_done, + NULL, + NULL, + NULL, + VDEV_TYPE_HOLE, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; diff --git a/uts/common/fs/zfs/vdev_queue.c b/uts/common/fs/zfs/vdev_queue.c new file mode 100644 index 000000000000..5a0d3ee97029 --- /dev/null +++ b/uts/common/fs/zfs/vdev_queue.c @@ -0,0 +1,406 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/avl.h> + +/* + * These tunables are for performance analysis. + */ +/* + * zfs_vdev_max_pending is the maximum number of i/os concurrently + * pending to each device. zfs_vdev_min_pending is the initial number + * of i/os pending to each device (before it starts ramping up to + * max_pending). + */ +int zfs_vdev_max_pending = 10; +int zfs_vdev_min_pending = 4; + +/* deadline = pri + ddi_get_lbolt64() >> time_shift) */ +int zfs_vdev_time_shift = 6; + +/* exponential I/O issue ramp-up rate */ +int zfs_vdev_ramp_rate = 2; + +/* + * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O. + * For read I/Os, we also aggregate across small adjacency gaps; for writes + * we include spans of optional I/Os to aid aggregation at the disk even when + * they aren't able to help us aggregate at this level. + */ +int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE; +int zfs_vdev_read_gap_limit = 32 << 10; +int zfs_vdev_write_gap_limit = 4 << 10; + +/* + * Virtual device vector for disk I/O scheduling. + */ +int +vdev_queue_deadline_compare(const void *x1, const void *x2) +{ + const zio_t *z1 = x1; + const zio_t *z2 = x2; + + if (z1->io_deadline < z2->io_deadline) + return (-1); + if (z1->io_deadline > z2->io_deadline) + return (1); + + if (z1->io_offset < z2->io_offset) + return (-1); + if (z1->io_offset > z2->io_offset) + return (1); + + if (z1 < z2) + return (-1); + if (z1 > z2) + return (1); + + return (0); +} + +int +vdev_queue_offset_compare(const void *x1, const void *x2) +{ + const zio_t *z1 = x1; + const zio_t *z2 = x2; + + if (z1->io_offset < z2->io_offset) + return (-1); + if (z1->io_offset > z2->io_offset) + return (1); + + if (z1 < z2) + return (-1); + if (z1 > z2) + return (1); + + return (0); +} + +void +vdev_queue_init(vdev_t *vd) +{ + vdev_queue_t *vq = &vd->vdev_queue; + + mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); + + avl_create(&vq->vq_deadline_tree, vdev_queue_deadline_compare, + sizeof (zio_t), offsetof(struct zio, io_deadline_node)); + + avl_create(&vq->vq_read_tree, vdev_queue_offset_compare, + sizeof (zio_t), offsetof(struct zio, io_offset_node)); + + avl_create(&vq->vq_write_tree, vdev_queue_offset_compare, + sizeof (zio_t), offsetof(struct zio, io_offset_node)); + + avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare, + sizeof (zio_t), offsetof(struct zio, io_offset_node)); +} + +void +vdev_queue_fini(vdev_t *vd) +{ + vdev_queue_t *vq = &vd->vdev_queue; + + avl_destroy(&vq->vq_deadline_tree); + avl_destroy(&vq->vq_read_tree); + avl_destroy(&vq->vq_write_tree); + avl_destroy(&vq->vq_pending_tree); + + mutex_destroy(&vq->vq_lock); +} + +static void +vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) +{ + avl_add(&vq->vq_deadline_tree, zio); + avl_add(zio->io_vdev_tree, zio); +} + +static void +vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) +{ + avl_remove(&vq->vq_deadline_tree, zio); + avl_remove(zio->io_vdev_tree, zio); +} + +static void +vdev_queue_agg_io_done(zio_t *aio) +{ + zio_t *pio; + + while ((pio = zio_walk_parents(aio)) != NULL) + if (aio->io_type == ZIO_TYPE_READ) + bcopy((char *)aio->io_data + (pio->io_offset - + aio->io_offset), pio->io_data, pio->io_size); + + zio_buf_free(aio->io_data, aio->io_size); +} + +/* + * Compute the range spanned by two i/os, which is the endpoint of the last + * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset). + * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio); + * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0. + */ +#define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset) +#define IO_GAP(fio, lio) (-IO_SPAN(lio, fio)) + +static zio_t * +vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit) +{ + zio_t *fio, *lio, *aio, *dio, *nio, *mio; + avl_tree_t *t; + int flags; + uint64_t maxspan = zfs_vdev_aggregation_limit; + uint64_t maxgap; + int stretch; + +again: + ASSERT(MUTEX_HELD(&vq->vq_lock)); + + if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit || + avl_numnodes(&vq->vq_deadline_tree) == 0) + return (NULL); + + fio = lio = avl_first(&vq->vq_deadline_tree); + + t = fio->io_vdev_tree; + flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT; + maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0; + + if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) { + /* + * We can aggregate I/Os that are sufficiently adjacent and of + * the same flavor, as expressed by the AGG_INHERIT flags. + * The latter requirement is necessary so that certain + * attributes of the I/O, such as whether it's a normal I/O + * or a scrub/resilver, can be preserved in the aggregate. + * We can include optional I/Os, but don't allow them + * to begin a range as they add no benefit in that situation. + */ + + /* + * We keep track of the last non-optional I/O. + */ + mio = (fio->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : fio; + + /* + * Walk backwards through sufficiently contiguous I/Os + * recording the last non-option I/O. + */ + while ((dio = AVL_PREV(t, fio)) != NULL && + (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && + IO_SPAN(dio, lio) <= maxspan && + IO_GAP(dio, fio) <= maxgap) { + fio = dio; + if (mio == NULL && !(fio->io_flags & ZIO_FLAG_OPTIONAL)) + mio = fio; + } + + /* + * Skip any initial optional I/Os. + */ + while ((fio->io_flags & ZIO_FLAG_OPTIONAL) && fio != lio) { + fio = AVL_NEXT(t, fio); + ASSERT(fio != NULL); + } + + /* + * Walk forward through sufficiently contiguous I/Os. + */ + while ((dio = AVL_NEXT(t, lio)) != NULL && + (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && + IO_SPAN(fio, dio) <= maxspan && + IO_GAP(lio, dio) <= maxgap) { + lio = dio; + if (!(lio->io_flags & ZIO_FLAG_OPTIONAL)) + mio = lio; + } + + /* + * Now that we've established the range of the I/O aggregation + * we must decide what to do with trailing optional I/Os. + * For reads, there's nothing to do. While we are unable to + * aggregate further, it's possible that a trailing optional + * I/O would allow the underlying device to aggregate with + * subsequent I/Os. We must therefore determine if the next + * non-optional I/O is close enough to make aggregation + * worthwhile. + */ + stretch = B_FALSE; + if (t != &vq->vq_read_tree && mio != NULL) { + nio = lio; + while ((dio = AVL_NEXT(t, nio)) != NULL && + IO_GAP(nio, dio) == 0 && + IO_GAP(mio, dio) <= zfs_vdev_write_gap_limit) { + nio = dio; + if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) { + stretch = B_TRUE; + break; + } + } + } + + if (stretch) { + /* This may be a no-op. */ + VERIFY((dio = AVL_NEXT(t, lio)) != NULL); + dio->io_flags &= ~ZIO_FLAG_OPTIONAL; + } else { + while (lio != mio && lio != fio) { + ASSERT(lio->io_flags & ZIO_FLAG_OPTIONAL); + lio = AVL_PREV(t, lio); + ASSERT(lio != NULL); + } + } + } + + if (fio != lio) { + uint64_t size = IO_SPAN(fio, lio); + ASSERT(size <= zfs_vdev_aggregation_limit); + + aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset, + zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG, + flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, + vdev_queue_agg_io_done, NULL); + + nio = fio; + do { + dio = nio; + nio = AVL_NEXT(t, dio); + ASSERT(dio->io_type == aio->io_type); + ASSERT(dio->io_vdev_tree == t); + + if (dio->io_flags & ZIO_FLAG_NODATA) { + ASSERT(dio->io_type == ZIO_TYPE_WRITE); + bzero((char *)aio->io_data + (dio->io_offset - + aio->io_offset), dio->io_size); + } else if (dio->io_type == ZIO_TYPE_WRITE) { + bcopy(dio->io_data, (char *)aio->io_data + + (dio->io_offset - aio->io_offset), + dio->io_size); + } + + zio_add_child(dio, aio); + vdev_queue_io_remove(vq, dio); + zio_vdev_io_bypass(dio); + zio_execute(dio); + } while (dio != lio); + + avl_add(&vq->vq_pending_tree, aio); + + return (aio); + } + + ASSERT(fio->io_vdev_tree == t); + vdev_queue_io_remove(vq, fio); + + /* + * If the I/O is or was optional and therefore has no data, we need to + * simply discard it. We need to drop the vdev queue's lock to avoid a + * deadlock that we could encounter since this I/O will complete + * immediately. + */ + if (fio->io_flags & ZIO_FLAG_NODATA) { + mutex_exit(&vq->vq_lock); + zio_vdev_io_bypass(fio); + zio_execute(fio); + mutex_enter(&vq->vq_lock); + goto again; + } + + avl_add(&vq->vq_pending_tree, fio); + + return (fio); +} + +zio_t * +vdev_queue_io(zio_t *zio) +{ + vdev_queue_t *vq = &zio->io_vd->vdev_queue; + zio_t *nio; + + ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); + + if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) + return (zio); + + zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; + + if (zio->io_type == ZIO_TYPE_READ) + zio->io_vdev_tree = &vq->vq_read_tree; + else + zio->io_vdev_tree = &vq->vq_write_tree; + + mutex_enter(&vq->vq_lock); + + zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) + + zio->io_priority; + + vdev_queue_io_add(vq, zio); + + nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending); + + mutex_exit(&vq->vq_lock); + + if (nio == NULL) + return (NULL); + + if (nio->io_done == vdev_queue_agg_io_done) { + zio_nowait(nio); + return (NULL); + } + + return (nio); +} + +void +vdev_queue_io_done(zio_t *zio) +{ + vdev_queue_t *vq = &zio->io_vd->vdev_queue; + + mutex_enter(&vq->vq_lock); + + avl_remove(&vq->vq_pending_tree, zio); + + for (int i = 0; i < zfs_vdev_ramp_rate; i++) { + zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending); + if (nio == NULL) + break; + mutex_exit(&vq->vq_lock); + if (nio->io_done == vdev_queue_agg_io_done) { + zio_nowait(nio); + } else { + zio_vdev_io_reissue(nio); + zio_execute(nio); + } + mutex_enter(&vq->vq_lock); + } + + mutex_exit(&vq->vq_lock); +} diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c new file mode 100644 index 000000000000..4b0f5602c1d4 --- /dev/null +++ b/uts/common/fs/zfs/vdev_raidz.c @@ -0,0 +1,2146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/fs/zfs.h> +#include <sys/fm/fs/zfs.h> + +/* + * Virtual device vector for RAID-Z. + * + * This vdev supports single, double, and triple parity. For single parity, + * we use a simple XOR of all the data columns. For double or triple parity, + * we use a special case of Reed-Solomon coding. This extends the + * technique described in "The mathematics of RAID-6" by H. Peter Anvin by + * drawing on the system described in "A Tutorial on Reed-Solomon Coding for + * Fault-Tolerance in RAID-like Systems" by James S. Plank on which the + * former is also based. The latter is designed to provide higher performance + * for writes. + * + * Note that the Plank paper claimed to support arbitrary N+M, but was then + * amended six years later identifying a critical flaw that invalidates its + * claims. Nevertheless, the technique can be adapted to work for up to + * triple parity. For additional parity, the amendment "Note: Correction to + * the 1997 Tutorial on Reed-Solomon Coding" by James S. Plank and Ying Ding + * is viable, but the additional complexity means that write performance will + * suffer. + * + * All of the methods above operate on a Galois field, defined over the + * integers mod 2^N. In our case we choose N=8 for GF(8) so that all elements + * can be expressed with a single byte. Briefly, the operations on the + * field are defined as follows: + * + * o addition (+) is represented by a bitwise XOR + * o subtraction (-) is therefore identical to addition: A + B = A - B + * o multiplication of A by 2 is defined by the following bitwise expression: + * (A * 2)_7 = A_6 + * (A * 2)_6 = A_5 + * (A * 2)_5 = A_4 + * (A * 2)_4 = A_3 + A_7 + * (A * 2)_3 = A_2 + A_7 + * (A * 2)_2 = A_1 + A_7 + * (A * 2)_1 = A_0 + * (A * 2)_0 = A_7 + * + * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)). + * As an aside, this multiplication is derived from the error correcting + * primitive polynomial x^8 + x^4 + x^3 + x^2 + 1. + * + * Observe that any number in the field (except for 0) can be expressed as a + * power of 2 -- a generator for the field. We store a table of the powers of + * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can + * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather + * than field addition). The inverse of a field element A (A^-1) is therefore + * A ^ (255 - 1) = A^254. + * + * The up-to-three parity columns, P, Q, R over several data columns, + * D_0, ... D_n-1, can be expressed by field operations: + * + * P = D_0 + D_1 + ... + D_n-2 + D_n-1 + * Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1 + * = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1 + * R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1 + * = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1 + * + * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival + * XOR operation, and 2 and 4 can be computed quickly and generate linearly- + * independent coefficients. (There are no additional coefficients that have + * this property which is why the uncorrected Plank method breaks down.) + * + * See the reconstruction code below for how P, Q and R can used individually + * or in concert to recover missing data columns. + */ + +typedef struct raidz_col { + uint64_t rc_devidx; /* child device index for I/O */ + uint64_t rc_offset; /* device offset */ + uint64_t rc_size; /* I/O size */ + void *rc_data; /* I/O data */ + void *rc_gdata; /* used to store the "good" version */ + int rc_error; /* I/O error for this device */ + uint8_t rc_tried; /* Did we attempt this I/O column? */ + uint8_t rc_skipped; /* Did we skip this I/O column? */ +} raidz_col_t; + +typedef struct raidz_map { + uint64_t rm_cols; /* Regular column count */ + uint64_t rm_scols; /* Count including skipped columns */ + uint64_t rm_bigcols; /* Number of oversized columns */ + uint64_t rm_asize; /* Actual total I/O size */ + uint64_t rm_missingdata; /* Count of missing data devices */ + uint64_t rm_missingparity; /* Count of missing parity devices */ + uint64_t rm_firstdatacol; /* First data column/parity count */ + uint64_t rm_nskip; /* Skipped sectors for padding */ + uint64_t rm_skipstart; /* Column index of padding start */ + void *rm_datacopy; /* rm_asize-buffer of copied data */ + uintptr_t rm_reports; /* # of referencing checksum reports */ + uint8_t rm_freed; /* map no longer has referencing ZIO */ + uint8_t rm_ecksuminjected; /* checksum error was injected */ + raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ +} raidz_map_t; + +#define VDEV_RAIDZ_P 0 +#define VDEV_RAIDZ_Q 1 +#define VDEV_RAIDZ_R 2 + +#define VDEV_RAIDZ_MUL_2(x) (((x) << 1) ^ (((x) & 0x80) ? 0x1d : 0)) +#define VDEV_RAIDZ_MUL_4(x) (VDEV_RAIDZ_MUL_2(VDEV_RAIDZ_MUL_2(x))) + +/* + * We provide a mechanism to perform the field multiplication operation on a + * 64-bit value all at once rather than a byte at a time. This works by + * creating a mask from the top bit in each byte and using that to + * conditionally apply the XOR of 0x1d. + */ +#define VDEV_RAIDZ_64MUL_2(x, mask) \ +{ \ + (mask) = (x) & 0x8080808080808080ULL; \ + (mask) = ((mask) << 1) - ((mask) >> 7); \ + (x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \ + ((mask) & 0x1d1d1d1d1d1d1d1d); \ +} + +#define VDEV_RAIDZ_64MUL_4(x, mask) \ +{ \ + VDEV_RAIDZ_64MUL_2((x), mask); \ + VDEV_RAIDZ_64MUL_2((x), mask); \ +} + +/* + * Force reconstruction to use the general purpose method. + */ +int vdev_raidz_default_to_general; + +/* + * These two tables represent powers and logs of 2 in the Galois field defined + * above. These values were computed by repeatedly multiplying by 2 as above. + */ +static const uint8_t vdev_raidz_pow2[256] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, + 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, + 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, + 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, + 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, + 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, + 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, + 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, + 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, + 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, + 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, + 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, + 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, + 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, + 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, + 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, + 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, + 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, + 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, + 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, + 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, + 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, + 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, + 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, + 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; +static const uint8_t vdev_raidz_log2[256] = { + 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, + 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, + 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, + 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, + 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, + 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, + 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, + 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, + 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, + 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, + 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, + 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, + 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, + 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, + 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, + 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, + 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, + 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, + 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, + 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, + 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, + 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, + 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, + 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, + 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, + 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, +}; + +static void vdev_raidz_generate_parity(raidz_map_t *rm); + +/* + * Multiply a given number by 2 raised to the given power. + */ +static uint8_t +vdev_raidz_exp2(uint_t a, int exp) +{ + if (a == 0) + return (0); + + ASSERT(exp >= 0); + ASSERT(vdev_raidz_log2[a] > 0 || a == 1); + + exp += vdev_raidz_log2[a]; + if (exp > 255) + exp -= 255; + + return (vdev_raidz_pow2[exp]); +} + +static void +vdev_raidz_map_free(raidz_map_t *rm) +{ + int c; + size_t size; + + for (c = 0; c < rm->rm_firstdatacol; c++) { + zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size); + + if (rm->rm_col[c].rc_gdata != NULL) + zio_buf_free(rm->rm_col[c].rc_gdata, + rm->rm_col[c].rc_size); + } + + size = 0; + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) + size += rm->rm_col[c].rc_size; + + if (rm->rm_datacopy != NULL) + zio_buf_free(rm->rm_datacopy, size); + + kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols])); +} + +static void +vdev_raidz_map_free_vsd(zio_t *zio) +{ + raidz_map_t *rm = zio->io_vsd; + + ASSERT3U(rm->rm_freed, ==, 0); + rm->rm_freed = 1; + + if (rm->rm_reports == 0) + vdev_raidz_map_free(rm); +} + +/*ARGSUSED*/ +static void +vdev_raidz_cksum_free(void *arg, size_t ignored) +{ + raidz_map_t *rm = arg; + + ASSERT3U(rm->rm_reports, >, 0); + + if (--rm->rm_reports == 0 && rm->rm_freed != 0) + vdev_raidz_map_free(rm); +} + +static void +vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) +{ + raidz_map_t *rm = zcr->zcr_cbdata; + size_t c = zcr->zcr_cbinfo; + size_t x; + + const char *good = NULL; + const char *bad = rm->rm_col[c].rc_data; + + if (good_data == NULL) { + zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); + return; + } + + if (c < rm->rm_firstdatacol) { + /* + * The first time through, calculate the parity blocks for + * the good data (this relies on the fact that the good + * data never changes for a given logical ZIO) + */ + if (rm->rm_col[0].rc_gdata == NULL) { + char *bad_parity[VDEV_RAIDZ_MAXPARITY]; + char *buf; + + /* + * Set up the rm_col[]s to generate the parity for + * good_data, first saving the parity bufs and + * replacing them with buffers to hold the result. + */ + for (x = 0; x < rm->rm_firstdatacol; x++) { + bad_parity[x] = rm->rm_col[x].rc_data; + rm->rm_col[x].rc_data = rm->rm_col[x].rc_gdata = + zio_buf_alloc(rm->rm_col[x].rc_size); + } + + /* fill in the data columns from good_data */ + buf = (char *)good_data; + for (; x < rm->rm_cols; x++) { + rm->rm_col[x].rc_data = buf; + buf += rm->rm_col[x].rc_size; + } + + /* + * Construct the parity from the good data. + */ + vdev_raidz_generate_parity(rm); + + /* restore everything back to its original state */ + for (x = 0; x < rm->rm_firstdatacol; x++) + rm->rm_col[x].rc_data = bad_parity[x]; + + buf = rm->rm_datacopy; + for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { + rm->rm_col[x].rc_data = buf; + buf += rm->rm_col[x].rc_size; + } + } + + ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL); + good = rm->rm_col[c].rc_gdata; + } else { + /* adjust good_data to point at the start of our column */ + good = good_data; + + for (x = rm->rm_firstdatacol; x < c; x++) + good += rm->rm_col[x].rc_size; + } + + /* we drop the ereport if it ends up that the data was good */ + zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); +} + +/* + * Invoked indirectly by zfs_ereport_start_checksum(), called + * below when our read operation fails completely. The main point + * is to keep a copy of everything we read from disk, so that at + * vdev_raidz_cksum_finish() time we can compare it with the good data. + */ +static void +vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) +{ + size_t c = (size_t)(uintptr_t)arg; + caddr_t buf; + + raidz_map_t *rm = zio->io_vsd; + size_t size; + + /* set up the report and bump the refcount */ + zcr->zcr_cbdata = rm; + zcr->zcr_cbinfo = c; + zcr->zcr_finish = vdev_raidz_cksum_finish; + zcr->zcr_free = vdev_raidz_cksum_free; + + rm->rm_reports++; + ASSERT3U(rm->rm_reports, >, 0); + + if (rm->rm_datacopy != NULL) + return; + + /* + * It's the first time we're called for this raidz_map_t, so we need + * to copy the data aside; there's no guarantee that our zio's buffer + * won't be re-used for something else. + * + * Our parity data is already in separate buffers, so there's no need + * to copy them. + */ + + size = 0; + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) + size += rm->rm_col[c].rc_size; + + buf = rm->rm_datacopy = zio_buf_alloc(size); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + raidz_col_t *col = &rm->rm_col[c]; + + bcopy(col->rc_data, buf, col->rc_size); + col->rc_data = buf; + + buf += col->rc_size; + } + ASSERT3P(buf - (caddr_t)rm->rm_datacopy, ==, size); +} + +static const zio_vsd_ops_t vdev_raidz_vsd_ops = { + vdev_raidz_map_free_vsd, + vdev_raidz_cksum_report +}; + +static raidz_map_t * +vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, + uint64_t nparity) +{ + raidz_map_t *rm; + uint64_t b = zio->io_offset >> unit_shift; + uint64_t s = zio->io_size >> unit_shift; + uint64_t f = b % dcols; + uint64_t o = (b / dcols) << unit_shift; + uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; + + q = s / (dcols - nparity); + r = s - q * (dcols - nparity); + bc = (r == 0 ? 0 : r + nparity); + tot = s + nparity * (q + (r == 0 ? 0 : 1)); + + if (q == 0) { + acols = bc; + scols = MIN(dcols, roundup(bc, nparity + 1)); + } else { + acols = dcols; + scols = dcols; + } + + ASSERT3U(acols, <=, scols); + + rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_SLEEP); + + rm->rm_cols = acols; + rm->rm_scols = scols; + rm->rm_bigcols = bc; + rm->rm_skipstart = bc; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; + rm->rm_firstdatacol = nparity; + rm->rm_datacopy = NULL; + rm->rm_reports = 0; + rm->rm_freed = 0; + rm->rm_ecksuminjected = 0; + + asize = 0; + + for (c = 0; c < scols; c++) { + col = f + c; + coff = o; + if (col >= dcols) { + col -= dcols; + coff += 1ULL << unit_shift; + } + rm->rm_col[c].rc_devidx = col; + rm->rm_col[c].rc_offset = coff; + rm->rm_col[c].rc_data = NULL; + rm->rm_col[c].rc_gdata = NULL; + rm->rm_col[c].rc_error = 0; + rm->rm_col[c].rc_tried = 0; + rm->rm_col[c].rc_skipped = 0; + + if (c >= acols) + rm->rm_col[c].rc_size = 0; + else if (c < bc) + rm->rm_col[c].rc_size = (q + 1) << unit_shift; + else + rm->rm_col[c].rc_size = q << unit_shift; + + asize += rm->rm_col[c].rc_size; + } + + ASSERT3U(asize, ==, tot << unit_shift); + rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift); + rm->rm_nskip = roundup(tot, nparity + 1) - tot; + ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift); + ASSERT3U(rm->rm_nskip, <=, nparity); + + for (c = 0; c < rm->rm_firstdatacol; c++) + rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size); + + rm->rm_col[c].rc_data = zio->io_data; + + for (c = c + 1; c < acols; c++) + rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data + + rm->rm_col[c - 1].rc_size; + + /* + * If all data stored spans all columns, there's a danger that parity + * will always be on the same device and, since parity isn't read + * during normal operation, that that device's I/O bandwidth won't be + * used effectively. We therefore switch the parity every 1MB. + * + * ... at least that was, ostensibly, the theory. As a practical + * matter unless we juggle the parity between all devices evenly, we + * won't see any benefit. Further, occasional writes that aren't a + * multiple of the LCM of the number of children and the minimum + * stripe width are sufficient to avoid pessimal behavior. + * Unfortunately, this decision created an implicit on-disk format + * requirement that we need to support for all eternity, but only + * for single-parity RAID-Z. + * + * If we intend to skip a sector in the zeroth column for padding + * we must make sure to note this swap. We will never intend to + * skip the first column since at least one data and one parity + * column must appear in each row. + */ + ASSERT(rm->rm_cols >= 2); + ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size); + + if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) { + devidx = rm->rm_col[0].rc_devidx; + o = rm->rm_col[0].rc_offset; + rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx; + rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset; + rm->rm_col[1].rc_devidx = devidx; + rm->rm_col[1].rc_offset = o; + + if (rm->rm_skipstart == 0) + rm->rm_skipstart = 1; + } + + zio->io_vsd = rm; + zio->io_vsd_ops = &vdev_raidz_vsd_ops; + return (rm); +} + +static void +vdev_raidz_generate_parity_p(raidz_map_t *rm) +{ + uint64_t *p, *src, pcount, ccount, i; + int c; + + pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + + if (c == rm->rm_firstdatacol) { + ASSERT(ccount == pcount); + for (i = 0; i < ccount; i++, src++, p++) { + *p = *src; + } + } else { + ASSERT(ccount <= pcount); + for (i = 0; i < ccount; i++, src++, p++) { + *p ^= *src; + } + } + } +} + +static void +vdev_raidz_generate_parity_pq(raidz_map_t *rm) +{ + uint64_t *p, *q, *src, pcnt, ccnt, mask, i; + int c; + + pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); + ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == + rm->rm_col[VDEV_RAIDZ_Q].rc_size); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + + ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); + + if (c == rm->rm_firstdatacol) { + ASSERT(ccnt == pcnt || ccnt == 0); + for (i = 0; i < ccnt; i++, src++, p++, q++) { + *p = *src; + *q = *src; + } + for (; i < pcnt; i++, src++, p++, q++) { + *p = 0; + *q = 0; + } + } else { + ASSERT(ccnt <= pcnt); + + /* + * Apply the algorithm described above by multiplying + * the previous result and adding in the new value. + */ + for (i = 0; i < ccnt; i++, src++, p++, q++) { + *p ^= *src; + + VDEV_RAIDZ_64MUL_2(*q, mask); + *q ^= *src; + } + + /* + * Treat short columns as though they are full of 0s. + * Note that there's therefore nothing needed for P. + */ + for (; i < pcnt; i++, q++) { + VDEV_RAIDZ_64MUL_2(*q, mask); + } + } + } +} + +static void +vdev_raidz_generate_parity_pqr(raidz_map_t *rm) +{ + uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i; + int c; + + pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); + ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == + rm->rm_col[VDEV_RAIDZ_Q].rc_size); + ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == + rm->rm_col[VDEV_RAIDZ_R].rc_size); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + r = rm->rm_col[VDEV_RAIDZ_R].rc_data; + + ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); + + if (c == rm->rm_firstdatacol) { + ASSERT(ccnt == pcnt || ccnt == 0); + for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { + *p = *src; + *q = *src; + *r = *src; + } + for (; i < pcnt; i++, src++, p++, q++, r++) { + *p = 0; + *q = 0; + *r = 0; + } + } else { + ASSERT(ccnt <= pcnt); + + /* + * Apply the algorithm described above by multiplying + * the previous result and adding in the new value. + */ + for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { + *p ^= *src; + + VDEV_RAIDZ_64MUL_2(*q, mask); + *q ^= *src; + + VDEV_RAIDZ_64MUL_4(*r, mask); + *r ^= *src; + } + + /* + * Treat short columns as though they are full of 0s. + * Note that there's therefore nothing needed for P. + */ + for (; i < pcnt; i++, q++, r++) { + VDEV_RAIDZ_64MUL_2(*q, mask); + VDEV_RAIDZ_64MUL_4(*r, mask); + } + } + } +} + +/* + * Generate RAID parity in the first virtual columns according to the number of + * parity columns available. + */ +static void +vdev_raidz_generate_parity(raidz_map_t *rm) +{ + switch (rm->rm_firstdatacol) { + case 1: + vdev_raidz_generate_parity_p(rm); + break; + case 2: + vdev_raidz_generate_parity_pq(rm); + break; + case 3: + vdev_raidz_generate_parity_pqr(rm); + break; + default: + cmn_err(CE_PANIC, "invalid RAID-Z configuration"); + } +} + +static int +vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) +{ + uint64_t *dst, *src, xcount, ccount, count, i; + int x = tgts[0]; + int c; + + ASSERT(ntgts == 1); + ASSERT(x >= rm->rm_firstdatacol); + ASSERT(x < rm->rm_cols); + + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0])); + ASSERT(xcount > 0); + + src = rm->rm_col[VDEV_RAIDZ_P].rc_data; + dst = rm->rm_col[x].rc_data; + for (i = 0; i < xcount; i++, dst++, src++) { + *dst = *src; + } + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; + + if (c == x) + continue; + + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + count = MIN(ccount, xcount); + + for (i = 0; i < count; i++, dst++, src++) { + *dst ^= *src; + } + } + + return (1 << VDEV_RAIDZ_P); +} + +static int +vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) +{ + uint64_t *dst, *src, xcount, ccount, count, mask, i; + uint8_t *b; + int x = tgts[0]; + int c, j, exp; + + ASSERT(ntgts == 1); + + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0])); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; + + if (c == x) + ccount = 0; + else + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + + count = MIN(ccount, xcount); + + if (c == rm->rm_firstdatacol) { + for (i = 0; i < count; i++, dst++, src++) { + *dst = *src; + } + for (; i < xcount; i++, dst++) { + *dst = 0; + } + + } else { + for (i = 0; i < count; i++, dst++, src++) { + VDEV_RAIDZ_64MUL_2(*dst, mask); + *dst ^= *src; + } + + for (; i < xcount; i++, dst++) { + VDEV_RAIDZ_64MUL_2(*dst, mask); + } + } + } + + src = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + dst = rm->rm_col[x].rc_data; + exp = 255 - (rm->rm_cols - 1 - x); + + for (i = 0; i < xcount; i++, dst++, src++) { + *dst ^= *src; + for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { + *b = vdev_raidz_exp2(*b, exp); + } + } + + return (1 << VDEV_RAIDZ_Q); +} + +static int +vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) +{ + uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; + void *pdata, *qdata; + uint64_t xsize, ysize, i; + int x = tgts[0]; + int y = tgts[1]; + + ASSERT(ntgts == 2); + ASSERT(x < y); + ASSERT(x >= rm->rm_firstdatacol); + ASSERT(y < rm->rm_cols); + + ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size); + + /* + * Move the parity data aside -- we're going to compute parity as + * though columns x and y were full of zeros -- Pxy and Qxy. We want to + * reuse the parity generation mechanism without trashing the actual + * parity so we make those columns appear to be full of zeros by + * setting their lengths to zero. + */ + pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xsize = rm->rm_col[x].rc_size; + ysize = rm->rm_col[y].rc_size; + + rm->rm_col[VDEV_RAIDZ_P].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size); + rm->rm_col[VDEV_RAIDZ_Q].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size); + rm->rm_col[x].rc_size = 0; + rm->rm_col[y].rc_size = 0; + + vdev_raidz_generate_parity_pq(rm); + + rm->rm_col[x].rc_size = xsize; + rm->rm_col[y].rc_size = ysize; + + p = pdata; + q = qdata; + pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xd = rm->rm_col[x].rc_data; + yd = rm->rm_col[y].rc_data; + + /* + * We now have: + * Pxy = P + D_x + D_y + * Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y + * + * We can then solve for D_x: + * D_x = A * (P + Pxy) + B * (Q + Qxy) + * where + * A = 2^(x - y) * (2^(x - y) + 1)^-1 + * B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1 + * + * With D_x in hand, we can easily solve for D_y: + * D_y = P + Pxy + D_x + */ + + a = vdev_raidz_pow2[255 + x - y]; + b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)]; + tmp = 255 - vdev_raidz_log2[a ^ 1]; + + aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; + bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)]; + + for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) { + *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^ + vdev_raidz_exp2(*q ^ *qxy, bexp); + + if (i < ysize) + *yd = *p ^ *pxy ^ *xd; + } + + zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data, + rm->rm_col[VDEV_RAIDZ_P].rc_size); + zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data, + rm->rm_col[VDEV_RAIDZ_Q].rc_size); + + /* + * Restore the saved parity data. + */ + rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata; + rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata; + + return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q)); +} + +/* BEGIN CSTYLED */ +/* + * In the general case of reconstruction, we must solve the system of linear + * equations defined by the coeffecients used to generate parity as well as + * the contents of the data and parity disks. This can be expressed with + * vectors for the original data (D) and the actual data (d) and parity (p) + * and a matrix composed of the identity matrix (I) and a dispersal matrix (V): + * + * __ __ __ __ + * | | __ __ | p_0 | + * | V | | D_0 | | p_m-1 | + * | | x | : | = | d_0 | + * | I | | D_n-1 | | : | + * | | ~~ ~~ | d_n-1 | + * ~~ ~~ ~~ ~~ + * + * I is simply a square identity matrix of size n, and V is a vandermonde + * matrix defined by the coeffecients we chose for the various parity columns + * (1, 2, 4). Note that these values were chosen both for simplicity, speedy + * computation as well as linear separability. + * + * __ __ __ __ + * | 1 .. 1 1 1 | | p_0 | + * | 2^n-1 .. 4 2 1 | __ __ | : | + * | 4^n-1 .. 16 4 1 | | D_0 | | p_m-1 | + * | 1 .. 0 0 0 | | D_1 | | d_0 | + * | 0 .. 0 0 0 | x | D_2 | = | d_1 | + * | : : : : | | : | | d_2 | + * | 0 .. 1 0 0 | | D_n-1 | | : | + * | 0 .. 0 1 0 | ~~ ~~ | : | + * | 0 .. 0 0 1 | | d_n-1 | + * ~~ ~~ ~~ ~~ + * + * Note that I, V, d, and p are known. To compute D, we must invert the + * matrix and use the known data and parity values to reconstruct the unknown + * data values. We begin by removing the rows in V|I and d|p that correspond + * to failed or missing columns; we then make V|I square (n x n) and d|p + * sized n by removing rows corresponding to unused parity from the bottom up + * to generate (V|I)' and (d|p)'. We can then generate the inverse of (V|I)' + * using Gauss-Jordan elimination. In the example below we use m=3 parity + * columns, n=8 data columns, with errors in d_1, d_2, and p_1: + * __ __ + * | 1 1 1 1 1 1 1 1 | + * | 128 64 32 16 8 4 2 1 | <-----+-+-- missing disks + * | 19 205 116 29 64 16 4 1 | / / + * | 1 0 0 0 0 0 0 0 | / / + * | 0 1 0 0 0 0 0 0 | <--' / + * (V|I) = | 0 0 1 0 0 0 0 0 | <---' + * | 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 1 1 1 1 1 1 1 | + * | 128 64 32 16 8 4 2 1 | + * | 19 205 116 29 64 16 4 1 | + * | 1 0 0 0 0 0 0 0 | + * | 0 1 0 0 0 0 0 0 | + * (V|I)' = | 0 0 1 0 0 0 0 0 | + * | 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * + * Here we employ Gauss-Jordan elimination to find the inverse of (V|I)'. We + * have carefully chosen the seed values 1, 2, and 4 to ensure that this + * matrix is not singular. + * __ __ + * | 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 | + * | 19 205 116 29 64 16 4 1 0 1 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 | + * | 19 205 116 29 64 16 4 1 0 1 0 0 0 0 0 0 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | + * | 0 205 116 0 0 0 0 0 0 1 19 29 64 16 4 1 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | + * | 0 0 185 0 0 0 0 0 205 1 222 208 141 221 201 204 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | + * | 0 0 1 0 0 0 0 0 166 100 4 40 158 168 216 209 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | + * | 0 1 0 0 0 0 0 0 167 100 5 41 159 169 217 208 | + * | 0 0 1 0 0 0 0 0 166 100 4 40 158 168 216 209 | + * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * __ __ + * | 0 0 1 0 0 0 0 0 | + * | 167 100 5 41 159 169 217 208 | + * | 166 100 4 40 158 168 216 209 | + * (V|I)'^-1 = | 0 0 0 1 0 0 0 0 | + * | 0 0 0 0 1 0 0 0 | + * | 0 0 0 0 0 1 0 0 | + * | 0 0 0 0 0 0 1 0 | + * | 0 0 0 0 0 0 0 1 | + * ~~ ~~ + * + * We can then simply compute D = (V|I)'^-1 x (d|p)' to discover the values + * of the missing data. + * + * As is apparent from the example above, the only non-trivial rows in the + * inverse matrix correspond to the data disks that we're trying to + * reconstruct. Indeed, those are the only rows we need as the others would + * only be useful for reconstructing data known or assumed to be valid. For + * that reason, we only build the coefficients in the rows that correspond to + * targeted columns. + */ +/* END CSTYLED */ + +static void +vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map, + uint8_t **rows) +{ + int i, j; + int pow; + + ASSERT(n == rm->rm_cols - rm->rm_firstdatacol); + + /* + * Fill in the missing rows of interest. + */ + for (i = 0; i < nmap; i++) { + ASSERT3S(0, <=, map[i]); + ASSERT3S(map[i], <=, 2); + + pow = map[i] * n; + if (pow > 255) + pow -= 255; + ASSERT(pow <= 255); + + for (j = 0; j < n; j++) { + pow -= map[i]; + if (pow < 0) + pow += 255; + rows[i][j] = vdev_raidz_pow2[pow]; + } + } +} + +static void +vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, + uint8_t **rows, uint8_t **invrows, const uint8_t *used) +{ + int i, j, ii, jj; + uint8_t log; + + /* + * Assert that the first nmissing entries from the array of used + * columns correspond to parity columns and that subsequent entries + * correspond to data columns. + */ + for (i = 0; i < nmissing; i++) { + ASSERT3S(used[i], <, rm->rm_firstdatacol); + } + for (; i < n; i++) { + ASSERT3S(used[i], >=, rm->rm_firstdatacol); + } + + /* + * First initialize the storage where we'll compute the inverse rows. + */ + for (i = 0; i < nmissing; i++) { + for (j = 0; j < n; j++) { + invrows[i][j] = (i == j) ? 1 : 0; + } + } + + /* + * Subtract all trivial rows from the rows of consequence. + */ + for (i = 0; i < nmissing; i++) { + for (j = nmissing; j < n; j++) { + ASSERT3U(used[j], >=, rm->rm_firstdatacol); + jj = used[j] - rm->rm_firstdatacol; + ASSERT3S(jj, <, n); + invrows[i][j] = rows[i][jj]; + rows[i][jj] = 0; + } + } + + /* + * For each of the rows of interest, we must normalize it and subtract + * a multiple of it from the other rows. + */ + for (i = 0; i < nmissing; i++) { + for (j = 0; j < missing[i]; j++) { + ASSERT3U(rows[i][j], ==, 0); + } + ASSERT3U(rows[i][missing[i]], !=, 0); + + /* + * Compute the inverse of the first element and multiply each + * element in the row by that value. + */ + log = 255 - vdev_raidz_log2[rows[i][missing[i]]]; + + for (j = 0; j < n; j++) { + rows[i][j] = vdev_raidz_exp2(rows[i][j], log); + invrows[i][j] = vdev_raidz_exp2(invrows[i][j], log); + } + + for (ii = 0; ii < nmissing; ii++) { + if (i == ii) + continue; + + ASSERT3U(rows[ii][missing[i]], !=, 0); + + log = vdev_raidz_log2[rows[ii][missing[i]]]; + + for (j = 0; j < n; j++) { + rows[ii][j] ^= + vdev_raidz_exp2(rows[i][j], log); + invrows[ii][j] ^= + vdev_raidz_exp2(invrows[i][j], log); + } + } + } + + /* + * Verify that the data that is left in the rows are properly part of + * an identity matrix. + */ + for (i = 0; i < nmissing; i++) { + for (j = 0; j < n; j++) { + if (j == missing[i]) { + ASSERT3U(rows[i][j], ==, 1); + } else { + ASSERT3U(rows[i][j], ==, 0); + } + } + } +} + +static void +vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, + int *missing, uint8_t **invrows, const uint8_t *used) +{ + int i, j, x, cc, c; + uint8_t *src; + uint64_t ccount; + uint8_t *dst[VDEV_RAIDZ_MAXPARITY]; + uint64_t dcount[VDEV_RAIDZ_MAXPARITY]; + uint8_t log, val; + int ll; + uint8_t *invlog[VDEV_RAIDZ_MAXPARITY]; + uint8_t *p, *pp; + size_t psize; + + psize = sizeof (invlog[0][0]) * n * nmissing; + p = kmem_alloc(psize, KM_SLEEP); + + for (pp = p, i = 0; i < nmissing; i++) { + invlog[i] = pp; + pp += n; + } + + for (i = 0; i < nmissing; i++) { + for (j = 0; j < n; j++) { + ASSERT3U(invrows[i][j], !=, 0); + invlog[i][j] = vdev_raidz_log2[invrows[i][j]]; + } + } + + for (i = 0; i < n; i++) { + c = used[i]; + ASSERT3U(c, <, rm->rm_cols); + + src = rm->rm_col[c].rc_data; + ccount = rm->rm_col[c].rc_size; + for (j = 0; j < nmissing; j++) { + cc = missing[j] + rm->rm_firstdatacol; + ASSERT3U(cc, >=, rm->rm_firstdatacol); + ASSERT3U(cc, <, rm->rm_cols); + ASSERT3U(cc, !=, c); + + dst[j] = rm->rm_col[cc].rc_data; + dcount[j] = rm->rm_col[cc].rc_size; + } + + ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0); + + for (x = 0; x < ccount; x++, src++) { + if (*src != 0) + log = vdev_raidz_log2[*src]; + + for (cc = 0; cc < nmissing; cc++) { + if (x >= dcount[cc]) + continue; + + if (*src == 0) { + val = 0; + } else { + if ((ll = log + invlog[cc][i]) >= 255) + ll -= 255; + val = vdev_raidz_pow2[ll]; + } + + if (i == 0) + dst[cc][x] = val; + else + dst[cc][x] ^= val; + } + } + } + + kmem_free(p, psize); +} + +static int +vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) +{ + int n, i, c, t, tt; + int nmissing_rows; + int missing_rows[VDEV_RAIDZ_MAXPARITY]; + int parity_map[VDEV_RAIDZ_MAXPARITY]; + + uint8_t *p, *pp; + size_t psize; + + uint8_t *rows[VDEV_RAIDZ_MAXPARITY]; + uint8_t *invrows[VDEV_RAIDZ_MAXPARITY]; + uint8_t *used; + + int code = 0; + + + n = rm->rm_cols - rm->rm_firstdatacol; + + /* + * Figure out which data columns are missing. + */ + nmissing_rows = 0; + for (t = 0; t < ntgts; t++) { + if (tgts[t] >= rm->rm_firstdatacol) { + missing_rows[nmissing_rows++] = + tgts[t] - rm->rm_firstdatacol; + } + } + + /* + * Figure out which parity columns to use to help generate the missing + * data columns. + */ + for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) { + ASSERT(tt < ntgts); + ASSERT(c < rm->rm_firstdatacol); + + /* + * Skip any targeted parity columns. + */ + if (c == tgts[tt]) { + tt++; + continue; + } + + code |= 1 << c; + + parity_map[i] = c; + i++; + } + + ASSERT(code != 0); + ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY); + + psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) * + nmissing_rows * n + sizeof (used[0]) * n; + p = kmem_alloc(psize, KM_SLEEP); + + for (pp = p, i = 0; i < nmissing_rows; i++) { + rows[i] = pp; + pp += n; + invrows[i] = pp; + pp += n; + } + used = pp; + + for (i = 0; i < nmissing_rows; i++) { + used[i] = parity_map[i]; + } + + for (tt = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + if (tt < nmissing_rows && + c == missing_rows[tt] + rm->rm_firstdatacol) { + tt++; + continue; + } + + ASSERT3S(i, <, n); + used[i] = c; + i++; + } + + /* + * Initialize the interesting rows of the matrix. + */ + vdev_raidz_matrix_init(rm, n, nmissing_rows, parity_map, rows); + + /* + * Invert the matrix. + */ + vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows, + invrows, used); + + /* + * Reconstruct the missing data using the generated matrix. + */ + vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows, + invrows, used); + + kmem_free(p, psize); + + return (code); +} + +static int +vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt) +{ + int tgts[VDEV_RAIDZ_MAXPARITY], *dt; + int ntgts; + int i, c; + int code; + int nbadparity, nbaddata; + int parity_valid[VDEV_RAIDZ_MAXPARITY]; + + /* + * The tgts list must already be sorted. + */ + for (i = 1; i < nt; i++) { + ASSERT(t[i] > t[i - 1]); + } + + nbadparity = rm->rm_firstdatacol; + nbaddata = rm->rm_cols - nbadparity; + ntgts = 0; + for (i = 0, c = 0; c < rm->rm_cols; c++) { + if (c < rm->rm_firstdatacol) + parity_valid[c] = B_FALSE; + + if (i < nt && c == t[i]) { + tgts[ntgts++] = c; + i++; + } else if (rm->rm_col[c].rc_error != 0) { + tgts[ntgts++] = c; + } else if (c >= rm->rm_firstdatacol) { + nbaddata--; + } else { + parity_valid[c] = B_TRUE; + nbadparity--; + } + } + + ASSERT(ntgts >= nt); + ASSERT(nbaddata >= 0); + ASSERT(nbaddata + nbadparity == ntgts); + + dt = &tgts[nbadparity]; + + /* + * See if we can use any of our optimized reconstruction routines. + */ + if (!vdev_raidz_default_to_general) { + switch (nbaddata) { + case 1: + if (parity_valid[VDEV_RAIDZ_P]) + return (vdev_raidz_reconstruct_p(rm, dt, 1)); + + ASSERT(rm->rm_firstdatacol > 1); + + if (parity_valid[VDEV_RAIDZ_Q]) + return (vdev_raidz_reconstruct_q(rm, dt, 1)); + + ASSERT(rm->rm_firstdatacol > 2); + break; + + case 2: + ASSERT(rm->rm_firstdatacol > 1); + + if (parity_valid[VDEV_RAIDZ_P] && + parity_valid[VDEV_RAIDZ_Q]) + return (vdev_raidz_reconstruct_pq(rm, dt, 2)); + + ASSERT(rm->rm_firstdatacol > 2); + + break; + } + } + + code = vdev_raidz_reconstruct_general(rm, tgts, ntgts); + ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY)); + ASSERT(code > 0); + return (code); +} + +static int +vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +{ + vdev_t *cvd; + uint64_t nparity = vd->vdev_nparity; + int c; + int lasterror = 0; + int numerrors = 0; + + ASSERT(nparity > 0); + + if (nparity > VDEV_RAIDZ_MAXPARITY || + vd->vdev_children < nparity + 1) { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + + vdev_open_children(vd); + + for (c = 0; c < vd->vdev_children; c++) { + cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error != 0) { + lasterror = cvd->vdev_open_error; + numerrors++; + continue; + } + + *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; + *ashift = MAX(*ashift, cvd->vdev_ashift); + } + + *asize *= vd->vdev_children; + + if (numerrors > nparity) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); + } + + return (0); +} + +static void +vdev_raidz_close(vdev_t *vd) +{ + int c; + + for (c = 0; c < vd->vdev_children; c++) + vdev_close(vd->vdev_child[c]); +} + +static uint64_t +vdev_raidz_asize(vdev_t *vd, uint64_t psize) +{ + uint64_t asize; + uint64_t ashift = vd->vdev_top->vdev_ashift; + uint64_t cols = vd->vdev_children; + uint64_t nparity = vd->vdev_nparity; + + asize = ((psize - 1) >> ashift) + 1; + asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity)); + asize = roundup(asize, nparity + 1) << ashift; + + return (asize); +} + +static void +vdev_raidz_child_done(zio_t *zio) +{ + raidz_col_t *rc = zio->io_private; + + rc->rc_error = zio->io_error; + rc->rc_tried = 1; + rc->rc_skipped = 0; +} + +static int +vdev_raidz_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_t *tvd = vd->vdev_top; + vdev_t *cvd; + raidz_map_t *rm; + raidz_col_t *rc; + int c, i; + + rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children, + vd->vdev_nparity); + + ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size)); + + if (zio->io_type == ZIO_TYPE_WRITE) { + vdev_raidz_generate_parity(rm); + + for (c = 0; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset, rc->rc_data, rc->rc_size, + zio->io_type, zio->io_priority, 0, + vdev_raidz_child_done, rc)); + } + + /* + * Generate optional I/Os for any skipped sectors to improve + * aggregation contiguity. + */ + for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) { + ASSERT(c <= rm->rm_scols); + if (c == rm->rm_scols) + c = 0; + rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset + rc->rc_size, NULL, + 1 << tvd->vdev_ashift, + zio->io_type, zio->io_priority, + ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); + } + + return (ZIO_PIPELINE_CONTINUE); + } + + ASSERT(zio->io_type == ZIO_TYPE_READ); + + /* + * Iterate over the columns in reverse order so that we hit the parity + * last -- any errors along the way will force us to read the parity. + */ + for (c = rm->rm_cols - 1; c >= 0; c--) { + rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; + if (!vdev_readable(cvd)) { + if (c >= rm->rm_firstdatacol) + rm->rm_missingdata++; + else + rm->rm_missingparity++; + rc->rc_error = ENXIO; + rc->rc_tried = 1; /* don't even try */ + rc->rc_skipped = 1; + continue; + } + if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) { + if (c >= rm->rm_firstdatacol) + rm->rm_missingdata++; + else + rm->rm_missingparity++; + rc->rc_error = ESTALE; + rc->rc_skipped = 1; + continue; + } + if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 || + (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset, rc->rc_data, rc->rc_size, + zio->io_type, zio->io_priority, 0, + vdev_raidz_child_done, rc)); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + + +/* + * Report a checksum error for a child of a RAID-Z device. + */ +static void +raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) +{ + vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; + + if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + zio_bad_cksum_t zbc; + raidz_map_t *rm = zio->io_vsd; + + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&vd->vdev_stat_lock); + + zbc.zbc_has_cksum = 0; + zbc.zbc_injected = rm->rm_ecksuminjected; + + zfs_ereport_post_checksum(zio->io_spa, vd, zio, + rc->rc_offset, rc->rc_size, rc->rc_data, bad_data, + &zbc); + } +} + +/* + * We keep track of whether or not there were any injected errors, so that + * any ereports we generate can note it. + */ +static int +raidz_checksum_verify(zio_t *zio) +{ + zio_bad_cksum_t zbc; + raidz_map_t *rm = zio->io_vsd; + + int ret = zio_checksum_error(zio, &zbc); + if (ret != 0 && zbc.zbc_injected != 0) + rm->rm_ecksuminjected = 1; + + return (ret); +} + +/* + * Generate the parity from the data columns. If we tried and were able to + * read the parity without error, verify that the generated parity matches the + * data we read. If it doesn't, we fire off a checksum error. Return the + * number such failures. + */ +static int +raidz_parity_verify(zio_t *zio, raidz_map_t *rm) +{ + void *orig[VDEV_RAIDZ_MAXPARITY]; + int c, ret = 0; + raidz_col_t *rc; + + for (c = 0; c < rm->rm_firstdatacol; c++) { + rc = &rm->rm_col[c]; + if (!rc->rc_tried || rc->rc_error != 0) + continue; + orig[c] = zio_buf_alloc(rc->rc_size); + bcopy(rc->rc_data, orig[c], rc->rc_size); + } + + vdev_raidz_generate_parity(rm); + + for (c = 0; c < rm->rm_firstdatacol; c++) { + rc = &rm->rm_col[c]; + if (!rc->rc_tried || rc->rc_error != 0) + continue; + if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { + raidz_checksum_error(zio, rc, orig[c]); + rc->rc_error = ECKSUM; + ret++; + } + zio_buf_free(orig[c], rc->rc_size); + } + + return (ret); +} + +/* + * Keep statistics on all the ways that we used parity to correct data. + */ +static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY]; + +static int +vdev_raidz_worst_error(raidz_map_t *rm) +{ + int error = 0; + + for (int c = 0; c < rm->rm_cols; c++) + error = zio_worst_error(error, rm->rm_col[c].rc_error); + + return (error); +} + +/* + * Iterate over all combinations of bad data and attempt a reconstruction. + * Note that the algorithm below is non-optimal because it doesn't take into + * account how reconstruction is actually performed. For example, with + * triple-parity RAID-Z the reconstruction procedure is the same if column 4 + * is targeted as invalid as if columns 1 and 4 are targeted since in both + * cases we'd only use parity information in column 0. + */ +static int +vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) +{ + raidz_map_t *rm = zio->io_vsd; + raidz_col_t *rc; + void *orig[VDEV_RAIDZ_MAXPARITY]; + int tstore[VDEV_RAIDZ_MAXPARITY + 2]; + int *tgts = &tstore[1]; + int current, next, i, c, n; + int code, ret = 0; + + ASSERT(total_errors < rm->rm_firstdatacol); + + /* + * This simplifies one edge condition. + */ + tgts[-1] = -1; + + for (n = 1; n <= rm->rm_firstdatacol - total_errors; n++) { + /* + * Initialize the targets array by finding the first n columns + * that contain no error. + * + * If there were no data errors, we need to ensure that we're + * always explicitly attempting to reconstruct at least one + * data column. To do this, we simply push the highest target + * up into the data columns. + */ + for (c = 0, i = 0; i < n; i++) { + if (i == n - 1 && data_errors == 0 && + c < rm->rm_firstdatacol) { + c = rm->rm_firstdatacol; + } + + while (rm->rm_col[c].rc_error != 0) { + c++; + ASSERT3S(c, <, rm->rm_cols); + } + + tgts[i] = c++; + } + + /* + * Setting tgts[n] simplifies the other edge condition. + */ + tgts[n] = rm->rm_cols; + + /* + * These buffers were allocated in previous iterations. + */ + for (i = 0; i < n - 1; i++) { + ASSERT(orig[i] != NULL); + } + + orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size); + + current = 0; + next = tgts[current]; + + while (current != n) { + tgts[current] = next; + current = 0; + + /* + * Save off the original data that we're going to + * attempt to reconstruct. + */ + for (i = 0; i < n; i++) { + ASSERT(orig[i] != NULL); + c = tgts[i]; + ASSERT3S(c, >=, 0); + ASSERT3S(c, <, rm->rm_cols); + rc = &rm->rm_col[c]; + bcopy(rc->rc_data, orig[i], rc->rc_size); + } + + /* + * Attempt a reconstruction and exit the outer loop on + * success. + */ + code = vdev_raidz_reconstruct(rm, tgts, n); + if (raidz_checksum_verify(zio) == 0) { + atomic_inc_64(&raidz_corrected[code]); + + for (i = 0; i < n; i++) { + c = tgts[i]; + rc = &rm->rm_col[c]; + ASSERT(rc->rc_error == 0); + if (rc->rc_tried) + raidz_checksum_error(zio, rc, + orig[i]); + rc->rc_error = ECKSUM; + } + + ret = code; + goto done; + } + + /* + * Restore the original data. + */ + for (i = 0; i < n; i++) { + c = tgts[i]; + rc = &rm->rm_col[c]; + bcopy(orig[i], rc->rc_data, rc->rc_size); + } + + do { + /* + * Find the next valid column after the current + * position.. + */ + for (next = tgts[current] + 1; + next < rm->rm_cols && + rm->rm_col[next].rc_error != 0; next++) + continue; + + ASSERT(next <= tgts[current + 1]); + + /* + * If that spot is available, we're done here. + */ + if (next != tgts[current + 1]) + break; + + /* + * Otherwise, find the next valid column after + * the previous position. + */ + for (c = tgts[current - 1] + 1; + rm->rm_col[c].rc_error != 0; c++) + continue; + + tgts[current] = c; + current++; + + } while (current != n); + } + } + n--; +done: + for (i = 0; i < n; i++) { + zio_buf_free(orig[i], rm->rm_col[0].rc_size); + } + + return (ret); +} + +static void +vdev_raidz_io_done(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_t *cvd; + raidz_map_t *rm = zio->io_vsd; + raidz_col_t *rc; + int unexpected_errors = 0; + int parity_errors = 0; + int parity_untried = 0; + int data_errors = 0; + int total_errors = 0; + int n, c; + int tgts[VDEV_RAIDZ_MAXPARITY]; + int code; + + ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */ + + ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol); + ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol); + + for (c = 0; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + + if (rc->rc_error) { + ASSERT(rc->rc_error != ECKSUM); /* child has no bp */ + + if (c < rm->rm_firstdatacol) + parity_errors++; + else + data_errors++; + + if (!rc->rc_skipped) + unexpected_errors++; + + total_errors++; + } else if (c < rm->rm_firstdatacol && !rc->rc_tried) { + parity_untried++; + } + } + + if (zio->io_type == ZIO_TYPE_WRITE) { + /* + * XXX -- for now, treat partial writes as a success. + * (If we couldn't write enough columns to reconstruct + * the data, the I/O failed. Otherwise, good enough.) + * + * Now that we support write reallocation, it would be better + * to treat partial failure as real failure unless there are + * no non-degraded top-level vdevs left, and not update DTLs + * if we intend to reallocate. + */ + /* XXPOLICY */ + if (total_errors > rm->rm_firstdatacol) + zio->io_error = vdev_raidz_worst_error(rm); + + return; + } + + ASSERT(zio->io_type == ZIO_TYPE_READ); + /* + * There are three potential phases for a read: + * 1. produce valid data from the columns read + * 2. read all disks and try again + * 3. perform combinatorial reconstruction + * + * Each phase is progressively both more expensive and less likely to + * occur. If we encounter more errors than we can repair or all phases + * fail, we have no choice but to return an error. + */ + + /* + * If the number of errors we saw was correctable -- less than or equal + * to the number of parity disks read -- attempt to produce data that + * has a valid checksum. Naturally, this case applies in the absence of + * any errors. + */ + if (total_errors <= rm->rm_firstdatacol - parity_untried) { + if (data_errors == 0) { + if (raidz_checksum_verify(zio) == 0) { + /* + * If we read parity information (unnecessarily + * as it happens since no reconstruction was + * needed) regenerate and verify the parity. + * We also regenerate parity when resilvering + * so we can write it out to the failed device + * later. + */ + if (parity_errors + parity_untried < + rm->rm_firstdatacol || + (zio->io_flags & ZIO_FLAG_RESILVER)) { + n = raidz_parity_verify(zio, rm); + unexpected_errors += n; + ASSERT(parity_errors + n <= + rm->rm_firstdatacol); + } + goto done; + } + } else { + /* + * We either attempt to read all the parity columns or + * none of them. If we didn't try to read parity, we + * wouldn't be here in the correctable case. There must + * also have been fewer parity errors than parity + * columns or, again, we wouldn't be in this code path. + */ + ASSERT(parity_untried == 0); + ASSERT(parity_errors < rm->rm_firstdatacol); + + /* + * Identify the data columns that reported an error. + */ + n = 0; + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + if (rc->rc_error != 0) { + ASSERT(n < VDEV_RAIDZ_MAXPARITY); + tgts[n++] = c; + } + } + + ASSERT(rm->rm_firstdatacol >= n); + + code = vdev_raidz_reconstruct(rm, tgts, n); + + if (raidz_checksum_verify(zio) == 0) { + atomic_inc_64(&raidz_corrected[code]); + + /* + * If we read more parity disks than were used + * for reconstruction, confirm that the other + * parity disks produced correct data. This + * routine is suboptimal in that it regenerates + * the parity that we already used in addition + * to the parity that we're attempting to + * verify, but this should be a relatively + * uncommon case, and can be optimized if it + * becomes a problem. Note that we regenerate + * parity when resilvering so we can write it + * out to failed devices later. + */ + if (parity_errors < rm->rm_firstdatacol - n || + (zio->io_flags & ZIO_FLAG_RESILVER)) { + n = raidz_parity_verify(zio, rm); + unexpected_errors += n; + ASSERT(parity_errors + n <= + rm->rm_firstdatacol); + } + + goto done; + } + } + } + + /* + * This isn't a typical situation -- either we got a read error or + * a child silently returned bad data. Read every block so we can + * try again with as much data and parity as we can track down. If + * we've already been through once before, all children will be marked + * as tried so we'll proceed to combinatorial reconstruction. + */ + unexpected_errors = 1; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; + + for (c = 0; c < rm->rm_cols; c++) { + if (rm->rm_col[c].rc_tried) + continue; + + zio_vdev_io_redone(zio); + do { + rc = &rm->rm_col[c]; + if (rc->rc_tried) + continue; + zio_nowait(zio_vdev_child_io(zio, NULL, + vd->vdev_child[rc->rc_devidx], + rc->rc_offset, rc->rc_data, rc->rc_size, + zio->io_type, zio->io_priority, 0, + vdev_raidz_child_done, rc)); + } while (++c < rm->rm_cols); + + return; + } + + /* + * At this point we've attempted to reconstruct the data given the + * errors we detected, and we've attempted to read all columns. There + * must, therefore, be one or more additional problems -- silent errors + * resulting in invalid data rather than explicit I/O errors resulting + * in absent data. We check if there is enough additional data to + * possibly reconstruct the data and then perform combinatorial + * reconstruction over all possible combinations. If that fails, + * we're cooked. + */ + if (total_errors > rm->rm_firstdatacol) { + zio->io_error = vdev_raidz_worst_error(rm); + + } else if (total_errors < rm->rm_firstdatacol && + (code = vdev_raidz_combrec(zio, total_errors, data_errors)) != 0) { + /* + * If we didn't use all the available parity for the + * combinatorial reconstruction, verify that the remaining + * parity is correct. + */ + if (code != (1 << rm->rm_firstdatacol) - 1) + (void) raidz_parity_verify(zio, rm); + } else { + /* + * We're here because either: + * + * total_errors == rm_first_datacol, or + * vdev_raidz_combrec() failed + * + * In either case, there is enough bad data to prevent + * reconstruction. + * + * Start checksum ereports for all children which haven't + * failed, and the IO wasn't speculative. + */ + zio->io_error = ECKSUM; + + if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + for (c = 0; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + if (rc->rc_error == 0) { + zio_bad_cksum_t zbc; + zbc.zbc_has_cksum = 0; + zbc.zbc_injected = + rm->rm_ecksuminjected; + + zfs_ereport_start_checksum( + zio->io_spa, + vd->vdev_child[rc->rc_devidx], + zio, rc->rc_offset, rc->rc_size, + (void *)(uintptr_t)c, &zbc); + } + } + } + } + +done: + zio_checksum_verified(zio); + + if (zio->io_error == 0 && spa_writeable(zio->io_spa) && + (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) { + /* + * Use the good data we have in hand to repair damaged children. + */ + for (c = 0; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; + + if (rc->rc_error == 0) + continue; + + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset, rc->rc_data, rc->rc_size, + ZIO_TYPE_WRITE, zio->io_priority, + ZIO_FLAG_IO_REPAIR | (unexpected_errors ? + ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); + } + } +} + +static void +vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded) +{ + if (faulted > vd->vdev_nparity) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_NO_REPLICAS); + else if (degraded + faulted != 0) + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); + else + vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); +} + +vdev_ops_t vdev_raidz_ops = { + vdev_raidz_open, + vdev_raidz_close, + vdev_raidz_asize, + vdev_raidz_io_start, + vdev_raidz_io_done, + vdev_raidz_state_change, + NULL, + NULL, + VDEV_TYPE_RAIDZ, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; diff --git a/uts/common/fs/zfs/vdev_root.c b/uts/common/fs/zfs/vdev_root.c new file mode 100644 index 000000000000..879f78f3a5b3 --- /dev/null +++ b/uts/common/fs/zfs/vdev_root.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/fs/zfs.h> + +/* + * Virtual device vector for the pool's root vdev. + */ + +/* + * We should be able to tolerate one failure with absolutely no damage + * to our metadata. Two failures will take out space maps, a bunch of + * indirect block trees, meta dnodes, dnodes, etc. Probably not a happy + * place to live. When we get smarter, we can liberalize this policy. + * e.g. If we haven't lost two consecutive top-level vdevs, then we are + * probably fine. Adding bean counters during alloc/free can make this + * future guesswork more accurate. + */ +static int +too_many_errors(vdev_t *vd, int numerrors) +{ + ASSERT3U(numerrors, <=, vd->vdev_children); + return (numerrors > 0); +} + +static int +vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +{ + int lasterror = 0; + int numerrors = 0; + + if (vd->vdev_children == 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return (EINVAL); + } + + vdev_open_children(vd); + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error && !cvd->vdev_islog) { + lasterror = cvd->vdev_open_error; + numerrors++; + } + } + + if (too_many_errors(vd, numerrors)) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); + } + + *asize = 0; + *ashift = 0; + + return (0); +} + +static void +vdev_root_close(vdev_t *vd) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_close(vd->vdev_child[c]); +} + +static void +vdev_root_state_change(vdev_t *vd, int faulted, int degraded) +{ + if (too_many_errors(vd, faulted)) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_NO_REPLICAS); + } else if (degraded) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); + } else { + vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); + } +} + +vdev_ops_t vdev_root_ops = { + vdev_root_open, + vdev_root_close, + vdev_default_asize, + NULL, /* io_start - not applicable to the root */ + NULL, /* io_done - not applicable to the root */ + vdev_root_state_change, + NULL, + NULL, + VDEV_TYPE_ROOT, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; diff --git a/uts/common/fs/zfs/zap.c b/uts/common/fs/zfs/zap.c new file mode 100644 index 000000000000..288a4d99ab25 --- /dev/null +++ b/uts/common/fs/zfs/zap.c @@ -0,0 +1,1354 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file contains the top half of the zfs directory structure + * implementation. The bottom half is in zap_leaf.c. + * + * The zdir is an extendable hash data structure. There is a table of + * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are + * each a constant size and hold a variable number of directory entries. + * The buckets (aka "leaf nodes") are implemented in zap_leaf.c. + * + * The pointer table holds a power of 2 number of pointers. + * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to + * by the pointer at index i in the table holds entries whose hash value + * has a zd_prefix_len - bit prefix + */ + +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/zfs_context.h> +#include <sys/zfs_znode.h> +#include <sys/fs/zfs.h> +#include <sys/zap.h> +#include <sys/refcount.h> +#include <sys/zap_impl.h> +#include <sys/zap_leaf.h> + +int fzap_default_block_shift = 14; /* 16k blocksize */ + +static void zap_leaf_pageout(dmu_buf_t *db, void *vl); +static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks); + + +void +fzap_byteswap(void *vbuf, size_t size) +{ + uint64_t block_type; + + block_type = *(uint64_t *)vbuf; + + if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF)) + zap_leaf_byteswap(vbuf, size); + else { + /* it's a ptrtbl block */ + byteswap_uint64_array(vbuf, size); + } +} + +void +fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags) +{ + dmu_buf_t *db; + zap_leaf_t *l; + int i; + zap_phys_t *zp; + + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + zap->zap_ismicro = FALSE; + + (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, + &zap->zap_f.zap_phys, zap_evict); + + mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); + zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1; + + zp = zap->zap_f.zap_phys; + /* + * explicitly zero it since it might be coming from an + * initialized microzap + */ + bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size); + zp->zap_block_type = ZBT_HEADER; + zp->zap_magic = ZAP_MAGIC; + + zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap); + + zp->zap_freeblk = 2; /* block 1 will be the first leaf */ + zp->zap_num_leafs = 1; + zp->zap_num_entries = 0; + zp->zap_salt = zap->zap_salt; + zp->zap_normflags = zap->zap_normflags; + zp->zap_flags = flags; + + /* block 1 will be the first leaf */ + for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++) + ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1; + + /* + * set up block 1 - the first leaf + */ + VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, + 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH)); + dmu_buf_will_dirty(db, tx); + + l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); + l->l_dbuf = db; + l->l_phys = db->db_data; + + zap_leaf_init(l, zp->zap_normflags != 0); + + kmem_free(l, sizeof (zap_leaf_t)); + dmu_buf_rele(db, FTAG); +} + +static int +zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) +{ + if (RW_WRITE_HELD(&zap->zap_rwlock)) + return (1); + if (rw_tryupgrade(&zap->zap_rwlock)) { + dmu_buf_will_dirty(zap->zap_dbuf, tx); + return (1); + } + return (0); +} + +/* + * Generic routines for dealing with the pointer & cookie tables. + */ + +static int +zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, + void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), + dmu_tx_t *tx) +{ + uint64_t b, newblk; + dmu_buf_t *db_old, *db_new; + int err; + int bs = FZAP_BLOCK_SHIFT(zap); + int hepb = 1<<(bs-4); + /* hepb = half the number of entries in a block */ + + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + ASSERT(tbl->zt_blk != 0); + ASSERT(tbl->zt_numblks > 0); + + if (tbl->zt_nextblk != 0) { + newblk = tbl->zt_nextblk; + } else { + newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2); + tbl->zt_nextblk = newblk; + ASSERT3U(tbl->zt_blks_copied, ==, 0); + dmu_prefetch(zap->zap_objset, zap->zap_object, + tbl->zt_blk << bs, tbl->zt_numblks << bs); + } + + /* + * Copy the ptrtbl from the old to new location. + */ + + b = tbl->zt_blks_copied; + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH); + if (err) + return (err); + + /* first half of entries in old[b] go to new[2*b+0] */ + VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, + (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); + dmu_buf_will_dirty(db_new, tx); + transfer_func(db_old->db_data, db_new->db_data, hepb); + dmu_buf_rele(db_new, FTAG); + + /* second half of entries in old[b] go to new[2*b+1] */ + VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, + (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); + dmu_buf_will_dirty(db_new, tx); + transfer_func((uint64_t *)db_old->db_data + hepb, + db_new->db_data, hepb); + dmu_buf_rele(db_new, FTAG); + + dmu_buf_rele(db_old, FTAG); + + tbl->zt_blks_copied++; + + dprintf("copied block %llu of %llu\n", + tbl->zt_blks_copied, tbl->zt_numblks); + + if (tbl->zt_blks_copied == tbl->zt_numblks) { + (void) dmu_free_range(zap->zap_objset, zap->zap_object, + tbl->zt_blk << bs, tbl->zt_numblks << bs, tx); + + tbl->zt_blk = newblk; + tbl->zt_numblks *= 2; + tbl->zt_shift++; + tbl->zt_nextblk = 0; + tbl->zt_blks_copied = 0; + + dprintf("finished; numblocks now %llu (%lluk entries)\n", + tbl->zt_numblks, 1<<(tbl->zt_shift-10)); + } + + return (0); +} + +static int +zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val, + dmu_tx_t *tx) +{ + int err; + uint64_t blk, off; + int bs = FZAP_BLOCK_SHIFT(zap); + dmu_buf_t *db; + + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + ASSERT(tbl->zt_blk != 0); + + dprintf("storing %llx at index %llx\n", val, idx); + + blk = idx >> (bs-3); + off = idx & ((1<<(bs-3))-1); + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH); + if (err) + return (err); + dmu_buf_will_dirty(db, tx); + + if (tbl->zt_nextblk != 0) { + uint64_t idx2 = idx * 2; + uint64_t blk2 = idx2 >> (bs-3); + uint64_t off2 = idx2 & ((1<<(bs-3))-1); + dmu_buf_t *db2; + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (tbl->zt_nextblk + blk2) << bs, FTAG, &db2, + DMU_READ_NO_PREFETCH); + if (err) { + dmu_buf_rele(db, FTAG); + return (err); + } + dmu_buf_will_dirty(db2, tx); + ((uint64_t *)db2->db_data)[off2] = val; + ((uint64_t *)db2->db_data)[off2+1] = val; + dmu_buf_rele(db2, FTAG); + } + + ((uint64_t *)db->db_data)[off] = val; + dmu_buf_rele(db, FTAG); + + return (0); +} + +static int +zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp) +{ + uint64_t blk, off; + int err; + dmu_buf_t *db; + int bs = FZAP_BLOCK_SHIFT(zap); + + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + + blk = idx >> (bs-3); + off = idx & ((1<<(bs-3))-1); + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH); + if (err) + return (err); + *valp = ((uint64_t *)db->db_data)[off]; + dmu_buf_rele(db, FTAG); + + if (tbl->zt_nextblk != 0) { + /* + * read the nextblk for the sake of i/o error checking, + * so that zap_table_load() will catch errors for + * zap_table_store. + */ + blk = (idx*2) >> (bs-3); + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (tbl->zt_nextblk + blk) << bs, FTAG, &db, + DMU_READ_NO_PREFETCH); + dmu_buf_rele(db, FTAG); + } + return (err); +} + +/* + * Routines for growing the ptrtbl. + */ + +static void +zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n) +{ + int i; + for (i = 0; i < n; i++) { + uint64_t lb = src[i]; + dst[2*i+0] = lb; + dst[2*i+1] = lb; + } +} + +static int +zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) +{ + /* + * The pointer table should never use more hash bits than we + * have (otherwise we'd be using useless zero bits to index it). + * If we are within 2 bits of running out, stop growing, since + * this is already an aberrant condition. + */ + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2) + return (ENOSPC); + + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + /* + * We are outgrowing the "embedded" ptrtbl (the one + * stored in the header block). Give it its own entire + * block, which will double the size of the ptrtbl. + */ + uint64_t newblk; + dmu_buf_t *db_new; + int err; + + ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, + ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); + ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0); + + newblk = zap_allocate_blocks(zap, 1); + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new, + DMU_READ_NO_PREFETCH); + if (err) + return (err); + dmu_buf_will_dirty(db_new, tx); + zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), + db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); + dmu_buf_rele(db_new, FTAG); + + zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; + zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; + zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; + + ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, + zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << + (FZAP_BLOCK_SHIFT(zap)-3)); + + return (0); + } else { + return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + zap_ptrtbl_transfer, tx)); + } +} + +static void +zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx) +{ + dmu_buf_will_dirty(zap->zap_dbuf, tx); + mutex_enter(&zap->zap_f.zap_num_entries_mtx); + ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); + zap->zap_f.zap_phys->zap_num_entries += delta; + mutex_exit(&zap->zap_f.zap_num_entries_mtx); +} + +static uint64_t +zap_allocate_blocks(zap_t *zap, int nblocks) +{ + uint64_t newblk; + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + newblk = zap->zap_f.zap_phys->zap_freeblk; + zap->zap_f.zap_phys->zap_freeblk += nblocks; + return (newblk); +} + +static zap_leaf_t * +zap_create_leaf(zap_t *zap, dmu_tx_t *tx) +{ + void *winner; + zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); + + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + rw_init(&l->l_rwlock, 0, 0, 0); + rw_enter(&l->l_rwlock, RW_WRITER); + l->l_blkid = zap_allocate_blocks(zap, 1); + l->l_dbuf = NULL; + l->l_phys = NULL; + + VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, + l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, + DMU_READ_NO_PREFETCH)); + winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); + ASSERT(winner == NULL); + dmu_buf_will_dirty(l->l_dbuf, tx); + + zap_leaf_init(l, zap->zap_normflags != 0); + + zap->zap_f.zap_phys->zap_num_leafs++; + + return (l); +} + +int +fzap_count(zap_t *zap, uint64_t *count) +{ + ASSERT(!zap->zap_ismicro); + mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ + *count = zap->zap_f.zap_phys->zap_num_entries; + mutex_exit(&zap->zap_f.zap_num_entries_mtx); + return (0); +} + +/* + * Routines for obtaining zap_leaf_t's + */ + +void +zap_put_leaf(zap_leaf_t *l) +{ + rw_exit(&l->l_rwlock); + dmu_buf_rele(l->l_dbuf, NULL); +} + +_NOTE(ARGSUSED(0)) +static void +zap_leaf_pageout(dmu_buf_t *db, void *vl) +{ + zap_leaf_t *l = vl; + + rw_destroy(&l->l_rwlock); + kmem_free(l, sizeof (zap_leaf_t)); +} + +static zap_leaf_t * +zap_open_leaf(uint64_t blkid, dmu_buf_t *db) +{ + zap_leaf_t *l, *winner; + + ASSERT(blkid != 0); + + l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); + rw_init(&l->l_rwlock, 0, 0, 0); + rw_enter(&l->l_rwlock, RW_WRITER); + l->l_blkid = blkid; + l->l_bs = highbit(db->db_size)-1; + l->l_dbuf = db; + l->l_phys = NULL; + + winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); + + rw_exit(&l->l_rwlock); + if (winner != NULL) { + /* someone else set it first */ + zap_leaf_pageout(NULL, l); + l = winner; + } + + /* + * lhr_pad was previously used for the next leaf in the leaf + * chain. There should be no chained leafs (as we have removed + * support for them). + */ + ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0); + + /* + * There should be more hash entries than there can be + * chunks to put in the hash table + */ + ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3); + + /* The chunks should begin at the end of the hash table */ + ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, + &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]); + + /* The chunks should end at the end of the block */ + ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) - + (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size); + + return (l); +} + +static int +zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt, + zap_leaf_t **lp) +{ + dmu_buf_t *db; + zap_leaf_t *l; + int bs = FZAP_BLOCK_SHIFT(zap); + int err; + + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH); + if (err) + return (err); + + ASSERT3U(db->db_object, ==, zap->zap_object); + ASSERT3U(db->db_offset, ==, blkid << bs); + ASSERT3U(db->db_size, ==, 1 << bs); + ASSERT(blkid != 0); + + l = dmu_buf_get_user(db); + + if (l == NULL) + l = zap_open_leaf(blkid, db); + + rw_enter(&l->l_rwlock, lt); + /* + * Must lock before dirtying, otherwise l->l_phys could change, + * causing ASSERT below to fail. + */ + if (lt == RW_WRITER) + dmu_buf_will_dirty(db, tx); + ASSERT3U(l->l_blkid, ==, blkid); + ASSERT3P(l->l_dbuf, ==, db); + ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); + ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF); + ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + + *lp = l; + return (0); +} + +static int +zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp) +{ + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + ASSERT3U(idx, <, + (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); + *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx); + return (0); + } else { + return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + idx, valp)); + } +} + +static int +zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx) +{ + ASSERT(tx != NULL); + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { + ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk; + return (0); + } else { + return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + idx, blk, tx)); + } +} + +static int +zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp) +{ + uint64_t idx, blk; + int err; + + ASSERT(zap->zap_dbuf == NULL || + zap->zap_f.zap_phys == zap->zap_dbuf->db_data); + ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); + idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + err = zap_idx_to_blk(zap, idx, &blk); + if (err != 0) + return (err); + err = zap_get_leaf_byblk(zap, blk, tx, lt, lp); + + ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) == + (*lp)->l_phys->l_hdr.lh_prefix); + return (err); +} + +static int +zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp) +{ + zap_t *zap = zn->zn_zap; + uint64_t hash = zn->zn_hash; + zap_leaf_t *nl; + int prefix_diff, i, err; + uint64_t sibling; + int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len; + + ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + + ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, + l->l_phys->l_hdr.lh_prefix); + + if (zap_tryupgradedir(zap, tx) == 0 || + old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { + /* We failed to upgrade, or need to grow the pointer table */ + objset_t *os = zap->zap_objset; + uint64_t object = zap->zap_object; + + zap_put_leaf(l); + zap_unlockdir(zap); + err = zap_lockdir(os, object, tx, RW_WRITER, + FALSE, FALSE, &zn->zn_zap); + zap = zn->zn_zap; + if (err) + return (err); + ASSERT(!zap->zap_ismicro); + + while (old_prefix_len == + zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { + err = zap_grow_ptrtbl(zap, tx); + if (err) + return (err); + } + + err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l); + if (err) + return (err); + + if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) { + /* it split while our locks were down */ + *lp = l; + return (0); + } + } + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, + l->l_phys->l_hdr.lh_prefix); + + prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - + (old_prefix_len + 1); + sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff; + + /* check for i/o errors before doing zap_leaf_split */ + for (i = 0; i < (1ULL<<prefix_diff); i++) { + uint64_t blk; + err = zap_idx_to_blk(zap, sibling+i, &blk); + if (err) + return (err); + ASSERT3U(blk, ==, l->l_blkid); + } + + nl = zap_create_leaf(zap, tx); + zap_leaf_split(l, nl, zap->zap_normflags != 0); + + /* set sibling pointers */ + for (i = 0; i < (1ULL<<prefix_diff); i++) { + err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx); + ASSERT3U(err, ==, 0); /* we checked for i/o errors above */ + } + + if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) { + /* we want the sibling */ + zap_put_leaf(l); + *lp = nl; + } else { + zap_put_leaf(nl); + *lp = l; + } + + return (0); +} + +static void +zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) +{ + zap_t *zap = zn->zn_zap; + int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; + int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift && + l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER); + + zap_put_leaf(l); + + if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) { + int err; + + /* + * We are in the middle of growing the pointer table, or + * this leaf will soon make us grow it. + */ + if (zap_tryupgradedir(zap, tx) == 0) { + objset_t *os = zap->zap_objset; + uint64_t zapobj = zap->zap_object; + + zap_unlockdir(zap); + err = zap_lockdir(os, zapobj, tx, + RW_WRITER, FALSE, FALSE, &zn->zn_zap); + zap = zn->zn_zap; + if (err) + return; + } + + /* could have finished growing while our locks were down */ + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift) + (void) zap_grow_ptrtbl(zap, tx); + } +} + +static int +fzap_checkname(zap_name_t *zn) +{ + if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN) + return (ENAMETOOLONG); + return (0); +} + +static int +fzap_checksize(uint64_t integer_size, uint64_t num_integers) +{ + /* Only integer sizes supported by C */ + switch (integer_size) { + case 1: + case 2: + case 4: + case 8: + break; + default: + return (EINVAL); + } + + if (integer_size * num_integers > ZAP_MAXVALUELEN) + return (E2BIG); + + return (0); +} + +static int +fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers) +{ + int err; + + if ((err = fzap_checkname(zn)) != 0) + return (err); + return (fzap_checksize(integer_size, num_integers)); +} + +/* + * Routines for manipulating attributes. + */ +int +fzap_lookup(zap_name_t *zn, + uint64_t integer_size, uint64_t num_integers, void *buf, + char *realname, int rn_len, boolean_t *ncp) +{ + zap_leaf_t *l; + int err; + zap_entry_handle_t zeh; + + if ((err = fzap_checkname(zn)) != 0) + return (err); + + err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l); + if (err != 0) + return (err); + err = zap_leaf_lookup(l, zn, &zeh); + if (err == 0) { + if ((err = fzap_checksize(integer_size, num_integers)) != 0) { + zap_put_leaf(l); + return (err); + } + + err = zap_entry_read(&zeh, integer_size, num_integers, buf); + (void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname); + if (ncp) { + *ncp = zap_entry_normalization_conflict(&zeh, + zn, NULL, zn->zn_zap); + } + } + + zap_put_leaf(l); + return (err); +} + +int +fzap_add_cd(zap_name_t *zn, + uint64_t integer_size, uint64_t num_integers, + const void *val, uint32_t cd, dmu_tx_t *tx) +{ + zap_leaf_t *l; + int err; + zap_entry_handle_t zeh; + zap_t *zap = zn->zn_zap; + + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + ASSERT(!zap->zap_ismicro); + ASSERT(fzap_check(zn, integer_size, num_integers) == 0); + + err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l); + if (err != 0) + return (err); +retry: + err = zap_leaf_lookup(l, zn, &zeh); + if (err == 0) { + err = EEXIST; + goto out; + } + if (err != ENOENT) + goto out; + + err = zap_entry_create(l, zn, cd, + integer_size, num_integers, val, &zeh); + + if (err == 0) { + zap_increment_num_entries(zap, 1, tx); + } else if (err == EAGAIN) { + err = zap_expand_leaf(zn, l, tx, &l); + zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ + if (err == 0) + goto retry; + } + +out: + if (zap != NULL) + zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx); + return (err); +} + +int +fzap_add(zap_name_t *zn, + uint64_t integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx) +{ + int err = fzap_check(zn, integer_size, num_integers); + if (err != 0) + return (err); + + return (fzap_add_cd(zn, integer_size, num_integers, + val, ZAP_NEED_CD, tx)); +} + +int +fzap_update(zap_name_t *zn, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) +{ + zap_leaf_t *l; + int err, create; + zap_entry_handle_t zeh; + zap_t *zap = zn->zn_zap; + + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + err = fzap_check(zn, integer_size, num_integers); + if (err != 0) + return (err); + + err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l); + if (err != 0) + return (err); +retry: + err = zap_leaf_lookup(l, zn, &zeh); + create = (err == ENOENT); + ASSERT(err == 0 || err == ENOENT); + + if (create) { + err = zap_entry_create(l, zn, ZAP_NEED_CD, + integer_size, num_integers, val, &zeh); + if (err == 0) + zap_increment_num_entries(zap, 1, tx); + } else { + err = zap_entry_update(&zeh, integer_size, num_integers, val); + } + + if (err == EAGAIN) { + err = zap_expand_leaf(zn, l, tx, &l); + zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ + if (err == 0) + goto retry; + } + + if (zap != NULL) + zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx); + return (err); +} + +int +fzap_length(zap_name_t *zn, + uint64_t *integer_size, uint64_t *num_integers) +{ + zap_leaf_t *l; + int err; + zap_entry_handle_t zeh; + + err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l); + if (err != 0) + return (err); + err = zap_leaf_lookup(l, zn, &zeh); + if (err != 0) + goto out; + + if (integer_size) + *integer_size = zeh.zeh_integer_size; + if (num_integers) + *num_integers = zeh.zeh_num_integers; +out: + zap_put_leaf(l); + return (err); +} + +int +fzap_remove(zap_name_t *zn, dmu_tx_t *tx) +{ + zap_leaf_t *l; + int err; + zap_entry_handle_t zeh; + + err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l); + if (err != 0) + return (err); + err = zap_leaf_lookup(l, zn, &zeh); + if (err == 0) { + zap_entry_remove(&zeh); + zap_increment_num_entries(zn->zn_zap, -1, tx); + } + zap_put_leaf(l); + return (err); +} + +void +fzap_prefetch(zap_name_t *zn) +{ + uint64_t idx, blk; + zap_t *zap = zn->zn_zap; + int bs; + + idx = ZAP_HASH_IDX(zn->zn_hash, + zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + if (zap_idx_to_blk(zap, idx, &blk) != 0) + return; + bs = FZAP_BLOCK_SHIFT(zap); + dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs); +} + +/* + * Helper functions for consumers. + */ + +int +zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, + char *name) +{ + zap_cursor_t zc; + zap_attribute_t *za; + int err; + + if (mask == 0) + mask = -1ULL; + + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + for (zap_cursor_init(&zc, os, zapobj); + (err = zap_cursor_retrieve(&zc, za)) == 0; + zap_cursor_advance(&zc)) { + if ((za->za_first_integer & mask) == (value & mask)) { + (void) strcpy(name, za->za_name); + break; + } + } + zap_cursor_fini(&zc); + kmem_free(za, sizeof (zap_attribute_t)); + return (err); +} + +int +zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + int err; + + for (zap_cursor_init(&zc, os, fromobj); + zap_cursor_retrieve(&zc, &za) == 0; + (void) zap_cursor_advance(&zc)) { + if (za.za_integer_length != 8 || za.za_num_integers != 1) + return (EINVAL); + err = zap_add(os, intoobj, za.za_name, + 8, 1, &za.za_first_integer, tx); + if (err) + return (err); + } + zap_cursor_fini(&zc); + return (0); +} + +int +zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, + uint64_t value, dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + int err; + + for (zap_cursor_init(&zc, os, fromobj); + zap_cursor_retrieve(&zc, &za) == 0; + (void) zap_cursor_advance(&zc)) { + if (za.za_integer_length != 8 || za.za_num_integers != 1) + return (EINVAL); + err = zap_add(os, intoobj, za.za_name, + 8, 1, &value, tx); + if (err) + return (err); + } + zap_cursor_fini(&zc); + return (0); +} + +int +zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, + dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + int err; + + for (zap_cursor_init(&zc, os, fromobj); + zap_cursor_retrieve(&zc, &za) == 0; + (void) zap_cursor_advance(&zc)) { + uint64_t delta = 0; + + if (za.za_integer_length != 8 || za.za_num_integers != 1) + return (EINVAL); + + err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta); + if (err != 0 && err != ENOENT) + return (err); + delta += za.za_first_integer; + err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx); + if (err) + return (err); + } + zap_cursor_fini(&zc); + return (0); +} + +int +zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value); + return (zap_add(os, obj, name, 8, 1, &value, tx)); +} + +int +zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value); + return (zap_remove(os, obj, name, tx)); +} + +int +zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value); + return (zap_lookup(os, obj, name, 8, 1, &value)); +} + +int +zap_add_int_key(objset_t *os, uint64_t obj, + uint64_t key, uint64_t value, dmu_tx_t *tx) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key); + return (zap_add(os, obj, name, 8, 1, &value, tx)); +} + +int +zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key); + return (zap_lookup(os, obj, name, 8, 1, valuep)); +} + +int +zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta, + dmu_tx_t *tx) +{ + uint64_t value = 0; + int err; + + if (delta == 0) + return (0); + + err = zap_lookup(os, obj, name, 8, 1, &value); + if (err != 0 && err != ENOENT) + return (err); + value += delta; + if (value == 0) + err = zap_remove(os, obj, name, tx); + else + err = zap_update(os, obj, name, 8, 1, &value, tx); + return (err); +} + +int +zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx) +{ + char name[20]; + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key); + return (zap_increment(os, obj, name, delta, tx)); +} + +/* + * Routines for iterating over the attributes. + */ + +int +fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) +{ + int err = ENOENT; + zap_entry_handle_t zeh; + zap_leaf_t *l; + + /* retrieve the next entry at or after zc_hash/zc_cd */ + /* if no entry, return ENOENT */ + + if (zc->zc_leaf && + (ZAP_HASH_IDX(zc->zc_hash, + zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) != + zc->zc_leaf->l_phys->l_hdr.lh_prefix)) { + rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); + zap_put_leaf(zc->zc_leaf); + zc->zc_leaf = NULL; + } + +again: + if (zc->zc_leaf == NULL) { + err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER, + &zc->zc_leaf); + if (err != 0) + return (err); + } else { + rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); + } + l = zc->zc_leaf; + + err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh); + + if (err == ENOENT) { + uint64_t nocare = + (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1; + zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; + zc->zc_cd = 0; + if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) { + zc->zc_hash = -1ULL; + } else { + zap_put_leaf(zc->zc_leaf); + zc->zc_leaf = NULL; + goto again; + } + } + + if (err == 0) { + zc->zc_hash = zeh.zeh_hash; + zc->zc_cd = zeh.zeh_cd; + za->za_integer_length = zeh.zeh_integer_size; + za->za_num_integers = zeh.zeh_num_integers; + if (zeh.zeh_num_integers == 0) { + za->za_first_integer = 0; + } else { + err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer); + ASSERT(err == 0 || err == EOVERFLOW); + } + err = zap_entry_read_name(zap, &zeh, + sizeof (za->za_name), za->za_name); + ASSERT(err == 0); + + za->za_normalization_conflict = + zap_entry_normalization_conflict(&zeh, + NULL, za->za_name, zap); + } + rw_exit(&zc->zc_leaf->l_rwlock); + return (err); +} + +static void +zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs) +{ + int i, err; + uint64_t lastblk = 0; + + /* + * NB: if a leaf has more pointers than an entire ptrtbl block + * can hold, then it'll be accounted for more than once, since + * we won't have lastblk. + */ + for (i = 0; i < len; i++) { + zap_leaf_t *l; + + if (tbl[i] == lastblk) + continue; + lastblk = tbl[i]; + + err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l); + if (err == 0) { + zap_leaf_stats(zap, l, zs); + zap_put_leaf(l); + } + } +} + +int +fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn) +{ + int err; + zap_leaf_t *l; + zap_entry_handle_t zeh; + + if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN) + return (ENAMETOOLONG); + + err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l); + if (err != 0) + return (err); + + err = zap_leaf_lookup(l, zn, &zeh); + if (err != 0) + return (err); + + zc->zc_leaf = l; + zc->zc_hash = zeh.zeh_hash; + zc->zc_cd = zeh.zeh_cd; + + return (err); +} + +void +fzap_get_stats(zap_t *zap, zap_stats_t *zs) +{ + int bs = FZAP_BLOCK_SHIFT(zap); + zs->zs_blocksize = 1ULL << bs; + + /* + * Set zap_phys_t fields + */ + zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; + zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; + zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; + zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type; + zs->zs_magic = zap->zap_f.zap_phys->zap_magic; + zs->zs_salt = zap->zap_f.zap_phys->zap_salt; + + /* + * Set zap_ptrtbl fields + */ + zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; + zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk; + zs->zs_ptrtbl_blks_copied = + zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied; + zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk; + zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; + zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; + + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + /* the ptrtbl is entirely in the header block. */ + zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), + 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs); + } else { + int b; + + dmu_prefetch(zap->zap_objset, zap->zap_object, + zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs, + zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs); + + for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; + b++) { + dmu_buf_t *db; + int err; + + err = dmu_buf_hold(zap->zap_objset, zap->zap_object, + (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs, + FTAG, &db, DMU_READ_NO_PREFETCH); + if (err == 0) { + zap_stats_ptrtbl(zap, db->db_data, + 1<<(bs-3), zs); + dmu_buf_rele(db, FTAG); + } + } + } +} + +int +fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, + uint64_t *tooverwrite) +{ + zap_t *zap = zn->zn_zap; + zap_leaf_t *l; + int err; + + /* + * Account for the header block of the fatzap. + */ + if (!add && dmu_buf_freeable(zap->zap_dbuf)) { + *tooverwrite += zap->zap_dbuf->db_size; + } else { + *towrite += zap->zap_dbuf->db_size; + } + + /* + * Account for the pointer table blocks. + * If we are adding we need to account for the following cases : + * - If the pointer table is embedded, this operation could force an + * external pointer table. + * - If this already has an external pointer table this operation + * could extend the table. + */ + if (add) { + if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) + *towrite += zap->zap_dbuf->db_size; + else + *towrite += (zap->zap_dbuf->db_size * 3); + } + + /* + * Now, check if the block containing leaf is freeable + * and account accordingly. + */ + err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l); + if (err != 0) { + return (err); + } + + if (!add && dmu_buf_freeable(l->l_dbuf)) { + *tooverwrite += l->l_dbuf->db_size; + } else { + /* + * If this an add operation, the leaf block could split. + * Hence, we need to account for an additional leaf block. + */ + *towrite += (add ? 2 : 1) * l->l_dbuf->db_size; + } + + zap_put_leaf(l); + return (0); +} diff --git a/uts/common/fs/zfs/zap_leaf.c b/uts/common/fs/zfs/zap_leaf.c new file mode 100644 index 000000000000..19a795db825b --- /dev/null +++ b/uts/common/fs/zfs/zap_leaf.c @@ -0,0 +1,872 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * The 512-byte leaf is broken into 32 16-byte chunks. + * chunk number n means l_chunk[n], even though the header precedes it. + * the names are stored null-terminated. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/zfs_context.h> +#include <sys/fs/zfs.h> +#include <sys/zap.h> +#include <sys/zap_impl.h> +#include <sys/zap_leaf.h> +#include <sys/arc.h> + +static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); + +#define CHAIN_END 0xffff /* end of the chunk chain */ + +/* half the (current) minimum block size */ +#define MAX_ARRAY_BYTES (8<<10) + +#define LEAF_HASH(l, h) \ + ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ + ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) + +#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) + + +static void +zap_memset(void *a, int c, size_t n) +{ + char *cp = a; + char *cpend = cp + n; + + while (cp < cpend) + *cp++ = c; +} + +static void +stv(int len, void *addr, uint64_t value) +{ + switch (len) { + case 1: + *(uint8_t *)addr = value; + return; + case 2: + *(uint16_t *)addr = value; + return; + case 4: + *(uint32_t *)addr = value; + return; + case 8: + *(uint64_t *)addr = value; + return; + } + ASSERT(!"bad int len"); +} + +static uint64_t +ldv(int len, const void *addr) +{ + switch (len) { + case 1: + return (*(uint8_t *)addr); + case 2: + return (*(uint16_t *)addr); + case 4: + return (*(uint32_t *)addr); + case 8: + return (*(uint64_t *)addr); + } + ASSERT(!"bad int len"); + return (0xFEEDFACEDEADBEEFULL); +} + +void +zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) +{ + int i; + zap_leaf_t l; + l.l_bs = highbit(size)-1; + l.l_phys = buf; + + buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); + buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); + buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); + buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); + buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); + buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); + buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); + + for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) + buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); + + for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { + zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); + struct zap_leaf_entry *le; + + switch (lc->l_free.lf_type) { + case ZAP_CHUNK_ENTRY: + le = &lc->l_entry; + + le->le_type = BSWAP_8(le->le_type); + le->le_value_intlen = BSWAP_8(le->le_value_intlen); + le->le_next = BSWAP_16(le->le_next); + le->le_name_chunk = BSWAP_16(le->le_name_chunk); + le->le_name_numints = BSWAP_16(le->le_name_numints); + le->le_value_chunk = BSWAP_16(le->le_value_chunk); + le->le_value_numints = BSWAP_16(le->le_value_numints); + le->le_cd = BSWAP_32(le->le_cd); + le->le_hash = BSWAP_64(le->le_hash); + break; + case ZAP_CHUNK_FREE: + lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); + lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); + break; + case ZAP_CHUNK_ARRAY: + lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); + lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); + /* la_array doesn't need swapping */ + break; + default: + ASSERT(!"bad leaf type"); + } + } +} + +void +zap_leaf_init(zap_leaf_t *l, boolean_t sort) +{ + int i; + + l->l_bs = highbit(l->l_dbuf->db_size)-1; + zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); + zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); + for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { + ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; + ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; + } + ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; + l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; + l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; + l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); + if (sort) + l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; +} + +/* + * Routines which manipulate leaf chunks (l_chunk[]). + */ + +static uint16_t +zap_leaf_chunk_alloc(zap_leaf_t *l) +{ + int chunk; + + ASSERT(l->l_phys->l_hdr.lh_nfree > 0); + + chunk = l->l_phys->l_hdr.lh_freelist; + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); + + l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; + + l->l_phys->l_hdr.lh_nfree--; + + return (chunk); +} + +static void +zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) +{ + struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; + ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); + + zlf->lf_type = ZAP_CHUNK_FREE; + zlf->lf_next = l->l_phys->l_hdr.lh_freelist; + bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ + l->l_phys->l_hdr.lh_freelist = chunk; + + l->l_phys->l_hdr.lh_nfree++; +} + +/* + * Routines which manipulate leaf arrays (zap_leaf_array type chunks). + */ + +static uint16_t +zap_leaf_array_create(zap_leaf_t *l, const char *buf, + int integer_size, int num_integers) +{ + uint16_t chunk_head; + uint16_t *chunkp = &chunk_head; + int byten = 0; + uint64_t value; + int shift = (integer_size-1)*8; + int len = num_integers; + + ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); + + while (len > 0) { + uint16_t chunk = zap_leaf_chunk_alloc(l); + struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; + int i; + + la->la_type = ZAP_CHUNK_ARRAY; + for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { + if (byten == 0) + value = ldv(integer_size, buf); + la->la_array[i] = value >> shift; + value <<= 8; + if (++byten == integer_size) { + byten = 0; + buf += integer_size; + if (--len == 0) + break; + } + } + + *chunkp = chunk; + chunkp = &la->la_next; + } + *chunkp = CHAIN_END; + + return (chunk_head); +} + +static void +zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) +{ + uint16_t chunk = *chunkp; + + *chunkp = CHAIN_END; + + while (chunk != CHAIN_END) { + int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; + ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, + ZAP_CHUNK_ARRAY); + zap_leaf_chunk_free(l, chunk); + chunk = nextchunk; + } +} + +/* array_len and buf_len are in integers, not bytes */ +static void +zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, + int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, + void *buf) +{ + int len = MIN(array_len, buf_len); + int byten = 0; + uint64_t value = 0; + char *p = buf; + + ASSERT3U(array_int_len, <=, buf_int_len); + + /* Fast path for one 8-byte integer */ + if (array_int_len == 8 && buf_int_len == 8 && len == 1) { + struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; + uint8_t *ip = la->la_array; + uint64_t *buf64 = buf; + + *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | + (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | + (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | + (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; + return; + } + + /* Fast path for an array of 1-byte integers (eg. the entry name) */ + if (array_int_len == 1 && buf_int_len == 1 && + buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { + while (chunk != CHAIN_END) { + struct zap_leaf_array *la = + &ZAP_LEAF_CHUNK(l, chunk).l_array; + bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES); + p += ZAP_LEAF_ARRAY_BYTES; + chunk = la->la_next; + } + return; + } + + while (len > 0) { + struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; + int i; + + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { + value = (value << 8) | la->la_array[i]; + byten++; + if (byten == array_int_len) { + stv(buf_int_len, p, value); + byten = 0; + len--; + if (len == 0) + return; + p += buf_int_len; + } + } + chunk = la->la_next; + } +} + +static boolean_t +zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, + int chunk, int array_numints) +{ + int bseen = 0; + + if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { + uint64_t *thiskey; + boolean_t match; + + ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); + thiskey = kmem_alloc(array_numints * sizeof (*thiskey), + KM_SLEEP); + + zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, + sizeof (*thiskey), array_numints, thiskey); + match = bcmp(thiskey, zn->zn_key_orig, + array_numints * sizeof (*thiskey)) == 0; + kmem_free(thiskey, array_numints * sizeof (*thiskey)); + return (match); + } + + ASSERT(zn->zn_key_intlen == 1); + if (zn->zn_matchtype == MT_FIRST) { + char *thisname = kmem_alloc(array_numints, KM_SLEEP); + boolean_t match; + + zap_leaf_array_read(l, chunk, sizeof (char), array_numints, + sizeof (char), array_numints, thisname); + match = zap_match(zn, thisname); + kmem_free(thisname, array_numints); + return (match); + } + + /* + * Fast path for exact matching. + * First check that the lengths match, so that we don't read + * past the end of the zn_key_orig array. + */ + if (array_numints != zn->zn_key_orig_numints) + return (B_FALSE); + while (bseen < array_numints) { + struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; + int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread)) + break; + chunk = la->la_next; + bseen += toread; + } + return (bseen == array_numints); +} + +/* + * Routines which manipulate leaf entries. + */ + +int +zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) +{ + uint16_t *chunkp; + struct zap_leaf_entry *le; + + ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + +again: + for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); + *chunkp != CHAIN_END; chunkp = &le->le_next) { + uint16_t chunk = *chunkp; + le = ZAP_LEAF_ENTRY(l, chunk); + + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + if (le->le_hash != zn->zn_hash) + continue; + + /* + * NB: the entry chain is always sorted by cd on + * normalized zap objects, so this will find the + * lowest-cd match for MT_FIRST. + */ + ASSERT(zn->zn_matchtype == MT_EXACT || + (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); + if (zap_leaf_array_match(l, zn, le->le_name_chunk, + le->le_name_numints)) { + zeh->zeh_num_integers = le->le_value_numints; + zeh->zeh_integer_size = le->le_value_intlen; + zeh->zeh_cd = le->le_cd; + zeh->zeh_hash = le->le_hash; + zeh->zeh_chunkp = chunkp; + zeh->zeh_leaf = l; + return (0); + } + } + + /* + * NB: we could of course do this in one pass, but that would be + * a pain. We'll see if MT_BEST is even used much. + */ + if (zn->zn_matchtype == MT_BEST) { + zn->zn_matchtype = MT_FIRST; + goto again; + } + + return (ENOENT); +} + +/* Return (h1,cd1 >= h2,cd2) */ +#define HCD_GTEQ(h1, cd1, h2, cd2) \ + ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) + +int +zap_leaf_lookup_closest(zap_leaf_t *l, + uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) +{ + uint16_t chunk; + uint64_t besth = -1ULL; + uint32_t bestcd = -1U; + uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; + uint16_t lh; + struct zap_leaf_entry *le; + + ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + + for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { + for (chunk = l->l_phys->l_hash[lh]; + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(l, chunk); + + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && + HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { + ASSERT3U(bestlh, >=, lh); + bestlh = lh; + besth = le->le_hash; + bestcd = le->le_cd; + + zeh->zeh_num_integers = le->le_value_numints; + zeh->zeh_integer_size = le->le_value_intlen; + zeh->zeh_cd = le->le_cd; + zeh->zeh_hash = le->le_hash; + zeh->zeh_fakechunk = chunk; + zeh->zeh_chunkp = &zeh->zeh_fakechunk; + zeh->zeh_leaf = l; + } + } + } + + return (bestcd == -1U ? ENOENT : 0); +} + +int +zap_entry_read(const zap_entry_handle_t *zeh, + uint8_t integer_size, uint64_t num_integers, void *buf) +{ + struct zap_leaf_entry *le = + ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + if (le->le_value_intlen > integer_size) + return (EINVAL); + + zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, + le->le_value_intlen, le->le_value_numints, + integer_size, num_integers, buf); + + if (zeh->zeh_num_integers > num_integers) + return (EOVERFLOW); + return (0); + +} + +int +zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, + char *buf) +{ + struct zap_leaf_entry *le = + ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { + zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, + le->le_name_numints, 8, buflen / 8, buf); + } else { + zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, + le->le_name_numints, 1, buflen, buf); + } + if (le->le_name_numints > buflen) + return (EOVERFLOW); + return (0); +} + +int +zap_entry_update(zap_entry_handle_t *zeh, + uint8_t integer_size, uint64_t num_integers, const void *buf) +{ + int delta_chunks; + zap_leaf_t *l = zeh->zeh_leaf; + struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); + + delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - + ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); + + if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) + return (EAGAIN); + + zap_leaf_array_free(l, &le->le_value_chunk); + le->le_value_chunk = + zap_leaf_array_create(l, buf, integer_size, num_integers); + le->le_value_numints = num_integers; + le->le_value_intlen = integer_size; + return (0); +} + +void +zap_entry_remove(zap_entry_handle_t *zeh) +{ + uint16_t entry_chunk; + struct zap_leaf_entry *le; + zap_leaf_t *l = zeh->zeh_leaf; + + ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); + + entry_chunk = *zeh->zeh_chunkp; + le = ZAP_LEAF_ENTRY(l, entry_chunk); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + zap_leaf_array_free(l, &le->le_name_chunk); + zap_leaf_array_free(l, &le->le_value_chunk); + + *zeh->zeh_chunkp = le->le_next; + zap_leaf_chunk_free(l, entry_chunk); + + l->l_phys->l_hdr.lh_nentries--; +} + +int +zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, + uint8_t integer_size, uint64_t num_integers, const void *buf, + zap_entry_handle_t *zeh) +{ + uint16_t chunk; + uint16_t *chunkp; + struct zap_leaf_entry *le; + uint64_t valuelen; + int numchunks; + uint64_t h = zn->zn_hash; + + valuelen = integer_size * num_integers; + + numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * + zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); + if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) + return (E2BIG); + + if (cd == ZAP_NEED_CD) { + /* find the lowest unused cd */ + if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { + cd = 0; + + for (chunk = *LEAF_HASH_ENTPTR(l, h); + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(l, chunk); + if (le->le_cd > cd) + break; + if (le->le_hash == h) { + ASSERT3U(cd, ==, le->le_cd); + cd++; + } + } + } else { + /* old unsorted format; do it the O(n^2) way */ + for (cd = 0; ; cd++) { + for (chunk = *LEAF_HASH_ENTPTR(l, h); + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(l, chunk); + if (le->le_hash == h && + le->le_cd == cd) { + break; + } + } + /* If this cd is not in use, we are good. */ + if (chunk == CHAIN_END) + break; + } + } + /* + * We would run out of space in a block before we could + * store enough entries to run out of CD values. + */ + ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); + } + + if (l->l_phys->l_hdr.lh_nfree < numchunks) + return (EAGAIN); + + /* make the entry */ + chunk = zap_leaf_chunk_alloc(l); + le = ZAP_LEAF_ENTRY(l, chunk); + le->le_type = ZAP_CHUNK_ENTRY; + le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, + zn->zn_key_intlen, zn->zn_key_orig_numints); + le->le_name_numints = zn->zn_key_orig_numints; + le->le_value_chunk = + zap_leaf_array_create(l, buf, integer_size, num_integers); + le->le_value_numints = num_integers; + le->le_value_intlen = integer_size; + le->le_hash = h; + le->le_cd = cd; + + /* link it into the hash chain */ + /* XXX if we did the search above, we could just use that */ + chunkp = zap_leaf_rehash_entry(l, chunk); + + l->l_phys->l_hdr.lh_nentries++; + + zeh->zeh_leaf = l; + zeh->zeh_num_integers = num_integers; + zeh->zeh_integer_size = le->le_value_intlen; + zeh->zeh_cd = le->le_cd; + zeh->zeh_hash = le->le_hash; + zeh->zeh_chunkp = chunkp; + + return (0); +} + +/* + * Determine if there is another entry with the same normalized form. + * For performance purposes, either zn or name must be provided (the + * other can be NULL). Note, there usually won't be any hash + * conflicts, in which case we don't need the concatenated/normalized + * form of the name. But all callers have one of these on hand anyway, + * so might as well take advantage. A cleaner but slower interface + * would accept neither argument, and compute the normalized name as + * needed (using zap_name_alloc(zap_entry_read_name(zeh))). + */ +boolean_t +zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, + const char *name, zap_t *zap) +{ + uint64_t chunk; + struct zap_leaf_entry *le; + boolean_t allocdzn = B_FALSE; + + if (zap->zap_normflags == 0) + return (B_FALSE); + + for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); + if (le->le_hash != zeh->zeh_hash) + continue; + if (le->le_cd == zeh->zeh_cd) + continue; + + if (zn == NULL) { + zn = zap_name_alloc(zap, name, MT_FIRST); + allocdzn = B_TRUE; + } + if (zap_leaf_array_match(zeh->zeh_leaf, zn, + le->le_name_chunk, le->le_name_numints)) { + if (allocdzn) + zap_name_free(zn); + return (B_TRUE); + } + } + if (allocdzn) + zap_name_free(zn); + return (B_FALSE); +} + +/* + * Routines for transferring entries between leafs. + */ + +static uint16_t * +zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) +{ + struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); + struct zap_leaf_entry *le2; + uint16_t *chunkp; + + /* + * keep the entry chain sorted by cd + * NB: this will not cause problems for unsorted leafs, though + * it is unnecessary there. + */ + for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); + *chunkp != CHAIN_END; chunkp = &le2->le_next) { + le2 = ZAP_LEAF_ENTRY(l, *chunkp); + if (le2->le_cd > le->le_cd) + break; + } + + le->le_next = *chunkp; + *chunkp = entry; + return (chunkp); +} + +static uint16_t +zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) +{ + uint16_t new_chunk; + uint16_t *nchunkp = &new_chunk; + + while (chunk != CHAIN_END) { + uint16_t nchunk = zap_leaf_chunk_alloc(nl); + struct zap_leaf_array *nla = + &ZAP_LEAF_CHUNK(nl, nchunk).l_array; + struct zap_leaf_array *la = + &ZAP_LEAF_CHUNK(l, chunk).l_array; + int nextchunk = la->la_next; + + ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); + + *nla = *la; /* structure assignment */ + + zap_leaf_chunk_free(l, chunk); + chunk = nextchunk; + *nchunkp = nchunk; + nchunkp = &nla->la_next; + } + *nchunkp = CHAIN_END; + return (new_chunk); +} + +static void +zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) +{ + struct zap_leaf_entry *le, *nle; + uint16_t chunk; + + le = ZAP_LEAF_ENTRY(l, entry); + ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); + + chunk = zap_leaf_chunk_alloc(nl); + nle = ZAP_LEAF_ENTRY(nl, chunk); + *nle = *le; /* structure assignment */ + + (void) zap_leaf_rehash_entry(nl, chunk); + + nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); + nle->le_value_chunk = + zap_leaf_transfer_array(l, le->le_value_chunk, nl); + + zap_leaf_chunk_free(l, entry); + + l->l_phys->l_hdr.lh_nentries--; + nl->l_phys->l_hdr.lh_nentries++; +} + +/* + * Transfer the entries whose hash prefix ends in 1 to the new leaf. + */ +void +zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) +{ + int i; + int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; + + /* set new prefix and prefix_len */ + l->l_phys->l_hdr.lh_prefix <<= 1; + l->l_phys->l_hdr.lh_prefix_len++; + nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; + nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; + + /* break existing hash chains */ + zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); + + if (sort) + l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; + + /* + * Transfer entries whose hash bit 'bit' is set to nl; rehash + * the remaining entries + * + * NB: We could find entries via the hashtable instead. That + * would be O(hashents+numents) rather than O(numblks+numents), + * but this accesses memory more sequentially, and when we're + * called, the block is usually pretty full. + */ + for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { + struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); + if (le->le_type != ZAP_CHUNK_ENTRY) + continue; + + if (le->le_hash & (1ULL << bit)) + zap_leaf_transfer_entry(l, i, nl); + else + (void) zap_leaf_rehash_entry(l, i); + } +} + +void +zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) +{ + int i, n; + + n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - + l->l_phys->l_hdr.lh_prefix_len; + n = MIN(n, ZAP_HISTOGRAM_SIZE-1); + zs->zs_leafs_with_2n_pointers[n]++; + + + n = l->l_phys->l_hdr.lh_nentries/5; + n = MIN(n, ZAP_HISTOGRAM_SIZE-1); + zs->zs_blocks_with_n5_entries[n]++; + + n = ((1<<FZAP_BLOCK_SHIFT(zap)) - + l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / + (1<<FZAP_BLOCK_SHIFT(zap)); + n = MIN(n, ZAP_HISTOGRAM_SIZE-1); + zs->zs_blocks_n_tenths_full[n]++; + + for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { + int nentries = 0; + int chunk = l->l_phys->l_hash[i]; + + while (chunk != CHAIN_END) { + struct zap_leaf_entry *le = + ZAP_LEAF_ENTRY(l, chunk); + + n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + + ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * + le->le_value_intlen); + n = MIN(n, ZAP_HISTOGRAM_SIZE-1); + zs->zs_entries_using_n_chunks[n]++; + + chunk = le->le_next; + nentries++; + } + + n = nentries; + n = MIN(n, ZAP_HISTOGRAM_SIZE-1); + zs->zs_buckets_with_n_entries[n]++; + } +} diff --git a/uts/common/fs/zfs/zap_micro.c b/uts/common/fs/zfs/zap_micro.c new file mode 100644 index 000000000000..2d89c20c47d7 --- /dev/null +++ b/uts/common/fs/zfs/zap_micro.c @@ -0,0 +1,1455 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/zfs_context.h> +#include <sys/zap.h> +#include <sys/refcount.h> +#include <sys/zap_impl.h> +#include <sys/zap_leaf.h> +#include <sys/avl.h> +#include <sys/arc.h> + +#ifdef _KERNEL +#include <sys/sunddi.h> +#endif + +static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); + +uint64_t +zap_getflags(zap_t *zap) +{ + if (zap->zap_ismicro) + return (0); + return (zap->zap_u.zap_fat.zap_phys->zap_flags); +} + +int +zap_hashbits(zap_t *zap) +{ + if (zap_getflags(zap) & ZAP_FLAG_HASH64) + return (48); + else + return (28); +} + +uint32_t +zap_maxcd(zap_t *zap) +{ + if (zap_getflags(zap) & ZAP_FLAG_HASH64) + return ((1<<16)-1); + else + return (-1U); +} + +static uint64_t +zap_hash(zap_name_t *zn) +{ + zap_t *zap = zn->zn_zap; + uint64_t h = 0; + + if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { + ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); + h = *(uint64_t *)zn->zn_key_orig; + } else { + h = zap->zap_salt; + ASSERT(h != 0); + ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); + + if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { + int i; + const uint64_t *wp = zn->zn_key_norm; + + ASSERT(zn->zn_key_intlen == 8); + for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { + int j; + uint64_t word = *wp; + + for (j = 0; j < zn->zn_key_intlen; j++) { + h = (h >> 8) ^ + zfs_crc64_table[(h ^ word) & 0xFF]; + word >>= NBBY; + } + } + } else { + int i, len; + const uint8_t *cp = zn->zn_key_norm; + + /* + * We previously stored the terminating null on + * disk, but didn't hash it, so we need to + * continue to not hash it. (The + * zn_key_*_numints includes the terminating + * null for non-binary keys.) + */ + len = zn->zn_key_norm_numints - 1; + + ASSERT(zn->zn_key_intlen == 1); + for (i = 0; i < len; cp++, i++) { + h = (h >> 8) ^ + zfs_crc64_table[(h ^ *cp) & 0xFF]; + } + } + } + /* + * Don't use all 64 bits, since we need some in the cookie for + * the collision differentiator. We MUST use the high bits, + * since those are the ones that we first pay attention to when + * chosing the bucket. + */ + h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); + + return (h); +} + +static int +zap_normalize(zap_t *zap, const char *name, char *namenorm) +{ + size_t inlen, outlen; + int err; + + ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); + + inlen = strlen(name) + 1; + outlen = ZAP_MAXNAMELEN; + + err = 0; + (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, + zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | + U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); + + return (err); +} + +boolean_t +zap_match(zap_name_t *zn, const char *matchname) +{ + ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); + + if (zn->zn_matchtype == MT_FIRST) { + char norm[ZAP_MAXNAMELEN]; + + if (zap_normalize(zn->zn_zap, matchname, norm) != 0) + return (B_FALSE); + + return (strcmp(zn->zn_key_norm, norm) == 0); + } else { + /* MT_BEST or MT_EXACT */ + return (strcmp(zn->zn_key_orig, matchname) == 0); + } +} + +void +zap_name_free(zap_name_t *zn) +{ + kmem_free(zn, sizeof (zap_name_t)); +} + +zap_name_t * +zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) +{ + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + + zn->zn_zap = zap; + zn->zn_key_intlen = sizeof (*key); + zn->zn_key_orig = key; + zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; + zn->zn_matchtype = mt; + if (zap->zap_normflags) { + if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { + zap_name_free(zn); + return (NULL); + } + zn->zn_key_norm = zn->zn_normbuf; + zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; + } else { + if (mt != MT_EXACT) { + zap_name_free(zn); + return (NULL); + } + zn->zn_key_norm = zn->zn_key_orig; + zn->zn_key_norm_numints = zn->zn_key_orig_numints; + } + + zn->zn_hash = zap_hash(zn); + return (zn); +} + +zap_name_t * +zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) +{ + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + + ASSERT(zap->zap_normflags == 0); + zn->zn_zap = zap; + zn->zn_key_intlen = sizeof (*key); + zn->zn_key_orig = zn->zn_key_norm = key; + zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; + zn->zn_matchtype = MT_EXACT; + + zn->zn_hash = zap_hash(zn); + return (zn); +} + +static void +mzap_byteswap(mzap_phys_t *buf, size_t size) +{ + int i, max; + buf->mz_block_type = BSWAP_64(buf->mz_block_type); + buf->mz_salt = BSWAP_64(buf->mz_salt); + buf->mz_normflags = BSWAP_64(buf->mz_normflags); + max = (size / MZAP_ENT_LEN) - 1; + for (i = 0; i < max; i++) { + buf->mz_chunk[i].mze_value = + BSWAP_64(buf->mz_chunk[i].mze_value); + buf->mz_chunk[i].mze_cd = + BSWAP_32(buf->mz_chunk[i].mze_cd); + } +} + +void +zap_byteswap(void *buf, size_t size) +{ + uint64_t block_type; + + block_type = *(uint64_t *)buf; + + if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { + /* ASSERT(magic == ZAP_LEAF_MAGIC); */ + mzap_byteswap(buf, size); + } else { + fzap_byteswap(buf, size); + } +} + +static int +mze_compare(const void *arg1, const void *arg2) +{ + const mzap_ent_t *mze1 = arg1; + const mzap_ent_t *mze2 = arg2; + + if (mze1->mze_hash > mze2->mze_hash) + return (+1); + if (mze1->mze_hash < mze2->mze_hash) + return (-1); + if (mze1->mze_cd > mze2->mze_cd) + return (+1); + if (mze1->mze_cd < mze2->mze_cd) + return (-1); + return (0); +} + +static void +mze_insert(zap_t *zap, int chunkid, uint64_t hash) +{ + mzap_ent_t *mze; + + ASSERT(zap->zap_ismicro); + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); + mze->mze_chunkid = chunkid; + mze->mze_hash = hash; + mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; + ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); + avl_add(&zap->zap_m.zap_avl, mze); +} + +static mzap_ent_t * +mze_find(zap_name_t *zn) +{ + mzap_ent_t mze_tofind; + mzap_ent_t *mze; + avl_index_t idx; + avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; + + ASSERT(zn->zn_zap->zap_ismicro); + ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); + + mze_tofind.mze_hash = zn->zn_hash; + mze_tofind.mze_cd = 0; + +again: + mze = avl_find(avl, &mze_tofind, &idx); + if (mze == NULL) + mze = avl_nearest(avl, idx, AVL_AFTER); + for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { + ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); + if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) + return (mze); + } + if (zn->zn_matchtype == MT_BEST) { + zn->zn_matchtype = MT_FIRST; + goto again; + } + return (NULL); +} + +static uint32_t +mze_find_unused_cd(zap_t *zap, uint64_t hash) +{ + mzap_ent_t mze_tofind; + mzap_ent_t *mze; + avl_index_t idx; + avl_tree_t *avl = &zap->zap_m.zap_avl; + uint32_t cd; + + ASSERT(zap->zap_ismicro); + ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + + mze_tofind.mze_hash = hash; + mze_tofind.mze_cd = 0; + + cd = 0; + for (mze = avl_find(avl, &mze_tofind, &idx); + mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + if (mze->mze_cd != cd) + break; + cd++; + } + + return (cd); +} + +static void +mze_remove(zap_t *zap, mzap_ent_t *mze) +{ + ASSERT(zap->zap_ismicro); + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + avl_remove(&zap->zap_m.zap_avl, mze); + kmem_free(mze, sizeof (mzap_ent_t)); +} + +static void +mze_destroy(zap_t *zap) +{ + mzap_ent_t *mze; + void *avlcookie = NULL; + + while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) + kmem_free(mze, sizeof (mzap_ent_t)); + avl_destroy(&zap->zap_m.zap_avl); +} + +static zap_t * +mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) +{ + zap_t *winner; + zap_t *zap; + int i; + + ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); + + zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); + rw_init(&zap->zap_rwlock, 0, 0, 0); + rw_enter(&zap->zap_rwlock, RW_WRITER); + zap->zap_objset = os; + zap->zap_object = obj; + zap->zap_dbuf = db; + + if (*(uint64_t *)db->db_data != ZBT_MICRO) { + mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); + zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; + } else { + zap->zap_ismicro = TRUE; + } + + /* + * Make sure that zap_ismicro is set before we let others see + * it, because zap_lockdir() checks zap_ismicro without the lock + * held. + */ + winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); + + if (winner != NULL) { + rw_exit(&zap->zap_rwlock); + rw_destroy(&zap->zap_rwlock); + if (!zap->zap_ismicro) + mutex_destroy(&zap->zap_f.zap_num_entries_mtx); + kmem_free(zap, sizeof (zap_t)); + return (winner); + } + + if (zap->zap_ismicro) { + zap->zap_salt = zap->zap_m.zap_phys->mz_salt; + zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; + zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; + avl_create(&zap->zap_m.zap_avl, mze_compare, + sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); + + for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { + mzap_ent_phys_t *mze = + &zap->zap_m.zap_phys->mz_chunk[i]; + if (mze->mze_name[0]) { + zap_name_t *zn; + + zap->zap_m.zap_num_entries++; + zn = zap_name_alloc(zap, mze->mze_name, + MT_EXACT); + mze_insert(zap, i, zn->zn_hash); + zap_name_free(zn); + } + } + } else { + zap->zap_salt = zap->zap_f.zap_phys->zap_salt; + zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; + + ASSERT3U(sizeof (struct zap_leaf_header), ==, + 2*ZAP_LEAF_CHUNKSIZE); + + /* + * The embedded pointer table should not overlap the + * other members. + */ + ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, + &zap->zap_f.zap_phys->zap_salt); + + /* + * The embedded pointer table should end at the end of + * the block + */ + ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, + 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - + (uintptr_t)zap->zap_f.zap_phys, ==, + zap->zap_dbuf->db_size); + } + rw_exit(&zap->zap_rwlock); + return (zap); +} + +int +zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, + krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) +{ + zap_t *zap; + dmu_buf_t *db; + krw_t lt; + int err; + + *zapp = NULL; + + err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); + if (err) + return (err); + +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(db, &doi); + ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); + } +#endif + + zap = dmu_buf_get_user(db); + if (zap == NULL) + zap = mzap_open(os, obj, db); + + /* + * We're checking zap_ismicro without the lock held, in order to + * tell what type of lock we want. Once we have some sort of + * lock, see if it really is the right type. In practice this + * can only be different if it was upgraded from micro to fat, + * and micro wanted WRITER but fat only needs READER. + */ + lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; + rw_enter(&zap->zap_rwlock, lt); + if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { + /* it was upgraded, now we only need reader */ + ASSERT(lt == RW_WRITER); + ASSERT(RW_READER == + (!zap->zap_ismicro && fatreader) ? RW_READER : lti); + rw_downgrade(&zap->zap_rwlock); + lt = RW_READER; + } + + zap->zap_objset = os; + + if (lt == RW_WRITER) + dmu_buf_will_dirty(db, tx); + + ASSERT3P(zap->zap_dbuf, ==, db); + + ASSERT(!zap->zap_ismicro || + zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); + if (zap->zap_ismicro && tx && adding && + zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { + uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; + if (newsz > MZAP_MAX_BLKSZ) { + dprintf("upgrading obj %llu: num_entries=%u\n", + obj, zap->zap_m.zap_num_entries); + *zapp = zap; + return (mzap_upgrade(zapp, tx, 0)); + } + err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); + ASSERT3U(err, ==, 0); + zap->zap_m.zap_num_chunks = + db->db_size / MZAP_ENT_LEN - 1; + } + + *zapp = zap; + return (0); +} + +void +zap_unlockdir(zap_t *zap) +{ + rw_exit(&zap->zap_rwlock); + dmu_buf_rele(zap->zap_dbuf, NULL); +} + +static int +mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) +{ + mzap_phys_t *mzp; + int i, sz, nchunks; + int err = 0; + zap_t *zap = *zapp; + + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + sz = zap->zap_dbuf->db_size; + mzp = kmem_alloc(sz, KM_SLEEP); + bcopy(zap->zap_dbuf->db_data, mzp, sz); + nchunks = zap->zap_m.zap_num_chunks; + + if (!flags) { + err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, + 1ULL << fzap_default_block_shift, 0, tx); + if (err) { + kmem_free(mzp, sz); + return (err); + } + } + + dprintf("upgrading obj=%llu with %u chunks\n", + zap->zap_object, nchunks); + /* XXX destroy the avl later, so we can use the stored hash value */ + mze_destroy(zap); + + fzap_upgrade(zap, tx, flags); + + for (i = 0; i < nchunks; i++) { + mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; + zap_name_t *zn; + if (mze->mze_name[0] == 0) + continue; + dprintf("adding %s=%llu\n", + mze->mze_name, mze->mze_value); + zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); + err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); + zap = zn->zn_zap; /* fzap_add_cd() may change zap */ + zap_name_free(zn); + if (err) + break; + } + kmem_free(mzp, sz); + *zapp = zap; + return (err); +} + +static void +mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, + dmu_tx_t *tx) +{ + dmu_buf_t *db; + mzap_phys_t *zp; + + VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); + +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(db, &doi); + ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); + } +#endif + + dmu_buf_will_dirty(db, tx); + zp = db->db_data; + zp->mz_block_type = ZBT_MICRO; + zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; + zp->mz_normflags = normflags; + dmu_buf_rele(db, FTAG); + + if (flags != 0) { + zap_t *zap; + /* Only fat zap supports flags; upgrade immediately. */ + VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, + B_FALSE, B_FALSE, &zap)); + VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); + zap_unlockdir(zap); + } +} + +int +zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + return (zap_create_claim_norm(os, obj, + 0, ot, bonustype, bonuslen, tx)); +} + +int +zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, + dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + int err; + + err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); + if (err != 0) + return (err); + mzap_create_impl(os, obj, normflags, 0, tx); + return (0); +} + +uint64_t +zap_create(objset_t *os, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); +} + +uint64_t +zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); + + mzap_create_impl(os, obj, normflags, 0, tx); + return (obj); +} + +uint64_t +zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); + + ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && + leaf_blockshift <= SPA_MAXBLOCKSHIFT && + indirect_blockshift >= SPA_MINBLOCKSHIFT && + indirect_blockshift <= SPA_MAXBLOCKSHIFT); + + VERIFY(dmu_object_set_blocksize(os, obj, + 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); + + mzap_create_impl(os, obj, normflags, flags, tx); + return (obj); +} + +int +zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) +{ + /* + * dmu_object_free will free the object number and free the + * data. Freeing the data will cause our pageout function to be + * called, which will destroy our data (zap_leaf_t's and zap_t). + */ + + return (dmu_object_free(os, zapobj, tx)); +} + +_NOTE(ARGSUSED(0)) +void +zap_evict(dmu_buf_t *db, void *vzap) +{ + zap_t *zap = vzap; + + rw_destroy(&zap->zap_rwlock); + + if (zap->zap_ismicro) + mze_destroy(zap); + else + mutex_destroy(&zap->zap_f.zap_num_entries_mtx); + + kmem_free(zap, sizeof (zap_t)); +} + +int +zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) +{ + zap_t *zap; + int err; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + if (!zap->zap_ismicro) { + err = fzap_count(zap, count); + } else { + *count = zap->zap_m.zap_num_entries; + } + zap_unlockdir(zap); + return (err); +} + +/* + * zn may be NULL; if not specified, it will be computed if needed. + * See also the comment above zap_entry_normalization_conflict(). + */ +static boolean_t +mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) +{ + mzap_ent_t *other; + int direction = AVL_BEFORE; + boolean_t allocdzn = B_FALSE; + + if (zap->zap_normflags == 0) + return (B_FALSE); + +again: + for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); + other && other->mze_hash == mze->mze_hash; + other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { + + if (zn == NULL) { + zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, + MT_FIRST); + allocdzn = B_TRUE; + } + if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { + if (allocdzn) + zap_name_free(zn); + return (B_TRUE); + } + } + + if (direction == AVL_BEFORE) { + direction = AVL_AFTER; + goto again; + } + + if (allocdzn) + zap_name_free(zn); + return (B_FALSE); +} + +/* + * Routines for manipulating attributes. + */ + +int +zap_lookup(objset_t *os, uint64_t zapobj, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf) +{ + return (zap_lookup_norm(os, zapobj, name, integer_size, + num_integers, buf, MT_EXACT, NULL, 0, NULL)); +} + +int +zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf, + matchtype_t mt, char *realname, int rn_len, + boolean_t *ncp) +{ + zap_t *zap; + int err; + mzap_ent_t *mze; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc(zap, name, mt); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + if (!zap->zap_ismicro) { + err = fzap_lookup(zn, integer_size, num_integers, buf, + realname, rn_len, ncp); + } else { + mze = mze_find(zn); + if (mze == NULL) { + err = ENOENT; + } else { + if (num_integers < 1) { + err = EOVERFLOW; + } else if (integer_size != 8) { + err = EINVAL; + } else { + *(uint64_t *)buf = + MZE_PHYS(zap, mze)->mze_value; + (void) strlcpy(realname, + MZE_PHYS(zap, mze)->mze_name, rn_len); + if (ncp) { + *ncp = mzap_normalization_conflict(zap, + zn, mze); + } + } + } + } + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + fzap_prefetch(zn); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + err = fzap_lookup(zn, integer_size, num_integers, buf, + NULL, 0, NULL); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_contains(objset_t *os, uint64_t zapobj, const char *name) +{ + int err = (zap_lookup_norm(os, zapobj, name, 0, + 0, NULL, MT_EXACT, NULL, 0, NULL)); + if (err == EOVERFLOW || err == EINVAL) + err = 0; /* found, but skipped reading the value */ + return (err); +} + +int +zap_length(objset_t *os, uint64_t zapobj, const char *name, + uint64_t *integer_size, uint64_t *num_integers) +{ + zap_t *zap; + int err; + mzap_ent_t *mze; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc(zap, name, MT_EXACT); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + if (!zap->zap_ismicro) { + err = fzap_length(zn, integer_size, num_integers); + } else { + mze = mze_find(zn); + if (mze == NULL) { + err = ENOENT; + } else { + if (integer_size) + *integer_size = 8; + if (num_integers) + *num_integers = 1; + } + } + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t *integer_size, uint64_t *num_integers) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_length(zn, integer_size, num_integers); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +static void +mzap_addent(zap_name_t *zn, uint64_t value) +{ + int i; + zap_t *zap = zn->zn_zap; + int start = zap->zap_m.zap_alloc_next; + uint32_t cd; + + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + +#ifdef ZFS_DEBUG + for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { + mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; + ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); + } +#endif + + cd = mze_find_unused_cd(zap, zn->zn_hash); + /* given the limited size of the microzap, this can't happen */ + ASSERT(cd < zap_maxcd(zap)); + +again: + for (i = start; i < zap->zap_m.zap_num_chunks; i++) { + mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; + if (mze->mze_name[0] == 0) { + mze->mze_value = value; + mze->mze_cd = cd; + (void) strcpy(mze->mze_name, zn->zn_key_orig); + zap->zap_m.zap_num_entries++; + zap->zap_m.zap_alloc_next = i+1; + if (zap->zap_m.zap_alloc_next == + zap->zap_m.zap_num_chunks) + zap->zap_m.zap_alloc_next = 0; + mze_insert(zap, i, zn->zn_hash); + return; + } + } + if (start != 0) { + start = 0; + goto again; + } + ASSERT(!"out of entries!"); +} + +int +zap_add(objset_t *os, uint64_t zapobj, const char *key, + int integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + mzap_ent_t *mze; + const uint64_t *intval = val; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc(zap, key, MT_EXACT); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + if (!zap->zap_ismicro) { + err = fzap_add(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + } else if (integer_size != 8 || num_integers != 1 || + strlen(key) >= MZAP_NAME_LEN) { + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + } else { + mze = mze_find(zn); + if (mze != NULL) { + err = EEXIST; + } else { + mzap_addent(zn, *intval); + } + } + ASSERT(zap == zn->zn_zap); + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_add() failed */ + zap_unlockdir(zap); + return (err); +} + +int +zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, int integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_add(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_add() failed */ + zap_unlockdir(zap); + return (err); +} + +int +zap_update(objset_t *os, uint64_t zapobj, const char *name, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + mzap_ent_t *mze; + uint64_t oldval; + const uint64_t *intval = val; + zap_name_t *zn; + int err; + +#ifdef ZFS_DEBUG + /* + * If there is an old value, it shouldn't change across the + * lockdir (eg, due to bprewrite's xlation). + */ + if (integer_size == 8 && num_integers == 1) + (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); +#endif + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc(zap, name, MT_EXACT); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + if (!zap->zap_ismicro) { + err = fzap_update(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_update() may change zap */ + } else if (integer_size != 8 || num_integers != 1 || + strlen(name) >= MZAP_NAME_LEN) { + dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", + zapobj, integer_size, num_integers, name); + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_update(zn, integer_size, num_integers, + val, tx); + zap = zn->zn_zap; /* fzap_update() may change zap */ + } else { + mze = mze_find(zn); + if (mze != NULL) { + ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); + MZE_PHYS(zap, mze)->mze_value = *intval; + } else { + mzap_addent(zn, *intval); + } + } + ASSERT(zap == zn->zn_zap); + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ + zap_unlockdir(zap); + return (err); +} + +int +zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + zap_name_t *zn; + int err; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_update(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_update() may change zap */ + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ + zap_unlockdir(zap); + return (err); +} + +int +zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) +{ + return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); +} + +int +zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, + matchtype_t mt, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + mzap_ent_t *mze; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc(zap, name, mt); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + if (!zap->zap_ismicro) { + err = fzap_remove(zn, tx); + } else { + mze = mze_find(zn); + if (mze == NULL) { + err = ENOENT; + } else { + zap->zap_m.zap_num_entries--; + bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], + sizeof (mzap_ent_phys_t)); + mze_remove(zap, mze); + } + } + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_remove(zn, tx); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +/* + * Routines for iterating over the attributes. + */ + +void +zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized) +{ + zc->zc_objset = os; + zc->zc_zap = NULL; + zc->zc_leaf = NULL; + zc->zc_zapobj = zapobj; + zc->zc_serialized = serialized; + zc->zc_hash = 0; + zc->zc_cd = 0; +} + +void +zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) +{ + zap_cursor_init_serialized(zc, os, zapobj, 0); +} + +void +zap_cursor_fini(zap_cursor_t *zc) +{ + if (zc->zc_zap) { + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + zap_unlockdir(zc->zc_zap); + zc->zc_zap = NULL; + } + if (zc->zc_leaf) { + rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); + zap_put_leaf(zc->zc_leaf); + zc->zc_leaf = NULL; + } + zc->zc_objset = NULL; +} + +uint64_t +zap_cursor_serialize(zap_cursor_t *zc) +{ + if (zc->zc_hash == -1ULL) + return (-1ULL); + if (zc->zc_zap == NULL) + return (zc->zc_serialized); + ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); + ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); + + /* + * We want to keep the high 32 bits of the cursor zero if we can, so + * that 32-bit programs can access this. So usually use a small + * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits + * of the cursor. + * + * [ collision differentiator | zap_hashbits()-bit hash value ] + */ + return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | + ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); +} + +int +zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) +{ + int err; + avl_index_t idx; + mzap_ent_t mze_tofind; + mzap_ent_t *mze; + + if (zc->zc_hash == -1ULL) + return (ENOENT); + + if (zc->zc_zap == NULL) { + int hb; + err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, + RW_READER, TRUE, FALSE, &zc->zc_zap); + if (err) + return (err); + + /* + * To support zap_cursor_init_serialized, advance, retrieve, + * we must add to the existing zc_cd, which may already + * be 1 due to the zap_cursor_advance. + */ + ASSERT(zc->zc_hash == 0); + hb = zap_hashbits(zc->zc_zap); + zc->zc_hash = zc->zc_serialized << (64 - hb); + zc->zc_cd += zc->zc_serialized >> hb; + if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ + zc->zc_cd = 0; + } else { + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + } + if (!zc->zc_zap->zap_ismicro) { + err = fzap_cursor_retrieve(zc->zc_zap, zc, za); + } else { + err = ENOENT; + + mze_tofind.mze_hash = zc->zc_hash; + mze_tofind.mze_cd = zc->zc_cd; + + mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); + if (mze == NULL) { + mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, + idx, AVL_AFTER); + } + if (mze) { + mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); + ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); + za->za_normalization_conflict = + mzap_normalization_conflict(zc->zc_zap, NULL, mze); + za->za_integer_length = 8; + za->za_num_integers = 1; + za->za_first_integer = mzep->mze_value; + (void) strcpy(za->za_name, mzep->mze_name); + zc->zc_hash = mze->mze_hash; + zc->zc_cd = mze->mze_cd; + err = 0; + } else { + zc->zc_hash = -1ULL; + } + } + rw_exit(&zc->zc_zap->zap_rwlock); + return (err); +} + +void +zap_cursor_advance(zap_cursor_t *zc) +{ + if (zc->zc_hash == -1ULL) + return; + zc->zc_cd++; +} + +int +zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) +{ + int err = 0; + mzap_ent_t *mze; + zap_name_t *zn; + + if (zc->zc_zap == NULL) { + err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, + RW_READER, TRUE, FALSE, &zc->zc_zap); + if (err) + return (err); + } else { + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + } + + zn = zap_name_alloc(zc->zc_zap, name, mt); + if (zn == NULL) { + rw_exit(&zc->zc_zap->zap_rwlock); + return (ENOTSUP); + } + + if (!zc->zc_zap->zap_ismicro) { + err = fzap_cursor_move_to_key(zc, zn); + } else { + mze = mze_find(zn); + if (mze == NULL) { + err = ENOENT; + goto out; + } + zc->zc_hash = mze->mze_hash; + zc->zc_cd = mze->mze_cd; + } + +out: + zap_name_free(zn); + rw_exit(&zc->zc_zap->zap_rwlock); + return (err); +} + +int +zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) +{ + int err; + zap_t *zap; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + + bzero(zs, sizeof (zap_stats_t)); + + if (zap->zap_ismicro) { + zs->zs_blocksize = zap->zap_dbuf->db_size; + zs->zs_num_entries = zap->zap_m.zap_num_entries; + zs->zs_num_blocks = 1; + } else { + fzap_get_stats(zap, zs); + } + zap_unlockdir(zap); + return (0); +} + +int +zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, + uint64_t *towrite, uint64_t *tooverwrite) +{ + zap_t *zap; + int err = 0; + + + /* + * Since, we don't have a name, we cannot figure out which blocks will + * be affected in this operation. So, account for the worst case : + * - 3 blocks overwritten: target leaf, ptrtbl block, header block + * - 4 new blocks written if adding: + * - 2 blocks for possibly split leaves, + * - 2 grown ptrtbl blocks + * + * This also accomodates the case where an add operation to a fairly + * large microzap results in a promotion to fatzap. + */ + if (name == NULL) { + *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; + return (err); + } + + /* + * We lock the zap with adding == FALSE. Because, if we pass + * the actual value of add, it could trigger a mzap_upgrade(). + * At present we are just evaluating the possibility of this operation + * and hence we donot want to trigger an upgrade. + */ + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + + if (!zap->zap_ismicro) { + zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); + if (zn) { + err = fzap_count_write(zn, add, towrite, + tooverwrite); + zap_name_free(zn); + } else { + /* + * We treat this case as similar to (name == NULL) + */ + *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; + } + } else { + /* + * We are here if (name != NULL) and this is a micro-zap. + * We account for the header block depending on whether it + * is freeable. + * + * Incase of an add-operation it is hard to find out + * if this add will promote this microzap to fatzap. + * Hence, we consider the worst case and account for the + * blocks assuming this microzap would be promoted to a + * fatzap. + * + * 1 block overwritten : header block + * 4 new blocks written : 2 new split leaf, 2 grown + * ptrtbl blocks + */ + if (dmu_buf_freeable(zap->zap_dbuf)) + *tooverwrite += SPA_MAXBLOCKSIZE; + else + *towrite += SPA_MAXBLOCKSIZE; + + if (add) { + *towrite += 4 * SPA_MAXBLOCKSIZE; + } + } + + zap_unlockdir(zap); + return (err); +} diff --git a/uts/common/fs/zfs/zfs.conf b/uts/common/fs/zfs/zfs.conf new file mode 100644 index 000000000000..09881909b804 --- /dev/null +++ b/uts/common/fs/zfs/zfs.conf @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +name="zfs" parent="pseudo"; diff --git a/uts/common/fs/zfs/zfs_acl.c b/uts/common/fs/zfs/zfs_acl.c new file mode 100644 index 000000000000..843b5ff06ef4 --- /dev/null +++ b/uts/common/fs/zfs/zfs_acl.c @@ -0,0 +1,2748 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/resource.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/sid.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/errno.h> +#include <sys/unistd.h> +#include <sys/sdt.h> +#include <sys/fs/zfs.h> +#include <sys/mode.h> +#include <sys/policy.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_fuid.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_dir.h> +#include <sys/zfs_vfsops.h> +#include <sys/dmu.h> +#include <sys/dnode.h> +#include <sys/zap.h> +#include <sys/sa.h> +#include "fs/fs_subr.h" +#include <acl/acl_common.h> + +#define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE +#define DENY ACE_ACCESS_DENIED_ACE_TYPE +#define MAX_ACE_TYPE ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE +#define MIN_ACE_TYPE ALLOW + +#define OWNING_GROUP (ACE_GROUP|ACE_IDENTIFIER_GROUP) +#define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \ + ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE) +#define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \ + ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS) +#define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \ + ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS) + +#define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \ + ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \ + ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \ + ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE) + +#define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS) +#define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \ + ACE_DELETE|ACE_DELETE_CHILD) +#define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS) + +#define OGE_CLEAR (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ + ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE) + +#define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ + ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE) + +#define ALL_INHERIT (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE) + +#define RESTRICTED_CLEAR (ACE_WRITE_ACL|ACE_WRITE_OWNER) + +#define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\ + ZFS_ACL_PROTECTED) + +#define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\ + ZFS_ACL_OBJ_ACE) + +#define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH) + +static uint16_t +zfs_ace_v0_get_type(void *acep) +{ + return (((zfs_oldace_t *)acep)->z_type); +} + +static uint16_t +zfs_ace_v0_get_flags(void *acep) +{ + return (((zfs_oldace_t *)acep)->z_flags); +} + +static uint32_t +zfs_ace_v0_get_mask(void *acep) +{ + return (((zfs_oldace_t *)acep)->z_access_mask); +} + +static uint64_t +zfs_ace_v0_get_who(void *acep) +{ + return (((zfs_oldace_t *)acep)->z_fuid); +} + +static void +zfs_ace_v0_set_type(void *acep, uint16_t type) +{ + ((zfs_oldace_t *)acep)->z_type = type; +} + +static void +zfs_ace_v0_set_flags(void *acep, uint16_t flags) +{ + ((zfs_oldace_t *)acep)->z_flags = flags; +} + +static void +zfs_ace_v0_set_mask(void *acep, uint32_t mask) +{ + ((zfs_oldace_t *)acep)->z_access_mask = mask; +} + +static void +zfs_ace_v0_set_who(void *acep, uint64_t who) +{ + ((zfs_oldace_t *)acep)->z_fuid = who; +} + +/*ARGSUSED*/ +static size_t +zfs_ace_v0_size(void *acep) +{ + return (sizeof (zfs_oldace_t)); +} + +static size_t +zfs_ace_v0_abstract_size(void) +{ + return (sizeof (zfs_oldace_t)); +} + +static int +zfs_ace_v0_mask_off(void) +{ + return (offsetof(zfs_oldace_t, z_access_mask)); +} + +/*ARGSUSED*/ +static int +zfs_ace_v0_data(void *acep, void **datap) +{ + *datap = NULL; + return (0); +} + +static acl_ops_t zfs_acl_v0_ops = { + zfs_ace_v0_get_mask, + zfs_ace_v0_set_mask, + zfs_ace_v0_get_flags, + zfs_ace_v0_set_flags, + zfs_ace_v0_get_type, + zfs_ace_v0_set_type, + zfs_ace_v0_get_who, + zfs_ace_v0_set_who, + zfs_ace_v0_size, + zfs_ace_v0_abstract_size, + zfs_ace_v0_mask_off, + zfs_ace_v0_data +}; + +static uint16_t +zfs_ace_fuid_get_type(void *acep) +{ + return (((zfs_ace_hdr_t *)acep)->z_type); +} + +static uint16_t +zfs_ace_fuid_get_flags(void *acep) +{ + return (((zfs_ace_hdr_t *)acep)->z_flags); +} + +static uint32_t +zfs_ace_fuid_get_mask(void *acep) +{ + return (((zfs_ace_hdr_t *)acep)->z_access_mask); +} + +static uint64_t +zfs_ace_fuid_get_who(void *args) +{ + uint16_t entry_type; + zfs_ace_t *acep = args; + + entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS; + + if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || + entry_type == ACE_EVERYONE) + return (-1); + return (((zfs_ace_t *)acep)->z_fuid); +} + +static void +zfs_ace_fuid_set_type(void *acep, uint16_t type) +{ + ((zfs_ace_hdr_t *)acep)->z_type = type; +} + +static void +zfs_ace_fuid_set_flags(void *acep, uint16_t flags) +{ + ((zfs_ace_hdr_t *)acep)->z_flags = flags; +} + +static void +zfs_ace_fuid_set_mask(void *acep, uint32_t mask) +{ + ((zfs_ace_hdr_t *)acep)->z_access_mask = mask; +} + +static void +zfs_ace_fuid_set_who(void *arg, uint64_t who) +{ + zfs_ace_t *acep = arg; + + uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS; + + if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || + entry_type == ACE_EVERYONE) + return; + acep->z_fuid = who; +} + +static size_t +zfs_ace_fuid_size(void *acep) +{ + zfs_ace_hdr_t *zacep = acep; + uint16_t entry_type; + + switch (zacep->z_type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + return (sizeof (zfs_object_ace_t)); + case ALLOW: + case DENY: + entry_type = + (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS); + if (entry_type == ACE_OWNER || + entry_type == OWNING_GROUP || + entry_type == ACE_EVERYONE) + return (sizeof (zfs_ace_hdr_t)); + /*FALLTHROUGH*/ + default: + return (sizeof (zfs_ace_t)); + } +} + +static size_t +zfs_ace_fuid_abstract_size(void) +{ + return (sizeof (zfs_ace_hdr_t)); +} + +static int +zfs_ace_fuid_mask_off(void) +{ + return (offsetof(zfs_ace_hdr_t, z_access_mask)); +} + +static int +zfs_ace_fuid_data(void *acep, void **datap) +{ + zfs_ace_t *zacep = acep; + zfs_object_ace_t *zobjp; + + switch (zacep->z_hdr.z_type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + zobjp = acep; + *datap = (caddr_t)zobjp + sizeof (zfs_ace_t); + return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t)); + default: + *datap = NULL; + return (0); + } +} + +static acl_ops_t zfs_acl_fuid_ops = { + zfs_ace_fuid_get_mask, + zfs_ace_fuid_set_mask, + zfs_ace_fuid_get_flags, + zfs_ace_fuid_set_flags, + zfs_ace_fuid_get_type, + zfs_ace_fuid_set_type, + zfs_ace_fuid_get_who, + zfs_ace_fuid_set_who, + zfs_ace_fuid_size, + zfs_ace_fuid_abstract_size, + zfs_ace_fuid_mask_off, + zfs_ace_fuid_data +}; + +/* + * The following three functions are provided for compatibility with + * older ZPL version in order to determine if the file use to have + * an external ACL and what version of ACL previously existed on the + * file. Would really be nice to not need this, sigh. + */ +uint64_t +zfs_external_acl(znode_t *zp) +{ + zfs_acl_phys_t acl_phys; + int error; + + if (zp->z_is_sa) + return (0); + + /* + * Need to deal with a potential + * race where zfs_sa_upgrade could cause + * z_isa_sa to change. + * + * If the lookup fails then the state of z_is_sa should have + * changed. + */ + + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), + &acl_phys, sizeof (acl_phys))) == 0) + return (acl_phys.z_acl_extern_obj); + else { + /* + * after upgrade the SA_ZPL_ZNODE_ACL should have been + * removed + */ + VERIFY(zp->z_is_sa && error == ENOENT); + return (0); + } +} + +/* + * Determine size of ACL in bytes + * + * This is more complicated than it should be since we have to deal + * with old external ACLs. + */ +static int +zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount, + zfs_acl_phys_t *aclphys) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + uint64_t acl_count; + int size; + int error; + + ASSERT(MUTEX_HELD(&zp->z_acl_lock)); + if (zp->z_is_sa) { + if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs), + &size)) != 0) + return (error); + *aclsize = size; + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs), + &acl_count, sizeof (acl_count))) != 0) + return (error); + *aclcount = acl_count; + } else { + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), + aclphys, sizeof (*aclphys))) != 0) + return (error); + + if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) { + *aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size); + *aclcount = aclphys->z_acl_size; + } else { + *aclsize = aclphys->z_acl_size; + *aclcount = aclphys->z_acl_count; + } + } + return (0); +} + +int +zfs_znode_acl_version(znode_t *zp) +{ + zfs_acl_phys_t acl_phys; + + if (zp->z_is_sa) + return (ZFS_ACL_VERSION_FUID); + else { + int error; + + /* + * Need to deal with a potential + * race where zfs_sa_upgrade could cause + * z_isa_sa to change. + * + * If the lookup fails then the state of z_is_sa should have + * changed. + */ + if ((error = sa_lookup(zp->z_sa_hdl, + SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), + &acl_phys, sizeof (acl_phys))) == 0) + return (acl_phys.z_acl_version); + else { + /* + * After upgrade SA_ZPL_ZNODE_ACL should have + * been removed. + */ + VERIFY(zp->z_is_sa && error == ENOENT); + return (ZFS_ACL_VERSION_FUID); + } + } +} + +static int +zfs_acl_version(int version) +{ + if (version < ZPL_VERSION_FUID) + return (ZFS_ACL_VERSION_INITIAL); + else + return (ZFS_ACL_VERSION_FUID); +} + +static int +zfs_acl_version_zp(znode_t *zp) +{ + return (zfs_acl_version(zp->z_zfsvfs->z_version)); +} + +zfs_acl_t * +zfs_acl_alloc(int vers) +{ + zfs_acl_t *aclp; + + aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP); + list_create(&aclp->z_acl, sizeof (zfs_acl_node_t), + offsetof(zfs_acl_node_t, z_next)); + aclp->z_version = vers; + if (vers == ZFS_ACL_VERSION_FUID) + aclp->z_ops = zfs_acl_fuid_ops; + else + aclp->z_ops = zfs_acl_v0_ops; + return (aclp); +} + +zfs_acl_node_t * +zfs_acl_node_alloc(size_t bytes) +{ + zfs_acl_node_t *aclnode; + + aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP); + if (bytes) { + aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP); + aclnode->z_allocdata = aclnode->z_acldata; + aclnode->z_allocsize = bytes; + aclnode->z_size = bytes; + } + + return (aclnode); +} + +static void +zfs_acl_node_free(zfs_acl_node_t *aclnode) +{ + if (aclnode->z_allocsize) + kmem_free(aclnode->z_allocdata, aclnode->z_allocsize); + kmem_free(aclnode, sizeof (zfs_acl_node_t)); +} + +static void +zfs_acl_release_nodes(zfs_acl_t *aclp) +{ + zfs_acl_node_t *aclnode; + + while (aclnode = list_head(&aclp->z_acl)) { + list_remove(&aclp->z_acl, aclnode); + zfs_acl_node_free(aclnode); + } + aclp->z_acl_count = 0; + aclp->z_acl_bytes = 0; +} + +void +zfs_acl_free(zfs_acl_t *aclp) +{ + zfs_acl_release_nodes(aclp); + list_destroy(&aclp->z_acl); + kmem_free(aclp, sizeof (zfs_acl_t)); +} + +static boolean_t +zfs_acl_valid_ace_type(uint_t type, uint_t flags) +{ + uint16_t entry_type; + + switch (type) { + case ALLOW: + case DENY: + case ACE_SYSTEM_AUDIT_ACE_TYPE: + case ACE_SYSTEM_ALARM_ACE_TYPE: + entry_type = flags & ACE_TYPE_FLAGS; + return (entry_type == ACE_OWNER || + entry_type == OWNING_GROUP || + entry_type == ACE_EVERYONE || entry_type == 0 || + entry_type == ACE_IDENTIFIER_GROUP); + default: + if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE) + return (B_TRUE); + } + return (B_FALSE); +} + +static boolean_t +zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags) +{ + /* + * first check type of entry + */ + + if (!zfs_acl_valid_ace_type(type, iflags)) + return (B_FALSE); + + switch (type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + if (aclp->z_version < ZFS_ACL_VERSION_FUID) + return (B_FALSE); + aclp->z_hints |= ZFS_ACL_OBJ_ACE; + } + + /* + * next check inheritance level flags + */ + + if (obj_type == VDIR && + (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))) + aclp->z_hints |= ZFS_INHERIT_ACE; + + if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) { + if ((iflags & (ACE_FILE_INHERIT_ACE| + ACE_DIRECTORY_INHERIT_ACE)) == 0) { + return (B_FALSE); + } + } + + return (B_TRUE); +} + +static void * +zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who, + uint32_t *access_mask, uint16_t *iflags, uint16_t *type) +{ + zfs_acl_node_t *aclnode; + + ASSERT(aclp); + + if (start == NULL) { + aclnode = list_head(&aclp->z_acl); + if (aclnode == NULL) + return (NULL); + + aclp->z_next_ace = aclnode->z_acldata; + aclp->z_curr_node = aclnode; + aclnode->z_ace_idx = 0; + } + + aclnode = aclp->z_curr_node; + + if (aclnode == NULL) + return (NULL); + + if (aclnode->z_ace_idx >= aclnode->z_ace_count) { + aclnode = list_next(&aclp->z_acl, aclnode); + if (aclnode == NULL) + return (NULL); + else { + aclp->z_curr_node = aclnode; + aclnode->z_ace_idx = 0; + aclp->z_next_ace = aclnode->z_acldata; + } + } + + if (aclnode->z_ace_idx < aclnode->z_ace_count) { + void *acep = aclp->z_next_ace; + size_t ace_size; + + /* + * Make sure we don't overstep our bounds + */ + ace_size = aclp->z_ops.ace_size(acep); + + if (((caddr_t)acep + ace_size) > + ((caddr_t)aclnode->z_acldata + aclnode->z_size)) { + return (NULL); + } + + *iflags = aclp->z_ops.ace_flags_get(acep); + *type = aclp->z_ops.ace_type_get(acep); + *access_mask = aclp->z_ops.ace_mask_get(acep); + *who = aclp->z_ops.ace_who_get(acep); + aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size; + aclnode->z_ace_idx++; + + return ((void *)acep); + } + return (NULL); +} + +/*ARGSUSED*/ +static uint64_t +zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt, + uint16_t *flags, uint16_t *type, uint32_t *mask) +{ + zfs_acl_t *aclp = datap; + zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie; + uint64_t who; + + acep = zfs_acl_next_ace(aclp, acep, &who, mask, + flags, type); + return ((uint64_t)(uintptr_t)acep); +} + +static zfs_acl_node_t * +zfs_acl_curr_node(zfs_acl_t *aclp) +{ + ASSERT(aclp->z_curr_node); + return (aclp->z_curr_node); +} + +/* + * Copy ACE to internal ZFS format. + * While processing the ACL each ACE will be validated for correctness. + * ACE FUIDs will be created later. + */ +int +zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, + void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size, + zfs_fuid_info_t **fuidp, cred_t *cr) +{ + int i; + uint16_t entry_type; + zfs_ace_t *aceptr = z_acl; + ace_t *acep = datap; + zfs_object_ace_t *zobjacep; + ace_object_t *aceobjp; + + for (i = 0; i != aclcnt; i++) { + aceptr->z_hdr.z_access_mask = acep->a_access_mask; + aceptr->z_hdr.z_flags = acep->a_flags; + aceptr->z_hdr.z_type = acep->a_type; + entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS; + if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP && + entry_type != ACE_EVERYONE) { + aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who, + cr, (entry_type == 0) ? + ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp); + } + + /* + * Make sure ACE is valid + */ + if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type, + aceptr->z_hdr.z_flags) != B_TRUE) + return (EINVAL); + + switch (acep->a_type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + zobjacep = (zfs_object_ace_t *)aceptr; + aceobjp = (ace_object_t *)acep; + + bcopy(aceobjp->a_obj_type, zobjacep->z_object_type, + sizeof (aceobjp->a_obj_type)); + bcopy(aceobjp->a_inherit_obj_type, + zobjacep->z_inherit_type, + sizeof (aceobjp->a_inherit_obj_type)); + acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t)); + break; + default: + acep = (ace_t *)((caddr_t)acep + sizeof (ace_t)); + } + + aceptr = (zfs_ace_t *)((caddr_t)aceptr + + aclp->z_ops.ace_size(aceptr)); + } + + *size = (caddr_t)aceptr - (caddr_t)z_acl; + + return (0); +} + +/* + * Copy ZFS ACEs to fixed size ace_t layout + */ +static void +zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, + void *datap, int filter) +{ + uint64_t who; + uint32_t access_mask; + uint16_t iflags, type; + zfs_ace_hdr_t *zacep = NULL; + ace_t *acep = datap; + ace_object_t *objacep; + zfs_object_ace_t *zobjacep; + size_t ace_size; + uint16_t entry_type; + + while (zacep = zfs_acl_next_ace(aclp, zacep, + &who, &access_mask, &iflags, &type)) { + + switch (type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + if (filter) { + continue; + } + zobjacep = (zfs_object_ace_t *)zacep; + objacep = (ace_object_t *)acep; + bcopy(zobjacep->z_object_type, + objacep->a_obj_type, + sizeof (zobjacep->z_object_type)); + bcopy(zobjacep->z_inherit_type, + objacep->a_inherit_obj_type, + sizeof (zobjacep->z_inherit_type)); + ace_size = sizeof (ace_object_t); + break; + default: + ace_size = sizeof (ace_t); + break; + } + + entry_type = (iflags & ACE_TYPE_FLAGS); + if ((entry_type != ACE_OWNER && + entry_type != OWNING_GROUP && + entry_type != ACE_EVERYONE)) { + acep->a_who = zfs_fuid_map_id(zfsvfs, who, + cr, (entry_type & ACE_IDENTIFIER_GROUP) ? + ZFS_ACE_GROUP : ZFS_ACE_USER); + } else { + acep->a_who = (uid_t)(int64_t)who; + } + acep->a_access_mask = access_mask; + acep->a_flags = iflags; + acep->a_type = type; + acep = (ace_t *)((caddr_t)acep + ace_size); + } +} + +static int +zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep, + zfs_oldace_t *z_acl, int aclcnt, size_t *size) +{ + int i; + zfs_oldace_t *aceptr = z_acl; + + for (i = 0; i != aclcnt; i++, aceptr++) { + aceptr->z_access_mask = acep[i].a_access_mask; + aceptr->z_type = acep[i].a_type; + aceptr->z_flags = acep[i].a_flags; + aceptr->z_fuid = acep[i].a_who; + /* + * Make sure ACE is valid + */ + if (zfs_ace_valid(obj_type, aclp, aceptr->z_type, + aceptr->z_flags) != B_TRUE) + return (EINVAL); + } + *size = (caddr_t)aceptr - (caddr_t)z_acl; + return (0); +} + +/* + * convert old ACL format to new + */ +void +zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) +{ + zfs_oldace_t *oldaclp; + int i; + uint16_t type, iflags; + uint32_t access_mask; + uint64_t who; + void *cookie = NULL; + zfs_acl_node_t *newaclnode; + + ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL); + /* + * First create the ACE in a contiguous piece of memory + * for zfs_copy_ace_2_fuid(). + * + * We only convert an ACL once, so this won't happen + * everytime. + */ + oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count, + KM_SLEEP); + i = 0; + while (cookie = zfs_acl_next_ace(aclp, cookie, &who, + &access_mask, &iflags, &type)) { + oldaclp[i].z_flags = iflags; + oldaclp[i].z_type = type; + oldaclp[i].z_fuid = who; + oldaclp[i++].z_access_mask = access_mask; + } + + newaclnode = zfs_acl_node_alloc(aclp->z_acl_count * + sizeof (zfs_object_ace_t)); + aclp->z_ops = zfs_acl_fuid_ops; + VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp, + oldaclp, newaclnode->z_acldata, aclp->z_acl_count, + &newaclnode->z_size, NULL, cr) == 0); + newaclnode->z_ace_count = aclp->z_acl_count; + aclp->z_version = ZFS_ACL_VERSION; + kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t)); + + /* + * Release all previous ACL nodes + */ + + zfs_acl_release_nodes(aclp); + + list_insert_head(&aclp->z_acl, newaclnode); + + aclp->z_acl_bytes = newaclnode->z_size; + aclp->z_acl_count = newaclnode->z_ace_count; + +} + +/* + * Convert unix access mask to v4 access mask + */ +static uint32_t +zfs_unix_to_v4(uint32_t access_mask) +{ + uint32_t new_mask = 0; + + if (access_mask & S_IXOTH) + new_mask |= ACE_EXECUTE; + if (access_mask & S_IWOTH) + new_mask |= ACE_WRITE_DATA; + if (access_mask & S_IROTH) + new_mask |= ACE_READ_DATA; + return (new_mask); +} + +static void +zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask, + uint16_t access_type, uint64_t fuid, uint16_t entry_type) +{ + uint16_t type = entry_type & ACE_TYPE_FLAGS; + + aclp->z_ops.ace_mask_set(acep, access_mask); + aclp->z_ops.ace_type_set(acep, access_type); + aclp->z_ops.ace_flags_set(acep, entry_type); + if ((type != ACE_OWNER && type != OWNING_GROUP && + type != ACE_EVERYONE)) + aclp->z_ops.ace_who_set(acep, fuid); +} + +/* + * Determine mode of file based on ACL. + * Also, create FUIDs for any User/Group ACEs + */ +uint64_t +zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp, + uint64_t *pflags, uint64_t fuid, uint64_t fgid) +{ + int entry_type; + mode_t mode; + mode_t seen = 0; + zfs_ace_hdr_t *acep = NULL; + uint64_t who; + uint16_t iflags, type; + uint32_t access_mask; + boolean_t an_exec_denied = B_FALSE; + + mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX)); + + while (acep = zfs_acl_next_ace(aclp, acep, &who, + &access_mask, &iflags, &type)) { + + if (!zfs_acl_valid_ace_type(type, iflags)) + continue; + + entry_type = (iflags & ACE_TYPE_FLAGS); + + /* + * Skip over owner@, group@ or everyone@ inherit only ACEs + */ + if ((iflags & ACE_INHERIT_ONLY_ACE) && + (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE || + entry_type == OWNING_GROUP)) + continue; + + if (entry_type == ACE_OWNER || (entry_type == 0 && + who == fuid)) { + if ((access_mask & ACE_READ_DATA) && + (!(seen & S_IRUSR))) { + seen |= S_IRUSR; + if (type == ALLOW) { + mode |= S_IRUSR; + } + } + if ((access_mask & ACE_WRITE_DATA) && + (!(seen & S_IWUSR))) { + seen |= S_IWUSR; + if (type == ALLOW) { + mode |= S_IWUSR; + } + } + if ((access_mask & ACE_EXECUTE) && + (!(seen & S_IXUSR))) { + seen |= S_IXUSR; + if (type == ALLOW) { + mode |= S_IXUSR; + } + } + } else if (entry_type == OWNING_GROUP || + (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) { + if ((access_mask & ACE_READ_DATA) && + (!(seen & S_IRGRP))) { + seen |= S_IRGRP; + if (type == ALLOW) { + mode |= S_IRGRP; + } + } + if ((access_mask & ACE_WRITE_DATA) && + (!(seen & S_IWGRP))) { + seen |= S_IWGRP; + if (type == ALLOW) { + mode |= S_IWGRP; + } + } + if ((access_mask & ACE_EXECUTE) && + (!(seen & S_IXGRP))) { + seen |= S_IXGRP; + if (type == ALLOW) { + mode |= S_IXGRP; + } + } + } else if (entry_type == ACE_EVERYONE) { + if ((access_mask & ACE_READ_DATA)) { + if (!(seen & S_IRUSR)) { + seen |= S_IRUSR; + if (type == ALLOW) { + mode |= S_IRUSR; + } + } + if (!(seen & S_IRGRP)) { + seen |= S_IRGRP; + if (type == ALLOW) { + mode |= S_IRGRP; + } + } + if (!(seen & S_IROTH)) { + seen |= S_IROTH; + if (type == ALLOW) { + mode |= S_IROTH; + } + } + } + if ((access_mask & ACE_WRITE_DATA)) { + if (!(seen & S_IWUSR)) { + seen |= S_IWUSR; + if (type == ALLOW) { + mode |= S_IWUSR; + } + } + if (!(seen & S_IWGRP)) { + seen |= S_IWGRP; + if (type == ALLOW) { + mode |= S_IWGRP; + } + } + if (!(seen & S_IWOTH)) { + seen |= S_IWOTH; + if (type == ALLOW) { + mode |= S_IWOTH; + } + } + } + if ((access_mask & ACE_EXECUTE)) { + if (!(seen & S_IXUSR)) { + seen |= S_IXUSR; + if (type == ALLOW) { + mode |= S_IXUSR; + } + } + if (!(seen & S_IXGRP)) { + seen |= S_IXGRP; + if (type == ALLOW) { + mode |= S_IXGRP; + } + } + if (!(seen & S_IXOTH)) { + seen |= S_IXOTH; + if (type == ALLOW) { + mode |= S_IXOTH; + } + } + } + } else { + /* + * Only care if this IDENTIFIER_GROUP or + * USER ACE denies execute access to someone, + * mode is not affected + */ + if ((access_mask & ACE_EXECUTE) && type == DENY) + an_exec_denied = B_TRUE; + } + } + + /* + * Failure to allow is effectively a deny, so execute permission + * is denied if it was never mentioned or if we explicitly + * weren't allowed it. + */ + if (!an_exec_denied && + ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS || + (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS)) + an_exec_denied = B_TRUE; + + if (an_exec_denied) + *pflags &= ~ZFS_NO_EXECS_DENIED; + else + *pflags |= ZFS_NO_EXECS_DENIED; + + return (mode); +} + +/* + * Read an external acl object. If the intent is to modify, always + * create a new acl and leave any cached acl in place. + */ +static int +zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp, + boolean_t will_modify) +{ + zfs_acl_t *aclp; + int aclsize; + int acl_count; + zfs_acl_node_t *aclnode; + zfs_acl_phys_t znode_acl; + int version; + int error; + boolean_t drop_lock = B_FALSE; + + ASSERT(MUTEX_HELD(&zp->z_acl_lock)); + + if (zp->z_acl_cached && !will_modify) { + *aclpp = zp->z_acl_cached; + return (0); + } + + /* + * close race where znode could be upgrade while trying to + * read the znode attributes. + * + * But this could only happen if the file isn't already an SA + * znode + */ + if (!zp->z_is_sa && !have_lock) { + mutex_enter(&zp->z_lock); + drop_lock = B_TRUE; + } + version = zfs_znode_acl_version(zp); + + if ((error = zfs_acl_znode_info(zp, &aclsize, + &acl_count, &znode_acl)) != 0) { + goto done; + } + + aclp = zfs_acl_alloc(version); + + aclp->z_acl_count = acl_count; + aclp->z_acl_bytes = aclsize; + + aclnode = zfs_acl_node_alloc(aclsize); + aclnode->z_ace_count = aclp->z_acl_count; + aclnode->z_size = aclsize; + + if (!zp->z_is_sa) { + if (znode_acl.z_acl_extern_obj) { + error = dmu_read(zp->z_zfsvfs->z_os, + znode_acl.z_acl_extern_obj, 0, aclnode->z_size, + aclnode->z_acldata, DMU_READ_PREFETCH); + } else { + bcopy(znode_acl.z_ace_data, aclnode->z_acldata, + aclnode->z_size); + } + } else { + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs), + aclnode->z_acldata, aclnode->z_size); + } + + if (error != 0) { + zfs_acl_free(aclp); + zfs_acl_node_free(aclnode); + /* convert checksum errors into IO errors */ + if (error == ECKSUM) + error = EIO; + goto done; + } + + list_insert_head(&aclp->z_acl, aclnode); + + *aclpp = aclp; + if (!will_modify) + zp->z_acl_cached = aclp; +done: + if (drop_lock) + mutex_exit(&zp->z_lock); + return (error); +} + +/*ARGSUSED*/ +void +zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen, + boolean_t start, void *userdata) +{ + zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata; + + if (start) { + cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl); + } else { + cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl, + cb->cb_acl_node); + } + *dataptr = cb->cb_acl_node->z_acldata; + *length = cb->cb_acl_node->z_size; +} + +int +zfs_acl_chown_setattr(znode_t *zp) +{ + int error; + zfs_acl_t *aclp; + + ASSERT(MUTEX_HELD(&zp->z_lock)); + ASSERT(MUTEX_HELD(&zp->z_acl_lock)); + + if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0) + zp->z_mode = zfs_mode_compute(zp->z_mode, aclp, + &zp->z_pflags, zp->z_uid, zp->z_gid); + return (error); +} + +/* + * common code for setting ACLs. + * + * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl. + * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's + * already checked the acl and knows whether to inherit. + */ +int +zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) +{ + int error; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + dmu_object_type_t otype; + zfs_acl_locator_cb_t locate = { 0 }; + uint64_t mode; + sa_bulk_attr_t bulk[5]; + uint64_t ctime[2]; + int count = 0; + + mode = zp->z_mode; + + mode = zfs_mode_compute(mode, aclp, &zp->z_pflags, + zp->z_uid, zp->z_gid); + + zp->z_mode = mode; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + &mode, sizeof (mode)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, sizeof (zp->z_pflags)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, sizeof (ctime)); + + if (zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + + /* + * Upgrade needed? + */ + if (!zfsvfs->z_use_fuids) { + otype = DMU_OT_OLDACL; + } else { + if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) && + (zfsvfs->z_version >= ZPL_VERSION_FUID)) + zfs_acl_xform(zp, aclp, cr); + ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID); + otype = DMU_OT_ACL; + } + + /* + * Arrgh, we have to handle old on disk format + * as well as newer (preferred) SA format. + */ + + if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */ + locate.cb_aclp = aclp; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs), + zfs_acl_data_locator, &locate, aclp->z_acl_bytes); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs), + NULL, &aclp->z_acl_count, sizeof (uint64_t)); + } else { /* Painful legacy way */ + zfs_acl_node_t *aclnode; + uint64_t off = 0; + zfs_acl_phys_t acl_phys; + uint64_t aoid; + + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), + &acl_phys, sizeof (acl_phys))) != 0) + return (error); + + aoid = acl_phys.z_acl_extern_obj; + + if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { + /* + * If ACL was previously external and we are now + * converting to new ACL format then release old + * ACL object and create a new one. + */ + if (aoid && + aclp->z_version != acl_phys.z_acl_version) { + error = dmu_object_free(zfsvfs->z_os, aoid, tx); + if (error) + return (error); + aoid = 0; + } + if (aoid == 0) { + aoid = dmu_object_alloc(zfsvfs->z_os, + otype, aclp->z_acl_bytes, + otype == DMU_OT_ACL ? + DMU_OT_SYSACL : DMU_OT_NONE, + otype == DMU_OT_ACL ? + DN_MAX_BONUSLEN : 0, tx); + } else { + (void) dmu_object_set_blocksize(zfsvfs->z_os, + aoid, aclp->z_acl_bytes, 0, tx); + } + acl_phys.z_acl_extern_obj = aoid; + for (aclnode = list_head(&aclp->z_acl); aclnode; + aclnode = list_next(&aclp->z_acl, aclnode)) { + if (aclnode->z_ace_count == 0) + continue; + dmu_write(zfsvfs->z_os, aoid, off, + aclnode->z_size, aclnode->z_acldata, tx); + off += aclnode->z_size; + } + } else { + void *start = acl_phys.z_ace_data; + /* + * Migrating back embedded? + */ + if (acl_phys.z_acl_extern_obj) { + error = dmu_object_free(zfsvfs->z_os, + acl_phys.z_acl_extern_obj, tx); + if (error) + return (error); + acl_phys.z_acl_extern_obj = 0; + } + + for (aclnode = list_head(&aclp->z_acl); aclnode; + aclnode = list_next(&aclp->z_acl, aclnode)) { + if (aclnode->z_ace_count == 0) + continue; + bcopy(aclnode->z_acldata, start, + aclnode->z_size); + start = (caddr_t)start + aclnode->z_size; + } + } + /* + * If Old version then swap count/bytes to match old + * layout of znode_acl_phys_t. + */ + if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) { + acl_phys.z_acl_size = aclp->z_acl_count; + acl_phys.z_acl_count = aclp->z_acl_bytes; + } else { + acl_phys.z_acl_size = aclp->z_acl_bytes; + acl_phys.z_acl_count = aclp->z_acl_count; + } + acl_phys.z_acl_version = aclp->z_version; + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + &acl_phys, sizeof (acl_phys)); + } + + /* + * Replace ACL wide bits, but first clear them. + */ + zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS; + + zp->z_pflags |= aclp->z_hints; + + if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0) + zp->z_pflags |= ZFS_ACL_TRIVIAL; + + zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE); + return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); +} + +/* + * Update access mask for prepended ACE + * + * This applies the "groupmask" value for aclmode property. + */ +static void +zfs_acl_prepend_fixup(zfs_acl_t *aclp, void *acep, void *origacep, + mode_t mode, uint64_t owner) +{ + int rmask, wmask, xmask; + int user_ace; + uint16_t aceflags; + uint32_t origmask, acepmask; + uint64_t fuid; + + aceflags = aclp->z_ops.ace_flags_get(acep); + fuid = aclp->z_ops.ace_who_get(acep); + origmask = aclp->z_ops.ace_mask_get(origacep); + acepmask = aclp->z_ops.ace_mask_get(acep); + + user_ace = (!(aceflags & + (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP))); + + if (user_ace && (fuid == owner)) { + rmask = S_IRUSR; + wmask = S_IWUSR; + xmask = S_IXUSR; + } else { + rmask = S_IRGRP; + wmask = S_IWGRP; + xmask = S_IXGRP; + } + + if (origmask & ACE_READ_DATA) { + if (mode & rmask) { + acepmask &= ~ACE_READ_DATA; + } else { + acepmask |= ACE_READ_DATA; + } + } + + if (origmask & ACE_WRITE_DATA) { + if (mode & wmask) { + acepmask &= ~ACE_WRITE_DATA; + } else { + acepmask |= ACE_WRITE_DATA; + } + } + + if (origmask & ACE_APPEND_DATA) { + if (mode & wmask) { + acepmask &= ~ACE_APPEND_DATA; + } else { + acepmask |= ACE_APPEND_DATA; + } + } + + if (origmask & ACE_EXECUTE) { + if (mode & xmask) { + acepmask &= ~ACE_EXECUTE; + } else { + acepmask |= ACE_EXECUTE; + } + } + aclp->z_ops.ace_mask_set(acep, acepmask); +} + +static void +zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp) +{ + void *acep = NULL; + uint64_t who; + int new_count, new_bytes; + int ace_size; + int entry_type; + uint16_t iflags, type; + uint32_t access_mask; + zfs_acl_node_t *newnode; + size_t abstract_size = aclp->z_ops.ace_abstract_size(); + void *zacep; + uint32_t owner, group, everyone; + uint32_t deny1, deny2, allow0; + + new_count = new_bytes = 0; + + acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2, + &owner, &group, &everyone); + + newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes); + + zacep = newnode->z_acldata; + if (allow0) { + zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER); + zacep = (void *)((uintptr_t)zacep + abstract_size); + new_count++; + new_bytes += abstract_size; + } if (deny1) { + zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER); + zacep = (void *)((uintptr_t)zacep + abstract_size); + new_count++; + new_bytes += abstract_size; + } + if (deny2) { + zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP); + zacep = (void *)((uintptr_t)zacep + abstract_size); + new_count++; + new_bytes += abstract_size; + } + + while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, + &iflags, &type)) { + uint16_t inherit_flags; + + entry_type = (iflags & ACE_TYPE_FLAGS); + inherit_flags = (iflags & ALL_INHERIT); + + if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE || + (entry_type == OWNING_GROUP)) && + ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) { + continue; + } + + if ((type != ALLOW && type != DENY) || + (inherit_flags & ACE_INHERIT_ONLY_ACE)) { + if (inherit_flags) + aclp->z_hints |= ZFS_INHERIT_ACE; + switch (type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + aclp->z_hints |= ZFS_ACL_OBJ_ACE; + break; + } + } else { + + /* + * Limit permissions to be no greater than + * group permissions + */ + if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) { + if (!(mode & S_IRGRP)) + access_mask &= ~ACE_READ_DATA; + if (!(mode & S_IWGRP)) + access_mask &= + ~(ACE_WRITE_DATA|ACE_APPEND_DATA); + if (!(mode & S_IXGRP)) + access_mask &= ~ACE_EXECUTE; + access_mask &= + ~(ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS); + } + } + zfs_set_ace(aclp, zacep, access_mask, type, who, iflags); + ace_size = aclp->z_ops.ace_size(acep); + zacep = (void *)((uintptr_t)zacep + ace_size); + new_count++; + new_bytes += ace_size; + } + zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER); + zacep = (void *)((uintptr_t)zacep + abstract_size); + zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP); + zacep = (void *)((uintptr_t)zacep + abstract_size); + zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE); + + new_count += 3; + new_bytes += abstract_size * 3; + zfs_acl_release_nodes(aclp); + aclp->z_acl_count = new_count; + aclp->z_acl_bytes = new_bytes; + newnode->z_ace_count = new_count; + newnode->z_size = new_bytes; + list_insert_tail(&aclp->z_acl, newnode); +} + +void +zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode) +{ + mutex_enter(&zp->z_acl_lock); + mutex_enter(&zp->z_lock); + *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); + (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; + zfs_acl_chmod(zp->z_zfsvfs, mode, *aclp); + mutex_exit(&zp->z_lock); + mutex_exit(&zp->z_acl_lock); + ASSERT(*aclp); +} + +/* + * strip off write_owner and write_acl + */ +static void +zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep) +{ + uint32_t mask = aclp->z_ops.ace_mask_get(acep); + + if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) && + (aclp->z_ops.ace_type_get(acep) == ALLOW)) { + mask &= ~RESTRICTED_CLEAR; + aclp->z_ops.ace_mask_set(acep, mask); + } +} + +/* + * Should ACE be inherited? + */ +static int +zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags) +{ + int iflags = (acep_flags & 0xf); + + if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE)) + return (1); + else if (iflags & ACE_FILE_INHERIT_ACE) + return (!((vtype == VDIR) && + (iflags & ACE_NO_PROPAGATE_INHERIT_ACE))); + return (0); +} + +/* + * inherit inheritable ACEs from parent + */ +static zfs_acl_t * +zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, + uint64_t mode, boolean_t *need_chmod) +{ + void *pacep; + void *acep; + zfs_acl_node_t *aclnode; + zfs_acl_t *aclp = NULL; + uint64_t who; + uint32_t access_mask; + uint16_t iflags, newflags, type; + size_t ace_size; + void *data1, *data2; + size_t data1sz, data2sz; + boolean_t vdir = vtype == VDIR; + boolean_t vreg = vtype == VREG; + boolean_t passthrough, passthrough_x, noallow; + + passthrough_x = + zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X; + passthrough = passthrough_x || + zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH; + noallow = + zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW; + + *need_chmod = B_TRUE; + pacep = NULL; + aclp = zfs_acl_alloc(paclp->z_version); + if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || vtype == VLNK) + return (aclp); + while (pacep = zfs_acl_next_ace(paclp, pacep, &who, + &access_mask, &iflags, &type)) { + + /* + * don't inherit bogus ACEs + */ + if (!zfs_acl_valid_ace_type(type, iflags)) + continue; + + if (noallow && type == ALLOW) + continue; + + ace_size = aclp->z_ops.ace_size(pacep); + + if (!zfs_ace_can_use(vtype, iflags)) + continue; + + /* + * If owner@, group@, or everyone@ inheritable + * then zfs_acl_chmod() isn't needed. + */ + if (passthrough && + ((iflags & (ACE_OWNER|ACE_EVERYONE)) || + ((iflags & OWNING_GROUP) == + OWNING_GROUP)) && (vreg || (vdir && (iflags & + ACE_DIRECTORY_INHERIT_ACE)))) { + *need_chmod = B_FALSE; + } + + if (!vdir && passthrough_x && + ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) { + access_mask &= ~ACE_EXECUTE; + } + + aclnode = zfs_acl_node_alloc(ace_size); + list_insert_tail(&aclp->z_acl, aclnode); + acep = aclnode->z_acldata; + + zfs_set_ace(aclp, acep, access_mask, type, + who, iflags|ACE_INHERITED_ACE); + + /* + * Copy special opaque data if any + */ + if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) { + VERIFY((data2sz = aclp->z_ops.ace_data(acep, + &data2)) == data1sz); + bcopy(data1, data2, data2sz); + } + + aclp->z_acl_count++; + aclnode->z_ace_count++; + aclp->z_acl_bytes += aclnode->z_size; + newflags = aclp->z_ops.ace_flags_get(acep); + + if (vdir) + aclp->z_hints |= ZFS_INHERIT_ACE; + + if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) { + newflags &= ~ALL_INHERIT; + aclp->z_ops.ace_flags_set(acep, + newflags|ACE_INHERITED_ACE); + zfs_restricted_update(zfsvfs, aclp, acep); + continue; + } + + ASSERT(vdir); + + /* + * If only FILE_INHERIT is set then turn on + * inherit_only + */ + if ((iflags & (ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) { + newflags |= ACE_INHERIT_ONLY_ACE; + aclp->z_ops.ace_flags_set(acep, + newflags|ACE_INHERITED_ACE); + } else { + newflags &= ~ACE_INHERIT_ONLY_ACE; + aclp->z_ops.ace_flags_set(acep, + newflags|ACE_INHERITED_ACE); + } + } + return (aclp); +} + +/* + * Create file system object initial permissions + * including inheritable ACEs. + */ +int +zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, + vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids) +{ + int error; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_acl_t *paclp; + gid_t gid; + boolean_t need_chmod = B_TRUE; + boolean_t inherited = B_FALSE; + + bzero(acl_ids, sizeof (zfs_acl_ids_t)); + acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); + + if (vsecp) + if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr, + &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0) + return (error); + /* + * Determine uid and gid. + */ + if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay || + ((flag & IS_XATTR) && (vap->va_type == VDIR))) { + acl_ids->z_fuid = zfs_fuid_create(zfsvfs, + (uint64_t)vap->va_uid, cr, + ZFS_OWNER, &acl_ids->z_fuidp); + acl_ids->z_fgid = zfs_fuid_create(zfsvfs, + (uint64_t)vap->va_gid, cr, + ZFS_GROUP, &acl_ids->z_fuidp); + gid = vap->va_gid; + } else { + acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER, + cr, &acl_ids->z_fuidp); + acl_ids->z_fgid = 0; + if (vap->va_mask & AT_GID) { + acl_ids->z_fgid = zfs_fuid_create(zfsvfs, + (uint64_t)vap->va_gid, + cr, ZFS_GROUP, &acl_ids->z_fuidp); + gid = vap->va_gid; + if (acl_ids->z_fgid != dzp->z_gid && + !groupmember(vap->va_gid, cr) && + secpolicy_vnode_create_gid(cr) != 0) + acl_ids->z_fgid = 0; + } + if (acl_ids->z_fgid == 0) { + if (dzp->z_mode & S_ISGID) { + char *domain; + uint32_t rid; + + acl_ids->z_fgid = dzp->z_gid; + gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid, + cr, ZFS_GROUP); + + if (zfsvfs->z_use_fuids && + IS_EPHEMERAL(acl_ids->z_fgid)) { + domain = zfs_fuid_idx_domain( + &zfsvfs->z_fuid_idx, + FUID_INDEX(acl_ids->z_fgid)); + rid = FUID_RID(acl_ids->z_fgid); + zfs_fuid_node_add(&acl_ids->z_fuidp, + domain, rid, + FUID_INDEX(acl_ids->z_fgid), + acl_ids->z_fgid, ZFS_GROUP); + } + } else { + acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs, + ZFS_GROUP, cr, &acl_ids->z_fuidp); + gid = crgetgid(cr); + } + } + } + + /* + * If we're creating a directory, and the parent directory has the + * set-GID bit set, set in on the new directory. + * Otherwise, if the user is neither privileged nor a member of the + * file's new group, clear the file's set-GID bit. + */ + + if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) && + (vap->va_type == VDIR)) { + acl_ids->z_mode |= S_ISGID; + } else { + if ((acl_ids->z_mode & S_ISGID) && + secpolicy_vnode_setids_setgids(cr, gid) != 0) + acl_ids->z_mode &= ~S_ISGID; + } + + if (acl_ids->z_aclp == NULL) { + mutex_enter(&dzp->z_acl_lock); + mutex_enter(&dzp->z_lock); + if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR && + (dzp->z_pflags & ZFS_INHERIT_ACE)) && + !(dzp->z_pflags & ZFS_XATTR)) { + VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, + &paclp, B_FALSE)); + acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, + vap->va_type, paclp, acl_ids->z_mode, &need_chmod); + inherited = B_TRUE; + } else { + acl_ids->z_aclp = + zfs_acl_alloc(zfs_acl_version_zp(dzp)); + acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL; + } + mutex_exit(&dzp->z_lock); + mutex_exit(&dzp->z_acl_lock); + if (need_chmod) { + acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ? + ZFS_ACL_AUTO_INHERIT : 0; + zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp); + } + } + + if (inherited || vsecp) { + acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode, + acl_ids->z_aclp, &acl_ids->z_aclp->z_hints, + acl_ids->z_fuid, acl_ids->z_fgid); + if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0) + acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL; + } + + return (0); +} + +/* + * Free ACL and fuid_infop, but not the acl_ids structure + */ +void +zfs_acl_ids_free(zfs_acl_ids_t *acl_ids) +{ + if (acl_ids->z_aclp) + zfs_acl_free(acl_ids->z_aclp); + if (acl_ids->z_fuidp) + zfs_fuid_info_free(acl_ids->z_fuidp); + acl_ids->z_aclp = NULL; + acl_ids->z_fuidp = NULL; +} + +boolean_t +zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids) +{ + return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) || + zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid)); +} + +/* + * Retrieve a files ACL + */ +int +zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +{ + zfs_acl_t *aclp; + ulong_t mask; + int error; + int count = 0; + int largeace = 0; + + mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT | + VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); + + if (mask == 0) + return (ENOSYS); + + if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)) + return (error); + + mutex_enter(&zp->z_acl_lock); + + error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); + if (error != 0) { + mutex_exit(&zp->z_acl_lock); + return (error); + } + + /* + * Scan ACL to determine number of ACEs + */ + if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) { + void *zacep = NULL; + uint64_t who; + uint32_t access_mask; + uint16_t type, iflags; + + while (zacep = zfs_acl_next_ace(aclp, zacep, + &who, &access_mask, &iflags, &type)) { + switch (type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + largeace++; + continue; + default: + count++; + } + } + vsecp->vsa_aclcnt = count; + } else + count = (int)aclp->z_acl_count; + + if (mask & VSA_ACECNT) { + vsecp->vsa_aclcnt = count; + } + + if (mask & VSA_ACE) { + size_t aclsz; + + aclsz = count * sizeof (ace_t) + + sizeof (ace_object_t) * largeace; + + vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP); + vsecp->vsa_aclentsz = aclsz; + + if (aclp->z_version == ZFS_ACL_VERSION_FUID) + zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr, + vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES)); + else { + zfs_acl_node_t *aclnode; + void *start = vsecp->vsa_aclentp; + + for (aclnode = list_head(&aclp->z_acl); aclnode; + aclnode = list_next(&aclp->z_acl, aclnode)) { + bcopy(aclnode->z_acldata, start, + aclnode->z_size); + start = (caddr_t)start + aclnode->z_size; + } + ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp == + aclp->z_acl_bytes); + } + } + if (mask & VSA_ACE_ACLFLAGS) { + vsecp->vsa_aclflags = 0; + if (zp->z_pflags & ZFS_ACL_DEFAULTED) + vsecp->vsa_aclflags |= ACL_DEFAULTED; + if (zp->z_pflags & ZFS_ACL_PROTECTED) + vsecp->vsa_aclflags |= ACL_PROTECTED; + if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT) + vsecp->vsa_aclflags |= ACL_AUTO_INHERIT; + } + + mutex_exit(&zp->z_acl_lock); + + return (0); +} + +int +zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, + vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp) +{ + zfs_acl_t *aclp; + zfs_acl_node_t *aclnode; + int aclcnt = vsecp->vsa_aclcnt; + int error; + + if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0) + return (EINVAL); + + aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version)); + + aclp->z_hints = 0; + aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t)); + if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) { + if ((error = zfs_copy_ace_2_oldace(obj_type, aclp, + (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata, + aclcnt, &aclnode->z_size)) != 0) { + zfs_acl_free(aclp); + zfs_acl_node_free(aclnode); + return (error); + } + } else { + if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp, + vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt, + &aclnode->z_size, fuidp, cr)) != 0) { + zfs_acl_free(aclp); + zfs_acl_node_free(aclnode); + return (error); + } + } + aclp->z_acl_bytes = aclnode->z_size; + aclnode->z_ace_count = aclcnt; + aclp->z_acl_count = aclcnt; + list_insert_head(&aclp->z_acl, aclnode); + + /* + * If flags are being set then add them to z_hints + */ + if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) { + if (vsecp->vsa_aclflags & ACL_PROTECTED) + aclp->z_hints |= ZFS_ACL_PROTECTED; + if (vsecp->vsa_aclflags & ACL_DEFAULTED) + aclp->z_hints |= ZFS_ACL_DEFAULTED; + if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT) + aclp->z_hints |= ZFS_ACL_AUTO_INHERIT; + } + + *zaclp = aclp; + + return (0); +} + +/* + * Set a files ACL + */ +int +zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zilog_t *zilog = zfsvfs->z_log; + ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT); + dmu_tx_t *tx; + int error; + zfs_acl_t *aclp; + zfs_fuid_info_t *fuidp = NULL; + boolean_t fuid_dirtied; + uint64_t acl_obj; + + if (mask == 0) + return (ENOSYS); + + if (zp->z_pflags & ZFS_IMMUTABLE) + return (EPERM); + + if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) + return (error); + + error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp, + &aclp); + if (error) + return (error); + + /* + * If ACL wide flags aren't being set then preserve any + * existing flags. + */ + if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) { + aclp->z_hints |= + (zp->z_pflags & V4_ACL_WIDE_FLAGS); + } +top: + mutex_enter(&zp->z_acl_lock); + mutex_enter(&zp->z_lock); + + tx = dmu_tx_create(zfsvfs->z_os); + + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + + /* + * If old version and ACL won't fit in bonus and we aren't + * upgrading then take out necessary DMU holds + */ + + if ((acl_obj = zfs_external_acl(zp)) != 0) { + if (zfsvfs->z_version >= ZPL_VERSION_FUID && + zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) { + dmu_tx_hold_free(tx, acl_obj, 0, + DMU_OBJECT_END); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, + aclp->z_acl_bytes); + } else { + dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes); + } + } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); + } + + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + mutex_exit(&zp->z_acl_lock); + mutex_exit(&zp->z_lock); + + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + zfs_acl_free(aclp); + return (error); + } + + error = zfs_aclset_common(zp, aclp, cr, tx); + ASSERT(error == 0); + ASSERT(zp->z_acl_cached == NULL); + zp->z_acl_cached = aclp; + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + zfs_log_acl(zilog, tx, zp, vsecp, fuidp); + + if (fuidp) + zfs_fuid_info_free(fuidp); + dmu_tx_commit(tx); +done: + mutex_exit(&zp->z_lock); + mutex_exit(&zp->z_acl_lock); + + return (error); +} + +/* + * Check accesses of interest (AoI) against attributes of the dataset + * such as read-only. Returns zero if no AoI conflict with dataset + * attributes, otherwise an appropriate errno is returned. + */ +static int +zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) +{ + if ((v4_mode & WRITE_MASK) && + (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && + (!IS_DEVVP(ZTOV(zp)) || + (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) { + return (EROFS); + } + + /* + * Only check for READONLY on non-directories. + */ + if ((v4_mode & WRITE_MASK_DATA) && + (((ZTOV(zp)->v_type != VDIR) && + (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) || + (ZTOV(zp)->v_type == VDIR && + (zp->z_pflags & ZFS_IMMUTABLE)))) { + return (EPERM); + } + + if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) && + (zp->z_pflags & ZFS_NOUNLINK)) { + return (EPERM); + } + + if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) && + (zp->z_pflags & ZFS_AV_QUARANTINED))) { + return (EACCES); + } + + return (0); +} + +/* + * The primary usage of this function is to loop through all of the + * ACEs in the znode, determining what accesses of interest (AoI) to + * the caller are allowed or denied. The AoI are expressed as bits in + * the working_mode parameter. As each ACE is processed, bits covered + * by that ACE are removed from the working_mode. This removal + * facilitates two things. The first is that when the working mode is + * empty (= 0), we know we've looked at all the AoI. The second is + * that the ACE interpretation rules don't allow a later ACE to undo + * something granted or denied by an earlier ACE. Removing the + * discovered access or denial enforces this rule. At the end of + * processing the ACEs, all AoI that were found to be denied are + * placed into the working_mode, giving the caller a mask of denied + * accesses. Returns: + * 0 if all AoI granted + * EACCESS if the denied mask is non-zero + * other error if abnormal failure (e.g., IO error) + * + * A secondary usage of the function is to determine if any of the + * AoI are granted. If an ACE grants any access in + * the working_mode, we immediately short circuit out of the function. + * This mode is chosen by setting anyaccess to B_TRUE. The + * working_mode is not a denied access mask upon exit if the function + * is used in this manner. + */ +static int +zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, + boolean_t anyaccess, cred_t *cr) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_acl_t *aclp; + int error; + uid_t uid = crgetuid(cr); + uint64_t who; + uint16_t type, iflags; + uint16_t entry_type; + uint32_t access_mask; + uint32_t deny_mask = 0; + zfs_ace_hdr_t *acep = NULL; + boolean_t checkit; + uid_t gowner; + uid_t fowner; + + zfs_fuid_map_ids(zp, cr, &fowner, &gowner); + + mutex_enter(&zp->z_acl_lock); + + error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); + if (error != 0) { + mutex_exit(&zp->z_acl_lock); + return (error); + } + + ASSERT(zp->z_acl_cached); + + while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, + &iflags, &type)) { + uint32_t mask_matched; + + if (!zfs_acl_valid_ace_type(type, iflags)) + continue; + + if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE)) + continue; + + /* Skip ACE if it does not affect any AoI */ + mask_matched = (access_mask & *working_mode); + if (!mask_matched) + continue; + + entry_type = (iflags & ACE_TYPE_FLAGS); + + checkit = B_FALSE; + + switch (entry_type) { + case ACE_OWNER: + if (uid == fowner) + checkit = B_TRUE; + break; + case OWNING_GROUP: + who = gowner; + /*FALLTHROUGH*/ + case ACE_IDENTIFIER_GROUP: + checkit = zfs_groupmember(zfsvfs, who, cr); + break; + case ACE_EVERYONE: + checkit = B_TRUE; + break; + + /* USER Entry */ + default: + if (entry_type == 0) { + uid_t newid; + + newid = zfs_fuid_map_id(zfsvfs, who, cr, + ZFS_ACE_USER); + if (newid != IDMAP_WK_CREATOR_OWNER_UID && + uid == newid) + checkit = B_TRUE; + break; + } else { + mutex_exit(&zp->z_acl_lock); + return (EIO); + } + } + + if (checkit) { + if (type == DENY) { + DTRACE_PROBE3(zfs__ace__denies, + znode_t *, zp, + zfs_ace_hdr_t *, acep, + uint32_t, mask_matched); + deny_mask |= mask_matched; + } else { + DTRACE_PROBE3(zfs__ace__allows, + znode_t *, zp, + zfs_ace_hdr_t *, acep, + uint32_t, mask_matched); + if (anyaccess) { + mutex_exit(&zp->z_acl_lock); + return (0); + } + } + *working_mode &= ~mask_matched; + } + + /* Are we done? */ + if (*working_mode == 0) + break; + } + + mutex_exit(&zp->z_acl_lock); + + /* Put the found 'denies' back on the working mode */ + if (deny_mask) { + *working_mode |= deny_mask; + return (EACCES); + } else if (*working_mode) { + return (-1); + } + + return (0); +} + +/* + * Return true if any access whatsoever granted, we don't actually + * care what access is granted. + */ +boolean_t +zfs_has_access(znode_t *zp, cred_t *cr) +{ + uint32_t have = ACE_ALL_PERMS; + + if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) { + uid_t owner; + + owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); + return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0); + } + return (B_TRUE); +} + +static int +zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, + boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int err; + + *working_mode = v4_mode; + *check_privs = B_TRUE; + + /* + * Short circuit empty requests + */ + if (v4_mode == 0 || zfsvfs->z_replay) { + *working_mode = 0; + return (0); + } + + if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) { + *check_privs = B_FALSE; + return (err); + } + + /* + * The caller requested that the ACL check be skipped. This + * would only happen if the caller checked VOP_ACCESS() with a + * 32 bit ACE mask and already had the appropriate permissions. + */ + if (skipaclchk) { + *working_mode = 0; + return (0); + } + + return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr)); +} + +static int +zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs, + cred_t *cr) +{ + if (*working_mode != ACE_WRITE_DATA) + return (EACCES); + + return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode, + check_privs, B_FALSE, cr)); +} + +int +zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) +{ + boolean_t owner = B_FALSE; + boolean_t groupmbr = B_FALSE; + boolean_t is_attr; + uid_t uid = crgetuid(cr); + int error; + + if (zdp->z_pflags & ZFS_AV_QUARANTINED) + return (EACCES); + + is_attr = ((zdp->z_pflags & ZFS_XATTR) && + (ZTOV(zdp)->v_type == VDIR)); + if (is_attr) + goto slow; + + + mutex_enter(&zdp->z_acl_lock); + + if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + + if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) { + mutex_exit(&zdp->z_acl_lock); + goto slow; + } + + if (uid == zdp->z_uid) { + owner = B_TRUE; + if (zdp->z_mode & S_IXUSR) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } else { + mutex_exit(&zdp->z_acl_lock); + goto slow; + } + } + if (groupmember(zdp->z_gid, cr)) { + groupmbr = B_TRUE; + if (zdp->z_mode & S_IXGRP) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } else { + mutex_exit(&zdp->z_acl_lock); + goto slow; + } + } + if (!owner && !groupmbr) { + if (zdp->z_mode & S_IXOTH) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + } + + mutex_exit(&zdp->z_acl_lock); + +slow: + DTRACE_PROBE(zfs__fastpath__execute__access__miss); + ZFS_ENTER(zdp->z_zfsvfs); + error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr); + ZFS_EXIT(zdp->z_zfsvfs); + return (error); +} + +/* + * Determine whether Access should be granted/denied. + * The least priv subsytem is always consulted as a basic privilege + * can define any form of access. + */ +int +zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) +{ + uint32_t working_mode; + int error; + int is_attr; + boolean_t check_privs; + znode_t *xzp; + znode_t *check_zp = zp; + mode_t needed_bits; + uid_t owner; + + is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR)); + + /* + * If attribute then validate against base file + */ + if (is_attr) { + uint64_t parent; + + if ((error = sa_lookup(zp->z_sa_hdl, + SA_ZPL_PARENT(zp->z_zfsvfs), &parent, + sizeof (parent))) != 0) + return (error); + + if ((error = zfs_zget(zp->z_zfsvfs, + parent, &xzp)) != 0) { + return (error); + } + + check_zp = xzp; + + /* + * fixup mode to map to xattr perms + */ + + if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) { + mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA); + mode |= ACE_WRITE_NAMED_ATTRS; + } + + if (mode & (ACE_READ_DATA|ACE_EXECUTE)) { + mode &= ~(ACE_READ_DATA|ACE_EXECUTE); + mode |= ACE_READ_NAMED_ATTRS; + } + } + + owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); + /* + * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC + * in needed_bits. Map the bits mapped by working_mode (currently + * missing) in missing_bits. + * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode), + * needed_bits. + */ + needed_bits = 0; + + working_mode = mode; + if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && + owner == crgetuid(cr)) + working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES); + + if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| + ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) + needed_bits |= VREAD; + if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| + ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) + needed_bits |= VWRITE; + if (working_mode & ACE_EXECUTE) + needed_bits |= VEXEC; + + if ((error = zfs_zaccess_common(check_zp, mode, &working_mode, + &check_privs, skipaclchk, cr)) == 0) { + if (is_attr) + VN_RELE(ZTOV(xzp)); + return (secpolicy_vnode_access2(cr, ZTOV(zp), owner, + needed_bits, needed_bits)); + } + + if (error && !check_privs) { + if (is_attr) + VN_RELE(ZTOV(xzp)); + return (error); + } + + if (error && (flags & V_APPEND)) { + error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr); + } + + if (error && check_privs) { + mode_t checkmode = 0; + + /* + * First check for implicit owner permission on + * read_acl/read_attributes + */ + + error = 0; + ASSERT(working_mode != 0); + + if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) && + owner == crgetuid(cr))) + working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES); + + if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| + ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) + checkmode |= VREAD; + if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| + ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) + checkmode |= VWRITE; + if (working_mode & ACE_EXECUTE) + checkmode |= VEXEC; + + error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner, + needed_bits & ~checkmode, needed_bits); + + if (error == 0 && (working_mode & ACE_WRITE_OWNER)) + error = secpolicy_vnode_chown(cr, owner); + if (error == 0 && (working_mode & ACE_WRITE_ACL)) + error = secpolicy_vnode_setdac(cr, owner); + + if (error == 0 && (working_mode & + (ACE_DELETE|ACE_DELETE_CHILD))) + error = secpolicy_vnode_remove(cr); + + if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) { + error = secpolicy_vnode_chown(cr, owner); + } + if (error == 0) { + /* + * See if any bits other than those already checked + * for are still present. If so then return EACCES + */ + if (working_mode & ~(ZFS_CHECKED_MASKS)) { + error = EACCES; + } + } + } else if (error == 0) { + error = secpolicy_vnode_access2(cr, ZTOV(zp), owner, + needed_bits, needed_bits); + } + + + if (is_attr) + VN_RELE(ZTOV(xzp)); + + return (error); +} + +/* + * Translate traditional unix VREAD/VWRITE/VEXEC mode into + * native ACL format and call zfs_zaccess() + */ +int +zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr) +{ + return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr)); +} + +/* + * Access function for secpolicy_vnode_setattr + */ +int +zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr) +{ + int v4_mode = zfs_unix_to_v4(mode >> 6); + + return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr)); +} + +static int +zfs_delete_final_check(znode_t *zp, znode_t *dzp, + mode_t available_perms, cred_t *cr) +{ + int error; + uid_t downer; + + downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER); + + error = secpolicy_vnode_access2(cr, ZTOV(dzp), + downer, available_perms, VWRITE|VEXEC); + + if (error == 0) + error = zfs_sticky_remove_access(dzp, zp, cr); + + return (error); +} + +/* + * Determine whether Access should be granted/deny, without + * consulting least priv subsystem. + * + * + * The following chart is the recommended NFSv4 enforcement for + * ability to delete an object. + * + * ------------------------------------------------------- + * | Parent Dir | Target Object Permissions | + * | permissions | | + * ------------------------------------------------------- + * | | ACL Allows | ACL Denies| Delete | + * | | Delete | Delete | unspecified| + * ------------------------------------------------------- + * | ACL Allows | Permit | Permit | Permit | + * | DELETE_CHILD | | + * ------------------------------------------------------- + * | ACL Denies | Permit | Deny | Deny | + * | DELETE_CHILD | | | | + * ------------------------------------------------------- + * | ACL specifies | | | | + * | only allow | Permit | Permit | Permit | + * | write and | | | | + * | execute | | | | + * ------------------------------------------------------- + * | ACL denies | | | | + * | write and | Permit | Deny | Deny | + * | execute | | | | + * ------------------------------------------------------- + * ^ + * | + * No search privilege, can't even look up file? + * + */ +int +zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr) +{ + uint32_t dzp_working_mode = 0; + uint32_t zp_working_mode = 0; + int dzp_error, zp_error; + mode_t available_perms; + boolean_t dzpcheck_privs = B_TRUE; + boolean_t zpcheck_privs = B_TRUE; + + /* + * We want specific DELETE permissions to + * take precedence over WRITE/EXECUTE. We don't + * want an ACL such as this to mess us up. + * user:joe:write_data:deny,user:joe:delete:allow + * + * However, deny permissions may ultimately be overridden + * by secpolicy_vnode_access(). + * + * We will ask for all of the necessary permissions and then + * look at the working modes from the directory and target object + * to determine what was found. + */ + + if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK)) + return (EPERM); + + /* + * First row + * If the directory permissions allow the delete, we are done. + */ + if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD, + &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0) + return (0); + + /* + * If target object has delete permission then we are done + */ + if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode, + &zpcheck_privs, B_FALSE, cr)) == 0) + return (0); + + ASSERT(dzp_error && zp_error); + + if (!dzpcheck_privs) + return (dzp_error); + if (!zpcheck_privs) + return (zp_error); + + /* + * Second row + * + * If directory returns EACCES then delete_child was denied + * due to deny delete_child. In this case send the request through + * secpolicy_vnode_remove(). We don't use zfs_delete_final_check() + * since that *could* allow the delete based on write/execute permission + * and we want delete permissions to override write/execute. + */ + + if (dzp_error == EACCES) + return (secpolicy_vnode_remove(cr)); + + /* + * Third Row + * only need to see if we have write/execute on directory. + */ + + dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA, + &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr); + + if (dzp_error != 0 && !dzpcheck_privs) + return (dzp_error); + + /* + * Fourth row + */ + + available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE; + available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC; + + return (zfs_delete_final_check(zp, dzp, available_perms, cr)); + +} + +int +zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, + znode_t *tzp, cred_t *cr) +{ + int add_perm; + int error; + + if (szp->z_pflags & ZFS_AV_QUARANTINED) + return (EACCES); + + add_perm = (ZTOV(szp)->v_type == VDIR) ? + ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE; + + /* + * Rename permissions are combination of delete permission + + * add file/subdir permission. + */ + + /* + * first make sure we do the delete portion. + * + * If that succeeds then check for add_file/add_subdir permissions + */ + + if (error = zfs_zaccess_delete(sdzp, szp, cr)) + return (error); + + /* + * If we have a tzp, see if we can delete it? + */ + if (tzp) { + if (error = zfs_zaccess_delete(tdzp, tzp, cr)) + return (error); + } + + /* + * Now check for add permissions + */ + error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr); + + return (error); +} diff --git a/uts/common/fs/zfs/zfs_byteswap.c b/uts/common/fs/zfs/zfs_byteswap.c new file mode 100644 index 000000000000..acf632bdbeff --- /dev/null +++ b/uts/common/fs/zfs/zfs_byteswap.c @@ -0,0 +1,199 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/vfs.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_sa.h> +#include <sys/zfs_acl.h> + +void +zfs_oldace_byteswap(ace_t *ace, int ace_cnt) +{ + int i; + + for (i = 0; i != ace_cnt; i++, ace++) { + ace->a_who = BSWAP_32(ace->a_who); + ace->a_access_mask = BSWAP_32(ace->a_access_mask); + ace->a_flags = BSWAP_16(ace->a_flags); + ace->a_type = BSWAP_16(ace->a_type); + } +} + +/* + * swap ace_t and ace_oject_t + */ +void +zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) +{ + caddr_t end; + caddr_t ptr; + zfs_ace_t *zacep; + ace_t *acep; + uint16_t entry_type; + size_t entry_size; + int ace_type; + + end = (caddr_t)buf + size; + ptr = buf; + + while (ptr < end) { + if (zfs_layout) { + /* + * Avoid overrun. Embedded aces can have one + * of several sizes. We don't know exactly + * how many our present, only the size of the + * buffer containing them. That size may be + * larger than needed to hold the aces + * present. As long as we do not do any + * swapping beyond the end of our block we are + * okay. It it safe to swap any non-ace data + * within the block since it is just zeros. + */ + if (ptr + sizeof (zfs_ace_hdr_t) > end) { + break; + } + zacep = (zfs_ace_t *)ptr; + zacep->z_hdr.z_access_mask = + BSWAP_32(zacep->z_hdr.z_access_mask); + zacep->z_hdr.z_flags = BSWAP_16(zacep->z_hdr.z_flags); + ace_type = zacep->z_hdr.z_type = + BSWAP_16(zacep->z_hdr.z_type); + entry_type = zacep->z_hdr.z_flags & ACE_TYPE_FLAGS; + } else { + /* Overrun avoidance */ + if (ptr + sizeof (ace_t) > end) { + break; + } + acep = (ace_t *)ptr; + acep->a_access_mask = BSWAP_32(acep->a_access_mask); + acep->a_flags = BSWAP_16(acep->a_flags); + ace_type = acep->a_type = BSWAP_16(acep->a_type); + acep->a_who = BSWAP_32(acep->a_who); + entry_type = acep->a_flags & ACE_TYPE_FLAGS; + } + switch (entry_type) { + case ACE_OWNER: + case ACE_EVERYONE: + case (ACE_IDENTIFIER_GROUP | ACE_GROUP): + entry_size = zfs_layout ? + sizeof (zfs_ace_hdr_t) : sizeof (ace_t); + break; + case ACE_IDENTIFIER_GROUP: + default: + /* Overrun avoidance */ + if (zfs_layout) { + if (ptr + sizeof (zfs_ace_t) <= end) { + zacep->z_fuid = BSWAP_64(zacep->z_fuid); + } else { + entry_size = sizeof (zfs_ace_t); + break; + } + } + switch (ace_type) { + case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: + case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: + case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: + case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: + entry_size = zfs_layout ? + sizeof (zfs_object_ace_t) : + sizeof (ace_object_t); + break; + default: + entry_size = zfs_layout ? sizeof (zfs_ace_t) : + sizeof (ace_t); + break; + } + } + ptr = ptr + entry_size; + } +} + +/* ARGSUSED */ +void +zfs_oldacl_byteswap(void *buf, size_t size) +{ + int cnt; + + /* + * Arggh, since we don't know how many ACEs are in + * the array, we have to swap the entire block + */ + + cnt = size / sizeof (ace_t); + + zfs_oldace_byteswap((ace_t *)buf, cnt); +} + +/* ARGSUSED */ +void +zfs_acl_byteswap(void *buf, size_t size) +{ + zfs_ace_byteswap(buf, size, B_TRUE); +} + +void +zfs_znode_byteswap(void *buf, size_t size) +{ + znode_phys_t *zp = buf; + + ASSERT(size >= sizeof (znode_phys_t)); + + zp->zp_crtime[0] = BSWAP_64(zp->zp_crtime[0]); + zp->zp_crtime[1] = BSWAP_64(zp->zp_crtime[1]); + zp->zp_atime[0] = BSWAP_64(zp->zp_atime[0]); + zp->zp_atime[1] = BSWAP_64(zp->zp_atime[1]); + zp->zp_mtime[0] = BSWAP_64(zp->zp_mtime[0]); + zp->zp_mtime[1] = BSWAP_64(zp->zp_mtime[1]); + zp->zp_ctime[0] = BSWAP_64(zp->zp_ctime[0]); + zp->zp_ctime[1] = BSWAP_64(zp->zp_ctime[1]); + zp->zp_gen = BSWAP_64(zp->zp_gen); + zp->zp_mode = BSWAP_64(zp->zp_mode); + zp->zp_size = BSWAP_64(zp->zp_size); + zp->zp_parent = BSWAP_64(zp->zp_parent); + zp->zp_links = BSWAP_64(zp->zp_links); + zp->zp_xattr = BSWAP_64(zp->zp_xattr); + zp->zp_rdev = BSWAP_64(zp->zp_rdev); + zp->zp_flags = BSWAP_64(zp->zp_flags); + zp->zp_uid = BSWAP_64(zp->zp_uid); + zp->zp_gid = BSWAP_64(zp->zp_gid); + zp->zp_zap = BSWAP_64(zp->zp_zap); + zp->zp_pad[0] = BSWAP_64(zp->zp_pad[0]); + zp->zp_pad[1] = BSWAP_64(zp->zp_pad[1]); + zp->zp_pad[2] = BSWAP_64(zp->zp_pad[2]); + + zp->zp_acl.z_acl_extern_obj = BSWAP_64(zp->zp_acl.z_acl_extern_obj); + zp->zp_acl.z_acl_size = BSWAP_32(zp->zp_acl.z_acl_size); + zp->zp_acl.z_acl_version = BSWAP_16(zp->zp_acl.z_acl_version); + zp->zp_acl.z_acl_count = BSWAP_16(zp->zp_acl.z_acl_count); + if (zp->zp_acl.z_acl_version == ZFS_ACL_VERSION) { + zfs_acl_byteswap((void *)&zp->zp_acl.z_ace_data[0], + ZFS_ACE_SPACE); + } else { + zfs_oldace_byteswap((ace_t *)&zp->zp_acl.z_ace_data[0], + ACE_SLOT_CNT); + } +} diff --git a/uts/common/fs/zfs/zfs_ctldir.c b/uts/common/fs/zfs/zfs_ctldir.c new file mode 100644 index 000000000000..815f8895e702 --- /dev/null +++ b/uts/common/fs/zfs/zfs_ctldir.c @@ -0,0 +1,1349 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * ZFS control directory (a.k.a. ".zfs") + * + * This directory provides a common location for all ZFS meta-objects. + * Currently, this is only the 'snapshot' directory, but this may expand in the + * future. The elements are built using the GFS primitives, as the hierarchy + * does not actually exist on disk. + * + * For 'snapshot', we don't want to have all snapshots always mounted, because + * this would take up a huge amount of space in /etc/mnttab. We have three + * types of objects: + * + * ctldir ------> snapshotdir -------> snapshot + * | + * | + * V + * mounted fs + * + * The 'snapshot' node contains just enough information to lookup '..' and act + * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we + * perform an automount of the underlying filesystem and return the + * corresponding vnode. + * + * All mounts are handled automatically by the kernel, but unmounts are + * (currently) handled from user land. The main reason is that there is no + * reliable way to auto-unmount the filesystem when it's "no longer in use". + * When the user unmounts a filesystem, we call zfsctl_unmount(), which + * unmounts any snapshots within the snapshot directory. + * + * The '.zfs', '.zfs/snapshot', and all directories created under + * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and + * share the same vfs_t as the head filesystem (what '.zfs' lives under). + * + * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' + * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. + * However, vnodes within these mounted on file systems have their v_vfsp + * fields set to the head filesystem to make NFS happy (see + * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t + * so that it cannot be freed until all snapshots have been unmounted. + */ + +#include <fs/fs_subr.h> +#include <sys/zfs_ctldir.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_vfsops.h> +#include <sys/vfs_opreg.h> +#include <sys/gfs.h> +#include <sys/stat.h> +#include <sys/dmu.h> +#include <sys/dsl_deleg.h> +#include <sys/mount.h> +#include <sys/sunddi.h> + +#include "zfs_namecheck.h" + +typedef struct zfsctl_node { + gfs_dir_t zc_gfs_private; + uint64_t zc_id; + timestruc_t zc_cmtime; /* ctime and mtime, always the same */ +} zfsctl_node_t; + +typedef struct zfsctl_snapdir { + zfsctl_node_t sd_node; + kmutex_t sd_lock; + avl_tree_t sd_snaps; +} zfsctl_snapdir_t; + +typedef struct { + char *se_name; + vnode_t *se_root; + avl_node_t se_node; +} zfs_snapentry_t; + +static int +snapentry_compare(const void *a, const void *b) +{ + const zfs_snapentry_t *sa = a; + const zfs_snapentry_t *sb = b; + int ret = strcmp(sa->se_name, sb->se_name); + + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +vnodeops_t *zfsctl_ops_root; +vnodeops_t *zfsctl_ops_snapdir; +vnodeops_t *zfsctl_ops_snapshot; +vnodeops_t *zfsctl_ops_shares; +vnodeops_t *zfsctl_ops_shares_dir; + +static const fs_operation_def_t zfsctl_tops_root[]; +static const fs_operation_def_t zfsctl_tops_snapdir[]; +static const fs_operation_def_t zfsctl_tops_snapshot[]; +static const fs_operation_def_t zfsctl_tops_shares[]; + +static vnode_t *zfsctl_mknode_snapdir(vnode_t *); +static vnode_t *zfsctl_mknode_shares(vnode_t *); +static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); +static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); + +static gfs_opsvec_t zfsctl_opsvec[] = { + { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, + { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, + { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, + { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir }, + { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares }, + { NULL } +}; + +/* + * Root directory elements. We only have two entries + * snapshot and shares. + */ +static gfs_dirent_t zfsctl_root_entries[] = { + { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, + { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE }, + { NULL } +}; + +/* include . and .. in the calculation */ +#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ + sizeof (gfs_dirent_t)) + 1) + + +/* + * Initialize the various GFS pieces we'll need to create and manipulate .zfs + * directories. This is called from the ZFS init routine, and initializes the + * vnode ops vectors that we'll be using. + */ +void +zfsctl_init(void) +{ + VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); +} + +void +zfsctl_fini(void) +{ + /* + * Remove vfsctl vnode ops + */ + if (zfsctl_ops_root) + vn_freevnodeops(zfsctl_ops_root); + if (zfsctl_ops_snapdir) + vn_freevnodeops(zfsctl_ops_snapdir); + if (zfsctl_ops_snapshot) + vn_freevnodeops(zfsctl_ops_snapshot); + if (zfsctl_ops_shares) + vn_freevnodeops(zfsctl_ops_shares); + if (zfsctl_ops_shares_dir) + vn_freevnodeops(zfsctl_ops_shares_dir); + + zfsctl_ops_root = NULL; + zfsctl_ops_snapdir = NULL; + zfsctl_ops_snapshot = NULL; + zfsctl_ops_shares = NULL; + zfsctl_ops_shares_dir = NULL; +} + +boolean_t +zfsctl_is_node(vnode_t *vp) +{ + return (vn_matchops(vp, zfsctl_ops_root) || + vn_matchops(vp, zfsctl_ops_snapdir) || + vn_matchops(vp, zfsctl_ops_snapshot) || + vn_matchops(vp, zfsctl_ops_shares) || + vn_matchops(vp, zfsctl_ops_shares_dir)); + +} + +/* + * Return the inode number associated with the 'snapshot' or + * 'shares' directory. + */ +/* ARGSUSED */ +static ino64_t +zfsctl_root_inode_cb(vnode_t *vp, int index) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + + ASSERT(index <= 2); + + if (index == 0) + return (ZFSCTL_INO_SNAPDIR); + + return (zfsvfs->z_shares_dir); +} + +/* + * Create the '.zfs' directory. This directory is cached as part of the VFS + * structure. This results in a hold on the vfs_t. The code in zfs_umount() + * therefore checks against a vfs_count of 2 instead of 1. This reference + * is removed when the ctldir is destroyed in the unmount. + */ +void +zfsctl_create(zfsvfs_t *zfsvfs) +{ + vnode_t *vp, *rvp; + zfsctl_node_t *zcp; + uint64_t crtime[2]; + + ASSERT(zfsvfs->z_ctldir == NULL); + + vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, + zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, + zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); + zcp = vp->v_data; + zcp->zc_id = ZFSCTL_INO_ROOT; + + VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0); + VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), + &crtime, sizeof (crtime))); + ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime); + VN_RELE(rvp); + + /* + * We're only faking the fact that we have a root of a filesystem for + * the sake of the GFS interfaces. Undo the flag manipulation it did + * for us. + */ + vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT); + + zfsvfs->z_ctldir = vp; +} + +/* + * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. + * There might still be more references if we were force unmounted, but only + * new zfs_inactive() calls can occur and they don't reference .zfs + */ +void +zfsctl_destroy(zfsvfs_t *zfsvfs) +{ + VN_RELE(zfsvfs->z_ctldir); + zfsvfs->z_ctldir = NULL; +} + +/* + * Given a root znode, retrieve the associated .zfs directory. + * Add a hold to the vnode and return it. + */ +vnode_t * +zfsctl_root(znode_t *zp) +{ + ASSERT(zfs_has_ctldir(zp)); + VN_HOLD(zp->z_zfsvfs->z_ctldir); + return (zp->z_zfsvfs->z_ctldir); +} + +/* + * Common open routine. Disallow any write access. + */ +/* ARGSUSED */ +static int +zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct) +{ + if (flags & FWRITE) + return (EACCES); + + return (0); +} + +/* + * Common close routine. Nothing to do here. + */ +/* ARGSUSED */ +static int +zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off, + cred_t *cr, caller_context_t *ct) +{ + return (0); +} + +/* + * Common access routine. Disallow writes. + */ +/* ARGSUSED */ +static int +zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr, + caller_context_t *ct) +{ + if (flags & V_ACE_MASK) { + if (mode & ACE_ALL_WRITE_PERMS) + return (EACCES); + } else { + if (mode & VWRITE) + return (EACCES); + } + + return (0); +} + +/* + * Common getattr function. Fill in basic information. + */ +static void +zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) +{ + timestruc_t now; + + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_rdev = 0; + /* + * We are a purely virtual object, so we have no + * blocksize or allocated blocks. + */ + vap->va_blksize = 0; + vap->va_nblocks = 0; + vap->va_seq = 0; + vap->va_fsid = vp->v_vfsp->vfs_dev; + vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | + S_IROTH | S_IXOTH; + vap->va_type = VDIR; + /* + * We live in the now (for atime). + */ + gethrestime(&now); + vap->va_atime = now; +} + +/*ARGSUSED*/ +static int +zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + zfsctl_node_t *zcp = vp->v_data; + uint64_t object = zcp->zc_id; + zfid_short_t *zfid; + int i; + + ZFS_ENTER(zfsvfs); + + if (fidp->fid_len < SHORT_FID_LEN) { + fidp->fid_len = SHORT_FID_LEN; + ZFS_EXIT(zfsvfs); + return (ENOSPC); + } + + zfid = (zfid_short_t *)fidp; + + zfid->zf_len = SHORT_FID_LEN; + + for (i = 0; i < sizeof (zfid->zf_object); i++) + zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); + + /* .zfs znodes always have a generation number of 0 */ + for (i = 0; i < sizeof (zfid->zf_gen); i++) + zfid->zf_gen[i] = 0; + + ZFS_EXIT(zfsvfs); + return (0); +} + + +/*ARGSUSED*/ +static int +zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + znode_t *dzp; + int error; + + ZFS_ENTER(zfsvfs); + + if (zfsvfs->z_shares_dir == 0) { + ZFS_EXIT(zfsvfs); + return (ENOTSUP); + } + + if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { + error = VOP_FID(ZTOV(dzp), fidp, ct); + VN_RELE(ZTOV(dzp)); + } + + ZFS_EXIT(zfsvfs); + return (error); +} +/* + * .zfs inode namespace + * + * We need to generate unique inode numbers for all files and directories + * within the .zfs pseudo-filesystem. We use the following scheme: + * + * ENTRY ZFSCTL_INODE + * .zfs 1 + * .zfs/snapshot 2 + * .zfs/snapshot/<snap> objectid(snap) + */ + +#define ZFSCTL_INO_SNAP(id) (id) + +/* + * Get root directory attributes. + */ +/* ARGSUSED */ +static int +zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + zfsctl_node_t *zcp = vp->v_data; + + ZFS_ENTER(zfsvfs); + vap->va_nodeid = ZFSCTL_INO_ROOT; + vap->va_nlink = vap->va_size = NROOT_ENTRIES; + vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; + + zfsctl_common_getattr(vp, vap); + ZFS_EXIT(zfsvfs); + + return (0); +} + +/* + * Special case the handling of "..". + */ +/* ARGSUSED */ +int +zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + int err; + + /* + * No extended attributes allowed under .zfs + */ + if (flags & LOOKUP_XATTR) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + + if (strcmp(nm, "..") == 0) { + err = VFS_ROOT(dvp->v_vfsp, vpp); + } else { + err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, + cr, ct, direntflags, realpnp); + } + + ZFS_EXIT(zfsvfs); + + return (err); +} + +static int +zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, + caller_context_t *ct) +{ + /* + * We only care about ACL_ENABLED so that libsec can + * display ACL correctly and not default to POSIX draft. + */ + if (cmd == _PC_ACL_ENABLED) { + *valp = _ACL_ACE_ENABLED; + return (0); + } + + return (fs_pathconf(vp, cmd, valp, cr, ct)); +} + +static const fs_operation_def_t zfsctl_tops_root[] = { + { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, + { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, + { VOPNAME_IOCTL, { .error = fs_inval } }, + { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } }, + { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, + { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, + { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } }, + { VOPNAME_SEEK, { .vop_seek = fs_seek } }, + { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, + { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } }, + { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, + { NULL } +}; + +static int +zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) +{ + objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; + + if (snapshot_namecheck(name, NULL, NULL) != 0) + return (EILSEQ); + dmu_objset_name(os, zname); + if (strlen(zname) + 1 + strlen(name) >= len) + return (ENAMETOOLONG); + (void) strcat(zname, "@"); + (void) strcat(zname, name); + return (0); +} + +static int +zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr) +{ + vnode_t *svp = sep->se_root; + int error; + + ASSERT(vn_ismntpt(svp)); + + /* this will be dropped by dounmount() */ + if ((error = vn_vfswlock(svp)) != 0) + return (error); + + VN_HOLD(svp); + error = dounmount(vn_mountedvfs(svp), fflags, cr); + if (error) { + VN_RELE(svp); + return (error); + } + + /* + * We can't use VN_RELE(), as that will try to invoke + * zfsctl_snapdir_inactive(), which would cause us to destroy + * the sd_lock mutex held by our caller. + */ + ASSERT(svp->v_count == 1); + gfs_vop_inactive(svp, cr, NULL); + + kmem_free(sep->se_name, strlen(sep->se_name) + 1); + kmem_free(sep, sizeof (zfs_snapentry_t)); + + return (0); +} + +static void +zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) +{ + avl_index_t where; + vfs_t *vfsp; + refstr_t *pathref; + char newpath[MAXNAMELEN]; + char *tail; + + ASSERT(MUTEX_HELD(&sdp->sd_lock)); + ASSERT(sep != NULL); + + vfsp = vn_mountedvfs(sep->se_root); + ASSERT(vfsp != NULL); + + vfs_lock_wait(vfsp); + + /* + * Change the name in the AVL tree. + */ + avl_remove(&sdp->sd_snaps, sep); + kmem_free(sep->se_name, strlen(sep->se_name) + 1); + sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); + (void) strcpy(sep->se_name, nm); + VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); + avl_insert(&sdp->sd_snaps, sep, where); + + /* + * Change the current mountpoint info: + * - update the tail of the mntpoint path + * - update the tail of the resource path + */ + pathref = vfs_getmntpoint(vfsp); + (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); + VERIFY((tail = strrchr(newpath, '/')) != NULL); + *(tail+1) = '\0'; + ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); + (void) strcat(newpath, nm); + refstr_rele(pathref); + vfs_setmntpoint(vfsp, newpath, 0); + + pathref = vfs_getresource(vfsp); + (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); + VERIFY((tail = strrchr(newpath, '@')) != NULL); + *(tail+1) = '\0'; + ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); + (void) strcat(newpath, nm); + refstr_rele(pathref); + vfs_setresource(vfsp, newpath, 0); + + vfs_unlock(vfsp); +} + +/*ARGSUSED*/ +static int +zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, + cred_t *cr, caller_context_t *ct, int flags) +{ + zfsctl_snapdir_t *sdp = sdvp->v_data; + zfs_snapentry_t search, *sep; + zfsvfs_t *zfsvfs; + avl_index_t where; + char from[MAXNAMELEN], to[MAXNAMELEN]; + char real[MAXNAMELEN]; + int err; + + zfsvfs = sdvp->v_vfsp->vfs_data; + ZFS_ENTER(zfsvfs); + + if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { + err = dmu_snapshot_realname(zfsvfs->z_os, snm, real, + MAXNAMELEN, NULL); + if (err == 0) { + snm = real; + } else if (err != ENOTSUP) { + ZFS_EXIT(zfsvfs); + return (err); + } + } + + ZFS_EXIT(zfsvfs); + + err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); + if (!err) + err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); + if (!err) + err = zfs_secpolicy_rename_perms(from, to, cr); + if (err) + return (err); + + /* + * Cannot move snapshots out of the snapdir. + */ + if (sdvp != tdvp) + return (EINVAL); + + if (strcmp(snm, tnm) == 0) + return (0); + + mutex_enter(&sdp->sd_lock); + + search.se_name = (char *)snm; + if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { + mutex_exit(&sdp->sd_lock); + return (ENOENT); + } + + err = dmu_objset_rename(from, to, B_FALSE); + if (err == 0) + zfsctl_rename_snap(sdp, sep, tnm); + + mutex_exit(&sdp->sd_lock); + + return (err); +} + +/* ARGSUSED */ +static int +zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, + caller_context_t *ct, int flags) +{ + zfsctl_snapdir_t *sdp = dvp->v_data; + zfs_snapentry_t *sep; + zfs_snapentry_t search; + zfsvfs_t *zfsvfs; + char snapname[MAXNAMELEN]; + char real[MAXNAMELEN]; + int err; + + zfsvfs = dvp->v_vfsp->vfs_data; + ZFS_ENTER(zfsvfs); + + if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { + + err = dmu_snapshot_realname(zfsvfs->z_os, name, real, + MAXNAMELEN, NULL); + if (err == 0) { + name = real; + } else if (err != ENOTSUP) { + ZFS_EXIT(zfsvfs); + return (err); + } + } + + ZFS_EXIT(zfsvfs); + + err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); + if (!err) + err = zfs_secpolicy_destroy_perms(snapname, cr); + if (err) + return (err); + + mutex_enter(&sdp->sd_lock); + + search.se_name = name; + sep = avl_find(&sdp->sd_snaps, &search, NULL); + if (sep) { + avl_remove(&sdp->sd_snaps, sep); + err = zfsctl_unmount_snap(sep, MS_FORCE, cr); + if (err) + avl_add(&sdp->sd_snaps, sep); + else + err = dmu_objset_destroy(snapname, B_FALSE); + } else { + err = ENOENT; + } + + mutex_exit(&sdp->sd_lock); + + return (err); +} + +/* + * This creates a snapshot under '.zfs/snapshot'. + */ +/* ARGSUSED */ +static int +zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, + cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp) +{ + zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + char name[MAXNAMELEN]; + int err; + static enum symfollow follow = NO_FOLLOW; + static enum uio_seg seg = UIO_SYSSPACE; + + if (snapshot_namecheck(dirname, NULL, NULL) != 0) + return (EILSEQ); + + dmu_objset_name(zfsvfs->z_os, name); + + *vpp = NULL; + + err = zfs_secpolicy_snapshot_perms(name, cr); + if (err) + return (err); + + if (err == 0) { + err = dmu_objset_snapshot(name, dirname, NULL, NULL, + B_FALSE, B_FALSE, -1); + if (err) + return (err); + err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); + } + + return (err); +} + +/* + * Lookup entry point for the 'snapshot' directory. Try to open the + * snapshot if it exist, creating the pseudo filesystem vnode as necessary. + * Perform a mount of the associated dataset on top of the vnode. + */ +/* ARGSUSED */ +static int +zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + zfsctl_snapdir_t *sdp = dvp->v_data; + objset_t *snap; + char snapname[MAXNAMELEN]; + char real[MAXNAMELEN]; + char *mountpoint; + zfs_snapentry_t *sep, search; + struct mounta margs; + vfs_t *vfsp; + size_t mountpoint_len; + avl_index_t where; + zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + int err; + + /* + * No extended attributes allowed under .zfs + */ + if (flags & LOOKUP_XATTR) + return (EINVAL); + + ASSERT(dvp->v_type == VDIR); + + /* + * If we get a recursive call, that means we got called + * from the domount() code while it was trying to look up the + * spec (which looks like a local path for zfs). We need to + * add some flag to domount() to tell it not to do this lookup. + */ + if (MUTEX_HELD(&sdp->sd_lock)) + return (ENOENT); + + ZFS_ENTER(zfsvfs); + + if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + ZFS_EXIT(zfsvfs); + return (0); + } + + if (flags & FIGNORECASE) { + boolean_t conflict = B_FALSE; + + err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, + MAXNAMELEN, &conflict); + if (err == 0) { + nm = real; + } else if (err != ENOTSUP) { + ZFS_EXIT(zfsvfs); + return (err); + } + if (realpnp) + (void) strlcpy(realpnp->pn_buf, nm, + realpnp->pn_bufsize); + if (conflict && direntflags) + *direntflags = ED_CASE_CONFLICT; + } + + mutex_enter(&sdp->sd_lock); + search.se_name = (char *)nm; + if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { + *vpp = sep->se_root; + VN_HOLD(*vpp); + err = traverse(vpp); + if (err) { + VN_RELE(*vpp); + *vpp = NULL; + } else if (*vpp == sep->se_root) { + /* + * The snapshot was unmounted behind our backs, + * try to remount it. + */ + goto domount; + } else { + /* + * VROOT was set during the traverse call. We need + * to clear it since we're pretending to be part + * of our parent's vfs. + */ + (*vpp)->v_flag &= ~VROOT; + } + mutex_exit(&sdp->sd_lock); + ZFS_EXIT(zfsvfs); + return (err); + } + + /* + * The requested snapshot is not currently mounted, look it up. + */ + err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); + if (err) { + mutex_exit(&sdp->sd_lock); + ZFS_EXIT(zfsvfs); + /* + * handle "ls *" or "?" in a graceful manner, + * forcing EILSEQ to ENOENT. + * Since shell ultimately passes "*" or "?" as name to lookup + */ + return (err == EILSEQ ? ENOENT : err); + } + if (dmu_objset_hold(snapname, FTAG, &snap) != 0) { + mutex_exit(&sdp->sd_lock); + ZFS_EXIT(zfsvfs); + return (ENOENT); + } + + sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); + sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); + (void) strcpy(sep->se_name, nm); + *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); + avl_insert(&sdp->sd_snaps, sep, where); + + dmu_objset_rele(snap, FTAG); +domount: + mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + + strlen("/.zfs/snapshot/") + strlen(nm) + 1; + mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); + (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", + refstr_value(dvp->v_vfsp->vfs_mntpt), nm); + + margs.spec = snapname; + margs.dir = mountpoint; + margs.flags = MS_SYSSPACE | MS_NOMNTTAB; + margs.fstype = "zfs"; + margs.dataptr = NULL; + margs.datalen = 0; + margs.optptr = NULL; + margs.optlen = 0; + + err = domount("zfs", &margs, *vpp, kcred, &vfsp); + kmem_free(mountpoint, mountpoint_len); + + if (err == 0) { + /* + * Return the mounted root rather than the covered mount point. + * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns + * the ZFS vnode mounted on top of the GFS node. This ZFS + * vnode is the root of the newly created vfsp. + */ + VFS_RELE(vfsp); + err = traverse(vpp); + } + + if (err == 0) { + /* + * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. + * + * This is where we lie about our v_vfsp in order to + * make .zfs/snapshot/<snapname> accessible over NFS + * without requiring manual mounts of <snapname>. + */ + ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); + VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; + (*vpp)->v_vfsp = zfsvfs->z_vfs; + (*vpp)->v_flag &= ~VROOT; + } + mutex_exit(&sdp->sd_lock); + ZFS_EXIT(zfsvfs); + + /* + * If we had an error, drop our hold on the vnode and + * zfsctl_snapshot_inactive() will clean up. + */ + if (err) { + VN_RELE(*vpp); + *vpp = NULL; + } + return (err); +} + +/* ARGSUSED */ +static int +zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + znode_t *dzp; + int error; + + ZFS_ENTER(zfsvfs); + + if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + ZFS_EXIT(zfsvfs); + return (0); + } + + if (zfsvfs->z_shares_dir == 0) { + ZFS_EXIT(zfsvfs); + return (ENOTSUP); + } + if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) + error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp, + flags, rdir, cr, ct, direntflags, realpnp); + + VN_RELE(ZTOV(dzp)); + ZFS_EXIT(zfsvfs); + + return (error); +} + +/* ARGSUSED */ +static int +zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, + offset_t *offp, offset_t *nextp, void *data, int flags) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + char snapname[MAXNAMELEN]; + uint64_t id, cookie; + boolean_t case_conflict; + int error; + + ZFS_ENTER(zfsvfs); + + cookie = *offp; + error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, + &cookie, &case_conflict); + if (error) { + ZFS_EXIT(zfsvfs); + if (error == ENOENT) { + *eofp = 1; + return (0); + } + return (error); + } + + if (flags & V_RDDIR_ENTFLAGS) { + edirent_t *eodp = dp; + + (void) strcpy(eodp->ed_name, snapname); + eodp->ed_ino = ZFSCTL_INO_SNAP(id); + eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; + } else { + struct dirent64 *odp = dp; + + (void) strcpy(odp->d_name, snapname); + odp->d_ino = ZFSCTL_INO_SNAP(id); + } + *nextp = cookie; + + ZFS_EXIT(zfsvfs); + + return (0); +} + +/* ARGSUSED */ +static int +zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, + caller_context_t *ct, int flags) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + znode_t *dzp; + int error; + + ZFS_ENTER(zfsvfs); + + if (zfsvfs->z_shares_dir == 0) { + ZFS_EXIT(zfsvfs); + return (ENOTSUP); + } + if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { + error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags); + VN_RELE(ZTOV(dzp)); + } else { + *eofp = 1; + error = ENOENT; + } + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * pvp is the '.zfs' directory (zfsctl_node_t). + * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t). + * + * This function is the callback to create a GFS vnode for '.zfs/snapshot' + * when a lookup is performed on .zfs for "snapshot". + */ +vnode_t * +zfsctl_mknode_snapdir(vnode_t *pvp) +{ + vnode_t *vp; + zfsctl_snapdir_t *sdp; + + vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, + zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, + zfsctl_snapdir_readdir_cb, NULL); + sdp = vp->v_data; + sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; + sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; + mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&sdp->sd_snaps, snapentry_compare, + sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); + return (vp); +} + +vnode_t * +zfsctl_mknode_shares(vnode_t *pvp) +{ + vnode_t *vp; + zfsctl_node_t *sdp; + + vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, + zfsctl_ops_shares, NULL, NULL, MAXNAMELEN, + NULL, NULL); + sdp = vp->v_data; + sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; + return (vp); + +} + +/* ARGSUSED */ +static int +zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + znode_t *dzp; + int error; + + ZFS_ENTER(zfsvfs); + if (zfsvfs->z_shares_dir == 0) { + ZFS_EXIT(zfsvfs); + return (ENOTSUP); + } + if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { + error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct); + VN_RELE(ZTOV(dzp)); + } + ZFS_EXIT(zfsvfs); + return (error); + + +} + +/* ARGSUSED */ +static int +zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + zfsctl_snapdir_t *sdp = vp->v_data; + + ZFS_ENTER(zfsvfs); + zfsctl_common_getattr(vp, vap); + vap->va_nodeid = gfs_file_inode(vp); + vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; + vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os); + ZFS_EXIT(zfsvfs); + + return (0); +} + +/* ARGSUSED */ +static void +zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + zfsctl_snapdir_t *sdp = vp->v_data; + void *private; + + private = gfs_dir_inactive(vp); + if (private != NULL) { + ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); + mutex_destroy(&sdp->sd_lock); + avl_destroy(&sdp->sd_snaps); + kmem_free(private, sizeof (zfsctl_snapdir_t)); + } +} + +static const fs_operation_def_t zfsctl_tops_snapdir[] = { + { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, + { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, + { VOPNAME_IOCTL, { .error = fs_inval } }, + { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } }, + { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, + { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } }, + { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } }, + { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } }, + { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, + { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } }, + { VOPNAME_SEEK, { .vop_seek = fs_seek } }, + { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } }, + { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, + { NULL } +}; + +static const fs_operation_def_t zfsctl_tops_shares[] = { + { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, + { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, + { VOPNAME_IOCTL, { .error = fs_inval } }, + { VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } }, + { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, + { VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } }, + { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } }, + { VOPNAME_SEEK, { .vop_seek = fs_seek } }, + { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, + { VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } }, + { NULL } +}; + +/* + * pvp is the GFS vnode '.zfs/snapshot'. + * + * This creates a GFS node under '.zfs/snapshot' representing each + * snapshot. This newly created GFS node is what we mount snapshot + * vfs_t's ontop of. + */ +static vnode_t * +zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) +{ + vnode_t *vp; + zfsctl_node_t *zcp; + + vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, + zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); + zcp = vp->v_data; + zcp->zc_id = objset; + + return (vp); +} + +static void +zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + zfsctl_snapdir_t *sdp; + zfs_snapentry_t *sep, *next; + vnode_t *dvp; + + VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); + sdp = dvp->v_data; + + mutex_enter(&sdp->sd_lock); + + if (vp->v_count > 1) { + mutex_exit(&sdp->sd_lock); + return; + } + ASSERT(!vn_ismntpt(vp)); + + sep = avl_first(&sdp->sd_snaps); + while (sep != NULL) { + next = AVL_NEXT(&sdp->sd_snaps, sep); + + if (sep->se_root == vp) { + avl_remove(&sdp->sd_snaps, sep); + kmem_free(sep->se_name, strlen(sep->se_name) + 1); + kmem_free(sep, sizeof (zfs_snapentry_t)); + break; + } + sep = next; + } + ASSERT(sep != NULL); + + mutex_exit(&sdp->sd_lock); + VN_RELE(dvp); + + /* + * Dispose of the vnode for the snapshot mount point. + * This is safe to do because once this entry has been removed + * from the AVL tree, it can't be found again, so cannot become + * "active". If we lookup the same name again we will end up + * creating a new vnode. + */ + gfs_vop_inactive(vp, cr, ct); +} + + +/* + * These VP's should never see the light of day. They should always + * be covered. + */ +static const fs_operation_def_t zfsctl_tops_snapshot[] = { + VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapshot_inactive }, + NULL, NULL +}; + +int +zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + vnode_t *dvp, *vp; + zfsctl_snapdir_t *sdp; + zfsctl_node_t *zcp; + zfs_snapentry_t *sep; + int error; + + ASSERT(zfsvfs->z_ctldir != NULL); + error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, + NULL, 0, NULL, kcred, NULL, NULL, NULL); + if (error != 0) + return (error); + sdp = dvp->v_data; + + mutex_enter(&sdp->sd_lock); + sep = avl_first(&sdp->sd_snaps); + while (sep != NULL) { + vp = sep->se_root; + zcp = vp->v_data; + if (zcp->zc_id == objsetid) + break; + + sep = AVL_NEXT(&sdp->sd_snaps, sep); + } + + if (sep != NULL) { + VN_HOLD(vp); + /* + * Return the mounted root rather than the covered mount point. + * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid> + * and returns the ZFS vnode mounted on top of the GFS node. + * This ZFS vnode is the root of the vfs for objset 'objsetid'. + */ + error = traverse(&vp); + if (error == 0) { + if (vp == sep->se_root) + error = EINVAL; + else + *zfsvfsp = VTOZ(vp)->z_zfsvfs; + } + mutex_exit(&sdp->sd_lock); + VN_RELE(vp); + } else { + error = EINVAL; + mutex_exit(&sdp->sd_lock); + } + + VN_RELE(dvp); + + return (error); +} + +/* + * Unmount any snapshots for the given filesystem. This is called from + * zfs_umount() - if we have a ctldir, then go through and unmount all the + * snapshots. + */ +int +zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + vnode_t *dvp; + zfsctl_snapdir_t *sdp; + zfs_snapentry_t *sep, *next; + int error; + + ASSERT(zfsvfs->z_ctldir != NULL); + error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, + NULL, 0, NULL, cr, NULL, NULL, NULL); + if (error != 0) + return (error); + sdp = dvp->v_data; + + mutex_enter(&sdp->sd_lock); + + sep = avl_first(&sdp->sd_snaps); + while (sep != NULL) { + next = AVL_NEXT(&sdp->sd_snaps, sep); + + /* + * If this snapshot is not mounted, then it must + * have just been unmounted by somebody else, and + * will be cleaned up by zfsctl_snapdir_inactive(). + */ + if (vn_ismntpt(sep->se_root)) { + avl_remove(&sdp->sd_snaps, sep); + error = zfsctl_unmount_snap(sep, fflags, cr); + if (error) { + avl_add(&sdp->sd_snaps, sep); + break; + } + } + sep = next; + } + + mutex_exit(&sdp->sd_lock); + VN_RELE(dvp); + + return (error); +} diff --git a/uts/common/fs/zfs/zfs_debug.c b/uts/common/fs/zfs/zfs_debug.c new file mode 100644 index 000000000000..d0f411a99350 --- /dev/null +++ b/uts/common/fs/zfs/zfs_debug.c @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> + +list_t zfs_dbgmsgs; +int zfs_dbgmsg_size; +kmutex_t zfs_dbgmsgs_lock; +int zfs_dbgmsg_maxsize = 1<<20; /* 1MB */ + +void +zfs_dbgmsg_init(void) +{ + list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t), + offsetof(zfs_dbgmsg_t, zdm_node)); + mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +zfs_dbgmsg_fini(void) +{ + zfs_dbgmsg_t *zdm; + + while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) { + int size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg); + kmem_free(zdm, size); + zfs_dbgmsg_size -= size; + } + mutex_destroy(&zfs_dbgmsgs_lock); + ASSERT3U(zfs_dbgmsg_size, ==, 0); +} + +/* + * Print these messages by running: + * echo ::zfs_dbgmsg | mdb -k + * + * Monitor these messages by running: + * dtrace -q -n 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}' + */ +void +zfs_dbgmsg(const char *fmt, ...) +{ + int size; + va_list adx; + zfs_dbgmsg_t *zdm; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx); + va_end(adx); + + /* + * There is one byte of string in sizeof (zfs_dbgmsg_t), used + * for the terminating null. + */ + zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP); + zdm->zdm_timestamp = gethrestime_sec(); + + va_start(adx, fmt); + (void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx); + va_end(adx); + + DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg); + + mutex_enter(&zfs_dbgmsgs_lock); + list_insert_tail(&zfs_dbgmsgs, zdm); + zfs_dbgmsg_size += sizeof (zfs_dbgmsg_t) + size; + while (zfs_dbgmsg_size > zfs_dbgmsg_maxsize) { + zdm = list_remove_head(&zfs_dbgmsgs); + size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg); + kmem_free(zdm, size); + zfs_dbgmsg_size -= size; + } + mutex_exit(&zfs_dbgmsgs_lock); +} diff --git a/uts/common/fs/zfs/zfs_dir.c b/uts/common/fs/zfs/zfs_dir.c new file mode 100644 index 000000000000..b06d29ab33e1 --- /dev/null +++ b/uts/common/fs/zfs/zfs_dir.c @@ -0,0 +1,1089 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/resource.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/mode.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/pathname.h> +#include <sys/cmn_err.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/unistd.h> +#include <sys/sunddi.h> +#include <sys/random.h> +#include <sys/policy.h> +#include <sys/zfs_dir.h> +#include <sys/zfs_acl.h> +#include <sys/fs/zfs.h> +#include "fs/fs_subr.h" +#include <sys/zap.h> +#include <sys/dmu.h> +#include <sys/atomic.h> +#include <sys/zfs_ctldir.h> +#include <sys/zfs_fuid.h> +#include <sys/sa.h> +#include <sys/zfs_sa.h> +#include <sys/dnlc.h> +#include <sys/extdirent.h> + +/* + * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups + * of names after deciding which is the appropriate lookup interface. + */ +static int +zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, + boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) +{ + int error; + + if (zfsvfs->z_norm) { + matchtype_t mt = MT_FIRST; + boolean_t conflict = B_FALSE; + size_t bufsz = 0; + char *buf = NULL; + + if (rpnp) { + buf = rpnp->pn_buf; + bufsz = rpnp->pn_bufsize; + } + if (exact) + mt = MT_EXACT; + /* + * In the non-mixed case we only expect there would ever + * be one match, but we need to use the normalizing lookup. + */ + error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, + zoid, mt, buf, bufsz, &conflict); + if (!error && deflags) + *deflags = conflict ? ED_CASE_CONFLICT : 0; + } else { + error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); + } + *zoid = ZFS_DIRENT_OBJ(*zoid); + + if (error == ENOENT && update) + dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); + + return (error); +} + +/* + * Lock a directory entry. A dirlock on <dzp, name> protects that name + * in dzp's directory zap object. As long as you hold a dirlock, you can + * assume two things: (1) dzp cannot be reaped, and (2) no other thread + * can change the zap entry for (i.e. link or unlink) this name. + * + * Input arguments: + * dzp - znode for directory + * name - name of entry to lock + * flag - ZNEW: if the entry already exists, fail with EEXIST. + * ZEXISTS: if the entry does not exist, fail with ENOENT. + * ZSHARED: allow concurrent access with other ZSHARED callers. + * ZXATTR: we want dzp's xattr directory + * ZCILOOK: On a mixed sensitivity file system, + * this lookup should be case-insensitive. + * ZCIEXACT: On a purely case-insensitive file system, + * this lookup should be case-sensitive. + * ZRENAMING: we are locking for renaming, force narrow locks + * ZHAVELOCK: Don't grab the z_name_lock for this call. The + * current thread already holds it. + * + * Output arguments: + * zpp - pointer to the znode for the entry (NULL if there isn't one) + * dlpp - pointer to the dirlock for this entry (NULL on error) + * direntflags - (case-insensitive lookup only) + * flags if multiple case-sensitive matches exist in directory + * realpnp - (case-insensitive lookup only) + * actual name matched within the directory + * + * Return value: 0 on success or errno on failure. + * + * NOTE: Always checks for, and rejects, '.' and '..'. + * NOTE: For case-insensitive file systems we take wide locks (see below), + * but return znode pointers to a single match. + */ +int +zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, + int flag, int *direntflags, pathname_t *realpnp) +{ + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_dirlock_t *dl; + boolean_t update; + boolean_t exact; + uint64_t zoid; + vnode_t *vp = NULL; + int error = 0; + int cmpflags; + + *zpp = NULL; + *dlpp = NULL; + + /* + * Verify that we are not trying to lock '.', '..', or '.zfs' + */ + if (name[0] == '.' && + (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || + zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) + return (EEXIST); + + /* + * Case sensitivity and normalization preferences are set when + * the file system is created. These are stored in the + * zfsvfs->z_case and zfsvfs->z_norm fields. These choices + * affect what vnodes can be cached in the DNLC, how we + * perform zap lookups, and the "width" of our dirlocks. + * + * A normal dirlock locks a single name. Note that with + * normalization a name can be composed multiple ways, but + * when normalized, these names all compare equal. A wide + * dirlock locks multiple names. We need these when the file + * system is supporting mixed-mode access. It is sometimes + * necessary to lock all case permutations of file name at + * once so that simultaneous case-insensitive/case-sensitive + * behaves as rationally as possible. + */ + + /* + * Decide if exact matches should be requested when performing + * a zap lookup on file systems supporting case-insensitive + * access. + */ + exact = + ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || + ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); + + /* + * Only look in or update the DNLC if we are looking for the + * name on a file system that does not require normalization + * or case folding. We can also look there if we happen to be + * on a non-normalizing, mixed sensitivity file system IF we + * are looking for the exact name. + * + * Maybe can add TO-UPPERed version of name to dnlc in ci-only + * case for performance improvement? + */ + update = !zfsvfs->z_norm || + ((zfsvfs->z_case == ZFS_CASE_MIXED) && + !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); + + /* + * ZRENAMING indicates we are in a situation where we should + * take narrow locks regardless of the file system's + * preferences for normalizing and case folding. This will + * prevent us deadlocking trying to grab the same wide lock + * twice if the two names happen to be case-insensitive + * matches. + */ + if (flag & ZRENAMING) + cmpflags = 0; + else + cmpflags = zfsvfs->z_norm; + + /* + * Wait until there are no locks on this name. + * + * Don't grab the the lock if it is already held. However, cannot + * have both ZSHARED and ZHAVELOCK together. + */ + ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); + if (!(flag & ZHAVELOCK)) + rw_enter(&dzp->z_name_lock, RW_READER); + + mutex_enter(&dzp->z_lock); + for (;;) { + if (dzp->z_unlinked) { + mutex_exit(&dzp->z_lock); + if (!(flag & ZHAVELOCK)) + rw_exit(&dzp->z_name_lock); + return (ENOENT); + } + for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { + if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, + U8_UNICODE_LATEST, &error) == 0) || error != 0) + break; + } + if (error != 0) { + mutex_exit(&dzp->z_lock); + if (!(flag & ZHAVELOCK)) + rw_exit(&dzp->z_name_lock); + return (ENOENT); + } + if (dl == NULL) { + /* + * Allocate a new dirlock and add it to the list. + */ + dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); + cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); + dl->dl_name = name; + dl->dl_sharecnt = 0; + dl->dl_namelock = 0; + dl->dl_namesize = 0; + dl->dl_dzp = dzp; + dl->dl_next = dzp->z_dirlocks; + dzp->z_dirlocks = dl; + break; + } + if ((flag & ZSHARED) && dl->dl_sharecnt != 0) + break; + cv_wait(&dl->dl_cv, &dzp->z_lock); + } + + /* + * If the z_name_lock was NOT held for this dirlock record it. + */ + if (flag & ZHAVELOCK) + dl->dl_namelock = 1; + + if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { + /* + * We're the second shared reference to dl. Make a copy of + * dl_name in case the first thread goes away before we do. + * Note that we initialize the new name before storing its + * pointer into dl_name, because the first thread may load + * dl->dl_name at any time. He'll either see the old value, + * which is his, or the new shared copy; either is OK. + */ + dl->dl_namesize = strlen(dl->dl_name) + 1; + name = kmem_alloc(dl->dl_namesize, KM_SLEEP); + bcopy(dl->dl_name, name, dl->dl_namesize); + dl->dl_name = name; + } + + mutex_exit(&dzp->z_lock); + + /* + * We have a dirlock on the name. (Note that it is the dirlock, + * not the dzp's z_lock, that protects the name in the zap object.) + * See if there's an object by this name; if so, put a hold on it. + */ + if (flag & ZXATTR) { + error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, + sizeof (zoid)); + if (error == 0) + error = (zoid == 0 ? ENOENT : 0); + } else { + if (update) + vp = dnlc_lookup(ZTOV(dzp), name); + if (vp == DNLC_NO_VNODE) { + VN_RELE(vp); + error = ENOENT; + } else if (vp) { + if (flag & ZNEW) { + zfs_dirent_unlock(dl); + VN_RELE(vp); + return (EEXIST); + } + *dlpp = dl; + *zpp = VTOZ(vp); + return (0); + } else { + error = zfs_match_find(zfsvfs, dzp, name, exact, + update, direntflags, realpnp, &zoid); + } + } + if (error) { + if (error != ENOENT || (flag & ZEXISTS)) { + zfs_dirent_unlock(dl); + return (error); + } + } else { + if (flag & ZNEW) { + zfs_dirent_unlock(dl); + return (EEXIST); + } + error = zfs_zget(zfsvfs, zoid, zpp); + if (error) { + zfs_dirent_unlock(dl); + return (error); + } + if (!(flag & ZXATTR) && update) + dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); + } + + *dlpp = dl; + + return (0); +} + +/* + * Unlock this directory entry and wake anyone who was waiting for it. + */ +void +zfs_dirent_unlock(zfs_dirlock_t *dl) +{ + znode_t *dzp = dl->dl_dzp; + zfs_dirlock_t **prev_dl, *cur_dl; + + mutex_enter(&dzp->z_lock); + + if (!dl->dl_namelock) + rw_exit(&dzp->z_name_lock); + + if (dl->dl_sharecnt > 1) { + dl->dl_sharecnt--; + mutex_exit(&dzp->z_lock); + return; + } + prev_dl = &dzp->z_dirlocks; + while ((cur_dl = *prev_dl) != dl) + prev_dl = &cur_dl->dl_next; + *prev_dl = dl->dl_next; + cv_broadcast(&dl->dl_cv); + mutex_exit(&dzp->z_lock); + + if (dl->dl_namesize != 0) + kmem_free(dl->dl_name, dl->dl_namesize); + cv_destroy(&dl->dl_cv); + kmem_free(dl, sizeof (*dl)); +} + +/* + * Look up an entry in a directory. + * + * NOTE: '.' and '..' are handled as special cases because + * no directory entries are actually stored for them. If this is + * the root of a filesystem, then '.zfs' is also treated as a + * special pseudo-directory. + */ +int +zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, + int *deflg, pathname_t *rpnp) +{ + zfs_dirlock_t *dl; + znode_t *zp; + int error = 0; + uint64_t parent; + + if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { + *vpp = ZTOV(dzp); + VN_HOLD(*vpp); + } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + + /* + * If we are a snapshot mounted under .zfs, return + * the vp for the snapshot directory. + */ + if ((error = sa_lookup(dzp->z_sa_hdl, + SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) + return (error); + if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { + error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, + "snapshot", vpp, NULL, 0, NULL, kcred, + NULL, NULL, NULL); + return (error); + } + rw_enter(&dzp->z_parent_lock, RW_READER); + error = zfs_zget(zfsvfs, parent, &zp); + if (error == 0) + *vpp = ZTOV(zp); + rw_exit(&dzp->z_parent_lock); + } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { + *vpp = zfsctl_root(dzp); + } else { + int zf; + + zf = ZEXISTS | ZSHARED; + if (flags & FIGNORECASE) + zf |= ZCILOOK; + + error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); + if (error == 0) { + *vpp = ZTOV(zp); + zfs_dirent_unlock(dl); + dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ + } + rpnp = NULL; + } + + if ((flags & FIGNORECASE) && rpnp && !error) + (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); + + return (error); +} + +/* + * unlinked Set (formerly known as the "delete queue") Error Handling + * + * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we + * don't specify the name of the entry that we will be manipulating. We + * also fib and say that we won't be adding any new entries to the + * unlinked set, even though we might (this is to lower the minimum file + * size that can be deleted in a full filesystem). So on the small + * chance that the nlink list is using a fat zap (ie. has more than + * 2000 entries), we *may* not pre-read a block that's needed. + * Therefore it is remotely possible for some of the assertions + * regarding the unlinked set below to fail due to i/o error. On a + * nondebug system, this will result in the space being leaked. + */ +void +zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + ASSERT(zp->z_unlinked); + ASSERT(zp->z_links == 0); + + VERIFY3U(0, ==, + zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); +} + +/* + * Clean up any znodes that had no links when we either crashed or + * (force) umounted the file system. + */ +void +zfs_unlinked_drain(zfsvfs_t *zfsvfs) +{ + zap_cursor_t zc; + zap_attribute_t zap; + dmu_object_info_t doi; + znode_t *zp; + int error; + + /* + * Interate over the contents of the unlinked set. + */ + for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); + zap_cursor_retrieve(&zc, &zap) == 0; + zap_cursor_advance(&zc)) { + + /* + * See what kind of object we have in list + */ + + error = dmu_object_info(zfsvfs->z_os, + zap.za_first_integer, &doi); + if (error != 0) + continue; + + ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || + (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); + /* + * We need to re-mark these list entries for deletion, + * so we pull them back into core and set zp->z_unlinked. + */ + error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); + + /* + * We may pick up znodes that are already marked for deletion. + * This could happen during the purge of an extended attribute + * directory. All we need to do is skip over them, since they + * are already in the system marked z_unlinked. + */ + if (error != 0) + continue; + + zp->z_unlinked = B_TRUE; + VN_RELE(ZTOV(zp)); + } + zap_cursor_fini(&zc); +} + +/* + * Delete the entire contents of a directory. Return a count + * of the number of entries that could not be deleted. If we encounter + * an error, return a count of at least one so that the directory stays + * in the unlinked set. + * + * NOTE: this function assumes that the directory is inactive, + * so there is no need to lock its entries before deletion. + * Also, it assumes the directory contents is *only* regular + * files. + */ +static int +zfs_purgedir(znode_t *dzp) +{ + zap_cursor_t zc; + zap_attribute_t zap; + znode_t *xzp; + dmu_tx_t *tx; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_dirlock_t dl; + int skipped = 0; + int error; + + for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); + (error = zap_cursor_retrieve(&zc, &zap)) == 0; + zap_cursor_advance(&zc)) { + error = zfs_zget(zfsvfs, + ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); + if (error) { + skipped += 1; + continue; + } + + ASSERT((ZTOV(xzp)->v_type == VREG) || + (ZTOV(xzp)->v_type == VLNK)); + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); + dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); + dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + /* Is this really needed ? */ + zfs_sa_upgrade_txholds(tx, xzp); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + VN_RELE(ZTOV(xzp)); + skipped += 1; + continue; + } + bzero(&dl, sizeof (dl)); + dl.dl_dzp = dzp; + dl.dl_name = zap.za_name; + + error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); + if (error) + skipped += 1; + dmu_tx_commit(tx); + + VN_RELE(ZTOV(xzp)); + } + zap_cursor_fini(&zc); + if (error != ENOENT) + skipped += 1; + return (skipped); +} + +void +zfs_rmnode(znode_t *zp) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + objset_t *os = zfsvfs->z_os; + znode_t *xzp = NULL; + dmu_tx_t *tx; + uint64_t acl_obj; + uint64_t xattr_obj; + int error; + + ASSERT(zp->z_links == 0); + ASSERT(ZTOV(zp)->v_count == 0); + + /* + * If this is an attribute directory, purge its contents. + */ + if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { + if (zfs_purgedir(zp) != 0) { + /* + * Not enough space to delete some xattrs. + * Leave it in the unlinked set. + */ + zfs_znode_dmu_fini(zp); + zfs_znode_free(zp); + return; + } + } + + /* + * Free up all the data in the file. + */ + error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); + if (error) { + /* + * Not enough space. Leave the file in the unlinked set. + */ + zfs_znode_dmu_fini(zp); + zfs_znode_free(zp); + return; + } + + /* + * If the file has extended attributes, we're going to unlink + * the xattr dir. + */ + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + &xattr_obj, sizeof (xattr_obj)); + if (error == 0 && xattr_obj) { + error = zfs_zget(zfsvfs, xattr_obj, &xzp); + ASSERT(error == 0); + } + + acl_obj = zfs_external_acl(zp); + + /* + * Set up the final transaction. + */ + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + if (xzp) { + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); + dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); + } + if (acl_obj) + dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); + + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + /* + * Not enough space to delete the file. Leave it in the + * unlinked set, leaking it until the fs is remounted (at + * which point we'll call zfs_unlinked_drain() to process it). + */ + dmu_tx_abort(tx); + zfs_znode_dmu_fini(zp); + zfs_znode_free(zp); + goto out; + } + + if (xzp) { + ASSERT(error == 0); + mutex_enter(&xzp->z_lock); + xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ + xzp->z_links = 0; /* no more links to it */ + VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + &xzp->z_links, sizeof (xzp->z_links), tx)); + mutex_exit(&xzp->z_lock); + zfs_unlinked_add(xzp, tx); + } + + /* Remove this znode from the unlinked set */ + VERIFY3U(0, ==, + zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + + zfs_znode_delete(zp, tx); + + dmu_tx_commit(tx); +out: + if (xzp) + VN_RELE(ZTOV(xzp)); +} + +static uint64_t +zfs_dirent(znode_t *zp, uint64_t mode) +{ + uint64_t de = zp->z_id; + + if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) + de |= IFTODT(mode) << 60; + return (de); +} + +/* + * Link zp into dl. Can only fail if zp has been unlinked. + */ +int +zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) +{ + znode_t *dzp = dl->dl_dzp; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + vnode_t *vp = ZTOV(zp); + uint64_t value; + int zp_is_dir = (vp->v_type == VDIR); + sa_bulk_attr_t bulk[5]; + uint64_t mtime[2], ctime[2]; + int count = 0; + int error; + + mutex_enter(&zp->z_lock); + + if (!(flag & ZRENAMING)) { + if (zp->z_unlinked) { /* no new links to unlinked zp */ + ASSERT(!(flag & (ZNEW | ZEXISTS))); + mutex_exit(&zp->z_lock); + return (ENOENT); + } + zp->z_links++; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + &zp->z_links, sizeof (zp->z_links)); + + } + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, + &dzp->z_id, sizeof (dzp->z_id)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, sizeof (zp->z_pflags)); + + if (!(flag & ZNEW)) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + ctime, sizeof (ctime)); + zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, + ctime, B_TRUE); + } + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + ASSERT(error == 0); + + mutex_exit(&zp->z_lock); + + mutex_enter(&dzp->z_lock); + dzp->z_size++; + dzp->z_links += zp_is_dir; + count = 0; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + &dzp->z_size, sizeof (dzp->z_size)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + &dzp->z_links, sizeof (dzp->z_links)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, + mtime, sizeof (mtime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + ctime, sizeof (ctime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &dzp->z_pflags, sizeof (dzp->z_pflags)); + zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); + error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); + ASSERT(error == 0); + mutex_exit(&dzp->z_lock); + + value = zfs_dirent(zp, zp->z_mode); + error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, + 8, 1, &value, tx); + ASSERT(error == 0); + + dnlc_update(ZTOV(dzp), dl->dl_name, vp); + + return (0); +} + +static int +zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, + int flag) +{ + int error; + + if (zp->z_zfsvfs->z_norm) { + if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && + (flag & ZCIEXACT)) || + ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && + !(flag & ZCILOOK))) + error = zap_remove_norm(zp->z_zfsvfs->z_os, + dzp->z_id, dl->dl_name, MT_EXACT, tx); + else + error = zap_remove_norm(zp->z_zfsvfs->z_os, + dzp->z_id, dl->dl_name, MT_FIRST, tx); + } else { + error = zap_remove(zp->z_zfsvfs->z_os, + dzp->z_id, dl->dl_name, tx); + } + + return (error); +} + +/* + * Unlink zp from dl, and mark zp for deletion if this was the last link. + * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). + * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. + * If it's non-NULL, we use it to indicate whether the znode needs deletion, + * and it's the caller's job to do it. + */ +int +zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, + boolean_t *unlinkedp) +{ + znode_t *dzp = dl->dl_dzp; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + vnode_t *vp = ZTOV(zp); + int zp_is_dir = (vp->v_type == VDIR); + boolean_t unlinked = B_FALSE; + sa_bulk_attr_t bulk[5]; + uint64_t mtime[2], ctime[2]; + int count = 0; + int error; + + dnlc_remove(ZTOV(dzp), dl->dl_name); + + if (!(flag & ZRENAMING)) { + if (vn_vfswlock(vp)) /* prevent new mounts on zp */ + return (EBUSY); + + if (vn_ismntpt(vp)) { /* don't remove mount point */ + vn_vfsunlock(vp); + return (EBUSY); + } + + mutex_enter(&zp->z_lock); + + if (zp_is_dir && !zfs_dirempty(zp)) { + mutex_exit(&zp->z_lock); + vn_vfsunlock(vp); + return (EEXIST); + } + + /* + * If we get here, we are going to try to remove the object. + * First try removing the name from the directory; if that + * fails, return the error. + */ + error = zfs_dropname(dl, zp, dzp, tx, flag); + if (error != 0) { + mutex_exit(&zp->z_lock); + vn_vfsunlock(vp); + return (error); + } + + if (zp->z_links <= zp_is_dir) { + zfs_panic_recover("zfs: link count on %s is %u, " + "should be at least %u", + zp->z_vnode->v_path ? zp->z_vnode->v_path : + "<unknown>", (int)zp->z_links, + zp_is_dir + 1); + zp->z_links = zp_is_dir + 1; + } + if (--zp->z_links == zp_is_dir) { + zp->z_unlinked = B_TRUE; + zp->z_links = 0; + unlinked = B_TRUE; + } else { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + NULL, &ctime, sizeof (ctime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, sizeof (zp->z_pflags)); + zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, + B_TRUE); + } + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + NULL, &zp->z_links, sizeof (zp->z_links)); + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + count = 0; + ASSERT(error == 0); + mutex_exit(&zp->z_lock); + vn_vfsunlock(vp); + } else { + error = zfs_dropname(dl, zp, dzp, tx, flag); + if (error != 0) + return (error); + } + + mutex_enter(&dzp->z_lock); + dzp->z_size--; /* one dirent removed */ + dzp->z_links -= zp_is_dir; /* ".." link from zp */ + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + NULL, &dzp->z_links, sizeof (dzp->z_links)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), + NULL, &dzp->z_size, sizeof (dzp->z_size)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + NULL, ctime, sizeof (ctime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), + NULL, mtime, sizeof (mtime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); + zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); + error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); + ASSERT(error == 0); + mutex_exit(&dzp->z_lock); + + if (unlinkedp != NULL) + *unlinkedp = unlinked; + else if (unlinked) + zfs_unlinked_add(zp, tx); + + return (0); +} + +/* + * Indicate whether the directory is empty. Works with or without z_lock + * held, but can only be consider a hint in the latter case. Returns true + * if only "." and ".." remain and there's no work in progress. + */ +boolean_t +zfs_dirempty(znode_t *dzp) +{ + return (dzp->z_size == 2 && dzp->z_dirlocks == 0); +} + +int +zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *xzp; + dmu_tx_t *tx; + int error; + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + uint64_t parent; + + *xvpp = NULL; + + if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) + return (error); + + if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, + &acl_ids)) != 0) + return (error); + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + return (EDQUOT); + } + +top: + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + return (error); + } + zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + +#ifdef DEBUG + error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), + &parent, sizeof (parent)); + ASSERT(error == 0 && parent == zp->z_id); +#endif + + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, + sizeof (xzp->z_id), tx)); + + (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, + xzp, "", NULL, acl_ids.z_fuidp, vap); + + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); + + *xvpp = ZTOV(xzp); + + return (0); +} + +/* + * Return a znode for the extended attribute directory for zp. + * ** If the directory does not already exist, it is created ** + * + * IN: zp - znode to obtain attribute directory from + * cr - credentials of caller + * flags - flags from the VOP_LOOKUP call + * + * OUT: xzpp - pointer to extended attribute znode + * + * RETURN: 0 on success + * error number on failure + */ +int +zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *xzp; + zfs_dirlock_t *dl; + vattr_t va; + int error; +top: + error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); + if (error) + return (error); + + if (xzp != NULL) { + *xvpp = ZTOV(xzp); + zfs_dirent_unlock(dl); + return (0); + } + + + if (!(flags & CREATE_XATTR_DIR)) { + zfs_dirent_unlock(dl); + return (ENOENT); + } + + if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { + zfs_dirent_unlock(dl); + return (EROFS); + } + + /* + * The ability to 'create' files in an attribute + * directory comes from the write_xattr permission on the base file. + * + * The ability to 'search' an attribute directory requires + * read_xattr permission on the base file. + * + * Once in a directory the ability to read/write attributes + * is controlled by the permissions on the attribute file. + */ + va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; + va.va_type = VDIR; + va.va_mode = S_IFDIR | S_ISVTX | 0777; + zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); + + error = zfs_make_xattrdir(zp, &va, xvpp, cr); + zfs_dirent_unlock(dl); + + if (error == ERESTART) { + /* NB: we already did dmu_tx_wait() if necessary */ + goto top; + } + + return (error); +} + +/* + * Decide whether it is okay to remove within a sticky directory. + * + * In sticky directories, write access is not sufficient; + * you can remove entries from a directory only if: + * + * you own the directory, + * you own the entry, + * the entry is a plain file and you have write access, + * or you are privileged (checked in secpolicy...). + * + * The function returns 0 if remove access is granted. + */ +int +zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) +{ + uid_t uid; + uid_t downer; + uid_t fowner; + zfsvfs_t *zfsvfs = zdp->z_zfsvfs; + + if (zdp->z_zfsvfs->z_replay) + return (0); + + if ((zdp->z_mode & S_ISVTX) == 0) + return (0); + + downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); + fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); + + if ((uid = crgetuid(cr)) == downer || uid == fowner || + (ZTOV(zp)->v_type == VREG && + zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) + return (0); + else + return (secpolicy_vnode_remove(cr)); +} diff --git a/uts/common/fs/zfs/zfs_fm.c b/uts/common/fs/zfs/zfs_fm.c new file mode 100644 index 000000000000..0b4812666442 --- /dev/null +++ b/uts/common/fs/zfs/zfs_fm.c @@ -0,0 +1,863 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> + +#include <sys/fm/fs/zfs.h> +#include <sys/fm/protocol.h> +#include <sys/fm/util.h> +#include <sys/sysevent.h> + +/* + * This general routine is responsible for generating all the different ZFS + * ereports. The payload is dependent on the class, and which arguments are + * supplied to the function: + * + * EREPORT POOL VDEV IO + * block X X X + * data X X + * device X X + * pool X + * + * If we are in a loading state, all errors are chained together by the same + * SPA-wide ENA (Error Numeric Association). + * + * For isolated I/O requests, we get the ENA from the zio_t. The propagation + * gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want + * to chain together all ereports associated with a logical piece of data. For + * read I/Os, there are basically three 'types' of I/O, which form a roughly + * layered diagram: + * + * +---------------+ + * | Aggregate I/O | No associated logical data or device + * +---------------+ + * | + * V + * +---------------+ Reads associated with a piece of logical data. + * | Read I/O | This includes reads on behalf of RAID-Z, + * +---------------+ mirrors, gang blocks, retries, etc. + * | + * V + * +---------------+ Reads associated with a particular device, but + * | Physical I/O | no logical data. Issued as part of vdev caching + * +---------------+ and I/O aggregation. + * + * Note that 'physical I/O' here is not the same terminology as used in the rest + * of ZIO. Typically, 'physical I/O' simply means that there is no attached + * blockpointer. But I/O with no associated block pointer can still be related + * to a logical piece of data (i.e. RAID-Z requests). + * + * Purely physical I/O always have unique ENAs. They are not related to a + * particular piece of logical data, and therefore cannot be chained together. + * We still generate an ereport, but the DE doesn't correlate it with any + * logical piece of data. When such an I/O fails, the delegated I/O requests + * will issue a retry, which will trigger the 'real' ereport with the correct + * ENA. + * + * We keep track of the ENA for a ZIO chain through the 'io_logical' member. + * When a new logical I/O is issued, we set this to point to itself. Child I/Os + * then inherit this pointer, so that when it is first set subsequent failures + * will use the same ENA. For vdev cache fill and queue aggregation I/O, + * this pointer is set to NULL, and no ereport will be generated (since it + * doesn't actually correspond to any particular device or piece of data, + * and the caller will always retry without caching or queueing anyway). + * + * For checksum errors, we want to include more information about the actual + * error which occurs. Accordingly, we build an ereport when the error is + * noticed, but instead of sending it in immediately, we hang it off of the + * io_cksum_report field of the logical IO. When the logical IO completes + * (successfully or not), zfs_ereport_finish_checksum() is called with the + * good and bad versions of the buffer (if available), and we annotate the + * ereport with information about the differences. + */ +#ifdef _KERNEL +static void +zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, + const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, + uint64_t stateoroffset, uint64_t size) +{ + nvlist_t *ereport, *detector; + + uint64_t ena; + char class[64]; + + /* + * If we are doing a spa_tryimport() or in recovery mode, + * ignore errors. + */ + if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT || + spa_load_state(spa) == SPA_LOAD_RECOVER) + return; + + /* + * If we are in the middle of opening a pool, and the previous attempt + * failed, don't bother logging any new ereports - we're just going to + * get the same diagnosis anyway. + */ + if (spa_load_state(spa) != SPA_LOAD_NONE && + spa->spa_last_open_failed) + return; + + if (zio != NULL) { + /* + * If this is not a read or write zio, ignore the error. This + * can occur if the DKIOCFLUSHWRITECACHE ioctl fails. + */ + if (zio->io_type != ZIO_TYPE_READ && + zio->io_type != ZIO_TYPE_WRITE) + return; + + /* + * Ignore any errors from speculative I/Os, as failure is an + * expected result. + */ + if (zio->io_flags & ZIO_FLAG_SPECULATIVE) + return; + + /* + * If this I/O is not a retry I/O, don't post an ereport. + * Otherwise, we risk making bad diagnoses based on B_FAILFAST + * I/Os. + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + + if (vd != NULL) { + /* + * If the vdev has already been marked as failing due + * to a failed probe, then ignore any subsequent I/O + * errors, as the DE will automatically fault the vdev + * on the first such failure. This also catches cases + * where vdev_remove_wanted is set and the device has + * not yet been asynchronously placed into the REMOVED + * state. + */ + if (zio->io_vd == vd && !vdev_accessible(vd, zio)) + return; + + /* + * Ignore checksum errors for reads from DTL regions of + * leaf vdevs. + */ + if (zio->io_type == ZIO_TYPE_READ && + zio->io_error == ECKSUM && + vd->vdev_ops->vdev_op_leaf && + vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1)) + return; + } + } + + /* + * For probe failure, we want to avoid posting ereports if we've + * already removed the device in the meantime. + */ + if (vd != NULL && + strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 && + (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED)) + return; + + if ((ereport = fm_nvlist_create(NULL)) == NULL) + return; + + if ((detector = fm_nvlist_create(NULL)) == NULL) { + fm_nvlist_destroy(ereport, FM_NVA_FREE); + return; + } + + /* + * Serialize ereport generation + */ + mutex_enter(&spa->spa_errlist_lock); + + /* + * Determine the ENA to use for this event. If we are in a loading + * state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use + * a root zio-wide ENA. Otherwise, simply use a unique ENA. + */ + if (spa_load_state(spa) != SPA_LOAD_NONE) { + if (spa->spa_ena == 0) + spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1); + ena = spa->spa_ena; + } else if (zio != NULL && zio->io_logical != NULL) { + if (zio->io_logical->io_ena == 0) + zio->io_logical->io_ena = + fm_ena_generate(0, FM_ENA_FMT1); + ena = zio->io_logical->io_ena; + } else { + ena = fm_ena_generate(0, FM_ENA_FMT1); + } + + /* + * Construct the full class, detector, and other standard FMA fields. + */ + (void) snprintf(class, sizeof (class), "%s.%s", + ZFS_ERROR_CLASS, subclass); + + fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa), + vd != NULL ? vd->vdev_guid : 0); + + fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); + + /* + * Construct the per-ereport payload, depending on which parameters are + * passed in. + */ + + /* + * Generic payload members common to all ereports. + */ + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL, + DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, + DATA_TYPE_UINT64, spa_guid(spa), + FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32, + spa_load_state(spa), NULL); + + if (spa != NULL) { + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, + DATA_TYPE_STRING, + spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ? + FM_EREPORT_FAILMODE_WAIT : + spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ? + FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC, + NULL); + } + + if (vd != NULL) { + vdev_t *pvd = vd->vdev_parent; + + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, + DATA_TYPE_UINT64, vd->vdev_guid, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL); + if (vd->vdev_path != NULL) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, + DATA_TYPE_STRING, vd->vdev_path, NULL); + if (vd->vdev_devid != NULL) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, + DATA_TYPE_STRING, vd->vdev_devid, NULL); + if (vd->vdev_fru != NULL) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, + DATA_TYPE_STRING, vd->vdev_fru, NULL); + + if (pvd != NULL) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, + DATA_TYPE_UINT64, pvd->vdev_guid, + FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE, + DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type, + NULL); + if (pvd->vdev_path) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH, + DATA_TYPE_STRING, pvd->vdev_path, NULL); + if (pvd->vdev_devid) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, + DATA_TYPE_STRING, pvd->vdev_devid, NULL); + } + } + + if (zio != NULL) { + /* + * Payload common to all I/Os. + */ + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR, + DATA_TYPE_INT32, zio->io_error, NULL); + + /* + * If the 'size' parameter is non-zero, it indicates this is a + * RAID-Z or other I/O where the physical offset and length are + * provided for us, instead of within the zio_t. + */ + if (vd != NULL) { + if (size) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, + DATA_TYPE_UINT64, stateoroffset, + FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, + DATA_TYPE_UINT64, size, NULL); + else + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, + DATA_TYPE_UINT64, zio->io_offset, + FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, + DATA_TYPE_UINT64, zio->io_size, NULL); + } + + /* + * Payload for I/Os with corresponding logical information. + */ + if (zio->io_logical != NULL) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, + DATA_TYPE_UINT64, + zio->io_logical->io_bookmark.zb_objset, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, + DATA_TYPE_UINT64, + zio->io_logical->io_bookmark.zb_object, + FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, + DATA_TYPE_INT64, + zio->io_logical->io_bookmark.zb_level, + FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, + DATA_TYPE_UINT64, + zio->io_logical->io_bookmark.zb_blkid, NULL); + } else if (vd != NULL) { + /* + * If we have a vdev but no zio, this is a device fault, and the + * 'stateoroffset' parameter indicates the previous state of the + * vdev. + */ + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_PREV_STATE, + DATA_TYPE_UINT64, stateoroffset, NULL); + } + + mutex_exit(&spa->spa_errlist_lock); + + *ereport_out = ereport; + *detector_out = detector; +} + +/* if it's <= 128 bytes, save the corruption directly */ +#define ZFM_MAX_INLINE (128 / sizeof (uint64_t)) + +#define MAX_RANGES 16 + +typedef struct zfs_ecksum_info { + /* histograms of set and cleared bits by bit number in a 64-bit word */ + uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY]; + uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY]; + + /* inline arrays of bits set and cleared. */ + uint64_t zei_bits_set[ZFM_MAX_INLINE]; + uint64_t zei_bits_cleared[ZFM_MAX_INLINE]; + + /* + * for each range, the number of bits set and cleared. The Hamming + * distance between the good and bad buffers is the sum of them all. + */ + uint32_t zei_range_sets[MAX_RANGES]; + uint32_t zei_range_clears[MAX_RANGES]; + + struct zei_ranges { + uint32_t zr_start; + uint32_t zr_end; + } zei_ranges[MAX_RANGES]; + + size_t zei_range_count; + uint32_t zei_mingap; + uint32_t zei_allowed_mingap; + +} zfs_ecksum_info_t; + +static void +update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count) +{ + size_t i; + size_t bits = 0; + uint64_t value = BE_64(value_arg); + + /* We store the bits in big-endian (largest-first) order */ + for (i = 0; i < 64; i++) { + if (value & (1ull << i)) { + hist[63 - i]++; + ++bits; + } + } + /* update the count of bits changed */ + *count += bits; +} + +/* + * We've now filled up the range array, and need to increase "mingap" and + * shrink the range list accordingly. zei_mingap is always the smallest + * distance between array entries, so we set the new_allowed_gap to be + * one greater than that. We then go through the list, joining together + * any ranges which are closer than the new_allowed_gap. + * + * By construction, there will be at least one. We also update zei_mingap + * to the new smallest gap, to prepare for our next invocation. + */ +static void +shrink_ranges(zfs_ecksum_info_t *eip) +{ + uint32_t mingap = UINT32_MAX; + uint32_t new_allowed_gap = eip->zei_mingap + 1; + + size_t idx, output; + size_t max = eip->zei_range_count; + + struct zei_ranges *r = eip->zei_ranges; + + ASSERT3U(eip->zei_range_count, >, 0); + ASSERT3U(eip->zei_range_count, <=, MAX_RANGES); + + output = idx = 0; + while (idx < max - 1) { + uint32_t start = r[idx].zr_start; + uint32_t end = r[idx].zr_end; + + while (idx < max - 1) { + idx++; + + uint32_t nstart = r[idx].zr_start; + uint32_t nend = r[idx].zr_end; + + uint32_t gap = nstart - end; + if (gap < new_allowed_gap) { + end = nend; + continue; + } + if (gap < mingap) + mingap = gap; + break; + } + r[output].zr_start = start; + r[output].zr_end = end; + output++; + } + ASSERT3U(output, <, eip->zei_range_count); + eip->zei_range_count = output; + eip->zei_mingap = mingap; + eip->zei_allowed_mingap = new_allowed_gap; +} + +static void +add_range(zfs_ecksum_info_t *eip, int start, int end) +{ + struct zei_ranges *r = eip->zei_ranges; + size_t count = eip->zei_range_count; + + if (count >= MAX_RANGES) { + shrink_ranges(eip); + count = eip->zei_range_count; + } + if (count == 0) { + eip->zei_mingap = UINT32_MAX; + eip->zei_allowed_mingap = 1; + } else { + int gap = start - r[count - 1].zr_end; + + if (gap < eip->zei_allowed_mingap) { + r[count - 1].zr_end = end; + return; + } + if (gap < eip->zei_mingap) + eip->zei_mingap = gap; + } + r[count].zr_start = start; + r[count].zr_end = end; + eip->zei_range_count++; +} + +static size_t +range_total_size(zfs_ecksum_info_t *eip) +{ + struct zei_ranges *r = eip->zei_ranges; + size_t count = eip->zei_range_count; + size_t result = 0; + size_t idx; + + for (idx = 0; idx < count; idx++) + result += (r[idx].zr_end - r[idx].zr_start); + + return (result); +} + +static zfs_ecksum_info_t * +annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, + const uint8_t *goodbuf, const uint8_t *badbuf, size_t size, + boolean_t drop_if_identical) +{ + const uint64_t *good = (const uint64_t *)goodbuf; + const uint64_t *bad = (const uint64_t *)badbuf; + + uint64_t allset = 0; + uint64_t allcleared = 0; + + size_t nui64s = size / sizeof (uint64_t); + + size_t inline_size; + int no_inline = 0; + size_t idx; + size_t range; + + size_t offset = 0; + ssize_t start = -1; + + zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP); + + /* don't do any annotation for injected checksum errors */ + if (info != NULL && info->zbc_injected) + return (eip); + + if (info != NULL && info->zbc_has_cksum) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED, + DATA_TYPE_UINT64_ARRAY, + sizeof (info->zbc_expected) / sizeof (uint64_t), + (uint64_t *)&info->zbc_expected, + FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL, + DATA_TYPE_UINT64_ARRAY, + sizeof (info->zbc_actual) / sizeof (uint64_t), + (uint64_t *)&info->zbc_actual, + FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO, + DATA_TYPE_STRING, + info->zbc_checksum_name, + NULL); + + if (info->zbc_byteswapped) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP, + DATA_TYPE_BOOLEAN, 1, + NULL); + } + } + + if (badbuf == NULL || goodbuf == NULL) + return (eip); + + ASSERT3U(nui64s, <=, UINT16_MAX); + ASSERT3U(size, ==, nui64s * sizeof (uint64_t)); + ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); + ASSERT3U(size, <=, UINT32_MAX); + + /* build up the range list by comparing the two buffers. */ + for (idx = 0; idx < nui64s; idx++) { + if (good[idx] == bad[idx]) { + if (start == -1) + continue; + + add_range(eip, start, idx); + start = -1; + } else { + if (start != -1) + continue; + + start = idx; + } + } + if (start != -1) + add_range(eip, start, idx); + + /* See if it will fit in our inline buffers */ + inline_size = range_total_size(eip); + if (inline_size > ZFM_MAX_INLINE) + no_inline = 1; + + /* + * If there is no change and we want to drop if the buffers are + * identical, do so. + */ + if (inline_size == 0 && drop_if_identical) { + kmem_free(eip, sizeof (*eip)); + return (NULL); + } + + /* + * Now walk through the ranges, filling in the details of the + * differences. Also convert our uint64_t-array offsets to byte + * offsets. + */ + for (range = 0; range < eip->zei_range_count; range++) { + size_t start = eip->zei_ranges[range].zr_start; + size_t end = eip->zei_ranges[range].zr_end; + + for (idx = start; idx < end; idx++) { + uint64_t set, cleared; + + // bits set in bad, but not in good + set = ((~good[idx]) & bad[idx]); + // bits set in good, but not in bad + cleared = (good[idx] & (~bad[idx])); + + allset |= set; + allcleared |= cleared; + + if (!no_inline) { + ASSERT3U(offset, <, inline_size); + eip->zei_bits_set[offset] = set; + eip->zei_bits_cleared[offset] = cleared; + offset++; + } + + update_histogram(set, eip->zei_histogram_set, + &eip->zei_range_sets[range]); + update_histogram(cleared, eip->zei_histogram_cleared, + &eip->zei_range_clears[range]); + } + + /* convert to byte offsets */ + eip->zei_ranges[range].zr_start *= sizeof (uint64_t); + eip->zei_ranges[range].zr_end *= sizeof (uint64_t); + } + eip->zei_allowed_mingap *= sizeof (uint64_t); + inline_size *= sizeof (uint64_t); + + /* fill in ereport */ + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES, + DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count, + (uint32_t *)eip->zei_ranges, + FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP, + DATA_TYPE_UINT32, eip->zei_allowed_mingap, + FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS, + DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets, + FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS, + DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears, + NULL); + + if (!no_inline) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS, + DATA_TYPE_UINT8_ARRAY, + inline_size, (uint8_t *)eip->zei_bits_set, + FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS, + DATA_TYPE_UINT8_ARRAY, + inline_size, (uint8_t *)eip->zei_bits_cleared, + NULL); + } else { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM, + DATA_TYPE_UINT16_ARRAY, + NBBY * sizeof (uint64_t), eip->zei_histogram_set, + FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM, + DATA_TYPE_UINT16_ARRAY, + NBBY * sizeof (uint64_t), eip->zei_histogram_cleared, + NULL); + } + return (eip); +} +#endif + +void +zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, + uint64_t stateoroffset, uint64_t size) +{ +#ifdef _KERNEL + nvlist_t *ereport = NULL; + nvlist_t *detector = NULL; + + zfs_ereport_start(&ereport, &detector, + subclass, spa, vd, zio, stateoroffset, size); + + if (ereport == NULL) + return; + + fm_ereport_post(ereport, EVCH_SLEEP); + + fm_nvlist_destroy(ereport, FM_NVA_FREE); + fm_nvlist_destroy(detector, FM_NVA_FREE); +#endif +} + +void +zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, + struct zio *zio, uint64_t offset, uint64_t length, void *arg, + zio_bad_cksum_t *info) +{ + zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP); + + if (zio->io_vsd != NULL) + zio->io_vsd_ops->vsd_cksum_report(zio, report, arg); + else + zio_vsd_default_cksum_report(zio, report, arg); + + /* copy the checksum failure information if it was provided */ + if (info != NULL) { + report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP); + bcopy(info, report->zcr_ckinfo, sizeof (*info)); + } + + report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift; + report->zcr_length = length; + +#ifdef _KERNEL + zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, + FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + + if (report->zcr_ereport == NULL) { + report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); + kmem_free(report, sizeof (*report)); + return; + } +#endif + + mutex_enter(&spa->spa_errlist_lock); + report->zcr_next = zio->io_logical->io_cksum_report; + zio->io_logical->io_cksum_report = report; + mutex_exit(&spa->spa_errlist_lock); +} + +void +zfs_ereport_finish_checksum(zio_cksum_report_t *report, + const void *good_data, const void *bad_data, boolean_t drop_if_identical) +{ +#ifdef _KERNEL + zfs_ecksum_info_t *info = NULL; + info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo, + good_data, bad_data, report->zcr_length, drop_if_identical); + + if (info != NULL) + fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); + + fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE); + fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE); + report->zcr_ereport = report->zcr_detector = NULL; + + if (info != NULL) + kmem_free(info, sizeof (*info)); +#endif +} + +void +zfs_ereport_free_checksum(zio_cksum_report_t *rpt) +{ +#ifdef _KERNEL + if (rpt->zcr_ereport != NULL) { + fm_nvlist_destroy(rpt->zcr_ereport, + FM_NVA_FREE); + fm_nvlist_destroy(rpt->zcr_detector, + FM_NVA_FREE); + } +#endif + rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo); + + if (rpt->zcr_ckinfo != NULL) + kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo)); + + kmem_free(rpt, sizeof (*rpt)); +} + +void +zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) +{ +#ifdef _KERNEL + fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); +#endif +} + +void +zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, + struct zio *zio, uint64_t offset, uint64_t length, + const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc) +{ +#ifdef _KERNEL + nvlist_t *ereport = NULL; + nvlist_t *detector = NULL; + zfs_ecksum_info_t *info; + + zfs_ereport_start(&ereport, &detector, + FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + + if (ereport == NULL) + return; + + info = annotate_ecksum(ereport, zbc, good_data, bad_data, length, + B_FALSE); + + if (info != NULL) + fm_ereport_post(ereport, EVCH_SLEEP); + + fm_nvlist_destroy(ereport, FM_NVA_FREE); + fm_nvlist_destroy(detector, FM_NVA_FREE); + + if (info != NULL) + kmem_free(info, sizeof (*info)); +#endif +} + +static void +zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) +{ +#ifdef _KERNEL + nvlist_t *resource; + char class[64]; + + if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT) + return; + + if ((resource = fm_nvlist_create(NULL)) == NULL) + return; + + (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, + ZFS_ERROR_CLASS, name); + VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); + VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); + VERIFY(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); + if (vd) + VERIFY(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); + + fm_ereport_post(resource, EVCH_SLEEP); + + fm_nvlist_destroy(resource, FM_NVA_FREE); +#endif +} + +/* + * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev + * has been removed from the system. This will cause the DE to ignore any + * recent I/O errors, inferring that they are due to the asynchronous device + * removal. + */ +void +zfs_post_remove(spa_t *spa, vdev_t *vd) +{ + zfs_post_common(spa, vd, FM_RESOURCE_REMOVED); +} + +/* + * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool + * has the 'autoreplace' property set, and therefore any broken vdevs will be + * handled by higher level logic, and no vdev fault should be generated. + */ +void +zfs_post_autoreplace(spa_t *spa, vdev_t *vd) +{ + zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE); +} + +/* + * The 'resource.fs.zfs.statechange' event is an internal signal that the + * given vdev has transitioned its state to DEGRADED or HEALTHY. This will + * cause the retire agent to repair any outstanding fault management cases + * open because the device was not found (fault.fs.zfs.device). + */ +void +zfs_post_state_change(spa_t *spa, vdev_t *vd) +{ + zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE); +} diff --git a/uts/common/fs/zfs/zfs_fuid.c b/uts/common/fs/zfs/zfs_fuid.c new file mode 100644 index 000000000000..a853f4d73561 --- /dev/null +++ b/uts/common/fs/zfs/zfs_fuid.c @@ -0,0 +1,756 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu.h> +#include <sys/avl.h> +#include <sys/zap.h> +#include <sys/refcount.h> +#include <sys/nvpair.h> +#ifdef _KERNEL +#include <sys/kidmap.h> +#include <sys/sid.h> +#include <sys/zfs_vfsops.h> +#include <sys/zfs_znode.h> +#endif +#include <sys/zfs_fuid.h> + +/* + * FUID Domain table(s). + * + * The FUID table is stored as a packed nvlist of an array + * of nvlists which contain an index, domain string and offset + * + * During file system initialization the nvlist(s) are read and + * two AVL trees are created. One tree is keyed by the index number + * and the other by the domain string. Nodes are never removed from + * trees, but new entries may be added. If a new entry is added then + * the zfsvfs->z_fuid_dirty flag is set to true and the caller will then + * be responsible for calling zfs_fuid_sync() to sync the changes to disk. + * + */ + +#define FUID_IDX "fuid_idx" +#define FUID_DOMAIN "fuid_domain" +#define FUID_OFFSET "fuid_offset" +#define FUID_NVP_ARRAY "fuid_nvlist" + +typedef struct fuid_domain { + avl_node_t f_domnode; + avl_node_t f_idxnode; + ksiddomain_t *f_ksid; + uint64_t f_idx; +} fuid_domain_t; + +static char *nulldomain = ""; + +/* + * Compare two indexes. + */ +static int +idx_compare(const void *arg1, const void *arg2) +{ + const fuid_domain_t *node1 = arg1; + const fuid_domain_t *node2 = arg2; + + if (node1->f_idx < node2->f_idx) + return (-1); + else if (node1->f_idx > node2->f_idx) + return (1); + return (0); +} + +/* + * Compare two domain strings. + */ +static int +domain_compare(const void *arg1, const void *arg2) +{ + const fuid_domain_t *node1 = arg1; + const fuid_domain_t *node2 = arg2; + int val; + + val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name); + if (val == 0) + return (0); + return (val > 0 ? 1 : -1); +} + +void +zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree) +{ + avl_create(idx_tree, idx_compare, + sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode)); + avl_create(domain_tree, domain_compare, + sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode)); +} + +/* + * load initial fuid domain and idx trees. This function is used by + * both the kernel and zdb. + */ +uint64_t +zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree, + avl_tree_t *domain_tree) +{ + dmu_buf_t *db; + uint64_t fuid_size; + + ASSERT(fuid_obj != 0); + VERIFY(0 == dmu_bonus_hold(os, fuid_obj, + FTAG, &db)); + fuid_size = *(uint64_t *)db->db_data; + dmu_buf_rele(db, FTAG); + + if (fuid_size) { + nvlist_t **fuidnvp; + nvlist_t *nvp = NULL; + uint_t count; + char *packed; + int i; + + packed = kmem_alloc(fuid_size, KM_SLEEP); + VERIFY(dmu_read(os, fuid_obj, 0, + fuid_size, packed, DMU_READ_PREFETCH) == 0); + VERIFY(nvlist_unpack(packed, fuid_size, + &nvp, 0) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY, + &fuidnvp, &count) == 0); + + for (i = 0; i != count; i++) { + fuid_domain_t *domnode; + char *domain; + uint64_t idx; + + VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN, + &domain) == 0); + VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX, + &idx) == 0); + + domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); + + domnode->f_idx = idx; + domnode->f_ksid = ksid_lookupdomain(domain); + avl_add(idx_tree, domnode); + avl_add(domain_tree, domnode); + } + nvlist_free(nvp); + kmem_free(packed, fuid_size); + } + return (fuid_size); +} + +void +zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree) +{ + fuid_domain_t *domnode; + void *cookie; + + cookie = NULL; + while (domnode = avl_destroy_nodes(domain_tree, &cookie)) + ksiddomain_rele(domnode->f_ksid); + + avl_destroy(domain_tree); + cookie = NULL; + while (domnode = avl_destroy_nodes(idx_tree, &cookie)) + kmem_free(domnode, sizeof (fuid_domain_t)); + avl_destroy(idx_tree); +} + +char * +zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx) +{ + fuid_domain_t searchnode, *findnode; + avl_index_t loc; + + searchnode.f_idx = idx; + + findnode = avl_find(idx_tree, &searchnode, &loc); + + return (findnode ? findnode->f_ksid->kd_name : nulldomain); +} + +#ifdef _KERNEL +/* + * Load the fuid table(s) into memory. + */ +static void +zfs_fuid_init(zfsvfs_t *zfsvfs) +{ + rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); + + if (zfsvfs->z_fuid_loaded) { + rw_exit(&zfsvfs->z_fuid_lock); + return; + } + + zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain); + + (void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj); + if (zfsvfs->z_fuid_obj != 0) { + zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os, + zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx, + &zfsvfs->z_fuid_domain); + } + + zfsvfs->z_fuid_loaded = B_TRUE; + rw_exit(&zfsvfs->z_fuid_lock); +} + +/* + * sync out AVL trees to persistent storage. + */ +void +zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +{ + nvlist_t *nvp; + nvlist_t **fuids; + size_t nvsize = 0; + char *packed; + dmu_buf_t *db; + fuid_domain_t *domnode; + int numnodes; + int i; + + if (!zfsvfs->z_fuid_dirty) { + return; + } + + rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); + + /* + * First see if table needs to be created? + */ + if (zfsvfs->z_fuid_obj == 0) { + zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os, + DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE, + sizeof (uint64_t), tx); + VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_FUID_TABLES, sizeof (uint64_t), 1, + &zfsvfs->z_fuid_obj, tx) == 0); + } + + VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + numnodes = avl_numnodes(&zfsvfs->z_fuid_idx); + fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP); + for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++, + domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) { + VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, + domnode->f_idx) == 0); + VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); + VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN, + domnode->f_ksid->kd_name) == 0); + } + VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, + fuids, numnodes) == 0); + for (i = 0; i != numnodes; i++) + nvlist_free(fuids[i]); + kmem_free(fuids, numnodes * sizeof (void *)); + VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); + packed = kmem_alloc(nvsize, KM_SLEEP); + VERIFY(nvlist_pack(nvp, &packed, &nvsize, + NV_ENCODE_XDR, KM_SLEEP) == 0); + nvlist_free(nvp); + zfsvfs->z_fuid_size = nvsize; + dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, + zfsvfs->z_fuid_size, packed, tx); + kmem_free(packed, zfsvfs->z_fuid_size); + VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, + FTAG, &db)); + dmu_buf_will_dirty(db, tx); + *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; + dmu_buf_rele(db, FTAG); + + zfsvfs->z_fuid_dirty = B_FALSE; + rw_exit(&zfsvfs->z_fuid_lock); +} + +/* + * Query domain table for a given domain. + * + * If domain isn't found and addok is set, it is added to AVL trees and + * the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be + * necessary for the caller or another thread to detect the dirty table + * and sync out the changes. + */ +int +zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, + char **retdomain, boolean_t addok) +{ + fuid_domain_t searchnode, *findnode; + avl_index_t loc; + krw_t rw = RW_READER; + + /* + * If the dummy "nobody" domain then return an index of 0 + * to cause the created FUID to be a standard POSIX id + * for the user nobody. + */ + if (domain[0] == '\0') { + if (retdomain) + *retdomain = nulldomain; + return (0); + } + + searchnode.f_ksid = ksid_lookupdomain(domain); + if (retdomain) + *retdomain = searchnode.f_ksid->kd_name; + if (!zfsvfs->z_fuid_loaded) + zfs_fuid_init(zfsvfs); + +retry: + rw_enter(&zfsvfs->z_fuid_lock, rw); + findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc); + + if (findnode) { + rw_exit(&zfsvfs->z_fuid_lock); + ksiddomain_rele(searchnode.f_ksid); + return (findnode->f_idx); + } else if (addok) { + fuid_domain_t *domnode; + uint64_t retidx; + + if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) { + rw_exit(&zfsvfs->z_fuid_lock); + rw = RW_WRITER; + goto retry; + } + + domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); + domnode->f_ksid = searchnode.f_ksid; + + retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1; + + avl_add(&zfsvfs->z_fuid_domain, domnode); + avl_add(&zfsvfs->z_fuid_idx, domnode); + zfsvfs->z_fuid_dirty = B_TRUE; + rw_exit(&zfsvfs->z_fuid_lock); + return (retidx); + } else { + rw_exit(&zfsvfs->z_fuid_lock); + return (-1); + } +} + +/* + * Query domain table by index, returning domain string + * + * Returns a pointer from an avl node of the domain string. + * + */ +const char * +zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx) +{ + char *domain; + + if (idx == 0 || !zfsvfs->z_use_fuids) + return (NULL); + + if (!zfsvfs->z_fuid_loaded) + zfs_fuid_init(zfsvfs); + + rw_enter(&zfsvfs->z_fuid_lock, RW_READER); + + if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty) + domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx); + else + domain = nulldomain; + rw_exit(&zfsvfs->z_fuid_lock); + + ASSERT(domain); + return (domain); +} + +void +zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp) +{ + *uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); + *gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_gid, cr, ZFS_GROUP); +} + +uid_t +zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid, + cred_t *cr, zfs_fuid_type_t type) +{ + uint32_t index = FUID_INDEX(fuid); + const char *domain; + uid_t id; + + if (index == 0) + return (fuid); + + domain = zfs_fuid_find_by_idx(zfsvfs, index); + ASSERT(domain != NULL); + + if (type == ZFS_OWNER || type == ZFS_ACE_USER) { + (void) kidmap_getuidbysid(crgetzone(cr), domain, + FUID_RID(fuid), &id); + } else { + (void) kidmap_getgidbysid(crgetzone(cr), domain, + FUID_RID(fuid), &id); + } + return (id); +} + +/* + * Add a FUID node to the list of fuid's being created for this + * ACL + * + * If ACL has multiple domains, then keep only one copy of each unique + * domain. + */ +void +zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid, + uint64_t idx, uint64_t id, zfs_fuid_type_t type) +{ + zfs_fuid_t *fuid; + zfs_fuid_domain_t *fuid_domain; + zfs_fuid_info_t *fuidp; + uint64_t fuididx; + boolean_t found = B_FALSE; + + if (*fuidpp == NULL) + *fuidpp = zfs_fuid_info_alloc(); + + fuidp = *fuidpp; + /* + * First find fuid domain index in linked list + * + * If one isn't found then create an entry. + */ + + for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains); + fuid_domain; fuid_domain = list_next(&fuidp->z_domains, + fuid_domain), fuididx++) { + if (idx == fuid_domain->z_domidx) { + found = B_TRUE; + break; + } + } + + if (!found) { + fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP); + fuid_domain->z_domain = domain; + fuid_domain->z_domidx = idx; + list_insert_tail(&fuidp->z_domains, fuid_domain); + fuidp->z_domain_str_sz += strlen(domain) + 1; + fuidp->z_domain_cnt++; + } + + if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) { + + /* + * Now allocate fuid entry and add it on the end of the list + */ + + fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP); + fuid->z_id = id; + fuid->z_domidx = idx; + fuid->z_logfuid = FUID_ENCODE(fuididx, rid); + + list_insert_tail(&fuidp->z_fuids, fuid); + fuidp->z_fuid_cnt++; + } else { + if (type == ZFS_OWNER) + fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid); + else + fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid); + } +} + +/* + * Create a file system FUID, based on information in the users cred + * + * If cred contains KSID_OWNER then it should be used to determine + * the uid otherwise cred's uid will be used. By default cred's gid + * is used unless it's an ephemeral ID in which case KSID_GROUP will + * be used if it exists. + */ +uint64_t +zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type, + cred_t *cr, zfs_fuid_info_t **fuidp) +{ + uint64_t idx; + ksid_t *ksid; + uint32_t rid; + char *kdomain; + const char *domain; + uid_t id; + + VERIFY(type == ZFS_OWNER || type == ZFS_GROUP); + + ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP); + + if (!zfsvfs->z_use_fuids || (ksid == NULL)) { + id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr); + + if (IS_EPHEMERAL(id)) + return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY); + + return ((uint64_t)id); + } + + /* + * ksid is present and FUID is supported + */ + id = (type == ZFS_OWNER) ? ksid_getid(ksid) : crgetgid(cr); + + if (!IS_EPHEMERAL(id)) + return ((uint64_t)id); + + if (type == ZFS_GROUP) + id = ksid_getid(ksid); + + rid = ksid_getrid(ksid); + domain = ksid_getdomain(ksid); + + idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE); + + zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type); + + return (FUID_ENCODE(idx, rid)); +} + +/* + * Create a file system FUID for an ACL ace + * or a chown/chgrp of the file. + * This is similar to zfs_fuid_create_cred, except that + * we can't find the domain + rid information in the + * cred. Instead we have to query Winchester for the + * domain and rid. + * + * During replay operations the domain+rid information is + * found in the zfs_fuid_info_t that the replay code has + * attached to the zfsvfs of the file system. + */ +uint64_t +zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, + zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp) +{ + const char *domain; + char *kdomain; + uint32_t fuid_idx = FUID_INDEX(id); + uint32_t rid; + idmap_stat status; + uint64_t idx; + zfs_fuid_t *zfuid = NULL; + zfs_fuid_info_t *fuidp; + + /* + * If POSIX ID, or entry is already a FUID then + * just return the id + * + * We may also be handed an already FUID'ized id via + * chmod. + */ + + if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0) + return (id); + + if (zfsvfs->z_replay) { + fuidp = zfsvfs->z_fuid_replay; + + /* + * If we are passed an ephemeral id, but no + * fuid_info was logged then return NOBODY. + * This is most likely a result of idmap service + * not being available. + */ + if (fuidp == NULL) + return (UID_NOBODY); + + switch (type) { + case ZFS_ACE_USER: + case ZFS_ACE_GROUP: + zfuid = list_head(&fuidp->z_fuids); + rid = FUID_RID(zfuid->z_logfuid); + idx = FUID_INDEX(zfuid->z_logfuid); + break; + case ZFS_OWNER: + rid = FUID_RID(fuidp->z_fuid_owner); + idx = FUID_INDEX(fuidp->z_fuid_owner); + break; + case ZFS_GROUP: + rid = FUID_RID(fuidp->z_fuid_group); + idx = FUID_INDEX(fuidp->z_fuid_group); + break; + }; + domain = fuidp->z_domain_table[idx -1]; + } else { + if (type == ZFS_OWNER || type == ZFS_ACE_USER) + status = kidmap_getsidbyuid(crgetzone(cr), id, + &domain, &rid); + else + status = kidmap_getsidbygid(crgetzone(cr), id, + &domain, &rid); + + if (status != 0) { + /* + * When returning nobody we will need to + * make a dummy fuid table entry for logging + * purposes. + */ + rid = UID_NOBODY; + domain = nulldomain; + } + } + + idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE); + + if (!zfsvfs->z_replay) + zfs_fuid_node_add(fuidpp, kdomain, + rid, idx, id, type); + else if (zfuid != NULL) { + list_remove(&fuidp->z_fuids, zfuid); + kmem_free(zfuid, sizeof (zfs_fuid_t)); + } + return (FUID_ENCODE(idx, rid)); +} + +void +zfs_fuid_destroy(zfsvfs_t *zfsvfs) +{ + rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); + if (!zfsvfs->z_fuid_loaded) { + rw_exit(&zfsvfs->z_fuid_lock); + return; + } + zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain); + rw_exit(&zfsvfs->z_fuid_lock); +} + +/* + * Allocate zfs_fuid_info for tracking FUIDs created during + * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR() + */ +zfs_fuid_info_t * +zfs_fuid_info_alloc(void) +{ + zfs_fuid_info_t *fuidp; + + fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP); + list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t), + offsetof(zfs_fuid_domain_t, z_next)); + list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t), + offsetof(zfs_fuid_t, z_next)); + return (fuidp); +} + +/* + * Release all memory associated with zfs_fuid_info_t + */ +void +zfs_fuid_info_free(zfs_fuid_info_t *fuidp) +{ + zfs_fuid_t *zfuid; + zfs_fuid_domain_t *zdomain; + + while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) { + list_remove(&fuidp->z_fuids, zfuid); + kmem_free(zfuid, sizeof (zfs_fuid_t)); + } + + if (fuidp->z_domain_table != NULL) + kmem_free(fuidp->z_domain_table, + (sizeof (char **)) * fuidp->z_domain_cnt); + + while ((zdomain = list_head(&fuidp->z_domains)) != NULL) { + list_remove(&fuidp->z_domains, zdomain); + kmem_free(zdomain, sizeof (zfs_fuid_domain_t)); + } + + kmem_free(fuidp, sizeof (zfs_fuid_info_t)); +} + +/* + * Check to see if id is a groupmember. If cred + * has ksid info then sidlist is checked first + * and if still not found then POSIX groups are checked + * + * Will use a straight FUID compare when possible. + */ +boolean_t +zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr) +{ + ksid_t *ksid = crgetsid(cr, KSID_GROUP); + ksidlist_t *ksidlist = crgetsidlist(cr); + uid_t gid; + + if (ksid && ksidlist) { + int i; + ksid_t *ksid_groups; + uint32_t idx = FUID_INDEX(id); + uint32_t rid = FUID_RID(id); + + ksid_groups = ksidlist->ksl_sids; + + for (i = 0; i != ksidlist->ksl_nsid; i++) { + if (idx == 0) { + if (id != IDMAP_WK_CREATOR_GROUP_GID && + id == ksid_groups[i].ks_id) { + return (B_TRUE); + } + } else { + const char *domain; + + domain = zfs_fuid_find_by_idx(zfsvfs, idx); + ASSERT(domain != NULL); + + if (strcmp(domain, + IDMAP_WK_CREATOR_SID_AUTHORITY) == 0) + return (B_FALSE); + + if ((strcmp(domain, + ksid_groups[i].ks_domain->kd_name) == 0) && + rid == ksid_groups[i].ks_rid) + return (B_TRUE); + } + } + } + + /* + * Not found in ksidlist, check posix groups + */ + gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP); + return (groupmember(gid, cr)); +} + +void +zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +{ + if (zfsvfs->z_fuid_obj == 0) { + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, + FUID_SIZE_ESTIMATE(zfsvfs)); + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); + } else { + dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); + dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, + FUID_SIZE_ESTIMATE(zfsvfs)); + } +} +#endif diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c new file mode 100644 index 000000000000..1b63c9bf45ef --- /dev/null +++ b/uts/common/fs/zfs/zfs_ioctl.c @@ -0,0 +1,5122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/uio.h> +#include <sys/buf.h> +#include <sys/modctl.h> +#include <sys/open.h> +#include <sys/file.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/cmn_err.h> +#include <sys/stat.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_vfsops.h> +#include <sys/zfs_znode.h> +#include <sys/zap.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/vdev.h> +#include <sys/priv_impl.h> +#include <sys/dmu.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_deleg.h> +#include <sys/dmu_objset.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/policy.h> +#include <sys/zone.h> +#include <sys/nvpair.h> +#include <sys/pathname.h> +#include <sys/mount.h> +#include <sys/sdt.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_ctldir.h> +#include <sys/zfs_dir.h> +#include <sys/zfs_onexit.h> +#include <sys/zvol.h> +#include <sys/dsl_scan.h> +#include <sharefs/share.h> +#include <sys/dmu_objset.h> + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "zfs_deleg.h" +#include "zfs_comutil.h" + +extern struct modlfs zfs_modlfs; + +extern void zfs_init(void); +extern void zfs_fini(void); + +ldi_ident_t zfs_li = NULL; +dev_info_t *zfs_dip; + +typedef int zfs_ioc_func_t(zfs_cmd_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); + +typedef enum { + NO_NAME, + POOL_NAME, + DATASET_NAME +} zfs_ioc_namecheck_t; + +typedef enum { + POOL_CHECK_NONE = 1 << 0, + POOL_CHECK_SUSPENDED = 1 << 1, + POOL_CHECK_READONLY = 1 << 2 +} zfs_ioc_poolcheck_t; + +typedef struct zfs_ioc_vec { + zfs_ioc_func_t *zvec_func; + zfs_secpolicy_func_t *zvec_secpolicy; + zfs_ioc_namecheck_t zvec_namecheck; + boolean_t zvec_his_log; + zfs_ioc_poolcheck_t zvec_pool_check; +} zfs_ioc_vec_t; + +/* This array is indexed by zfs_userquota_prop_t */ +static const char *userquota_perms[] = { + ZFS_DELEG_PERM_USERUSED, + ZFS_DELEG_PERM_USERQUOTA, + ZFS_DELEG_PERM_GROUPUSED, + ZFS_DELEG_PERM_GROUPQUOTA, +}; + +static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc); +static int zfs_check_settable(const char *name, nvpair_t *property, + cred_t *cr); +static int zfs_check_clearable(char *dataset, nvlist_t *props, + nvlist_t **errors); +static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, + boolean_t *); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); + +/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ +void +__dprintf(const char *file, const char *func, int line, const char *fmt, ...) +{ + const char *newfile; + char buf[512]; + va_list adx; + + /* + * Get rid of annoying "../common/" prefix to filename. + */ + newfile = strrchr(file, '/'); + if (newfile != NULL) { + newfile = newfile + 1; /* Get rid of leading / */ + } else { + newfile = file; + } + + va_start(adx, fmt); + (void) vsnprintf(buf, sizeof (buf), fmt, adx); + va_end(adx); + + /* + * To get this data, use the zfs-dprintf probe as so: + * dtrace -q -n 'zfs-dprintf \ + * /stringof(arg0) == "dbuf.c"/ \ + * {printf("%s: %s", stringof(arg1), stringof(arg3))}' + * arg0 = file name + * arg1 = function name + * arg2 = line number + * arg3 = message + */ + DTRACE_PROBE4(zfs__dprintf, + char *, newfile, char *, func, int, line, char *, buf); +} + +static void +history_str_free(char *buf) +{ + kmem_free(buf, HIS_MAX_RECORD_LEN); +} + +static char * +history_str_get(zfs_cmd_t *zc) +{ + char *buf; + + if (zc->zc_history == NULL) + return (NULL); + + buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); + if (copyinstr((void *)(uintptr_t)zc->zc_history, + buf, HIS_MAX_RECORD_LEN, NULL) != 0) { + history_str_free(buf); + return (NULL); + } + + buf[HIS_MAX_RECORD_LEN -1] = '\0'; + + return (buf); +} + +/* + * Check to see if the named dataset is currently defined as bootable + */ +static boolean_t +zfs_is_bootfs(const char *name) +{ + objset_t *os; + + if (dmu_objset_hold(name, FTAG, &os) == 0) { + boolean_t ret; + ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os))); + dmu_objset_rele(os, FTAG); + return (ret); + } + return (B_FALSE); +} + +/* + * zfs_earlier_version + * + * Return non-zero if the spa version is less than requested version. + */ +static int +zfs_earlier_version(const char *name, int version) +{ + spa_t *spa; + + if (spa_open(name, &spa, FTAG) == 0) { + if (spa_version(spa) < version) { + spa_close(spa, FTAG); + return (1); + } + spa_close(spa, FTAG); + } + return (0); +} + +/* + * zpl_earlier_version + * + * Return TRUE if the ZPL version is less than requested version. + */ +static boolean_t +zpl_earlier_version(const char *name, int version) +{ + objset_t *os; + boolean_t rc = B_TRUE; + + if (dmu_objset_hold(name, FTAG, &os) == 0) { + uint64_t zplversion; + + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (B_TRUE); + } + /* XXX reading from non-owned objset */ + if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0) + rc = zplversion < version; + dmu_objset_rele(os, FTAG); + } + return (rc); +} + +static void +zfs_log_history(zfs_cmd_t *zc) +{ + spa_t *spa; + char *buf; + + if ((buf = history_str_get(zc)) == NULL) + return; + + if (spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) + (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); + spa_close(spa, FTAG); + } + history_str_free(buf); +} + +/* + * Policy for top-level read operations (list pools). Requires no privileges, + * and can be used in the local zone, as there is no associated dataset. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +{ + return (0); +} + +/* + * Policy for dataset read operations (list children, get statistics). Requires + * no privileges, but must be visible in the local zone. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +{ + if (INGLOBALZONE(curproc) || + zone_dataset_visible(zc->zc_name, NULL)) + return (0); + + return (ENOENT); +} + +static int +zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) +{ + int writable = 1; + + /* + * The dataset must be visible by this zone -- check this first + * so they don't see EPERM on something they shouldn't know about. + */ + if (!INGLOBALZONE(curproc) && + !zone_dataset_visible(dataset, &writable)) + return (ENOENT); + + if (INGLOBALZONE(curproc)) { + /* + * If the fs is zoned, only root can access it from the + * global zone. + */ + if (secpolicy_zfs(cr) && zoned) + return (EPERM); + } else { + /* + * If we are in a local zone, the 'zoned' property must be set. + */ + if (!zoned) + return (EPERM); + + /* must be writable by this zone */ + if (!writable) + return (EPERM); + } + return (0); +} + +static int +zfs_dozonecheck(const char *dataset, cred_t *cr) +{ + uint64_t zoned; + + if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) + return (ENOENT); + + return (zfs_dozonecheck_impl(dataset, zoned, cr)); +} + +static int +zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) +{ + uint64_t zoned; + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) { + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + return (ENOENT); + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + + return (zfs_dozonecheck_impl(dataset, zoned, cr)); +} + +int +zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) +{ + int error; + + error = zfs_dozonecheck(name, cr); + if (error == 0) { + error = secpolicy_zfs(cr); + if (error) + error = dsl_deleg_access(name, perm, cr); + } + return (error); +} + +int +zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, + const char *perm, cred_t *cr) +{ + int error; + + error = zfs_dozonecheck_ds(name, ds, cr); + if (error == 0) { + error = secpolicy_zfs(cr); + if (error) + error = dsl_deleg_access_impl(ds, perm, cr); + } + return (error); +} + +/* + * Policy for setting the security label property. + * + * Returns 0 for success, non-zero for access and other errors. + */ +static int +zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) +{ + char ds_hexsl[MAXNAMELEN]; + bslabel_t ds_sl, new_sl; + boolean_t new_default = FALSE; + uint64_t zoned; + int needed_priv = -1; + int error; + + /* First get the existing dataset label. */ + error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL), + 1, sizeof (ds_hexsl), &ds_hexsl, NULL); + if (error) + return (EPERM); + + if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) + new_default = TRUE; + + /* The label must be translatable */ + if (!new_default && (hexstr_to_label(strval, &new_sl) != 0)) + return (EINVAL); + + /* + * In a non-global zone, disallow attempts to set a label that + * doesn't match that of the zone; otherwise no other checks + * are needed. + */ + if (!INGLOBALZONE(curproc)) { + if (new_default || !blequal(&new_sl, CR_SL(CRED()))) + return (EPERM); + return (0); + } + + /* + * For global-zone datasets (i.e., those whose zoned property is + * "off", verify that the specified new label is valid for the + * global zone. + */ + if (dsl_prop_get_integer(name, + zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) + return (EPERM); + if (!zoned) { + if (zfs_check_global_label(name, strval) != 0) + return (EPERM); + } + + /* + * If the existing dataset label is nondefault, check if the + * dataset is mounted (label cannot be changed while mounted). + * Get the zfsvfs; if there isn't one, then the dataset isn't + * mounted (or isn't a dataset, doesn't exist, ...). + */ + if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) { + objset_t *os; + static char *setsl_tag = "setsl_tag"; + + /* + * Try to own the dataset; abort if there is any error, + * (e.g., already mounted, in use, or other error). + */ + error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, + setsl_tag, &os); + if (error) + return (EPERM); + + dmu_objset_disown(os, setsl_tag); + + if (new_default) { + needed_priv = PRIV_FILE_DOWNGRADE_SL; + goto out_check; + } + + if (hexstr_to_label(strval, &new_sl) != 0) + return (EPERM); + + if (blstrictdom(&ds_sl, &new_sl)) + needed_priv = PRIV_FILE_DOWNGRADE_SL; + else if (blstrictdom(&new_sl, &ds_sl)) + needed_priv = PRIV_FILE_UPGRADE_SL; + } else { + /* dataset currently has a default label */ + if (!new_default) + needed_priv = PRIV_FILE_UPGRADE_SL; + } + +out_check: + if (needed_priv != -1) + return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL)); + return (0); +} + +static int +zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, + cred_t *cr) +{ + char *strval; + + /* + * Check permissions for special properties. + */ + switch (prop) { + case ZFS_PROP_ZONED: + /* + * Disallow setting of 'zoned' from within a local zone. + */ + if (!INGLOBALZONE(curproc)) + return (EPERM); + break; + + case ZFS_PROP_QUOTA: + if (!INGLOBALZONE(curproc)) { + uint64_t zoned; + char setpoint[MAXNAMELEN]; + /* + * Unprivileged users are allowed to modify the + * quota on things *under* (ie. contained by) + * the thing they own. + */ + if (dsl_prop_get_integer(dsname, "zoned", &zoned, + setpoint)) + return (EPERM); + if (!zoned || strlen(dsname) <= strlen(setpoint)) + return (EPERM); + } + break; + + case ZFS_PROP_MLSLABEL: + if (!is_system_labeled()) + return (EPERM); + + if (nvpair_value_string(propval, &strval) == 0) { + int err; + + err = zfs_set_slabel_policy(dsname, strval, CRED()); + if (err != 0) + return (err); + } + break; + } + + return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); +} + +int +zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + + error = zfs_dozonecheck(zc->zc_name, cr); + if (error) + return (error); + + /* + * permission to set permissions will be evaluated later in + * dsl_deleg_can_allow() + */ + return (0); +} + +int +zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_ROLLBACK, cr)); +} + +int +zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +{ + spa_t *spa; + dsl_pool_t *dp; + dsl_dataset_t *ds; + char *cp; + int error; + + /* + * Generate the current snapshot name from the given objsetid, then + * use that name for the secpolicy/zone checks. + */ + cp = strchr(zc->zc_name, '@'); + if (cp == NULL) + return (EINVAL); + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + if (error) + return (error); + + dsl_dataset_name(ds, zc->zc_name); + + error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, + ZFS_DELEG_PERM_SEND, cr); + dsl_dataset_rele(ds, FTAG); + + return (error); +} + +static int +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +{ + vnode_t *vp; + int error; + + if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, + NO_FOLLOW, NULL, &vp)) != 0) + return (error); + + /* Now make sure mntpnt and dataset are ZFS */ + + if (vp->v_vfsp->vfs_fstype != zfsfstype || + (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), + zc->zc_name) != 0)) { + VN_RELE(vp); + return (EPERM); + } + + VN_RELE(vp); + return (dsl_deleg_access(zc->zc_name, + ZFS_DELEG_PERM_SHARE, cr)); +} + +int +zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +{ + if (!INGLOBALZONE(curproc)) + return (EPERM); + + if (secpolicy_nfs(cr) == 0) { + return (0); + } else { + return (zfs_secpolicy_deleg_share(zc, cr)); + } +} + +int +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +{ + if (!INGLOBALZONE(curproc)) + return (EPERM); + + if (secpolicy_smb(cr) == 0) { + return (0); + } else { + return (zfs_secpolicy_deleg_share(zc, cr)); + } +} + +static int +zfs_get_parent(const char *datasetname, char *parent, int parentsize) +{ + char *cp; + + /* + * Remove the @bla or /bla from the end of the name to get the parent. + */ + (void) strncpy(parent, datasetname, parentsize); + cp = strrchr(parent, '@'); + if (cp != NULL) { + cp[0] = '\0'; + } else { + cp = strrchr(parent, '/'); + if (cp == NULL) + return (ENOENT); + cp[0] = '\0'; + } + + return (0); +} + +int +zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) +{ + int error; + + if ((error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_MOUNT, cr)) != 0) + return (error); + + return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); +} + +static int +zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); +} + +/* + * Destroying snapshots with delegated permissions requires + * descendent mount and destroy permissions. + * Reassemble the full filesystem@snap name so dsl_deleg_access() + * can do the correct permission check. + * + * Since this routine is used when doing a recursive destroy of snapshots + * and destroying snapshots requires descendent permissions, a successfull + * check of the top level snapshot applies to snapshots of all descendent + * datasets as well. + */ +static int +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + char *dsname; + + dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); + + error = zfs_secpolicy_destroy_perms(dsname, cr); + + strfree(dsname); + return (error); +} + +int +zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) +{ + char parentname[MAXNAMELEN]; + int error; + + if ((error = zfs_secpolicy_write_perms(from, + ZFS_DELEG_PERM_RENAME, cr)) != 0) + return (error); + + if ((error = zfs_secpolicy_write_perms(from, + ZFS_DELEG_PERM_MOUNT, cr)) != 0) + return (error); + + if ((error = zfs_get_parent(to, parentname, + sizeof (parentname))) != 0) + return (error); + + if ((error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_CREATE, cr)) != 0) + return (error); + + if ((error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr)) != 0) + return (error); + + return (error); +} + +static int +zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); +} + +static int +zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +{ + char parentname[MAXNAMELEN]; + objset_t *clone; + int error; + + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_PROMOTE, cr); + if (error) + return (error); + + error = dmu_objset_hold(zc->zc_name, FTAG, &clone); + + if (error == 0) { + dsl_dataset_t *pclone = NULL; + dsl_dir_t *dd; + dd = clone->os_dsl_dataset->ds_dir; + + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_origin_obj, FTAG, &pclone); + rw_exit(&dd->dd_pool->dp_config_rwlock); + if (error) { + dmu_objset_rele(clone, FTAG); + return (error); + } + + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_MOUNT, cr); + + dsl_dataset_name(pclone, parentname); + dmu_objset_rele(clone, FTAG); + dsl_dataset_rele(pclone, FTAG); + if (error == 0) + error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_PROMOTE, cr); + } + return (error); +} + +static int +zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + + if ((error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_RECEIVE, cr)) != 0) + return (error); + + if ((error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_MOUNT, cr)) != 0) + return (error); + + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CREATE, cr)); +} + +int +zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_SNAPSHOT, cr)); +} + +static int +zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +{ + + return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); +} + +static int +zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +{ + char parentname[MAXNAMELEN]; + int error; + + if ((error = zfs_get_parent(zc->zc_name, parentname, + sizeof (parentname))) != 0) + return (error); + + if (zc->zc_value[0] != '\0') { + if ((error = zfs_secpolicy_write_perms(zc->zc_value, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); + } + + if ((error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_CREATE, cr)) != 0) + return (error); + + error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr); + + return (error); +} + +static int +zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + + error = secpolicy_fs_unmount(cr, NULL); + if (error) { + error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); + } + return (error); +} + +/* + * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires + * SYS_CONFIG privilege, which is not available in a local zone. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +{ + if (secpolicy_sys_config(cr, B_FALSE) != 0) + return (EPERM); + + return (0); +} + +/* + * Policy for object to name lookups. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + + if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0) + return (0); + + error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr); + return (error); +} + +/* + * Policy for fault injection. Requires all privileges. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +{ + return (secpolicy_zinject(cr)); +} + +static int +zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +{ + zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); + + if (prop == ZPROP_INVAL) { + if (!zfs_prop_user(zc->zc_value)) + return (EINVAL); + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_USERPROP, cr)); + } else { + return (zfs_secpolicy_setprop(zc->zc_name, prop, + NULL, cr)); + } +} + +static int +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +{ + int err = zfs_secpolicy_read(zc, cr); + if (err) + return (err); + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + if (zc->zc_value[0] == 0) { + /* + * They are asking about a posix uid/gid. If it's + * themself, allow it. + */ + if (zc->zc_objset_type == ZFS_PROP_USERUSED || + zc->zc_objset_type == ZFS_PROP_USERQUOTA) { + if (zc->zc_guid == crgetuid(cr)) + return (0); + } else { + if (groupmember(zc->zc_guid, cr)) + return (0); + } + } + + return (zfs_secpolicy_write_perms(zc->zc_name, + userquota_perms[zc->zc_objset_type], cr)); +} + +static int +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +{ + int err = zfs_secpolicy_read(zc, cr); + if (err) + return (err); + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + return (zfs_secpolicy_write_perms(zc->zc_name, + userquota_perms[zc->zc_objset_type], cr)); +} + +static int +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, + NULL, cr)); +} + +static int +zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_HOLD, cr)); +} + +static int +zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_RELEASE, cr)); +} + +/* + * Policy for allowing temporary snapshots to be taken or released + */ +static int +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +{ + /* + * A temporary snapshot is the same as a snapshot, + * hold, destroy and release all rolled into one. + * Delegated diff alone is sufficient that we allow this. + */ + int error; + + if ((error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_DIFF, cr)) == 0) + return (0); + + error = zfs_secpolicy_snapshot(zc, cr); + if (!error) + error = zfs_secpolicy_hold(zc, cr); + if (!error) + error = zfs_secpolicy_release(zc, cr); + if (!error) + error = zfs_secpolicy_destroy(zc, cr); + return (error); +} + +/* + * Returns the nvlist as specified by the user in the zfs_cmd_t. + */ +static int +get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) +{ + char *packed; + int error; + nvlist_t *list = NULL; + + /* + * Read in and unpack the user-supplied nvlist. + */ + if (size == 0) + return (EINVAL); + + packed = kmem_alloc(size, KM_SLEEP); + + if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, + iflag)) != 0) { + kmem_free(packed, size); + return (error); + } + + if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) { + kmem_free(packed, size); + return (error); + } + + kmem_free(packed, size); + + *nvp = list; + return (0); +} + +static int +fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +{ + size_t size; + + VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + + if (size > zc->zc_nvlist_dst_size) { + nvpair_t *more_errors; + int n = 0; + + if (zc->zc_nvlist_dst_size < 1024) + return (ENOMEM); + + VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); + more_errors = nvlist_prev_nvpair(*errors, NULL); + + do { + nvpair_t *pair = nvlist_prev_nvpair(*errors, + more_errors); + VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); + n++; + VERIFY(nvlist_size(*errors, &size, + NV_ENCODE_NATIVE) == 0); + } while (size > zc->zc_nvlist_dst_size); + + VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); + VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); + ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + ASSERT(size <= zc->zc_nvlist_dst_size); + } + + return (0); +} + +static int +put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) +{ + char *packed = NULL; + int error = 0; + size_t size; + + VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); + + if (size > zc->zc_nvlist_dst_size) { + error = ENOMEM; + } else { + packed = kmem_alloc(size, KM_SLEEP); + VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, + KM_SLEEP) == 0); + if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, + size, zc->zc_iflags) != 0) + error = EFAULT; + kmem_free(packed, size); + } + + zc->zc_nvlist_dst_size = size; + return (error); +} + +static int +getzfsvfs(const char *dsname, zfsvfs_t **zfvp) +{ + objset_t *os; + int error; + + error = dmu_objset_hold(dsname, FTAG, &os); + if (error) + return (error); + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (EINVAL); + } + + mutex_enter(&os->os_user_ptr_lock); + *zfvp = dmu_objset_get_user(os); + if (*zfvp) { + VFS_HOLD((*zfvp)->z_vfs); + } else { + error = ESRCH; + } + mutex_exit(&os->os_user_ptr_lock); + dmu_objset_rele(os, FTAG); + return (error); +} + +/* + * Find a zfsvfs_t for a mounted filesystem, or create our own, in which + * case its z_vfs will be NULL, and it will be opened as the owner. + */ +static int +zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) +{ + int error = 0; + + if (getzfsvfs(name, zfvp) != 0) + error = zfsvfs_create(name, zfvp); + if (error == 0) { + rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : + RW_READER, tag); + if ((*zfvp)->z_unmounted) { + /* + * XXX we could probably try again, since the unmounting + * thread should be just about to disassociate the + * objset from the zfsvfs. + */ + rrw_exit(&(*zfvp)->z_teardown_lock, tag); + return (EBUSY); + } + } + return (error); +} + +static void +zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) +{ + rrw_exit(&zfsvfs->z_teardown_lock, tag); + + if (zfsvfs->z_vfs) { + VFS_RELE(zfsvfs->z_vfs); + } else { + dmu_objset_disown(zfsvfs->z_os, zfsvfs); + zfsvfs_free(zfsvfs); + } +} + +static int +zfs_ioc_pool_create(zfs_cmd_t *zc) +{ + int error; + nvlist_t *config, *props = NULL; + nvlist_t *rootprops = NULL; + nvlist_t *zplprops = NULL; + char *buf; + + if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &config)) + return (error); + + if (zc->zc_nvlist_src_size != 0 && (error = + get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props))) { + nvlist_free(config); + return (error); + } + + if (props) { + nvlist_t *nvl = NULL; + uint64_t version = SPA_VERSION; + + (void) nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_VERSION), &version); + if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) { + error = EINVAL; + goto pool_props_bad; + } + (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); + if (nvl) { + error = nvlist_dup(nvl, &rootprops, KM_SLEEP); + if (error != 0) { + nvlist_free(config); + nvlist_free(props); + return (error); + } + (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); + } + VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops_root(version, rootprops, + zplprops, NULL); + if (error) + goto pool_props_bad; + } + + buf = history_str_get(zc); + + error = spa_create(zc->zc_name, config, props, buf, zplprops); + + /* + * Set the remaining root properties + */ + if (!error && (error = zfs_set_prop_nvlist(zc->zc_name, + ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) + (void) spa_destroy(zc->zc_name); + + if (buf != NULL) + history_str_free(buf); + +pool_props_bad: + nvlist_free(rootprops); + nvlist_free(zplprops); + nvlist_free(config); + nvlist_free(props); + + return (error); +} + +static int +zfs_ioc_pool_destroy(zfs_cmd_t *zc) +{ + int error; + zfs_log_history(zc); + error = spa_destroy(zc->zc_name); + if (error == 0) + zvol_remove_minors(zc->zc_name); + return (error); +} + +static int +zfs_ioc_pool_import(zfs_cmd_t *zc) +{ + nvlist_t *config, *props = NULL; + uint64_t guid; + int error; + + if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &config)) != 0) + return (error); + + if (zc->zc_nvlist_src_size != 0 && (error = + get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props))) { + nvlist_free(config); + return (error); + } + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || + guid != zc->zc_guid) + error = EINVAL; + else + error = spa_import(zc->zc_name, config, props, zc->zc_cookie); + + if (zc->zc_nvlist_dst != 0) { + int err; + + if ((err = put_nvlist(zc, config)) != 0) + error = err; + } + + nvlist_free(config); + + if (props) + nvlist_free(props); + + return (error); +} + +static int +zfs_ioc_pool_export(zfs_cmd_t *zc) +{ + int error; + boolean_t force = (boolean_t)zc->zc_cookie; + boolean_t hardforce = (boolean_t)zc->zc_guid; + + zfs_log_history(zc); + error = spa_export(zc->zc_name, NULL, force, hardforce); + if (error == 0) + zvol_remove_minors(zc->zc_name); + return (error); +} + +static int +zfs_ioc_pool_configs(zfs_cmd_t *zc) +{ + nvlist_t *configs; + int error; + + if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) + return (EEXIST); + + error = put_nvlist(zc, configs); + + nvlist_free(configs); + + return (error); +} + +static int +zfs_ioc_pool_stats(zfs_cmd_t *zc) +{ + nvlist_t *config; + int error; + int ret = 0; + + error = spa_get_stats(zc->zc_name, &config, zc->zc_value, + sizeof (zc->zc_value)); + + if (config != NULL) { + ret = put_nvlist(zc, config); + nvlist_free(config); + + /* + * The config may be present even if 'error' is non-zero. + * In this case we return success, and preserve the real errno + * in 'zc_cookie'. + */ + zc->zc_cookie = error; + } else { + ret = error; + } + + return (ret); +} + +/* + * Try to import the given pool, returning pool stats as appropriate so that + * user land knows which devices are available and overall pool health. + */ +static int +zfs_ioc_pool_tryimport(zfs_cmd_t *zc) +{ + nvlist_t *tryconfig, *config; + int error; + + if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &tryconfig)) != 0) + return (error); + + config = spa_tryimport(tryconfig); + + nvlist_free(tryconfig); + + if (config == NULL) + return (EINVAL); + + error = put_nvlist(zc, config); + nvlist_free(config); + + return (error); +} + +/* + * inputs: + * zc_name name of the pool + * zc_cookie scan func (pool_scan_func_t) + */ +static int +zfs_ioc_pool_scan(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if (zc->zc_cookie == POOL_SCAN_NONE) + error = spa_scan_stop(spa); + else + error = spa_scan(spa, zc->zc_cookie); + + spa_close(spa, FTAG); + + return (error); +} + +static int +zfs_ioc_pool_freeze(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error == 0) { + spa_freeze(spa); + spa_close(spa, FTAG); + } + return (error); +} + +static int +zfs_ioc_pool_upgrade(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) { + spa_close(spa, FTAG); + return (EINVAL); + } + + spa_upgrade(spa, zc->zc_cookie); + spa_close(spa, FTAG); + + return (error); +} + +static int +zfs_ioc_pool_get_history(zfs_cmd_t *zc) +{ + spa_t *spa; + char *hist_buf; + uint64_t size; + int error; + + if ((size = zc->zc_history_len) == 0) + return (EINVAL); + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { + spa_close(spa, FTAG); + return (ENOTSUP); + } + + hist_buf = kmem_alloc(size, KM_SLEEP); + if ((error = spa_history_get(spa, &zc->zc_history_offset, + &zc->zc_history_len, hist_buf)) == 0) { + error = ddi_copyout(hist_buf, + (void *)(uintptr_t)zc->zc_history, + zc->zc_history_len, zc->zc_iflags); + } + + spa_close(spa, FTAG); + kmem_free(hist_buf, size); + return (error); +} + +static int +zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) +{ + int error; + + if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value)) + return (error); + + return (0); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_obj object to find + * + * outputs: + * zc_value name of object + */ +static int +zfs_ioc_obj_to_path(zfs_cmd_t *zc) +{ + objset_t *os; + int error; + + /* XXX reading from objset not owned */ + if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + return (error); + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (EINVAL); + } + error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, + sizeof (zc->zc_value)); + dmu_objset_rele(os, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_obj object to find + * + * outputs: + * zc_stat stats on object + * zc_value path to object + */ +static int +zfs_ioc_obj_to_stats(zfs_cmd_t *zc) +{ + objset_t *os; + int error; + + /* XXX reading from objset not owned */ + if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + return (error); + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (EINVAL); + } + error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, + sizeof (zc->zc_value)); + dmu_objset_rele(os, FTAG); + + return (error); +} + +static int +zfs_ioc_vdev_add(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + nvlist_t *config, **l2cache, **spares; + uint_t nl2cache = 0, nspares = 0; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error != 0) + return (error); + + error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &config); + (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache); + + (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES, + &spares, &nspares); + + /* + * A root pool with concatenated devices is not supported. + * Thus, can not add a device to a root pool. + * + * Intent log device can not be added to a rootpool because + * during mountroot, zil is replayed, a seperated log device + * can not be accessed during the mountroot time. + * + * l2cache and spare devices are ok to be added to a rootpool. + */ + if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) { + nvlist_free(config); + spa_close(spa, FTAG); + return (EDOM); + } + + if (error == 0) { + error = spa_vdev_add(spa, config); + nvlist_free(config); + } + spa_close(spa, FTAG); + return (error); +} + +/* + * inputs: + * zc_name name of the pool + * zc_nvlist_conf nvlist of devices to remove + * zc_cookie to stop the remove? + */ +static int +zfs_ioc_vdev_remove(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error != 0) + return (error); + error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE); + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_vdev_set_state(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + vdev_state_t newstate = VDEV_STATE_UNKNOWN; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + switch (zc->zc_cookie) { + case VDEV_STATE_ONLINE: + error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate); + break; + + case VDEV_STATE_OFFLINE: + error = vdev_offline(spa, zc->zc_guid, zc->zc_obj); + break; + + case VDEV_STATE_FAULTED: + if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && + zc->zc_obj != VDEV_AUX_EXTERNAL) + zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; + + error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); + break; + + case VDEV_STATE_DEGRADED: + if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && + zc->zc_obj != VDEV_AUX_EXTERNAL) + zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; + + error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); + break; + + default: + error = EINVAL; + } + zc->zc_cookie = newstate; + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_vdev_attach(zfs_cmd_t *zc) +{ + spa_t *spa; + int replacing = zc->zc_cookie; + nvlist_t *config; + int error; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &config)) == 0) { + error = spa_vdev_attach(spa, zc->zc_guid, config, replacing); + nvlist_free(config); + } + + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_vdev_detach(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE); + + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_vdev_split(zfs_cmd_t *zc) +{ + spa_t *spa; + nvlist_t *config, *props = NULL; + int error; + boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT); + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, + zc->zc_iflags, &config)) { + spa_close(spa, FTAG); + return (error); + } + + if (zc->zc_nvlist_src_size != 0 && (error = + get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props))) { + spa_close(spa, FTAG); + nvlist_free(config); + return (error); + } + + error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp); + + spa_close(spa, FTAG); + + nvlist_free(config); + nvlist_free(props); + + return (error); +} + +static int +zfs_ioc_vdev_setpath(zfs_cmd_t *zc) +{ + spa_t *spa; + char *path = zc->zc_value; + uint64_t guid = zc->zc_guid; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error != 0) + return (error); + + error = spa_vdev_setpath(spa, guid, path); + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_vdev_setfru(zfs_cmd_t *zc) +{ + spa_t *spa; + char *fru = zc->zc_value; + uint64_t guid = zc->zc_guid; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error != 0) + return (error); + + error = spa_vdev_setfru(spa, guid, fru); + spa_close(spa, FTAG); + return (error); +} + +static int +zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) +{ + int error = 0; + nvlist_t *nv; + + dmu_objset_fast_stat(os, &zc->zc_objset_stats); + + if (zc->zc_nvlist_dst != 0 && + (error = dsl_prop_get_all(os, &nv)) == 0) { + dmu_objset_stats(os, nv); + /* + * NB: zvol_get_stats() will read the objset contents, + * which we aren't supposed to do with a + * DS_MODE_USER hold, because it could be + * inconsistent. So this is a bit of a workaround... + * XXX reading with out owning + */ + if (!zc->zc_objset_stats.dds_inconsistent) { + if (dmu_objset_type(os) == DMU_OST_ZVOL) + VERIFY(zvol_get_stats(os, nv) == 0); + } + error = put_nvlist(zc, nv); + nvlist_free(nv); + } + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + */ +static int +zfs_ioc_objset_stats(zfs_cmd_t *zc) +{ + objset_t *os = NULL; + int error; + + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) + return (error); + + error = zfs_ioc_objset_stats_impl(zc, os); + + dmu_objset_rele(os, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_nvlist_dst received property nvlist + * zc_nvlist_dst_size size of received property nvlist + * + * Gets received properties (distinct from local properties on or after + * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from + * local property values. + */ +static int +zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) +{ + objset_t *os = NULL; + int error; + nvlist_t *nv; + + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) + return (error); + + /* + * Without this check, we would return local property values if the + * caller has not already received properties on or after + * SPA_VERSION_RECVD_PROPS. + */ + if (!dsl_prop_get_hasrecvd(os)) { + dmu_objset_rele(os, FTAG); + return (ENOTSUP); + } + + if (zc->zc_nvlist_dst != 0 && + (error = dsl_prop_get_received(os, &nv)) == 0) { + error = put_nvlist(zc, nv); + nvlist_free(nv); + } + + dmu_objset_rele(os, FTAG); + return (error); +} + +static int +nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop) +{ + uint64_t value; + int error; + + /* + * zfs_get_zplprop() will either find a value or give us + * the default value (if there is one). + */ + if ((error = zfs_get_zplprop(os, prop, &value)) != 0) + return (error); + VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0); + return (0); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_dst_size size of buffer for zpl property nvlist + * + * outputs: + * zc_nvlist_dst zpl property nvlist + * zc_nvlist_dst_size size of zpl property nvlist + */ +static int +zfs_ioc_objset_zplprops(zfs_cmd_t *zc) +{ + objset_t *os; + int err; + + /* XXX reading without owning */ + if (err = dmu_objset_hold(zc->zc_name, FTAG, &os)) + return (err); + + dmu_objset_fast_stat(os, &zc->zc_objset_stats); + + /* + * NB: nvl_add_zplprop() will read the objset contents, + * which we aren't supposed to do with a DS_MODE_USER + * hold, because it could be inconsistent. + */ + if (zc->zc_nvlist_dst != NULL && + !zc->zc_objset_stats.dds_inconsistent && + dmu_objset_type(os) == DMU_OST_ZFS) { + nvlist_t *nv; + + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 && + (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 && + (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 && + (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0) + err = put_nvlist(zc, nv); + nvlist_free(nv); + } else { + err = ENOENT; + } + dmu_objset_rele(os, FTAG); + return (err); +} + +static boolean_t +dataset_name_hidden(const char *name) +{ + /* + * Skip over datasets that are not visible in this zone, + * internal datasets (which have a $ in their name), and + * temporary datasets (which have a % in their name). + */ + if (strchr(name, '$') != NULL) + return (B_TRUE); + if (strchr(name, '%') != NULL) + return (B_TRUE); + if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) + return (B_TRUE); + return (B_FALSE); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_name name of next filesystem + * zc_cookie zap cursor + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + */ +static int +zfs_ioc_dataset_list_next(zfs_cmd_t *zc) +{ + objset_t *os; + int error; + char *p; + size_t orig_len = strlen(zc->zc_name); + +top: + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) { + if (error == ENOENT) + error = ESRCH; + return (error); + } + + p = strrchr(zc->zc_name, '/'); + if (p == NULL || p[1] != '\0') + (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); + p = zc->zc_name + strlen(zc->zc_name); + + /* + * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0 + * but is not declared void because its called by dmu_objset_find(). + */ + if (zc->zc_cookie == 0) { + uint64_t cookie = 0; + int len = sizeof (zc->zc_name) - (p - zc->zc_name); + + while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) + (void) dmu_objset_prefetch(p, NULL); + } + + do { + error = dmu_dir_list_next(os, + sizeof (zc->zc_name) - (p - zc->zc_name), p, + NULL, &zc->zc_cookie); + if (error == ENOENT) + error = ESRCH; + } while (error == 0 && dataset_name_hidden(zc->zc_name) && + !(zc->zc_iflags & FKIOCTL)); + dmu_objset_rele(os, FTAG); + + /* + * If it's an internal dataset (ie. with a '$' in its name), + * don't try to get stats for it, otherwise we'll return ENOENT. + */ + if (error == 0 && strchr(zc->zc_name, '$') == NULL) { + error = zfs_ioc_objset_stats(zc); /* fill in the stats */ + if (error == ENOENT) { + /* We lost a race with destroy, get the next one. */ + zc->zc_name[orig_len] = '\0'; + goto top; + } + } + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_name name of next snapshot + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + */ +static int +zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) +{ + objset_t *os; + int error; + +top: + if (zc->zc_cookie == 0) + (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, + NULL, DS_FIND_SNAPSHOTS); + + error = dmu_objset_hold(zc->zc_name, FTAG, &os); + if (error) + return (error == ENOENT ? ESRCH : error); + + /* + * A dataset name of maximum length cannot have any snapshots, + * so exit immediately. + */ + if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { + dmu_objset_rele(os, FTAG); + return (ESRCH); + } + + error = dmu_snapshot_list_next(os, + sizeof (zc->zc_name) - strlen(zc->zc_name), + zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie, + NULL); + + if (error == 0) { + dsl_dataset_t *ds; + dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; + + /* + * Since we probably don't have a hold on this snapshot, + * it's possible that the objsetid could have been destroyed + * and reused for a new objset. It's OK if this happens during + * a zfs send operation, since the new createtxg will be + * beyond the range we're interested in. + */ + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) { + if (error == ENOENT) { + /* Racing with destroy, get the next one. */ + *strchr(zc->zc_name, '@') = '\0'; + dmu_objset_rele(os, FTAG); + goto top; + } + } else { + objset_t *ossnap; + + error = dmu_objset_from_ds(ds, &ossnap); + if (error == 0) + error = zfs_ioc_objset_stats_impl(zc, ossnap); + dsl_dataset_rele(ds, FTAG); + } + } else if (error == ENOENT) { + error = ESRCH; + } + + dmu_objset_rele(os, FTAG); + /* if we failed, undo the @ that we tacked on to zc_name */ + if (error) + *strchr(zc->zc_name, '@') = '\0'; + return (error); +} + +static int +zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) +{ + const char *propname = nvpair_name(pair); + uint64_t *valary; + unsigned int vallen; + const char *domain; + char *dash; + zfs_userquota_prop_t type; + uint64_t rid; + uint64_t quota; + zfsvfs_t *zfsvfs; + int err; + + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &pair) != 0) + return (EINVAL); + } + + /* + * A correctly constructed propname is encoded as + * userquota@<rid>-<domain>. + */ + if ((dash = strchr(propname, '-')) == NULL || + nvpair_value_uint64_array(pair, &valary, &vallen) != 0 || + vallen != 3) + return (EINVAL); + + domain = dash + 1; + type = valary[0]; + rid = valary[1]; + quota = valary[2]; + + err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE); + if (err == 0) { + err = zfs_set_userquota(zfsvfs, type, domain, rid, quota); + zfsvfs_rele(zfsvfs, FTAG); + } + + return (err); +} + +/* + * If the named property is one that has a special function to set its value, + * return 0 on success and a positive error code on failure; otherwise if it is + * not one of the special properties handled by this function, return -1. + * + * XXX: It would be better for callers of the property interface if we handled + * these special cases in dsl_prop.c (in the dsl layer). + */ +static int +zfs_prop_set_special(const char *dsname, zprop_source_t source, + nvpair_t *pair) +{ + const char *propname = nvpair_name(pair); + zfs_prop_t prop = zfs_name_to_prop(propname); + uint64_t intval; + int err; + + if (prop == ZPROP_INVAL) { + if (zfs_prop_userquota(propname)) + return (zfs_prop_set_userquota(dsname, pair)); + return (-1); + } + + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &pair) == 0); + } + + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) + return (-1); + + VERIFY(0 == nvpair_value_uint64(pair, &intval)); + + switch (prop) { + case ZFS_PROP_QUOTA: + err = dsl_dir_set_quota(dsname, source, intval); + break; + case ZFS_PROP_REFQUOTA: + err = dsl_dataset_set_quota(dsname, source, intval); + break; + case ZFS_PROP_RESERVATION: + err = dsl_dir_set_reservation(dsname, source, intval); + break; + case ZFS_PROP_REFRESERVATION: + err = dsl_dataset_set_reservation(dsname, source, intval); + break; + case ZFS_PROP_VOLSIZE: + err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip), + intval); + break; + case ZFS_PROP_VERSION: + { + zfsvfs_t *zfsvfs; + + if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0) + break; + + err = zfs_set_version(zfsvfs, intval); + zfsvfs_rele(zfsvfs, FTAG); + + if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { + zfs_cmd_t *zc; + + zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); + (void) strcpy(zc->zc_name, dsname); + (void) zfs_ioc_userspace_upgrade(zc); + kmem_free(zc, sizeof (zfs_cmd_t)); + } + break; + } + + default: + err = -1; + } + + return (err); +} + +/* + * This function is best effort. If it fails to set any of the given properties, + * it continues to set as many as it can and returns the first error + * encountered. If the caller provides a non-NULL errlist, it also gives the + * complete list of names of all the properties it failed to set along with the + * corresponding error numbers. The caller is responsible for freeing the + * returned errlist. + * + * If every property is set successfully, zero is returned and the list pointed + * at by errlist is NULL. + */ +int +zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, + nvlist_t **errlist) +{ + nvpair_t *pair; + nvpair_t *propval; + int rv = 0; + uint64_t intval; + char *strval; + nvlist_t *genericnvl; + nvlist_t *errors; + nvlist_t *retrynvl; + + VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + +retry: + pair = NULL; + while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { + const char *propname = nvpair_name(pair); + zfs_prop_t prop = zfs_name_to_prop(propname); + int err = 0; + + /* decode the property value */ + propval = pair; + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &propval) != 0) + err = EINVAL; + } + + /* Validate value type */ + if (err == 0 && prop == ZPROP_INVAL) { + if (zfs_prop_user(propname)) { + if (nvpair_type(propval) != DATA_TYPE_STRING) + err = EINVAL; + } else if (zfs_prop_userquota(propname)) { + if (nvpair_type(propval) != + DATA_TYPE_UINT64_ARRAY) + err = EINVAL; + } + } else if (err == 0) { + if (nvpair_type(propval) == DATA_TYPE_STRING) { + if (zfs_prop_get_type(prop) != PROP_TYPE_STRING) + err = EINVAL; + } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { + const char *unused; + + VERIFY(nvpair_value_uint64(propval, + &intval) == 0); + + switch (zfs_prop_get_type(prop)) { + case PROP_TYPE_NUMBER: + break; + case PROP_TYPE_STRING: + err = EINVAL; + break; + case PROP_TYPE_INDEX: + if (zfs_prop_index_to_string(prop, + intval, &unused) != 0) + err = EINVAL; + break; + default: + cmn_err(CE_PANIC, + "unknown property type"); + } + } else { + err = EINVAL; + } + } + + /* Validate permissions */ + if (err == 0) + err = zfs_check_settable(dsname, pair, CRED()); + + if (err == 0) { + err = zfs_prop_set_special(dsname, source, pair); + if (err == -1) { + /* + * For better performance we build up a list of + * properties to set in a single transaction. + */ + err = nvlist_add_nvpair(genericnvl, pair); + } else if (err != 0 && nvl != retrynvl) { + /* + * This may be a spurious error caused by + * receiving quota and reservation out of order. + * Try again in a second pass. + */ + err = nvlist_add_nvpair(retrynvl, pair); + } + } + + if (err != 0) + VERIFY(nvlist_add_int32(errors, propname, err) == 0); + } + + if (nvl != retrynvl && !nvlist_empty(retrynvl)) { + nvl = retrynvl; + goto retry; + } + + if (!nvlist_empty(genericnvl) && + dsl_props_set(dsname, source, genericnvl) != 0) { + /* + * If this fails, we still want to set as many properties as we + * can, so try setting them individually. + */ + pair = NULL; + while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) { + const char *propname = nvpair_name(pair); + int err = 0; + + propval = pair; + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &propval) == 0); + } + + if (nvpair_type(propval) == DATA_TYPE_STRING) { + VERIFY(nvpair_value_string(propval, + &strval) == 0); + err = dsl_prop_set(dsname, propname, source, 1, + strlen(strval) + 1, strval); + } else { + VERIFY(nvpair_value_uint64(propval, + &intval) == 0); + err = dsl_prop_set(dsname, propname, source, 8, + 1, &intval); + } + + if (err != 0) { + VERIFY(nvlist_add_int32(errors, propname, + err) == 0); + } + } + } + nvlist_free(genericnvl); + nvlist_free(retrynvl); + + if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { + nvlist_free(errors); + errors = NULL; + } else { + VERIFY(nvpair_value_int32(pair, &rv) == 0); + } + + if (errlist == NULL) + nvlist_free(errors); + else + *errlist = errors; + + return (rv); +} + +/* + * Check that all the properties are valid user properties. + */ +static int +zfs_check_userprops(char *fsname, nvlist_t *nvl) +{ + nvpair_t *pair = NULL; + int error = 0; + + while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { + const char *propname = nvpair_name(pair); + char *valstr; + + if (!zfs_prop_user(propname) || + nvpair_type(pair) != DATA_TYPE_STRING) + return (EINVAL); + + if (error = zfs_secpolicy_write_perms(fsname, + ZFS_DELEG_PERM_USERPROP, CRED())) + return (error); + + if (strlen(propname) >= ZAP_MAXNAMELEN) + return (ENAMETOOLONG); + + VERIFY(nvpair_value_string(pair, &valstr) == 0); + if (strlen(valstr) >= ZAP_MAXVALUELEN) + return (E2BIG); + } + return (0); +} + +static void +props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) +{ + nvpair_t *pair; + + VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + pair = NULL; + while ((pair = nvlist_next_nvpair(props, pair)) != NULL) { + if (nvlist_exists(skipped, nvpair_name(pair))) + continue; + + VERIFY(nvlist_add_nvpair(*newprops, pair) == 0); + } +} + +static int +clear_received_props(objset_t *os, const char *fs, nvlist_t *props, + nvlist_t *skipped) +{ + int err = 0; + nvlist_t *cleared_props = NULL; + props_skip(props, skipped, &cleared_props); + if (!nvlist_empty(cleared_props)) { + /* + * Acts on local properties until the dataset has received + * properties at least once on or after SPA_VERSION_RECVD_PROPS. + */ + zprop_source_t flags = (ZPROP_SRC_NONE | + (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); + err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); + } + nvlist_free(cleared_props); + return (err); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value name of property to set + * zc_nvlist_src{_size} nvlist of properties to apply + * zc_cookie received properties flag + * + * outputs: + * zc_nvlist_dst{_size} error for each unapplied received property + */ +static int +zfs_ioc_set_prop(zfs_cmd_t *zc) +{ + nvlist_t *nvl; + boolean_t received = zc->zc_cookie; + zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : + ZPROP_SRC_LOCAL); + nvlist_t *errors = NULL; + int error; + + if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvl)) != 0) + return (error); + + if (received) { + nvlist_t *origprops; + objset_t *os; + + if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { + if (dsl_prop_get_received(os, &origprops) == 0) { + (void) clear_received_props(os, + zc->zc_name, origprops, nvl); + nvlist_free(origprops); + } + + dsl_prop_set_hasrecvd(os); + dmu_objset_rele(os, FTAG); + } + } + + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); + + if (zc->zc_nvlist_dst != NULL && errors != NULL) { + (void) put_nvlist(zc, errors); + } + + nvlist_free(errors); + nvlist_free(nvl); + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value name of property to inherit + * zc_cookie revert to received value if TRUE + * + * outputs: none + */ +static int +zfs_ioc_inherit_prop(zfs_cmd_t *zc) +{ + const char *propname = zc->zc_value; + zfs_prop_t prop = zfs_name_to_prop(propname); + boolean_t received = zc->zc_cookie; + zprop_source_t source = (received + ? ZPROP_SRC_NONE /* revert to received value, if any */ + : ZPROP_SRC_INHERITED); /* explicitly inherit */ + + if (received) { + nvlist_t *dummy; + nvpair_t *pair; + zprop_type_t type; + int err; + + /* + * zfs_prop_set_special() expects properties in the form of an + * nvpair with type info. + */ + if (prop == ZPROP_INVAL) { + if (!zfs_prop_user(propname)) + return (EINVAL); + + type = PROP_TYPE_STRING; + } else if (prop == ZFS_PROP_VOLSIZE || + prop == ZFS_PROP_VERSION) { + return (EINVAL); + } else { + type = zfs_prop_get_type(prop); + } + + VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + switch (type) { + case PROP_TYPE_STRING: + VERIFY(0 == nvlist_add_string(dummy, propname, "")); + break; + case PROP_TYPE_NUMBER: + case PROP_TYPE_INDEX: + VERIFY(0 == nvlist_add_uint64(dummy, propname, 0)); + break; + default: + nvlist_free(dummy); + return (EINVAL); + } + + pair = nvlist_next_nvpair(dummy, NULL); + err = zfs_prop_set_special(zc->zc_name, source, pair); + nvlist_free(dummy); + if (err != -1) + return (err); /* special property already handled */ + } else { + /* + * Only check this in the non-received case. We want to allow + * 'inherit -S' to revert non-inheritable properties like quota + * and reservation to the received or default values even though + * they are not considered inheritable. + */ + if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop)) + return (EINVAL); + } + + /* the property name has been validated by zfs_secpolicy_inherit() */ + return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); +} + +static int +zfs_ioc_pool_set_props(zfs_cmd_t *zc) +{ + nvlist_t *props; + spa_t *spa; + int error; + nvpair_t *pair; + + if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props)) + return (error); + + /* + * If the only property is the configfile, then just do a spa_lookup() + * to handle the faulted case. + */ + pair = nvlist_next_nvpair(props, NULL); + if (pair != NULL && strcmp(nvpair_name(pair), + zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 && + nvlist_next_nvpair(props, pair) == NULL) { + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(zc->zc_name)) != NULL) { + spa_configfile_set(spa, props, B_FALSE); + spa_config_sync(spa, B_FALSE, B_TRUE); + } + mutex_exit(&spa_namespace_lock); + if (spa != NULL) { + nvlist_free(props); + return (0); + } + } + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { + nvlist_free(props); + return (error); + } + + error = spa_prop_set(spa, props); + + nvlist_free(props); + spa_close(spa, FTAG); + + return (error); +} + +static int +zfs_ioc_pool_get_props(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + nvlist_t *nvp = NULL; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { + /* + * If the pool is faulted, there may be properties we can still + * get (such as altroot and cachefile), so attempt to get them + * anyway. + */ + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(zc->zc_name)) != NULL) + error = spa_prop_get(spa, &nvp); + mutex_exit(&spa_namespace_lock); + } else { + error = spa_prop_get(spa, &nvp); + spa_close(spa, FTAG); + } + + if (error == 0 && zc->zc_nvlist_dst != NULL) + error = put_nvlist(zc, nvp); + else + error = EFAULT; + + nvlist_free(nvp); + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_src{_size} nvlist of delegated permissions + * zc_perm_action allow/unallow flag + * + * outputs: none + */ +static int +zfs_ioc_set_fsacl(zfs_cmd_t *zc) +{ + int error; + nvlist_t *fsaclnv = NULL; + + if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &fsaclnv)) != 0) + return (error); + + /* + * Verify nvlist is constructed correctly + */ + if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) { + nvlist_free(fsaclnv); + return (EINVAL); + } + + /* + * If we don't have PRIV_SYS_MOUNT, then validate + * that user is allowed to hand out each permission in + * the nvlist(s) + */ + + error = secpolicy_zfs(CRED()); + if (error) { + if (zc->zc_perm_action == B_FALSE) { + error = dsl_deleg_can_allow(zc->zc_name, + fsaclnv, CRED()); + } else { + error = dsl_deleg_can_unallow(zc->zc_name, + fsaclnv, CRED()); + } + } + + if (error == 0) + error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action); + + nvlist_free(fsaclnv); + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * + * outputs: + * zc_nvlist_src{_size} nvlist of delegated permissions + */ +static int +zfs_ioc_get_fsacl(zfs_cmd_t *zc) +{ + nvlist_t *nvp; + int error; + + if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) { + error = put_nvlist(zc, nvp); + nvlist_free(nvp); + } + + return (error); +} + +/* + * Search the vfs list for a specified resource. Returns a pointer to it + * or NULL if no suitable entry is found. The caller of this routine + * is responsible for releasing the returned vfs pointer. + */ +static vfs_t * +zfs_get_vfs(const char *resource) +{ + struct vfs *vfsp; + struct vfs *vfs_found = NULL; + + vfs_list_read_lock(); + vfsp = rootvfs; + do { + if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { + VFS_HOLD(vfsp); + vfs_found = vfsp; + break; + } + vfsp = vfsp->vfs_next; + } while (vfsp != rootvfs); + vfs_list_unlock(); + return (vfs_found); +} + +/* ARGSUSED */ +static void +zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) +{ + zfs_creat_t *zct = arg; + + zfs_create_fs(os, cr, zct->zct_zplprops, tx); +} + +#define ZFS_PROP_UNDEFINED ((uint64_t)-1) + +/* + * inputs: + * createprops list of properties requested by creator + * default_zplver zpl version to use if unspecified in createprops + * fuids_ok fuids allowed in this version of the spa? + * os parent objset pointer (NULL if root fs) + * + * outputs: + * zplprops values for the zplprops we attach to the master node object + * is_ci true if requested file system will be purely case-insensitive + * + * Determine the settings for utf8only, normalization and + * casesensitivity. Specific values may have been requested by the + * creator and/or we can inherit values from the parent dataset. If + * the file system is of too early a vintage, a creator can not + * request settings for these properties, even if the requested + * setting is the default value. We don't actually want to create dsl + * properties for these, so remove them from the source nvlist after + * processing. + */ +static int +zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, + boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops, + nvlist_t *zplprops, boolean_t *is_ci) +{ + uint64_t sense = ZFS_PROP_UNDEFINED; + uint64_t norm = ZFS_PROP_UNDEFINED; + uint64_t u8 = ZFS_PROP_UNDEFINED; + + ASSERT(zplprops != NULL); + + /* + * Pull out creator prop choices, if any. + */ + if (createprops) { + (void) nvlist_lookup_uint64(createprops, + zfs_prop_to_name(ZFS_PROP_VERSION), &zplver); + (void) nvlist_lookup_uint64(createprops, + zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm); + (void) nvlist_remove_all(createprops, + zfs_prop_to_name(ZFS_PROP_NORMALIZE)); + (void) nvlist_lookup_uint64(createprops, + zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8); + (void) nvlist_remove_all(createprops, + zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); + (void) nvlist_lookup_uint64(createprops, + zfs_prop_to_name(ZFS_PROP_CASE), &sense); + (void) nvlist_remove_all(createprops, + zfs_prop_to_name(ZFS_PROP_CASE)); + } + + /* + * If the zpl version requested is whacky or the file system + * or pool is version is too "young" to support normalization + * and the creator tried to set a value for one of the props, + * error out. + */ + if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) || + (zplver >= ZPL_VERSION_FUID && !fuids_ok) || + (zplver >= ZPL_VERSION_SA && !sa_ok) || + (zplver < ZPL_VERSION_NORMALIZATION && + (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED || + sense != ZFS_PROP_UNDEFINED))) + return (ENOTSUP); + + /* + * Put the version in the zplprops + */ + VERIFY(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0); + + if (norm == ZFS_PROP_UNDEFINED) + VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0); + VERIFY(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0); + + /* + * If we're normalizing, names must always be valid UTF-8 strings. + */ + if (norm) + u8 = 1; + if (u8 == ZFS_PROP_UNDEFINED) + VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0); + VERIFY(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0); + + if (sense == ZFS_PROP_UNDEFINED) + VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0); + VERIFY(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0); + + if (is_ci) + *is_ci = (sense == ZFS_CASE_INSENSITIVE); + + return (0); +} + +static int +zfs_fill_zplprops(const char *dataset, nvlist_t *createprops, + nvlist_t *zplprops, boolean_t *is_ci) +{ + boolean_t fuids_ok, sa_ok; + uint64_t zplver = ZPL_VERSION; + objset_t *os = NULL; + char parentname[MAXNAMELEN]; + char *cp; + spa_t *spa; + uint64_t spa_vers; + int error; + + (void) strlcpy(parentname, dataset, sizeof (parentname)); + cp = strrchr(parentname, '/'); + ASSERT(cp != NULL); + cp[0] = '\0'; + + if ((error = spa_open(dataset, &spa, FTAG)) != 0) + return (error); + + spa_vers = spa_version(spa); + spa_close(spa, FTAG); + + zplver = zfs_zpl_version_map(spa_vers); + fuids_ok = (zplver >= ZPL_VERSION_FUID); + sa_ok = (zplver >= ZPL_VERSION_SA); + + /* + * Open parent object set so we can inherit zplprop values. + */ + if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0) + return (error); + + error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops, + zplprops, is_ci); + dmu_objset_rele(os, FTAG); + return (error); +} + +static int +zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, + nvlist_t *zplprops, boolean_t *is_ci) +{ + boolean_t fuids_ok; + boolean_t sa_ok; + uint64_t zplver = ZPL_VERSION; + int error; + + zplver = zfs_zpl_version_map(spa_vers); + fuids_ok = (zplver >= ZPL_VERSION_FUID); + sa_ok = (zplver >= ZPL_VERSION_SA); + + error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok, + createprops, zplprops, is_ci); + return (error); +} + +/* + * inputs: + * zc_objset_type type of objset to create (fs vs zvol) + * zc_name name of new objset + * zc_value name of snapshot to clone from (may be empty) + * zc_nvlist_src{_size} nvlist of properties to apply + * + * outputs: none + */ +static int +zfs_ioc_create(zfs_cmd_t *zc) +{ + objset_t *clone; + int error = 0; + zfs_creat_t zct; + nvlist_t *nvprops = NULL; + void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); + dmu_objset_type_t type = zc->zc_objset_type; + + switch (type) { + + case DMU_OST_ZFS: + cbfunc = zfs_create_cb; + break; + + case DMU_OST_ZVOL: + cbfunc = zvol_create_cb; + break; + + default: + cbfunc = NULL; + break; + } + if (strchr(zc->zc_name, '@') || + strchr(zc->zc_name, '%')) + return (EINVAL); + + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvprops)) != 0) + return (error); + + zct.zct_zplprops = NULL; + zct.zct_props = nvprops; + + if (zc->zc_value[0] != '\0') { + /* + * We're creating a clone of an existing snapshot. + */ + zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; + if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { + nvlist_free(nvprops); + return (EINVAL); + } + + error = dmu_objset_hold(zc->zc_value, FTAG, &clone); + if (error) { + nvlist_free(nvprops); + return (error); + } + + error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); + dmu_objset_rele(clone, FTAG); + if (error) { + nvlist_free(nvprops); + return (error); + } + } else { + boolean_t is_insensitive = B_FALSE; + + if (cbfunc == NULL) { + nvlist_free(nvprops); + return (EINVAL); + } + + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; + + if (nvprops == NULL || + nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), + &volsize) != 0) { + nvlist_free(nvprops); + return (EINVAL); + } + + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) { + nvlist_free(nvprops); + return (EINVAL); + } + + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); + + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) { + nvlist_free(nvprops); + return (error); + } + } else if (type == DMU_OST_ZFS) { + int error; + + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(zc->zc_name, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(nvprops); + nvlist_free(zct.zct_zplprops); + return (error); + } + } + error = dmu_objset_create(zc->zc_name, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + } + + /* + * It would be nice to do this atomically. + */ + if (error == 0) { + error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, + nvprops, NULL); + if (error != 0) + (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + } + nvlist_free(nvprops); + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * zc_cookie recursive flag + * zc_nvlist_src[_size] property list + * + * outputs: + * zc_value short snapname (i.e. part after the '@') + */ +static int +zfs_ioc_snapshot(zfs_cmd_t *zc) +{ + nvlist_t *nvprops = NULL; + int error; + boolean_t recursive = zc->zc_cookie; + + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + return (EINVAL); + + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvprops)) != 0) + return (error); + + error = zfs_check_userprops(zc->zc_name, nvprops); + if (error) + goto out; + + if (!nvlist_empty(nvprops) && + zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { + error = ENOTSUP; + goto out; + } + + error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, + nvprops, recursive, B_FALSE, -1); + +out: + nvlist_free(nvprops); + return (error); +} + +int +zfs_unmount_snap(const char *name, void *arg) +{ + vfs_t *vfsp = NULL; + + if (arg) { + char *snapname = arg; + char *fullname = kmem_asprintf("%s@%s", name, snapname); + vfsp = zfs_get_vfs(fullname); + strfree(fullname); + } else if (strchr(name, '@')) { + vfsp = zfs_get_vfs(name); + } + + if (vfsp) { + /* + * Always force the unmount for snapshots. + */ + int flag = MS_FORCE; + int err; + + if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { + VFS_RELE(vfsp); + return (err); + } + VFS_RELE(vfsp); + if ((err = dounmount(vfsp, flag, kcred)) != 0) + return (err); + } + return (0); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * zc_defer_destroy mark for deferred destroy + * + * outputs: none + */ +static int +zfs_ioc_destroy_snaps(zfs_cmd_t *zc) +{ + int err; + + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + return (EINVAL); + err = dmu_objset_find(zc->zc_name, + zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN); + if (err) + return (err); + return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value, + zc->zc_defer_destroy)); +} + +/* + * inputs: + * zc_name name of dataset to destroy + * zc_objset_type type of objset + * zc_defer_destroy mark for deferred destroy + * + * outputs: none + */ +static int +zfs_ioc_destroy(zfs_cmd_t *zc) +{ + int err; + if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { + err = zfs_unmount_snap(zc->zc_name, NULL); + if (err) + return (err); + } + + err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy); + if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0) + (void) zvol_remove_minor(zc->zc_name); + return (err); +} + +/* + * inputs: + * zc_name name of dataset to rollback (to most recent snapshot) + * + * outputs: none + */ +static int +zfs_ioc_rollback(zfs_cmd_t *zc) +{ + dsl_dataset_t *ds, *clone; + int error; + zfsvfs_t *zfsvfs; + char *clone_name; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); + if (error) + return (error); + + /* must not be a snapshot */ + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + /* must have a most recent snapshot */ + if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + /* + * Create clone of most recent snapshot. + */ + clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); + error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); + if (error) + goto out; + + error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); + if (error) + goto out; + + /* + * Do clone swap. + */ + if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { + error = zfs_suspend_fs(zfsvfs); + if (error == 0) { + int resume_err; + + if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { + error = dsl_dataset_clone_swap(clone, ds, + B_TRUE); + dsl_dataset_disown(ds, FTAG); + ds = NULL; + } else { + error = EBUSY; + } + resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); + error = error ? error : resume_err; + } + VFS_RELE(zfsvfs->z_vfs); + } else { + if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { + error = dsl_dataset_clone_swap(clone, ds, B_TRUE); + dsl_dataset_disown(ds, FTAG); + ds = NULL; + } else { + error = EBUSY; + } + } + + /* + * Destroy clone (which also closes it). + */ + (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); + +out: + strfree(clone_name); + if (ds) + dsl_dataset_rele(ds, FTAG); + return (error); +} + +/* + * inputs: + * zc_name old name of dataset + * zc_value new name of dataset + * zc_cookie recursive flag (only valid for snapshots) + * + * outputs: none + */ +static int +zfs_ioc_rename(zfs_cmd_t *zc) +{ + boolean_t recursive = zc->zc_cookie & 1; + + zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; + if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || + strchr(zc->zc_value, '%')) + return (EINVAL); + + /* + * Unmount snapshot unless we're doing a recursive rename, + * in which case the dataset code figures out which snapshots + * to unmount. + */ + if (!recursive && strchr(zc->zc_name, '@') != NULL && + zc->zc_objset_type == DMU_OST_ZFS) { + int err = zfs_unmount_snap(zc->zc_name, NULL); + if (err) + return (err); + } + if (zc->zc_objset_type == DMU_OST_ZVOL) + (void) zvol_remove_minor(zc->zc_name); + return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive)); +} + +static int +zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) +{ + const char *propname = nvpair_name(pair); + boolean_t issnap = (strchr(dsname, '@') != NULL); + zfs_prop_t prop = zfs_name_to_prop(propname); + uint64_t intval; + int err; + + if (prop == ZPROP_INVAL) { + if (zfs_prop_user(propname)) { + if (err = zfs_secpolicy_write_perms(dsname, + ZFS_DELEG_PERM_USERPROP, cr)) + return (err); + return (0); + } + + if (!issnap && zfs_prop_userquota(propname)) { + const char *perm = NULL; + const char *uq_prefix = + zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA]; + const char *gq_prefix = + zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA]; + + if (strncmp(propname, uq_prefix, + strlen(uq_prefix)) == 0) { + perm = ZFS_DELEG_PERM_USERQUOTA; + } else if (strncmp(propname, gq_prefix, + strlen(gq_prefix)) == 0) { + perm = ZFS_DELEG_PERM_GROUPQUOTA; + } else { + /* USERUSED and GROUPUSED are read-only */ + return (EINVAL); + } + + if (err = zfs_secpolicy_write_perms(dsname, perm, cr)) + return (err); + return (0); + } + + return (EINVAL); + } + + if (issnap) + return (EINVAL); + + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + /* + * dsl_prop_get_all_impl() returns properties in this + * format. + */ + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &pair) == 0); + } + + /* + * Check that this value is valid for this pool version + */ + switch (prop) { + case ZFS_PROP_COMPRESSION: + /* + * If the user specified gzip compression, make sure + * the SPA supports it. We ignore any errors here since + * we'll catch them later. + */ + if (nvpair_type(pair) == DATA_TYPE_UINT64 && + nvpair_value_uint64(pair, &intval) == 0) { + if (intval >= ZIO_COMPRESS_GZIP_1 && + intval <= ZIO_COMPRESS_GZIP_9 && + zfs_earlier_version(dsname, + SPA_VERSION_GZIP_COMPRESSION)) { + return (ENOTSUP); + } + + if (intval == ZIO_COMPRESS_ZLE && + zfs_earlier_version(dsname, + SPA_VERSION_ZLE_COMPRESSION)) + return (ENOTSUP); + + /* + * If this is a bootable dataset then + * verify that the compression algorithm + * is supported for booting. We must return + * something other than ENOTSUP since it + * implies a downrev pool version. + */ + if (zfs_is_bootfs(dsname) && + !BOOTFS_COMPRESS_VALID(intval)) { + return (ERANGE); + } + } + break; + + case ZFS_PROP_COPIES: + if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS)) + return (ENOTSUP); + break; + + case ZFS_PROP_DEDUP: + if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) + return (ENOTSUP); + break; + + case ZFS_PROP_SHARESMB: + if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) + return (ENOTSUP); + break; + + case ZFS_PROP_ACLINHERIT: + if (nvpair_type(pair) == DATA_TYPE_UINT64 && + nvpair_value_uint64(pair, &intval) == 0) { + if (intval == ZFS_ACL_PASSTHROUGH_X && + zfs_earlier_version(dsname, + SPA_VERSION_PASSTHROUGH_X)) + return (ENOTSUP); + } + break; + } + + return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); +} + +/* + * Removes properties from the given props list that fail permission checks + * needed to clear them and to restore them in case of a receive error. For each + * property, make sure we have both set and inherit permissions. + * + * Returns the first error encountered if any permission checks fail. If the + * caller provides a non-NULL errlist, it also gives the complete list of names + * of all the properties that failed a permission check along with the + * corresponding error numbers. The caller is responsible for freeing the + * returned errlist. + * + * If every property checks out successfully, zero is returned and the list + * pointed at by errlist is NULL. + */ +static int +zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) +{ + zfs_cmd_t *zc; + nvpair_t *pair, *next_pair; + nvlist_t *errors; + int err, rv = 0; + + if (props == NULL) + return (0); + + VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP); + (void) strcpy(zc->zc_name, dataset); + pair = nvlist_next_nvpair(props, NULL); + while (pair != NULL) { + next_pair = nvlist_next_nvpair(props, pair); + + (void) strcpy(zc->zc_value, nvpair_name(pair)); + if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || + (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { + VERIFY(nvlist_remove_nvpair(props, pair) == 0); + VERIFY(nvlist_add_int32(errors, + zc->zc_value, err) == 0); + } + pair = next_pair; + } + kmem_free(zc, sizeof (zfs_cmd_t)); + + if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { + nvlist_free(errors); + errors = NULL; + } else { + VERIFY(nvpair_value_int32(pair, &rv) == 0); + } + + if (errlist == NULL) + nvlist_free(errors); + else + *errlist = errors; + + return (rv); +} + +static boolean_t +propval_equals(nvpair_t *p1, nvpair_t *p2) +{ + if (nvpair_type(p1) == DATA_TYPE_NVLIST) { + /* dsl_prop_get_all_impl() format */ + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(p1, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &p1) == 0); + } + + if (nvpair_type(p2) == DATA_TYPE_NVLIST) { + nvlist_t *attrs; + VERIFY(nvpair_value_nvlist(p2, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &p2) == 0); + } + + if (nvpair_type(p1) != nvpair_type(p2)) + return (B_FALSE); + + if (nvpair_type(p1) == DATA_TYPE_STRING) { + char *valstr1, *valstr2; + + VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0); + VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0); + return (strcmp(valstr1, valstr2) == 0); + } else { + uint64_t intval1, intval2; + + VERIFY(nvpair_value_uint64(p1, &intval1) == 0); + VERIFY(nvpair_value_uint64(p2, &intval2) == 0); + return (intval1 == intval2); + } +} + +/* + * Remove properties from props if they are not going to change (as determined + * by comparison with origprops). Remove them from origprops as well, since we + * do not need to clear or restore properties that won't change. + */ +static void +props_reduce(nvlist_t *props, nvlist_t *origprops) +{ + nvpair_t *pair, *next_pair; + + if (origprops == NULL) + return; /* all props need to be received */ + + pair = nvlist_next_nvpair(props, NULL); + while (pair != NULL) { + const char *propname = nvpair_name(pair); + nvpair_t *match; + + next_pair = nvlist_next_nvpair(props, pair); + + if ((nvlist_lookup_nvpair(origprops, propname, + &match) != 0) || !propval_equals(pair, match)) + goto next; /* need to set received value */ + + /* don't clear the existing received value */ + (void) nvlist_remove_nvpair(origprops, match); + /* don't bother receiving the property */ + (void) nvlist_remove_nvpair(props, pair); +next: + pair = next_pair; + } +} + +#ifdef DEBUG +static boolean_t zfs_ioc_recv_inject_err; +#endif + +/* + * inputs: + * zc_name name of containing filesystem + * zc_nvlist_src{_size} nvlist of properties to apply + * zc_value name of snapshot to create + * zc_string name of clone origin (if DRR_FLAG_CLONE) + * zc_cookie file descriptor to recv from + * zc_begin_record the BEGIN record of the stream (not byteswapped) + * zc_guid force flag + * zc_cleanup_fd cleanup-on-exit file descriptor + * zc_action_handle handle for this guid/ds mapping (or zero on first call) + * + * outputs: + * zc_cookie number of bytes read + * zc_nvlist_dst{_size} error for each unapplied received property + * zc_obj zprop_errflags_t + * zc_action_handle handle for this guid/ds mapping + */ +static int +zfs_ioc_recv(zfs_cmd_t *zc) +{ + file_t *fp; + objset_t *os; + dmu_recv_cookie_t drc; + boolean_t force = (boolean_t)zc->zc_guid; + int fd; + int error = 0; + int props_error = 0; + nvlist_t *errors; + offset_t off; + nvlist_t *props = NULL; /* sent properties */ + nvlist_t *origprops = NULL; /* existing properties */ + objset_t *origin = NULL; + char *tosnap; + char tofs[ZFS_MAXNAMELEN]; + boolean_t first_recvd_props = B_FALSE; + + if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || + strchr(zc->zc_value, '@') == NULL || + strchr(zc->zc_value, '%')) + return (EINVAL); + + (void) strcpy(tofs, zc->zc_value); + tosnap = strchr(tofs, '@'); + *tosnap++ = '\0'; + + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props)) != 0) + return (error); + + fd = zc->zc_cookie; + fp = getf(fd); + if (fp == NULL) { + nvlist_free(props); + return (EBADF); + } + + VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { + if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) && + !dsl_prop_get_hasrecvd(os)) { + first_recvd_props = B_TRUE; + } + + /* + * If new received properties are supplied, they are to + * completely replace the existing received properties, so stash + * away the existing ones. + */ + if (dsl_prop_get_received(os, &origprops) == 0) { + nvlist_t *errlist = NULL; + /* + * Don't bother writing a property if its value won't + * change (and avoid the unnecessary security checks). + * + * The first receive after SPA_VERSION_RECVD_PROPS is a + * special case where we blow away all local properties + * regardless. + */ + if (!first_recvd_props) + props_reduce(props, origprops); + if (zfs_check_clearable(tofs, origprops, + &errlist) != 0) + (void) nvlist_merge(errors, errlist, 0); + nvlist_free(errlist); + } + + dmu_objset_rele(os, FTAG); + } + + if (zc->zc_string[0]) { + error = dmu_objset_hold(zc->zc_string, FTAG, &origin); + if (error) + goto out; + } + + error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds, + &zc->zc_begin_record, force, origin, &drc); + if (origin) + dmu_objset_rele(origin, FTAG); + if (error) + goto out; + + /* + * Set properties before we receive the stream so that they are applied + * to the new data. Note that we must call dmu_recv_stream() if + * dmu_recv_begin() succeeds. + */ + if (props) { + nvlist_t *errlist; + + if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { + if (drc.drc_newfs) { + if (spa_version(os->os_spa) >= + SPA_VERSION_RECVD_PROPS) + first_recvd_props = B_TRUE; + } else if (origprops != NULL) { + if (clear_received_props(os, tofs, origprops, + first_recvd_props ? NULL : props) != 0) + zc->zc_obj |= ZPROP_ERR_NOCLEAR; + } else { + zc->zc_obj |= ZPROP_ERR_NOCLEAR; + } + dsl_prop_set_hasrecvd(os); + } else if (!drc.drc_newfs) { + zc->zc_obj |= ZPROP_ERR_NOCLEAR; + } + + (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, + props, &errlist); + (void) nvlist_merge(errors, errlist, 0); + nvlist_free(errlist); + } + + if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { + /* + * Caller made zc->zc_nvlist_dst less than the minimum expected + * size or supplied an invalid address. + */ + props_error = EINVAL; + } + + off = fp->f_offset; + error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd, + &zc->zc_action_handle); + + if (error == 0) { + zfsvfs_t *zfsvfs = NULL; + + if (getzfsvfs(tofs, &zfsvfs) == 0) { + /* online recv */ + int end_err; + + error = zfs_suspend_fs(zfsvfs); + /* + * If the suspend fails, then the recv_end will + * likely also fail, and clean up after itself. + */ + end_err = dmu_recv_end(&drc); + if (error == 0) + error = zfs_resume_fs(zfsvfs, tofs); + error = error ? error : end_err; + VFS_RELE(zfsvfs->z_vfs); + } else { + error = dmu_recv_end(&drc); + } + } + + zc->zc_cookie = off - fp->f_offset; + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + +#ifdef DEBUG + if (zfs_ioc_recv_inject_err) { + zfs_ioc_recv_inject_err = B_FALSE; + error = 1; + } +#endif + /* + * On error, restore the original props. + */ + if (error && props) { + if (dmu_objset_hold(tofs, FTAG, &os) == 0) { + if (clear_received_props(os, tofs, props, NULL) != 0) { + /* + * We failed to clear the received properties. + * Since we may have left a $recvd value on the + * system, we can't clear the $hasrecvd flag. + */ + zc->zc_obj |= ZPROP_ERR_NORESTORE; + } else if (first_recvd_props) { + dsl_prop_unset_hasrecvd(os); + } + dmu_objset_rele(os, FTAG); + } else if (!drc.drc_newfs) { + /* We failed to clear the received properties. */ + zc->zc_obj |= ZPROP_ERR_NORESTORE; + } + + if (origprops == NULL && !drc.drc_newfs) { + /* We failed to stash the original properties. */ + zc->zc_obj |= ZPROP_ERR_NORESTORE; + } + + /* + * dsl_props_set() will not convert RECEIVED to LOCAL on or + * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL + * explictly if we're restoring local properties cleared in the + * first new-style receive. + */ + if (origprops != NULL && + zfs_set_prop_nvlist(tofs, (first_recvd_props ? + ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED), + origprops, NULL) != 0) { + /* + * We stashed the original properties but failed to + * restore them. + */ + zc->zc_obj |= ZPROP_ERR_NORESTORE; + } + } +out: + nvlist_free(props); + nvlist_free(origprops); + nvlist_free(errors); + releasef(fd); + + if (error == 0) + error = props_error; + + return (error); +} + +/* + * inputs: + * zc_name name of snapshot to send + * zc_cookie file descriptor to send stream to + * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) + * zc_sendobj objsetid of snapshot to send + * zc_fromobj objsetid of incremental fromsnap (may be zero) + * + * outputs: none + */ +static int +zfs_ioc_send(zfs_cmd_t *zc) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + file_t *fp; + int error; + offset_t off; + dsl_dataset_t *ds; + dsl_dataset_t *dsfrom = NULL; + spa_t *spa; + dsl_pool_t *dp; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + if (error) { + spa_close(spa, FTAG); + return (error); + } + + error = dmu_objset_from_ds(ds, &tosnap); + if (error) { + dsl_dataset_rele(ds, FTAG); + spa_close(spa, FTAG); + return (error); + } + + if (zc->zc_fromobj != 0) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + error = dmu_objset_from_ds(dsfrom, &fromsnap); + if (error) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); + } + } else { + spa_close(spa, FTAG); + } + + fp = getf(zc->zc_cookie); + if (fp == NULL) { + dsl_dataset_rele(ds, FTAG); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); + return (EBADF); + } + + off = fp->f_offset; + error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off); + + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + releasef(zc->zc_cookie); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); +} + +static int +zfs_ioc_inject_fault(zfs_cmd_t *zc) +{ + int id, error; + + error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id, + &zc->zc_inject_record); + + if (error == 0) + zc->zc_guid = (uint64_t)id; + + return (error); +} + +static int +zfs_ioc_clear_fault(zfs_cmd_t *zc) +{ + return (zio_clear_fault((int)zc->zc_guid)); +} + +static int +zfs_ioc_inject_list_next(zfs_cmd_t *zc) +{ + int id = (int)zc->zc_guid; + int error; + + error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name), + &zc->zc_inject_record); + + zc->zc_guid = id; + + return (error); +} + +static int +zfs_ioc_error_log(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + size_t count = (size_t)zc->zc_nvlist_dst_size; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst, + &count); + if (error == 0) + zc->zc_nvlist_dst_size = count; + else + zc->zc_nvlist_dst_size = spa_get_errlog_size(spa); + + spa_close(spa, FTAG); + + return (error); +} + +static int +zfs_ioc_clear(zfs_cmd_t *zc) +{ + spa_t *spa; + vdev_t *vd; + int error; + + /* + * On zpool clear we also fix up missing slogs + */ + mutex_enter(&spa_namespace_lock); + spa = spa_lookup(zc->zc_name); + if (spa == NULL) { + mutex_exit(&spa_namespace_lock); + return (EIO); + } + if (spa_get_log_state(spa) == SPA_LOG_MISSING) { + /* we need to let spa_open/spa_load clear the chains */ + spa_set_log_state(spa, SPA_LOG_CLEAR); + } + spa->spa_last_open_failed = 0; + mutex_exit(&spa_namespace_lock); + + if (zc->zc_cookie & ZPOOL_NO_REWIND) { + error = spa_open(zc->zc_name, &spa, FTAG); + } else { + nvlist_t *policy; + nvlist_t *config = NULL; + + if (zc->zc_nvlist_src == NULL) + return (EINVAL); + + if ((error = get_nvlist(zc->zc_nvlist_src, + zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) { + error = spa_open_rewind(zc->zc_name, &spa, FTAG, + policy, &config); + if (config != NULL) { + int err; + + if ((err = put_nvlist(zc, config)) != 0) + error = err; + nvlist_free(config); + } + nvlist_free(policy); + } + } + + if (error) + return (error); + + spa_vdev_state_enter(spa, SCL_NONE); + + if (zc->zc_guid == 0) { + vd = NULL; + } else { + vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE); + if (vd == NULL) { + (void) spa_vdev_state_exit(spa, NULL, ENODEV); + spa_close(spa, FTAG); + return (ENODEV); + } + } + + vdev_clear(spa, vd); + + (void) spa_vdev_state_exit(spa, NULL, 0); + + /* + * Resume any suspended I/Os. + */ + if (zio_resume(spa) != 0) + error = EIO; + + spa_close(spa, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value name of origin snapshot + * + * outputs: + * zc_string name of conflicting snapshot, if there is one + */ +static int +zfs_ioc_promote(zfs_cmd_t *zc) +{ + char *cp; + + /* + * We don't need to unmount *all* the origin fs's snapshots, but + * it's easier. + */ + cp = strchr(zc->zc_value, '@'); + if (cp) + *cp = '\0'; + (void) dmu_objset_find(zc->zc_value, + zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); + return (dsl_dataset_promote(zc->zc_name, zc->zc_string)); +} + +/* + * Retrieve a single {user|group}{used|quota}@... property. + * + * inputs: + * zc_name name of filesystem + * zc_objset_type zfs_userquota_prop_t + * zc_value domain name (eg. "S-1-234-567-89") + * zc_guid RID/UID/GID + * + * outputs: + * zc_cookie property value + */ +static int +zfs_ioc_userspace_one(zfs_cmd_t *zc) +{ + zfsvfs_t *zfsvfs; + int error; + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); + if (error) + return (error); + + error = zfs_userspace_one(zfsvfs, + zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie); + zfsvfs_rele(zfsvfs, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_objset_type zfs_userquota_prop_t + * zc_nvlist_dst[_size] buffer to fill (not really an nvlist) + * + * outputs: + * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t) + * zc_cookie zap cursor + */ +static int +zfs_ioc_userspace_many(zfs_cmd_t *zc) +{ + zfsvfs_t *zfsvfs; + int bufsize = zc->zc_nvlist_dst_size; + + if (bufsize <= 0) + return (ENOMEM); + + int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); + if (error) + return (error); + + void *buf = kmem_alloc(bufsize, KM_SLEEP); + + error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie, + buf, &zc->zc_nvlist_dst_size); + + if (error == 0) { + error = xcopyout(buf, + (void *)(uintptr_t)zc->zc_nvlist_dst, + zc->zc_nvlist_dst_size); + } + kmem_free(buf, bufsize); + zfsvfs_rele(zfsvfs, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * + * outputs: + * none + */ +static int +zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) +{ + objset_t *os; + int error = 0; + zfsvfs_t *zfsvfs; + + if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { + if (!dmu_objset_userused_enabled(zfsvfs->z_os)) { + /* + * If userused is not enabled, it may be because the + * objset needs to be closed & reopened (to grow the + * objset_phys_t). Suspend/resume the fs will do that. + */ + error = zfs_suspend_fs(zfsvfs); + if (error == 0) + error = zfs_resume_fs(zfsvfs, zc->zc_name); + } + if (error == 0) + error = dmu_objset_userspace_upgrade(zfsvfs->z_os); + VFS_RELE(zfsvfs->z_vfs); + } else { + /* XXX kind of reading contents without owning */ + error = dmu_objset_hold(zc->zc_name, FTAG, &os); + if (error) + return (error); + + error = dmu_objset_userspace_upgrade(os); + dmu_objset_rele(os, FTAG); + } + + return (error); +} + +/* + * We don't want to have a hard dependency + * against some special symbols in sharefs + * nfs, and smbsrv. Determine them if needed when + * the first file system is shared. + * Neither sharefs, nfs or smbsrv are unloadable modules. + */ +int (*znfsexport_fs)(void *arg); +int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t); +int (*zsmbexport_fs)(void *arg, boolean_t add_share); + +int zfs_nfsshare_inited; +int zfs_smbshare_inited; + +ddi_modhandle_t nfs_mod; +ddi_modhandle_t sharefs_mod; +ddi_modhandle_t smbsrv_mod; +kmutex_t zfs_share_lock; + +static int +zfs_init_sharefs() +{ + int error; + + ASSERT(MUTEX_HELD(&zfs_share_lock)); + /* Both NFS and SMB shares also require sharetab support. */ + if (sharefs_mod == NULL && ((sharefs_mod = + ddi_modopen("fs/sharefs", + KRTLD_MODE_FIRST, &error)) == NULL)) { + return (ENOSYS); + } + if (zshare_fs == NULL && ((zshare_fs = + (int (*)(enum sharefs_sys_op, share_t *, uint32_t)) + ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) { + return (ENOSYS); + } + return (0); +} + +static int +zfs_ioc_share(zfs_cmd_t *zc) +{ + int error; + int opcode; + + switch (zc->zc_share.z_sharetype) { + case ZFS_SHARE_NFS: + case ZFS_UNSHARE_NFS: + if (zfs_nfsshare_inited == 0) { + mutex_enter(&zfs_share_lock); + if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs", + KRTLD_MODE_FIRST, &error)) == NULL)) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + if (znfsexport_fs == NULL && + ((znfsexport_fs = (int (*)(void *)) + ddi_modsym(nfs_mod, + "nfs_export", &error)) == NULL)) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + error = zfs_init_sharefs(); + if (error) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + zfs_nfsshare_inited = 1; + mutex_exit(&zfs_share_lock); + } + break; + case ZFS_SHARE_SMB: + case ZFS_UNSHARE_SMB: + if (zfs_smbshare_inited == 0) { + mutex_enter(&zfs_share_lock); + if (smbsrv_mod == NULL && ((smbsrv_mod = + ddi_modopen("drv/smbsrv", + KRTLD_MODE_FIRST, &error)) == NULL)) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + if (zsmbexport_fs == NULL && ((zsmbexport_fs = + (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod, + "smb_server_share", &error)) == NULL)) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + error = zfs_init_sharefs(); + if (error) { + mutex_exit(&zfs_share_lock); + return (ENOSYS); + } + zfs_smbshare_inited = 1; + mutex_exit(&zfs_share_lock); + } + break; + default: + return (EINVAL); + } + + switch (zc->zc_share.z_sharetype) { + case ZFS_SHARE_NFS: + case ZFS_UNSHARE_NFS: + if (error = + znfsexport_fs((void *) + (uintptr_t)zc->zc_share.z_exportdata)) + return (error); + break; + case ZFS_SHARE_SMB: + case ZFS_UNSHARE_SMB: + if (error = zsmbexport_fs((void *) + (uintptr_t)zc->zc_share.z_exportdata, + zc->zc_share.z_sharetype == ZFS_SHARE_SMB ? + B_TRUE: B_FALSE)) { + return (error); + } + break; + } + + opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS || + zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ? + SHAREFS_ADD : SHAREFS_REMOVE; + + /* + * Add or remove share from sharetab + */ + error = zshare_fs(opcode, + (void *)(uintptr_t)zc->zc_share.z_sharedata, + zc->zc_share.z_sharemax); + + return (error); + +} + +ace_t full_access[] = { + {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0} +}; + +/* + * inputs: + * zc_name name of containing filesystem + * zc_obj object # beyond which we want next in-use object # + * + * outputs: + * zc_obj next in-use object # + */ +static int +zfs_ioc_next_obj(zfs_cmd_t *zc) +{ + objset_t *os = NULL; + int error; + + error = dmu_objset_hold(zc->zc_name, FTAG, &os); + if (error) + return (error); + + error = dmu_object_next(os, &zc->zc_obj, B_FALSE, + os->os_dsl_dataset->ds_phys->ds_prev_snap_txg); + + dmu_objset_rele(os, FTAG); + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value prefix name for snapshot + * zc_cleanup_fd cleanup-on-exit file descriptor for calling process + * + * outputs: + */ +static int +zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) +{ + char *snap_name; + int error; + + snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, + (u_longlong_t)ddi_get_lbolt64()); + + if (strlen(snap_name) >= MAXNAMELEN) { + strfree(snap_name); + return (E2BIG); + } + + error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, + NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); + if (error != 0) { + strfree(snap_name); + return (error); + } + + (void) strcpy(zc->zc_value, snap_name); + strfree(snap_name); + return (0); +} + +/* + * inputs: + * zc_name name of "to" snapshot + * zc_value name of "from" snapshot + * zc_cookie file descriptor to write diff data on + * + * outputs: + * dmu_diff_record_t's to the file descriptor + */ +static int +zfs_ioc_diff(zfs_cmd_t *zc) +{ + objset_t *fromsnap; + objset_t *tosnap; + file_t *fp; + offset_t off; + int error; + + error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); + if (error) + return (error); + + error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + + fp = getf(zc->zc_cookie); + if (fp == NULL) { + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (EBADF); + } + + off = fp->f_offset; + + error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off); + + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + releasef(zc->zc_cookie); + + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + +/* + * Remove all ACL files in shares dir + */ +static int +zfs_smb_acl_purge(znode_t *dzp) +{ + zap_cursor_t zc; + zap_attribute_t zap; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + int error; + + for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); + (error = zap_cursor_retrieve(&zc, &zap)) == 0; + zap_cursor_advance(&zc)) { + if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred, + NULL, 0)) != 0) + break; + } + zap_cursor_fini(&zc); + return (error); +} + +static int +zfs_ioc_smb_acl(zfs_cmd_t *zc) +{ + vnode_t *vp; + znode_t *dzp; + vnode_t *resourcevp = NULL; + znode_t *sharedir; + zfsvfs_t *zfsvfs; + nvlist_t *nvlist; + char *src, *target; + vattr_t vattr; + vsecattr_t vsec; + int error = 0; + + if ((error = lookupname(zc->zc_value, UIO_SYSSPACE, + NO_FOLLOW, NULL, &vp)) != 0) + return (error); + + /* Now make sure mntpnt and dataset are ZFS */ + + if (vp->v_vfsp->vfs_fstype != zfsfstype || + (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), + zc->zc_name) != 0)) { + VN_RELE(vp); + return (EINVAL); + } + + dzp = VTOZ(vp); + zfsvfs = dzp->z_zfsvfs; + ZFS_ENTER(zfsvfs); + + /* + * Create share dir if its missing. + */ + mutex_enter(&zfsvfs->z_lock); + if (zfsvfs->z_shares_dir == 0) { + dmu_tx_t *tx; + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE, + ZFS_SHARES_DIR); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + error = zfs_create_share_dir(zfsvfs, tx); + dmu_tx_commit(tx); + } + if (error) { + mutex_exit(&zfsvfs->z_lock); + VN_RELE(vp); + ZFS_EXIT(zfsvfs); + return (error); + } + } + mutex_exit(&zfsvfs->z_lock); + + ASSERT(zfsvfs->z_shares_dir); + if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) { + VN_RELE(vp); + ZFS_EXIT(zfsvfs); + return (error); + } + + switch (zc->zc_cookie) { + case ZFS_SMB_ACL_ADD: + vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; + vattr.va_type = VREG; + vattr.va_mode = S_IFREG|0777; + vattr.va_uid = 0; + vattr.va_gid = 0; + + vsec.vsa_mask = VSA_ACE; + vsec.vsa_aclentp = &full_access; + vsec.vsa_aclentsz = sizeof (full_access); + vsec.vsa_aclcnt = 1; + + error = VOP_CREATE(ZTOV(sharedir), zc->zc_string, + &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec); + if (resourcevp) + VN_RELE(resourcevp); + break; + + case ZFS_SMB_ACL_REMOVE: + error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred, + NULL, 0); + break; + + case ZFS_SMB_ACL_RENAME: + if ((error = get_nvlist(zc->zc_nvlist_src, + zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) { + VN_RELE(vp); + ZFS_EXIT(zfsvfs); + return (error); + } + if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) || + nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET, + &target)) { + VN_RELE(vp); + VN_RELE(ZTOV(sharedir)); + ZFS_EXIT(zfsvfs); + nvlist_free(nvlist); + return (error); + } + error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target, + kcred, NULL, 0); + nvlist_free(nvlist); + break; + + case ZFS_SMB_ACL_PURGE: + error = zfs_smb_acl_purge(sharedir); + break; + + default: + error = EINVAL; + break; + } + + VN_RELE(vp); + VN_RELE(ZTOV(sharedir)); + + ZFS_EXIT(zfsvfs); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_value short name of snap + * zc_string user-supplied tag for this hold + * zc_cookie recursive flag + * zc_temphold set if hold is temporary + * zc_cleanup_fd cleanup-on-exit file descriptor for calling process + * zc_sendobj if non-zero, the objid for zc_name@zc_value + * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg + * + * outputs: none + */ +static int +zfs_ioc_hold(zfs_cmd_t *zc) +{ + boolean_t recursive = zc->zc_cookie; + spa_t *spa; + dsl_pool_t *dp; + dsl_dataset_t *ds; + int error; + minor_t minor = 0; + + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + return (EINVAL); + + if (zc->zc_sendobj == 0) { + return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, + zc->zc_string, recursive, zc->zc_temphold, + zc->zc_cleanup_fd)); + } + + if (recursive) + return (EINVAL); + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + dp = spa_get_dsl(spa); + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); + rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); + if (error) + return (error); + + /* + * Until we have a hold on this snapshot, it's possible that + * zc_sendobj could've been destroyed and reused as part + * of a later txg. Make sure we're looking at the right object. + */ + if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { + dsl_dataset_rele(ds, FTAG); + return (ENOENT); + } + + if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { + error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + } + + error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, + zc->zc_temphold); + if (minor != 0) { + if (error == 0) { + dsl_register_onexit_hold_cleanup(ds, zc->zc_string, + minor); + } + zfs_onexit_fd_rele(zc->zc_cleanup_fd); + } + dsl_dataset_rele(ds, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of dataset from which we're releasing a user hold + * zc_value short name of snap + * zc_string user-supplied tag for this hold + * zc_cookie recursive flag + * + * outputs: none + */ +static int +zfs_ioc_release(zfs_cmd_t *zc) +{ + boolean_t recursive = zc->zc_cookie; + + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + return (EINVAL); + + return (dsl_dataset_user_release(zc->zc_name, zc->zc_value, + zc->zc_string, recursive)); +} + +/* + * inputs: + * zc_name name of filesystem + * + * outputs: + * zc_nvlist_src{_size} nvlist of snapshot holds + */ +static int +zfs_ioc_get_holds(zfs_cmd_t *zc) +{ + nvlist_t *nvp; + int error; + + if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) { + error = put_nvlist(zc, nvp); + nvlist_free(nvp); + } + + return (error); +} + +/* + * pool create, destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export + * do the logging of those commands. + */ +static zfs_ioc_vec_t zfs_ioc_vec[] = { + { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_READONLY }, + { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME, + B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, + B_FALSE, POOL_CHECK_NONE }, + { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, + B_FALSE, POOL_CHECK_NONE }, + { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, + B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED } +}; + +int +pool_status_check(const char *name, zfs_ioc_namecheck_t type, + zfs_ioc_poolcheck_t check) +{ + spa_t *spa; + int error; + + ASSERT(type == POOL_NAME || type == DATASET_NAME); + + if (check & POOL_CHECK_NONE) + return (0); + + error = spa_open(name, &spa, FTAG); + if (error == 0) { + if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa)) + error = EAGAIN; + else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa)) + error = EROFS; + spa_close(spa, FTAG); + } + return (error); +} + +/* + * Find a free minor number. + */ +minor_t +zfsdev_minor_alloc(void) +{ + static minor_t last_minor; + minor_t m; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + for (m = last_minor + 1; m != last_minor; m++) { + if (m > ZFSDEV_MAX_MINOR) + m = 1; + if (ddi_get_soft_state(zfsdev_state, m) == NULL) { + last_minor = m; + return (m); + } + } + + return (0); +} + +static int +zfs_ctldev_init(dev_t *devp) +{ + minor_t minor; + zfs_soft_state_t *zs; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + ASSERT(getminor(*devp) == 0); + + minor = zfsdev_minor_alloc(); + if (minor == 0) + return (ENXIO); + + if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) + return (EAGAIN); + + *devp = makedevice(getemajor(*devp), minor); + + zs = ddi_get_soft_state(zfsdev_state, minor); + zs->zss_type = ZSST_CTLDEV; + zfs_onexit_init((zfs_onexit_t **)&zs->zss_data); + + return (0); +} + +static void +zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor) +{ + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + zfs_onexit_destroy(zo); + ddi_soft_state_free(zfsdev_state, minor); +} + +void * +zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which) +{ + zfs_soft_state_t *zp; + + zp = ddi_get_soft_state(zfsdev_state, minor); + if (zp == NULL || zp->zss_type != which) + return (NULL); + + return (zp->zss_data); +} + +static int +zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr) +{ + int error = 0; + + if (getminor(*devp) != 0) + return (zvol_open(devp, flag, otyp, cr)); + + /* This is the control device. Allocate a new minor if requested. */ + if (flag & FEXCL) { + mutex_enter(&zfsdev_state_lock); + error = zfs_ctldev_init(devp); + mutex_exit(&zfsdev_state_lock); + } + + return (error); +} + +static int +zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr) +{ + zfs_onexit_t *zo; + minor_t minor = getminor(dev); + + if (minor == 0) + return (0); + + mutex_enter(&zfsdev_state_lock); + zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); + if (zo == NULL) { + mutex_exit(&zfsdev_state_lock); + return (zvol_close(dev, flag, otyp, cr)); + } + zfs_ctldev_destroy(zo, minor); + mutex_exit(&zfsdev_state_lock); + + return (0); +} + +static int +zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) +{ + zfs_cmd_t *zc; + uint_t vec; + int error, rc; + minor_t minor = getminor(dev); + + if (minor != 0 && + zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL) + return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); + + vec = cmd - ZFS_IOC; + ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); + + if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + return (EINVAL); + + zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); + + error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); + if (error != 0) + error = EFAULT; + + if ((error == 0) && !(flag & FKIOCTL)) + error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); + + /* + * Ensure that all pool/dataset names are valid before we pass down to + * the lower layers. + */ + if (error == 0) { + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + zc->zc_iflags = flag & FKIOCTL; + switch (zfs_ioc_vec[vec].zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + error = pool_status_check(zc->zc_name, + zfs_ioc_vec[vec].zvec_namecheck, + zfs_ioc_vec[vec].zvec_pool_check); + break; + + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + error = pool_status_check(zc->zc_name, + zfs_ioc_vec[vec].zvec_namecheck, + zfs_ioc_vec[vec].zvec_pool_check); + break; + + case NO_NAME: + break; + } + } + + if (error == 0) + error = zfs_ioc_vec[vec].zvec_func(zc); + + rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); + if (error == 0) { + if (rc != 0) + error = EFAULT; + if (zfs_ioc_vec[vec].zvec_his_log) + zfs_log_history(zc); + } + + kmem_free(zc, sizeof (zfs_cmd_t)); + return (error); +} + +static int +zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0, + DDI_PSEUDO, 0) == DDI_FAILURE) + return (DDI_FAILURE); + + zfs_dip = dip; + + ddi_report_dev(dip); + + return (DDI_SUCCESS); +} + +static int +zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (spa_busy() || zfs_busy() || zvol_busy()) + return (DDI_FAILURE); + + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + zfs_dip = NULL; + + ddi_prop_remove_all(dip); + ddi_remove_minor_node(dip, NULL); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = zfs_dip; + return (DDI_SUCCESS); + + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + return (DDI_SUCCESS); + } + + return (DDI_FAILURE); +} + +/* + * OK, so this is a little weird. + * + * /dev/zfs is the control node, i.e. minor 0. + * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0. + * + * /dev/zfs has basically nothing to do except serve up ioctls, + * so most of the standard driver entry points are in zvol.c. + */ +static struct cb_ops zfs_cb_ops = { + zfsdev_open, /* open */ + zfsdev_close, /* close */ + zvol_strategy, /* strategy */ + nodev, /* print */ + zvol_dump, /* dump */ + zvol_read, /* read */ + zvol_write, /* write */ + zfsdev_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* prop_op */ + NULL, /* streamtab */ + D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */ + CB_REV, /* version */ + nodev, /* async read */ + nodev, /* async write */ +}; + +static struct dev_ops zfs_dev_ops = { + DEVO_REV, /* version */ + 0, /* refcnt */ + zfs_info, /* info */ + nulldev, /* identify */ + nulldev, /* probe */ + zfs_attach, /* attach */ + zfs_detach, /* detach */ + nodev, /* reset */ + &zfs_cb_ops, /* driver operations */ + NULL, /* no bus operations */ + NULL, /* power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +static struct modldrv zfs_modldrv = { + &mod_driverops, + "ZFS storage pool", + &zfs_dev_ops +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&zfs_modlfs, + (void *)&zfs_modldrv, + NULL +}; + + +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; + +int +_init(void) +{ + int error; + + spa_init(FREAD | FWRITE); + zfs_init(); + zvol_init(); + + if ((error = mod_install(&modlinkage)) != 0) { + zvol_fini(); + zfs_fini(); + spa_fini(); + return (error); + } + + tsd_create(&zfs_fsyncer_key, NULL); + tsd_create(&rrw_tsd_key, NULL); + + error = ldi_ident_from_mod(&modlinkage, &zfs_li); + ASSERT(error == 0); + mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL); + + return (0); +} + +int +_fini(void) +{ + int error; + + if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled) + return (EBUSY); + + if ((error = mod_remove(&modlinkage)) != 0) + return (error); + + zvol_fini(); + zfs_fini(); + spa_fini(); + if (zfs_nfsshare_inited) + (void) ddi_modclose(nfs_mod); + if (zfs_smbshare_inited) + (void) ddi_modclose(smbsrv_mod); + if (zfs_nfsshare_inited || zfs_smbshare_inited) + (void) ddi_modclose(sharefs_mod); + + tsd_destroy(&zfs_fsyncer_key); + ldi_ident_release(zfs_li); + zfs_li = NULL; + mutex_destroy(&zfs_share_lock); + + return (error); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/uts/common/fs/zfs/zfs_log.c b/uts/common/fs/zfs/zfs_log.c new file mode 100644 index 000000000000..26ab78279b31 --- /dev/null +++ b/uts/common/fs/zfs/zfs_log.c @@ -0,0 +1,676 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/cmn_err.h> +#include <sys/kmem.h> +#include <sys/thread.h> +#include <sys/file.h> +#include <sys/vfs.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_dir.h> +#include <sys/zil.h> +#include <sys/zil_impl.h> +#include <sys/byteorder.h> +#include <sys/policy.h> +#include <sys/stat.h> +#include <sys/mode.h> +#include <sys/acl.h> +#include <sys/dmu.h> +#include <sys/spa.h> +#include <sys/zfs_fuid.h> +#include <sys/ddi.h> +#include <sys/dsl_dataset.h> + +/* + * These zfs_log_* functions must be called within a dmu tx, in one + * of 2 contexts depending on zilog->z_replay: + * + * Non replay mode + * --------------- + * We need to record the transaction so that if it is committed to + * the Intent Log then it can be replayed. An intent log transaction + * structure (itx_t) is allocated and all the information necessary to + * possibly replay the transaction is saved in it. The itx is then assigned + * a sequence number and inserted in the in-memory list anchored in the zilog. + * + * Replay mode + * ----------- + * We need to mark the intent log record as replayed in the log header. + * This is done in the same transaction as the replay so that they + * commit atomically. + */ + +int +zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) +{ + int isxvattr = (vap->va_mask & AT_XVATTR); + switch (type) { + case Z_FILE: + if (vsecp == NULL && !isxvattr) + return (TX_CREATE); + if (vsecp && isxvattr) + return (TX_CREATE_ACL_ATTR); + if (vsecp) + return (TX_CREATE_ACL); + else + return (TX_CREATE_ATTR); + /*NOTREACHED*/ + case Z_DIR: + if (vsecp == NULL && !isxvattr) + return (TX_MKDIR); + if (vsecp && isxvattr) + return (TX_MKDIR_ACL_ATTR); + if (vsecp) + return (TX_MKDIR_ACL); + else + return (TX_MKDIR_ATTR); + case Z_XATTRDIR: + return (TX_MKXATTR); + } + ASSERT(0); + return (TX_MAX_TYPE); +} + +/* + * build up the log data necessary for logging xvattr_t + * First lr_attr_t is initialized. following the lr_attr_t + * is the mapsize and attribute bitmap copied from the xvattr_t. + * Following the bitmap and bitmapsize two 64 bit words are reserved + * for the create time which may be set. Following the create time + * records a single 64 bit integer which has the bits to set on + * replay for the xvattr. + */ +static void +zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) +{ + uint32_t *bitmap; + uint64_t *attrs; + uint64_t *crtime; + xoptattr_t *xoap; + void *scanstamp; + int i; + + xoap = xva_getxoptattr(xvap); + ASSERT(xoap); + + lrattr->lr_attr_masksize = xvap->xva_mapsize; + bitmap = &lrattr->lr_attr_bitmap; + for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) { + *bitmap = xvap->xva_reqattrmap[i]; + } + + /* Now pack the attributes up in a single uint64_t */ + attrs = (uint64_t *)bitmap; + crtime = attrs + 1; + scanstamp = (caddr_t)(crtime + 2); + *attrs = 0; + if (XVA_ISSET_REQ(xvap, XAT_READONLY)) + *attrs |= (xoap->xoa_readonly == 0) ? 0 : + XAT0_READONLY; + if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) + *attrs |= (xoap->xoa_hidden == 0) ? 0 : + XAT0_HIDDEN; + if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) + *attrs |= (xoap->xoa_system == 0) ? 0 : + XAT0_SYSTEM; + if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) + *attrs |= (xoap->xoa_archive == 0) ? 0 : + XAT0_ARCHIVE; + if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) + *attrs |= (xoap->xoa_immutable == 0) ? 0 : + XAT0_IMMUTABLE; + if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) + *attrs |= (xoap->xoa_nounlink == 0) ? 0 : + XAT0_NOUNLINK; + if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) + *attrs |= (xoap->xoa_appendonly == 0) ? 0 : + XAT0_APPENDONLY; + if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) + *attrs |= (xoap->xoa_opaque == 0) ? 0 : + XAT0_APPENDONLY; + if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) + *attrs |= (xoap->xoa_nodump == 0) ? 0 : + XAT0_NODUMP; + if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) + *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 : + XAT0_AV_QUARANTINED; + if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) + *attrs |= (xoap->xoa_av_modified == 0) ? 0 : + XAT0_AV_MODIFIED; + if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) + ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); + if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) + bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) + *attrs |= (xoap->xoa_reparse == 0) ? 0 : + XAT0_REPARSE; + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) + *attrs |= (xoap->xoa_offline == 0) ? 0 : + XAT0_OFFLINE; + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) + *attrs |= (xoap->xoa_sparse == 0) ? 0 : + XAT0_SPARSE; +} + +static void * +zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start) +{ + zfs_fuid_t *zfuid; + uint64_t *fuidloc = start; + + /* First copy in the ACE FUIDs */ + for (zfuid = list_head(&fuidp->z_fuids); zfuid; + zfuid = list_next(&fuidp->z_fuids, zfuid)) { + *fuidloc++ = zfuid->z_logfuid; + } + return (fuidloc); +} + + +static void * +zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) +{ + zfs_fuid_domain_t *zdomain; + + /* now copy in the domain info, if any */ + if (fuidp->z_domain_str_sz != 0) { + for (zdomain = list_head(&fuidp->z_domains); zdomain; + zdomain = list_next(&fuidp->z_domains, zdomain)) { + bcopy((void *)zdomain->z_domain, start, + strlen(zdomain->z_domain) + 1); + start = (caddr_t)start + + strlen(zdomain->z_domain) + 1; + } + } + return (start); +} + +/* + * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, + * TX_MKDIR_ATTR and TX_MKXATTR + * transactions. + * + * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID + * domain information appended prior to the name. In this case the + * uid/gid in the log record will be a log centric FUID. + * + * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that + * may contain attributes, ACL and optional fuid information. + * + * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify + * and ACL and normal users/groups in the ACEs. + * + * There may be an optional xvattr attribute information similar + * to zfs_log_setattr. + * + * Also, after the file name "domain" strings may be appended. + */ +void +zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp, + zfs_fuid_info_t *fuidp, vattr_t *vap) +{ + itx_t *itx; + lr_create_t *lr; + lr_acl_create_t *lracl; + size_t aclsize; + size_t xvatsize = 0; + size_t txsize; + xvattr_t *xvap = (xvattr_t *)vap; + void *end; + size_t lrsize; + size_t namesize = strlen(name) + 1; + size_t fuidsz = 0; + + if (zil_replaying(zilog, tx)) + return; + + /* + * If we have FUIDs present then add in space for + * domains and ACE fuid's if any. + */ + if (fuidp) { + fuidsz += fuidp->z_domain_str_sz; + fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); + } + + if (vap->va_mask & AT_XVATTR) + xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); + + if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || + (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || + (int)txtype == TX_MKXATTR) { + txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; + lrsize = sizeof (*lr); + } else { + aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0; + txsize = + sizeof (lr_acl_create_t) + namesize + fuidsz + + ZIL_ACE_LENGTH(aclsize) + xvatsize; + lrsize = sizeof (lr_acl_create_t); + } + + itx = zil_itx_create(txtype, txsize); + + lr = (lr_create_t *)&itx->itx_lr; + lr->lr_doid = dzp->z_id; + lr->lr_foid = zp->z_id; + lr->lr_mode = zp->z_mode; + if (!IS_EPHEMERAL(zp->z_uid)) { + lr->lr_uid = (uint64_t)zp->z_uid; + } else { + lr->lr_uid = fuidp->z_fuid_owner; + } + if (!IS_EPHEMERAL(zp->z_gid)) { + lr->lr_gid = (uint64_t)zp->z_gid; + } else { + lr->lr_gid = fuidp->z_fuid_group; + } + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), + lr->lr_crtime, sizeof (uint64_t) * 2); + + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev, + sizeof (lr->lr_rdev)) != 0) + lr->lr_rdev = 0; + + /* + * Fill in xvattr info if any + */ + if (vap->va_mask & AT_XVATTR) { + zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); + end = (caddr_t)lr + lrsize + xvatsize; + } else { + end = (caddr_t)lr + lrsize; + } + + /* Now fill in any ACL info */ + + if (vsecp) { + lracl = (lr_acl_create_t *)&itx->itx_lr; + lracl->lr_aclcnt = vsecp->vsa_aclcnt; + lracl->lr_acl_bytes = aclsize; + lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; + lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; + if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS) + lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; + else + lracl->lr_acl_flags = 0; + + bcopy(vsecp->vsa_aclentp, end, aclsize); + end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); + } + + /* drop in FUID info */ + if (fuidp) { + end = zfs_log_fuid_ids(fuidp, end); + end = zfs_log_fuid_domains(fuidp, end); + } + /* + * Now place file name in log record + */ + bcopy(name, end, namesize); + + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions. + */ +void +zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, char *name, uint64_t foid) +{ + itx_t *itx; + lr_remove_t *lr; + size_t namesize = strlen(name) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + namesize); + lr = (lr_remove_t *)&itx->itx_lr; + lr->lr_doid = dzp->z_id; + bcopy(name, (char *)(lr + 1), namesize); + + itx->itx_oid = foid; + + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_link() handles TX_LINK transactions. + */ +void +zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name) +{ + itx_t *itx; + lr_link_t *lr; + size_t namesize = strlen(name) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + namesize); + lr = (lr_link_t *)&itx->itx_lr; + lr->lr_doid = dzp->z_id; + lr->lr_link_obj = zp->z_id; + bcopy(name, (char *)(lr + 1), namesize); + + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_symlink() handles TX_SYMLINK transactions. + */ +void +zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *dzp, znode_t *zp, char *name, char *link) +{ + itx_t *itx; + lr_create_t *lr; + size_t namesize = strlen(name) + 1; + size_t linksize = strlen(link) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); + lr = (lr_create_t *)&itx->itx_lr; + lr->lr_doid = dzp->z_id; + lr->lr_foid = zp->z_id; + lr->lr_uid = zp->z_uid; + lr->lr_gid = zp->z_gid; + lr->lr_mode = zp->z_mode; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen, + sizeof (uint64_t)); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), + lr->lr_crtime, sizeof (uint64_t) * 2); + bcopy(name, (char *)(lr + 1), namesize); + bcopy(link, (char *)(lr + 1) + namesize, linksize); + + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_rename() handles TX_RENAME transactions. + */ +void +zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) +{ + itx_t *itx; + lr_rename_t *lr; + size_t snamesize = strlen(sname) + 1; + size_t dnamesize = strlen(dname) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); + lr = (lr_rename_t *)&itx->itx_lr; + lr->lr_sdoid = sdzp->z_id; + lr->lr_tdoid = tdzp->z_id; + bcopy(sname, (char *)(lr + 1), snamesize); + bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); + itx->itx_oid = szp->z_id; + + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_write() handles TX_WRITE transactions. + */ +ssize_t zfs_immediate_write_sz = 32768; + +void +zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, offset_t off, ssize_t resid, int ioflag) +{ + itx_wr_state_t write_state; + boolean_t slogging; + uintptr_t fsync_cnt; + ssize_t immediate_write_sz; + + if (zil_replaying(zilog, tx) || zp->z_unlinked) + return; + + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zfs_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) + write_state = WR_INDIRECT; + else if (ioflag & (FSYNC | FDSYNC)) + write_state = WR_COPIED; + else + write_state = WR_NEED_COPY; + + if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { + (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); + } + + while (resid) { + itx_t *itx; + lr_write_t *lr; + ssize_t len; + + /* + * If the write would overflow the largest block then split it. + */ + if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA) + len = SPA_MAXBLOCKSIZE >> 1; + else + len = resid; + + itx = zil_itx_create(txtype, sizeof (*lr) + + (write_state == WR_COPIED ? len : 0)); + lr = (lr_write_t *)&itx->itx_lr; + if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, + zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + zil_itx_destroy(itx); + itx = zil_itx_create(txtype, sizeof (*lr)); + lr = (lr_write_t *)&itx->itx_lr; + write_state = WR_NEED_COPY; + } + + itx->itx_wr_state = write_state; + if (write_state == WR_NEED_COPY) + itx->itx_sod += len; + lr->lr_foid = zp->z_id; + lr->lr_offset = off; + lr->lr_length = len; + lr->lr_blkoff = 0; + BP_ZERO(&lr->lr_blkptr); + + itx->itx_private = zp->z_zfsvfs; + + if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && + (fsync_cnt == 0)) + itx->itx_sync = B_FALSE; + + zil_itx_assign(zilog, itx, tx); + + off += len; + resid -= len; + } +} + +/* + * zfs_log_truncate() handles TX_TRUNCATE transactions. + */ +void +zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, uint64_t off, uint64_t len) +{ + itx_t *itx; + lr_truncate_t *lr; + + if (zil_replaying(zilog, tx) || zp->z_unlinked) + return; + + itx = zil_itx_create(txtype, sizeof (*lr)); + lr = (lr_truncate_t *)&itx->itx_lr; + lr->lr_foid = zp->z_id; + lr->lr_offset = off; + lr->lr_length = len; + + itx->itx_sync = (zp->z_sync_cnt != 0); + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_setattr() handles TX_SETATTR transactions. + */ +void +zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) +{ + itx_t *itx; + lr_setattr_t *lr; + xvattr_t *xvap = (xvattr_t *)vap; + size_t recsize = sizeof (lr_setattr_t); + void *start; + + if (zil_replaying(zilog, tx) || zp->z_unlinked) + return; + + /* + * If XVATTR set, then log record size needs to allow + * for lr_attr_t + xvattr mask, mapsize and create time + * plus actual attribute values + */ + if (vap->va_mask & AT_XVATTR) + recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); + + if (fuidp) + recsize += fuidp->z_domain_str_sz; + + itx = zil_itx_create(txtype, recsize); + lr = (lr_setattr_t *)&itx->itx_lr; + lr->lr_foid = zp->z_id; + lr->lr_mask = (uint64_t)mask_applied; + lr->lr_mode = (uint64_t)vap->va_mode; + if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) + lr->lr_uid = fuidp->z_fuid_owner; + else + lr->lr_uid = (uint64_t)vap->va_uid; + + if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) + lr->lr_gid = fuidp->z_fuid_group; + else + lr->lr_gid = (uint64_t)vap->va_gid; + + lr->lr_size = (uint64_t)vap->va_size; + ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); + ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); + start = (lr_setattr_t *)(lr + 1); + if (vap->va_mask & AT_XVATTR) { + zfs_log_xvattr((lr_attr_t *)start, xvap); + start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); + } + + /* + * Now stick on domain information if any on end + */ + + if (fuidp) + (void) zfs_log_fuid_domains(fuidp, start); + + itx->itx_sync = (zp->z_sync_cnt != 0); + zil_itx_assign(zilog, itx, tx); +} + +/* + * zfs_log_acl() handles TX_ACL transactions. + */ +void +zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, + vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) +{ + itx_t *itx; + lr_acl_v0_t *lrv0; + lr_acl_t *lr; + int txtype; + int lrsize; + size_t txsize; + size_t aclbytes = vsecp->vsa_aclentsz; + + if (zil_replaying(zilog, tx) || zp->z_unlinked) + return; + + txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? + TX_ACL_V0 : TX_ACL; + + if (txtype == TX_ACL) + lrsize = sizeof (*lr); + else + lrsize = sizeof (*lrv0); + + txsize = lrsize + + ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) + + (fuidp ? fuidp->z_domain_str_sz : 0) + + sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0); + + itx = zil_itx_create(txtype, txsize); + + lr = (lr_acl_t *)&itx->itx_lr; + lr->lr_foid = zp->z_id; + if (txtype == TX_ACL) { + lr->lr_acl_bytes = aclbytes; + lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; + lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; + if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) + lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; + else + lr->lr_acl_flags = 0; + } + lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt; + + if (txtype == TX_ACL_V0) { + lrv0 = (lr_acl_v0_t *)lr; + bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes); + } else { + void *start = (ace_t *)(lr + 1); + + bcopy(vsecp->vsa_aclentp, start, aclbytes); + + start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); + + if (fuidp) { + start = zfs_log_fuid_ids(fuidp, start); + (void) zfs_log_fuid_domains(fuidp, start); + } + } + + itx->itx_sync = (zp->z_sync_cnt != 0); + zil_itx_assign(zilog, itx, tx); +} diff --git a/uts/common/fs/zfs/zfs_onexit.c b/uts/common/fs/zfs/zfs_onexit.c new file mode 100644 index 000000000000..9706de2b42c2 --- /dev/null +++ b/uts/common/fs/zfs/zfs_onexit.c @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/open.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/zfs_ioctl.h> +#include <sys/mkdev.h> +#include <sys/zfs_onexit.h> +#include <sys/zvol.h> + +/* + * ZFS kernel routines may add/delete callback routines to be invoked + * upon process exit (triggered via the close operation from the /dev/zfs + * driver). + * + * These cleanup callbacks are intended to allow for the accumulation + * of kernel state across multiple ioctls. User processes participate + * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a + * clone-open, generating a unique minor number. The process then passes + * along that file descriptor to each ioctl that might have a cleanup operation. + * + * Consumers of the onexit routines should call zfs_onexit_fd_hold() early + * on to validate the given fd and add a reference to its file table entry. + * This allows the consumer to do its work and then add a callback, knowing + * that zfs_onexit_add_cb() won't fail with EBADF. When finished, consumers + * should call zfs_onexit_fd_rele(). + * + * A simple example is zfs_ioc_recv(), where we might create an AVL tree + * with dataset/GUID mappings and then reuse that tree on subsequent + * zfs_ioc_recv() calls. + * + * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc() + * the AVL tree and pass it along with a callback function to + * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the + * callback and return an action handle. + * + * The action handle is then passed from user space to subsequent + * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree + * by calling zfs_onexit_cb_data() with the device minor number and + * action handle. + * + * If the user process exits abnormally, the callback is invoked implicitly + * as part of the driver close operation. Once the user space process is + * finished with the accumulated kernel state, it can also just call close(2) + * on the cleanup fd to trigger the cleanup callback. + */ + +void +zfs_onexit_init(zfs_onexit_t **zop) +{ + zfs_onexit_t *zo; + + zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP); + mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t), + offsetof(zfs_onexit_action_node_t, za_link)); +} + +void +zfs_onexit_destroy(zfs_onexit_t *zo) +{ + zfs_onexit_action_node_t *ap; + + mutex_enter(&zo->zo_lock); + while ((ap = list_head(&zo->zo_actions)) != NULL) { + list_remove(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + ap->za_func(ap->za_data); + kmem_free(ap, sizeof (zfs_onexit_action_node_t)); + mutex_enter(&zo->zo_lock); + } + mutex_exit(&zo->zo_lock); + + list_destroy(&zo->zo_actions); + mutex_destroy(&zo->zo_lock); + kmem_free(zo, sizeof (zfs_onexit_t)); +} + +static int +zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) +{ + *zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); + if (*zo == NULL) + return (EBADF); + + return (0); +} + +/* + * Consumers might need to operate by minor number instead of fd, since + * they might be running in another thread (e.g. txg_sync_thread). Callers + * of this function must call zfs_onexit_fd_rele() when they're finished + * using the minor number. + */ +int +zfs_onexit_fd_hold(int fd, minor_t *minorp) +{ + file_t *fp; + zfs_onexit_t *zo; + + fp = getf(fd); + if (fp == NULL) + return (EBADF); + + *minorp = getminor(fp->f_vnode->v_rdev); + return (zfs_onexit_minor_to_state(*minorp, &zo)); +} + +void +zfs_onexit_fd_rele(int fd) +{ + releasef(fd); +} + +/* + * Add a callback to be invoked when the calling process exits. + */ +int +zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, + uint64_t *action_handle) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + error = zfs_onexit_minor_to_state(minor, &zo); + if (error) + return (error); + + ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP); + list_link_init(&ap->za_link); + ap->za_func = func; + ap->za_data = data; + + mutex_enter(&zo->zo_lock); + list_insert_tail(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + if (action_handle) + *action_handle = (uint64_t)(uintptr_t)ap; + + return (0); +} + +static zfs_onexit_action_node_t * +zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle) +{ + zfs_onexit_action_node_t *match; + zfs_onexit_action_node_t *ap; + list_t *l; + + ASSERT(MUTEX_HELD(&zo->zo_lock)); + + match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle; + l = &zo->zo_actions; + for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) { + if (match == ap) + break; + } + return (ap); +} + +/* + * Delete the callback, triggering it first if 'fire' is set. + */ +int +zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + error = zfs_onexit_minor_to_state(minor, &zo); + if (error) + return (error); + + mutex_enter(&zo->zo_lock); + ap = zfs_onexit_find_cb(zo, action_handle); + if (ap != NULL) { + list_remove(&zo->zo_actions, ap); + mutex_exit(&zo->zo_lock); + if (fire) + ap->za_func(ap->za_data); + kmem_free(ap, sizeof (zfs_onexit_action_node_t)); + } else { + mutex_exit(&zo->zo_lock); + error = ENOENT; + } + + return (error); +} + +/* + * Return the data associated with this callback. This allows consumers + * of the cleanup-on-exit interfaces to stash kernel data across system + * calls, knowing that it will be cleaned up if the calling process exits. + */ +int +zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) +{ + zfs_onexit_t *zo; + zfs_onexit_action_node_t *ap; + int error; + + *data = NULL; + + error = zfs_onexit_minor_to_state(minor, &zo); + if (error) + return (error); + + mutex_enter(&zo->zo_lock); + ap = zfs_onexit_find_cb(zo, action_handle); + if (ap != NULL) + *data = ap->za_data; + else + error = ENOENT; + mutex_exit(&zo->zo_lock); + + return (error); +} diff --git a/uts/common/fs/zfs/zfs_replay.c b/uts/common/fs/zfs/zfs_replay.c new file mode 100644 index 000000000000..9fb336856990 --- /dev/null +++ b/uts/common/fs/zfs/zfs_replay.c @@ -0,0 +1,931 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/cmn_err.h> +#include <sys/kmem.h> +#include <sys/thread.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/vfs.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_dir.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_fuid.h> +#include <sys/spa.h> +#include <sys/zil.h> +#include <sys/byteorder.h> +#include <sys/stat.h> +#include <sys/mode.h> +#include <sys/acl.h> +#include <sys/atomic.h> +#include <sys/cred.h> + +/* + * Functions to replay ZFS intent log (ZIL) records + * The functions are called through a function vector (zfs_replay_vector) + * which is indexed by the transaction type. + */ + +static void +zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, + uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) +{ + bzero(vap, sizeof (*vap)); + vap->va_mask = (uint_t)mask; + vap->va_type = IFTOVT(mode); + vap->va_mode = mode & MODEMASK; + vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; + vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; + vap->va_rdev = zfs_cmpldev(rdev); + vap->va_nodeid = nodeid; +} + +/* ARGSUSED */ +static int +zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap) +{ + return (ENOTSUP); +} + +static void +zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) +{ + xoptattr_t *xoap = NULL; + uint64_t *attrs; + uint64_t *crtime; + uint32_t *bitmap; + void *scanstamp; + int i; + + xvap->xva_vattr.va_mask |= AT_XVATTR; + if ((xoap = xva_getxoptattr(xvap)) == NULL) { + xvap->xva_vattr.va_mask &= ~AT_XVATTR; /* shouldn't happen */ + return; + } + + ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize); + + bitmap = &lrattr->lr_attr_bitmap; + for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++) + xvap->xva_reqattrmap[i] = *bitmap; + + attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1); + crtime = attrs + 1; + scanstamp = (caddr_t)(crtime + 2); + + if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) + xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0); + if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) + xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0); + if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) + xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_READONLY)) + xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0); + if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) + xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) + xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0); + if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) + xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0); + if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) + xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0); + if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) + xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) + xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0); + if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) + xoap->xoa_av_quarantined = + ((*attrs & XAT0_AV_QUARANTINED) != 0); + if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) + ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime); + if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) + bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ); + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) + xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) + xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0); + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) + xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0); +} + +static int +zfs_replay_domain_cnt(uint64_t uid, uint64_t gid) +{ + uint64_t uid_idx; + uint64_t gid_idx; + int domcnt = 0; + + uid_idx = FUID_INDEX(uid); + gid_idx = FUID_INDEX(gid); + if (uid_idx) + domcnt++; + if (gid_idx > 0 && gid_idx != uid_idx) + domcnt++; + + return (domcnt); +} + +static void * +zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start, + int domcnt) +{ + int i; + + for (i = 0; i != domcnt; i++) { + fuid_infop->z_domain_table[i] = start; + start = (caddr_t)start + strlen(start) + 1; + } + + return (start); +} + +/* + * Set the uid/gid in the fuid_info structure. + */ +static void +zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid) +{ + /* + * If owner or group are log specific FUIDs then slurp up + * domain information and build zfs_fuid_info_t + */ + if (IS_EPHEMERAL(uid)) + fuid_infop->z_fuid_owner = uid; + + if (IS_EPHEMERAL(gid)) + fuid_infop->z_fuid_group = gid; +} + +/* + * Load fuid domains into fuid_info_t + */ +static zfs_fuid_info_t * +zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid) +{ + int domcnt; + + zfs_fuid_info_t *fuid_infop; + + fuid_infop = zfs_fuid_info_alloc(); + + domcnt = zfs_replay_domain_cnt(uid, gid); + + if (domcnt == 0) + return (fuid_infop); + + fuid_infop->z_domain_table = + kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP); + + zfs_replay_fuid_ugid(fuid_infop, uid, gid); + + fuid_infop->z_domain_cnt = domcnt; + *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt); + return (fuid_infop); +} + +/* + * load zfs_fuid_t's and fuid_domains into fuid_info_t + */ +static zfs_fuid_info_t * +zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid, + uint64_t gid) +{ + uint64_t *log_fuid = (uint64_t *)start; + zfs_fuid_info_t *fuid_infop; + int i; + + fuid_infop = zfs_fuid_info_alloc(); + fuid_infop->z_domain_cnt = domcnt; + + fuid_infop->z_domain_table = + kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP); + + for (i = 0; i != idcnt; i++) { + zfs_fuid_t *zfuid; + + zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP); + zfuid->z_logfuid = *log_fuid; + zfuid->z_id = -1; + zfuid->z_domidx = 0; + list_insert_tail(&fuid_infop->z_fuids, zfuid); + log_fuid++; + } + + zfs_replay_fuid_ugid(fuid_infop, uid, gid); + + *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt); + return (fuid_infop); +} + +static void +zfs_replay_swap_attrs(lr_attr_t *lrattr) +{ + /* swap the lr_attr structure */ + byteswap_uint32_array(lrattr, sizeof (*lrattr)); + /* swap the bitmap */ + byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) * + sizeof (uint32_t)); + /* swap the attributes, create time + 64 bit word for attributes */ + byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) * + (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t)); +} + +/* + * Replay file create with optional ACL, xvattr information as well + * as option FUID information. + */ +static int +zfs_replay_create_acl(zfsvfs_t *zfsvfs, + lr_acl_create_t *lracl, boolean_t byteswap) +{ + char *name = NULL; /* location determined later */ + lr_create_t *lr = (lr_create_t *)lracl; + znode_t *dzp; + vnode_t *vp = NULL; + xvattr_t xva; + int vflg = 0; + vsecattr_t vsec = { 0 }; + lr_attr_t *lrattr; + void *aclstart; + void *fuidstart; + size_t xvatlen = 0; + uint64_t txtype; + int error; + + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); + if (byteswap) { + byteswap_uint64_array(lracl, sizeof (*lracl)); + if (txtype == TX_CREATE_ACL_ATTR || + txtype == TX_MKDIR_ACL_ATTR) { + lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); + zfs_replay_swap_attrs(lrattr); + xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); + } + + aclstart = (caddr_t)(lracl + 1) + xvatlen; + zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE); + /* swap fuids */ + if (lracl->lr_fuidcnt) { + byteswap_uint64_array((caddr_t)aclstart + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes), + lracl->lr_fuidcnt * sizeof (uint64_t)); + } + } + + if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + return (error); + + xva_init(&xva); + zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, + lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); + + /* + * All forms of zfs create (create, mkdir, mkxattrdir, symlink) + * eventually end up in zfs_mknode(), which assigns the object's + * creation time and generation number. The generic VOP_CREATE() + * doesn't have either concept, so we smuggle the values inside + * the vattr's otherwise unused va_ctime and va_nblocks fields. + */ + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); + xva.xva_vattr.va_nblocks = lr->lr_gen; + + error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); + if (error != ENOENT) + goto bail; + + if (lr->lr_common.lrc_txtype & TX_CI) + vflg |= FIGNORECASE; + switch (txtype) { + case TX_CREATE_ACL: + aclstart = (caddr_t)(lracl + 1); + fuidstart = (caddr_t)aclstart + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, + (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, + lr->lr_uid, lr->lr_gid); + /*FALLTHROUGH*/ + case TX_CREATE_ACL_ATTR: + if (name == NULL) { + lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); + xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); + xva.xva_vattr.va_mask |= AT_XVATTR; + zfs_replay_xvattr(lrattr, &xva); + } + vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; + vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; + vsec.vsa_aclcnt = lracl->lr_aclcnt; + vsec.vsa_aclentsz = lracl->lr_acl_bytes; + vsec.vsa_aclflags = lracl->lr_acl_flags; + if (zfsvfs->z_fuid_replay == NULL) { + fuidstart = (caddr_t)(lracl + 1) + xvatlen + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + zfsvfs->z_fuid_replay = + zfs_replay_fuids(fuidstart, + (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, + lr->lr_uid, lr->lr_gid); + } + + error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, + 0, 0, &vp, kcred, vflg, NULL, &vsec); + break; + case TX_MKDIR_ACL: + aclstart = (caddr_t)(lracl + 1); + fuidstart = (caddr_t)aclstart + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, + (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, + lr->lr_uid, lr->lr_gid); + /*FALLTHROUGH*/ + case TX_MKDIR_ACL_ATTR: + if (name == NULL) { + lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); + xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); + zfs_replay_xvattr(lrattr, &xva); + } + vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; + vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; + vsec.vsa_aclcnt = lracl->lr_aclcnt; + vsec.vsa_aclentsz = lracl->lr_acl_bytes; + vsec.vsa_aclflags = lracl->lr_acl_flags; + if (zfsvfs->z_fuid_replay == NULL) { + fuidstart = (caddr_t)(lracl + 1) + xvatlen + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + zfsvfs->z_fuid_replay = + zfs_replay_fuids(fuidstart, + (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, + lr->lr_uid, lr->lr_gid); + } + error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, + &vp, kcred, NULL, vflg, &vsec); + break; + default: + error = ENOTSUP; + } + +bail: + if (error == 0 && vp != NULL) + VN_RELE(vp); + + VN_RELE(ZTOV(dzp)); + + if (zfsvfs->z_fuid_replay) + zfs_fuid_info_free(zfsvfs->z_fuid_replay); + zfsvfs->z_fuid_replay = NULL; + + return (error); +} + +static int +zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) +{ + char *name = NULL; /* location determined later */ + char *link; /* symlink content follows name */ + znode_t *dzp; + vnode_t *vp = NULL; + xvattr_t xva; + int vflg = 0; + size_t lrsize = sizeof (lr_create_t); + lr_attr_t *lrattr; + void *start; + size_t xvatlen; + uint64_t txtype; + int error; + + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); + if (byteswap) { + byteswap_uint64_array(lr, sizeof (*lr)); + if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) + zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); + } + + + if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + return (error); + + xva_init(&xva); + zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, + lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); + + /* + * All forms of zfs create (create, mkdir, mkxattrdir, symlink) + * eventually end up in zfs_mknode(), which assigns the object's + * creation time and generation number. The generic VOP_CREATE() + * doesn't have either concept, so we smuggle the values inside + * the vattr's otherwise unused va_ctime and va_nblocks fields. + */ + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); + xva.xva_vattr.va_nblocks = lr->lr_gen; + + error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); + if (error != ENOENT) + goto out; + + if (lr->lr_common.lrc_txtype & TX_CI) + vflg |= FIGNORECASE; + + /* + * Symlinks don't have fuid info, and CIFS never creates + * symlinks. + * + * The _ATTR versions will grab the fuid info in their subcases. + */ + if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK && + (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && + (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { + start = (lr + 1); + zfsvfs->z_fuid_replay = + zfs_replay_fuid_domain(start, &start, + lr->lr_uid, lr->lr_gid); + } + + switch (txtype) { + case TX_CREATE_ATTR: + lrattr = (lr_attr_t *)(caddr_t)(lr + 1); + xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); + zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); + start = (caddr_t)(lr + 1) + xvatlen; + zfsvfs->z_fuid_replay = + zfs_replay_fuid_domain(start, &start, + lr->lr_uid, lr->lr_gid); + name = (char *)start; + + /*FALLTHROUGH*/ + case TX_CREATE: + if (name == NULL) + name = (char *)start; + + error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, + 0, 0, &vp, kcred, vflg, NULL, NULL); + break; + case TX_MKDIR_ATTR: + lrattr = (lr_attr_t *)(caddr_t)(lr + 1); + xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); + zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); + start = (caddr_t)(lr + 1) + xvatlen; + zfsvfs->z_fuid_replay = + zfs_replay_fuid_domain(start, &start, + lr->lr_uid, lr->lr_gid); + name = (char *)start; + + /*FALLTHROUGH*/ + case TX_MKDIR: + if (name == NULL) + name = (char *)(lr + 1); + + error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, + &vp, kcred, NULL, vflg, NULL); + break; + case TX_MKXATTR: + error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred); + break; + case TX_SYMLINK: + name = (char *)(lr + 1); + link = name + strlen(name) + 1; + error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr, + link, kcred, NULL, vflg); + break; + default: + error = ENOTSUP; + } + +out: + if (error == 0 && vp != NULL) + VN_RELE(vp); + + VN_RELE(ZTOV(dzp)); + + if (zfsvfs->z_fuid_replay) + zfs_fuid_info_free(zfsvfs->z_fuid_replay); + zfsvfs->z_fuid_replay = NULL; + return (error); +} + +static int +zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) +{ + char *name = (char *)(lr + 1); /* name follows lr_remove_t */ + znode_t *dzp; + int error; + int vflg = 0; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + return (error); + + if (lr->lr_common.lrc_txtype & TX_CI) + vflg |= FIGNORECASE; + + switch ((int)lr->lr_common.lrc_txtype) { + case TX_REMOVE: + error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg); + break; + case TX_RMDIR: + error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg); + break; + default: + error = ENOTSUP; + } + + VN_RELE(ZTOV(dzp)); + + return (error); +} + +static int +zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap) +{ + char *name = (char *)(lr + 1); /* name follows lr_link_t */ + znode_t *dzp, *zp; + int error; + int vflg = 0; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + return (error); + + if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { + VN_RELE(ZTOV(dzp)); + return (error); + } + + if (lr->lr_common.lrc_txtype & TX_CI) + vflg |= FIGNORECASE; + + error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); + + VN_RELE(ZTOV(zp)); + VN_RELE(ZTOV(dzp)); + + return (error); +} + +static int +zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) +{ + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + znode_t *sdzp, *tdzp; + int error; + int vflg = 0; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) + return (error); + + if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { + VN_RELE(ZTOV(sdzp)); + return (error); + } + + if (lr->lr_common.lrc_txtype & TX_CI) + vflg |= FIGNORECASE; + + error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, + NULL, vflg); + + VN_RELE(ZTOV(tdzp)); + VN_RELE(ZTOV(sdzp)); + + return (error); +} + +static int +zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) +{ + char *data = (char *)(lr + 1); /* data follows lr_write_t */ + znode_t *zp; + int error; + ssize_t resid; + uint64_t eod, offset, length; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { + /* + * As we can log writes out of order, it's possible the + * file has been removed. In this case just drop the write + * and return success. + */ + if (error == ENOENT) + error = 0; + return (error); + } + + offset = lr->lr_offset; + length = lr->lr_length; + eod = offset + length; /* end of data for this write */ + + /* + * This may be a write from a dmu_sync() for a whole block, + * and may extend beyond the current end of the file. + * We can't just replay what was written for this TX_WRITE as + * a future TX_WRITE2 may extend the eof and the data for that + * write needs to be there. So we write the whole block and + * reduce the eof. This needs to be done within the single dmu + * transaction created within vn_rdwr -> zfs_write. So a possible + * new end of file is passed through in zfsvfs->z_replay_eof + */ + + zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ + + /* If it's a dmu_sync() block, write the whole block */ + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); + if (length < blocksize) { + offset -= offset % blocksize; + length = blocksize; + } + if (zp->z_size < eod) + zfsvfs->z_replay_eof = eod; + } + + error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, + UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + + VN_RELE(ZTOV(zp)); + zfsvfs->z_replay_eof = 0; /* safety */ + + return (error); +} + +/* + * TX_WRITE2 are only generated when dmu_sync() returns EALREADY + * meaning the pool block is already being synced. So now that we always write + * out full blocks, all we have to do is expand the eof if + * the file is grown. + */ +static int +zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) +{ + znode_t *zp; + int error; + uint64_t end; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + +top: + end = lr->lr_offset + lr->lr_length; + if (end > zp->z_size) { + dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + + zp->z_size = end; + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + VN_RELE(ZTOV(zp)); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + return (error); + } + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void *)&zp->z_size, sizeof (uint64_t), tx); + + /* Ensure the replayed seq is updated */ + (void) zil_replaying(zfsvfs->z_log, tx); + + dmu_tx_commit(tx); + } + + VN_RELE(ZTOV(zp)); + + return (error); +} + +static int +zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) +{ + znode_t *zp; + flock64_t fl; + int error; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + + bzero(&fl, sizeof (fl)); + fl.l_type = F_WRLCK; + fl.l_whence = 0; + fl.l_start = lr->lr_offset; + fl.l_len = lr->lr_length; + + error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, + lr->lr_offset, kcred, NULL); + + VN_RELE(ZTOV(zp)); + + return (error); +} + +static int +zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) +{ + znode_t *zp; + xvattr_t xva; + vattr_t *vap = &xva.xva_vattr; + int error; + void *start; + + xva_init(&xva); + if (byteswap) { + byteswap_uint64_array(lr, sizeof (*lr)); + + if ((lr->lr_mask & AT_XVATTR) && + zfsvfs->z_version >= ZPL_VERSION_INITIAL) + zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); + } + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + + zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, + lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); + + vap->va_size = lr->lr_size; + ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); + ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); + + /* + * Fill in xvattr_t portions if necessary. + */ + + start = (lr_setattr_t *)(lr + 1); + if (vap->va_mask & AT_XVATTR) { + zfs_replay_xvattr((lr_attr_t *)start, &xva); + start = (caddr_t)start + + ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); + } else + xva.xva_vattr.va_mask &= ~AT_XVATTR; + + zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, + lr->lr_uid, lr->lr_gid); + + error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); + + zfs_fuid_info_free(zfsvfs->z_fuid_replay); + zfsvfs->z_fuid_replay = NULL; + VN_RELE(ZTOV(zp)); + + return (error); +} + +static int +zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) +{ + ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ + vsecattr_t vsa; + znode_t *zp; + int error; + + if (byteswap) { + byteswap_uint64_array(lr, sizeof (*lr)); + zfs_oldace_byteswap(ace, lr->lr_aclcnt); + } + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + + bzero(&vsa, sizeof (vsa)); + vsa.vsa_mask = VSA_ACE | VSA_ACECNT; + vsa.vsa_aclcnt = lr->lr_aclcnt; + vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; + vsa.vsa_aclflags = 0; + vsa.vsa_aclentp = ace; + + error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + + VN_RELE(ZTOV(zp)); + + return (error); +} + +/* + * Replaying ACLs is complicated by FUID support. + * The log record may contain some optional data + * to be used for replaying FUID's. These pieces + * are the actual FUIDs that were created initially. + * The FUID table index may no longer be valid and + * during zfs_create() a new index may be assigned. + * Because of this the log will contain the original + * doman+rid in order to create a new FUID. + * + * The individual ACEs may contain an ephemeral uid/gid which is no + * longer valid and will need to be replaced with an actual FUID. + * + */ +static int +zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) +{ + ace_t *ace = (ace_t *)(lr + 1); + vsecattr_t vsa; + znode_t *zp; + int error; + + if (byteswap) { + byteswap_uint64_array(lr, sizeof (*lr)); + zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); + if (lr->lr_fuidcnt) { + byteswap_uint64_array((caddr_t)ace + + ZIL_ACE_LENGTH(lr->lr_acl_bytes), + lr->lr_fuidcnt * sizeof (uint64_t)); + } + } + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + + bzero(&vsa, sizeof (vsa)); + vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; + vsa.vsa_aclcnt = lr->lr_aclcnt; + vsa.vsa_aclentp = ace; + vsa.vsa_aclentsz = lr->lr_acl_bytes; + vsa.vsa_aclflags = lr->lr_acl_flags; + + if (lr->lr_fuidcnt) { + void *fuidstart = (caddr_t)ace + + ZIL_ACE_LENGTH(lr->lr_acl_bytes); + + zfsvfs->z_fuid_replay = + zfs_replay_fuids(fuidstart, &fuidstart, + lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); + } + + error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + + if (zfsvfs->z_fuid_replay) + zfs_fuid_info_free(zfsvfs->z_fuid_replay); + + zfsvfs->z_fuid_replay = NULL; + VN_RELE(ZTOV(zp)); + + return (error); +} + +/* + * Callback vectors for replaying records + */ +zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { + zfs_replay_error, /* 0 no such transaction type */ + zfs_replay_create, /* TX_CREATE */ + zfs_replay_create, /* TX_MKDIR */ + zfs_replay_create, /* TX_MKXATTR */ + zfs_replay_create, /* TX_SYMLINK */ + zfs_replay_remove, /* TX_REMOVE */ + zfs_replay_remove, /* TX_RMDIR */ + zfs_replay_link, /* TX_LINK */ + zfs_replay_rename, /* TX_RENAME */ + zfs_replay_write, /* TX_WRITE */ + zfs_replay_truncate, /* TX_TRUNCATE */ + zfs_replay_setattr, /* TX_SETATTR */ + zfs_replay_acl_v0, /* TX_ACL_V0 */ + zfs_replay_acl, /* TX_ACL */ + zfs_replay_create_acl, /* TX_CREATE_ACL */ + zfs_replay_create, /* TX_CREATE_ATTR */ + zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ + zfs_replay_create_acl, /* TX_MKDIR_ACL */ + zfs_replay_create, /* TX_MKDIR_ATTR */ + zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ + zfs_replay_write2, /* TX_WRITE2 */ +}; diff --git a/uts/common/fs/zfs/zfs_rlock.c b/uts/common/fs/zfs/zfs_rlock.c new file mode 100644 index 000000000000..7fd8f6020d08 --- /dev/null +++ b/uts/common/fs/zfs/zfs_rlock.c @@ -0,0 +1,602 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * This file contains the code to implement file range locking in + * ZFS, although there isn't much specific to ZFS (all that comes to mind + * support for growing the blocksize). + * + * Interface + * --------- + * Defined in zfs_rlock.h but essentially: + * rl = zfs_range_lock(zp, off, len, lock_type); + * zfs_range_unlock(rl); + * zfs_range_reduce(rl, off, len); + * + * AVL tree + * -------- + * An AVL tree is used to maintain the state of the existing ranges + * that are locked for exclusive (writer) or shared (reader) use. + * The starting range offset is used for searching and sorting the tree. + * + * Common case + * ----------- + * The (hopefully) usual case is of no overlaps or contention for + * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree + * searched that finds no overlap, and *this* rl_t is placed in the tree. + * + * Overlaps/Reference counting/Proxy locks + * --------------------------------------- + * The avl code only allows one node at a particular offset. Also it's very + * inefficient to search through all previous entries looking for overlaps + * (because the very 1st in the ordered list might be at offset 0 but + * cover the whole file). + * So this implementation uses reference counts and proxy range locks. + * Firstly, only reader locks use reference counts and proxy locks, + * because writer locks are exclusive. + * When a reader lock overlaps with another then a proxy lock is created + * for that range and replaces the original lock. If the overlap + * is exact then the reference count of the proxy is simply incremented. + * Otherwise, the proxy lock is split into smaller lock ranges and + * new proxy locks created for non overlapping ranges. + * The reference counts are adjusted accordingly. + * Meanwhile, the orginal lock is kept around (this is the callers handle) + * and its offset and length are used when releasing the lock. + * + * Thread coordination + * ------------------- + * In order to make wakeups efficient and to ensure multiple continuous + * readers on a range don't starve a writer for the same range lock, + * two condition variables are allocated in each rl_t. + * If a writer (or reader) can't get a range it initialises the writer + * (or reader) cv; sets a flag saying there's a writer (or reader) waiting; + * and waits on that cv. When a thread unlocks that range it wakes up all + * writers then all readers before destroying the lock. + * + * Append mode writes + * ------------------ + * Append mode writes need to lock a range at the end of a file. + * The offset of the end of the file is determined under the + * range locking mutex, and the lock type converted from RL_APPEND to + * RL_WRITER and the range locked. + * + * Grow block handling + * ------------------- + * ZFS supports multiple block sizes currently upto 128K. The smallest + * block size is used for the file which is grown as needed. During this + * growth all other writers and readers must be excluded. + * So if the block size needs to be grown then the whole file is + * exclusively locked, then later the caller will reduce the lock + * range to just the range to be written using zfs_reduce_range. + */ + +#include <sys/zfs_rlock.h> + +/* + * Check if a write lock can be grabbed, or wait and recheck until available. + */ +static void +zfs_range_lock_writer(znode_t *zp, rl_t *new) +{ + avl_tree_t *tree = &zp->z_range_avl; + rl_t *rl; + avl_index_t where; + uint64_t end_size; + uint64_t off = new->r_off; + uint64_t len = new->r_len; + + for (;;) { + /* + * Range locking is also used by zvol and uses a + * dummied up znode. However, for zvol, we don't need to + * append or grow blocksize, and besides we don't have + * a "sa" data or z_zfsvfs - so skip that processing. + * + * Yes, this is ugly, and would be solved by not handling + * grow or append in range lock code. If that was done then + * we could make the range locking code generically available + * to other non-zfs consumers. + */ + if (zp->z_vnode) { /* caller is ZPL */ + /* + * If in append mode pick up the current end of file. + * This is done under z_range_lock to avoid races. + */ + if (new->r_type == RL_APPEND) + new->r_off = zp->z_size; + + /* + * If we need to grow the block size then grab the whole + * file range. This is also done under z_range_lock to + * avoid races. + */ + end_size = MAX(zp->z_size, new->r_off + len); + if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || + zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { + new->r_off = 0; + new->r_len = UINT64_MAX; + } + } + + /* + * First check for the usual case of no locks + */ + if (avl_numnodes(tree) == 0) { + new->r_type = RL_WRITER; /* convert to writer */ + avl_add(tree, new); + return; + } + + /* + * Look for any locks in the range. + */ + rl = avl_find(tree, new, &where); + if (rl) + goto wait; /* already locked at same offset */ + + rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER); + if (rl && (rl->r_off < new->r_off + new->r_len)) + goto wait; + + rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE); + if (rl && rl->r_off + rl->r_len > new->r_off) + goto wait; + + new->r_type = RL_WRITER; /* convert possible RL_APPEND */ + avl_insert(tree, new, where); + return; +wait: + if (!rl->r_write_wanted) { + cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL); + rl->r_write_wanted = B_TRUE; + } + cv_wait(&rl->r_wr_cv, &zp->z_range_lock); + + /* reset to original */ + new->r_off = off; + new->r_len = len; + } +} + +/* + * If this is an original (non-proxy) lock then replace it by + * a proxy and return the proxy. + */ +static rl_t * +zfs_range_proxify(avl_tree_t *tree, rl_t *rl) +{ + rl_t *proxy; + + if (rl->r_proxy) + return (rl); /* already a proxy */ + + ASSERT3U(rl->r_cnt, ==, 1); + ASSERT(rl->r_write_wanted == B_FALSE); + ASSERT(rl->r_read_wanted == B_FALSE); + avl_remove(tree, rl); + rl->r_cnt = 0; + + /* create a proxy range lock */ + proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP); + proxy->r_off = rl->r_off; + proxy->r_len = rl->r_len; + proxy->r_cnt = 1; + proxy->r_type = RL_READER; + proxy->r_proxy = B_TRUE; + proxy->r_write_wanted = B_FALSE; + proxy->r_read_wanted = B_FALSE; + avl_add(tree, proxy); + + return (proxy); +} + +/* + * Split the range lock at the supplied offset + * returning the *front* proxy. + */ +static rl_t * +zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off) +{ + rl_t *front, *rear; + + ASSERT3U(rl->r_len, >, 1); + ASSERT3U(off, >, rl->r_off); + ASSERT3U(off, <, rl->r_off + rl->r_len); + ASSERT(rl->r_write_wanted == B_FALSE); + ASSERT(rl->r_read_wanted == B_FALSE); + + /* create the rear proxy range lock */ + rear = kmem_alloc(sizeof (rl_t), KM_SLEEP); + rear->r_off = off; + rear->r_len = rl->r_off + rl->r_len - off; + rear->r_cnt = rl->r_cnt; + rear->r_type = RL_READER; + rear->r_proxy = B_TRUE; + rear->r_write_wanted = B_FALSE; + rear->r_read_wanted = B_FALSE; + + front = zfs_range_proxify(tree, rl); + front->r_len = off - rl->r_off; + + avl_insert_here(tree, rear, front, AVL_AFTER); + return (front); +} + +/* + * Create and add a new proxy range lock for the supplied range. + */ +static void +zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) +{ + rl_t *rl; + + ASSERT(len); + rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); + rl->r_off = off; + rl->r_len = len; + rl->r_cnt = 1; + rl->r_type = RL_READER; + rl->r_proxy = B_TRUE; + rl->r_write_wanted = B_FALSE; + rl->r_read_wanted = B_FALSE; + avl_add(tree, rl); +} + +static void +zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where) +{ + rl_t *next; + uint64_t off = new->r_off; + uint64_t len = new->r_len; + + /* + * prev arrives either: + * - pointing to an entry at the same offset + * - pointing to the entry with the closest previous offset whose + * range may overlap with the new range + * - null, if there were no ranges starting before the new one + */ + if (prev) { + if (prev->r_off + prev->r_len <= off) { + prev = NULL; + } else if (prev->r_off != off) { + /* + * convert to proxy if needed then + * split this entry and bump ref count + */ + prev = zfs_range_split(tree, prev, off); + prev = AVL_NEXT(tree, prev); /* move to rear range */ + } + } + ASSERT((prev == NULL) || (prev->r_off == off)); + + if (prev) + next = prev; + else + next = (rl_t *)avl_nearest(tree, where, AVL_AFTER); + + if (next == NULL || off + len <= next->r_off) { + /* no overlaps, use the original new rl_t in the tree */ + avl_insert(tree, new, where); + return; + } + + if (off < next->r_off) { + /* Add a proxy for initial range before the overlap */ + zfs_range_new_proxy(tree, off, next->r_off - off); + } + + new->r_cnt = 0; /* will use proxies in tree */ + /* + * We now search forward through the ranges, until we go past the end + * of the new range. For each entry we make it a proxy if it + * isn't already, then bump its reference count. If there's any + * gaps between the ranges then we create a new proxy range. + */ + for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) { + if (off + len <= next->r_off) + break; + if (prev && prev->r_off + prev->r_len < next->r_off) { + /* there's a gap */ + ASSERT3U(next->r_off, >, prev->r_off + prev->r_len); + zfs_range_new_proxy(tree, prev->r_off + prev->r_len, + next->r_off - (prev->r_off + prev->r_len)); + } + if (off + len == next->r_off + next->r_len) { + /* exact overlap with end */ + next = zfs_range_proxify(tree, next); + next->r_cnt++; + return; + } + if (off + len < next->r_off + next->r_len) { + /* new range ends in the middle of this block */ + next = zfs_range_split(tree, next, off + len); + next->r_cnt++; + return; + } + ASSERT3U(off + len, >, next->r_off + next->r_len); + next = zfs_range_proxify(tree, next); + next->r_cnt++; + } + + /* Add the remaining end range. */ + zfs_range_new_proxy(tree, prev->r_off + prev->r_len, + (off + len) - (prev->r_off + prev->r_len)); +} + +/* + * Check if a reader lock can be grabbed, or wait and recheck until available. + */ +static void +zfs_range_lock_reader(znode_t *zp, rl_t *new) +{ + avl_tree_t *tree = &zp->z_range_avl; + rl_t *prev, *next; + avl_index_t where; + uint64_t off = new->r_off; + uint64_t len = new->r_len; + + /* + * Look for any writer locks in the range. + */ +retry: + prev = avl_find(tree, new, &where); + if (prev == NULL) + prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE); + + /* + * Check the previous range for a writer lock overlap. + */ + if (prev && (off < prev->r_off + prev->r_len)) { + if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) { + if (!prev->r_read_wanted) { + cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL); + prev->r_read_wanted = B_TRUE; + } + cv_wait(&prev->r_rd_cv, &zp->z_range_lock); + goto retry; + } + if (off + len < prev->r_off + prev->r_len) + goto got_lock; + } + + /* + * Search through the following ranges to see if there's + * write lock any overlap. + */ + if (prev) + next = AVL_NEXT(tree, prev); + else + next = (rl_t *)avl_nearest(tree, where, AVL_AFTER); + for (; next; next = AVL_NEXT(tree, next)) { + if (off + len <= next->r_off) + goto got_lock; + if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) { + if (!next->r_read_wanted) { + cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL); + next->r_read_wanted = B_TRUE; + } + cv_wait(&next->r_rd_cv, &zp->z_range_lock); + goto retry; + } + if (off + len <= next->r_off + next->r_len) + goto got_lock; + } + +got_lock: + /* + * Add the read lock, which may involve splitting existing + * locks and bumping ref counts (r_cnt). + */ + zfs_range_add_reader(tree, new, prev, where); +} + +/* + * Lock a range (offset, length) as either shared (RL_READER) + * or exclusive (RL_WRITER). Returns the range lock structure + * for later unlocking or reduce range (if entire file + * previously locked as RL_WRITER). + */ +rl_t * +zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) +{ + rl_t *new; + + ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); + + new = kmem_alloc(sizeof (rl_t), KM_SLEEP); + new->r_zp = zp; + new->r_off = off; + if (len + off < off) /* overflow */ + len = UINT64_MAX - off; + new->r_len = len; + new->r_cnt = 1; /* assume it's going to be in the tree */ + new->r_type = type; + new->r_proxy = B_FALSE; + new->r_write_wanted = B_FALSE; + new->r_read_wanted = B_FALSE; + + mutex_enter(&zp->z_range_lock); + if (type == RL_READER) { + /* + * First check for the usual case of no locks + */ + if (avl_numnodes(&zp->z_range_avl) == 0) + avl_add(&zp->z_range_avl, new); + else + zfs_range_lock_reader(zp, new); + } else + zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */ + mutex_exit(&zp->z_range_lock); + return (new); +} + +/* + * Unlock a reader lock + */ +static void +zfs_range_unlock_reader(znode_t *zp, rl_t *remove) +{ + avl_tree_t *tree = &zp->z_range_avl; + rl_t *rl, *next; + uint64_t len; + + /* + * The common case is when the remove entry is in the tree + * (cnt == 1) meaning there's been no other reader locks overlapping + * with this one. Otherwise the remove entry will have been + * removed from the tree and replaced by proxies (one or + * more ranges mapping to the entire range). + */ + if (remove->r_cnt == 1) { + avl_remove(tree, remove); + if (remove->r_write_wanted) { + cv_broadcast(&remove->r_wr_cv); + cv_destroy(&remove->r_wr_cv); + } + if (remove->r_read_wanted) { + cv_broadcast(&remove->r_rd_cv); + cv_destroy(&remove->r_rd_cv); + } + } else { + ASSERT3U(remove->r_cnt, ==, 0); + ASSERT3U(remove->r_write_wanted, ==, 0); + ASSERT3U(remove->r_read_wanted, ==, 0); + /* + * Find start proxy representing this reader lock, + * then decrement ref count on all proxies + * that make up this range, freeing them as needed. + */ + rl = avl_find(tree, remove, NULL); + ASSERT(rl); + ASSERT(rl->r_cnt); + ASSERT(rl->r_type == RL_READER); + for (len = remove->r_len; len != 0; rl = next) { + len -= rl->r_len; + if (len) { + next = AVL_NEXT(tree, rl); + ASSERT(next); + ASSERT(rl->r_off + rl->r_len == next->r_off); + ASSERT(next->r_cnt); + ASSERT(next->r_type == RL_READER); + } + rl->r_cnt--; + if (rl->r_cnt == 0) { + avl_remove(tree, rl); + if (rl->r_write_wanted) { + cv_broadcast(&rl->r_wr_cv); + cv_destroy(&rl->r_wr_cv); + } + if (rl->r_read_wanted) { + cv_broadcast(&rl->r_rd_cv); + cv_destroy(&rl->r_rd_cv); + } + kmem_free(rl, sizeof (rl_t)); + } + } + } + kmem_free(remove, sizeof (rl_t)); +} + +/* + * Unlock range and destroy range lock structure. + */ +void +zfs_range_unlock(rl_t *rl) +{ + znode_t *zp = rl->r_zp; + + ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER); + ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0); + ASSERT(!rl->r_proxy); + + mutex_enter(&zp->z_range_lock); + if (rl->r_type == RL_WRITER) { + /* writer locks can't be shared or split */ + avl_remove(&zp->z_range_avl, rl); + mutex_exit(&zp->z_range_lock); + if (rl->r_write_wanted) { + cv_broadcast(&rl->r_wr_cv); + cv_destroy(&rl->r_wr_cv); + } + if (rl->r_read_wanted) { + cv_broadcast(&rl->r_rd_cv); + cv_destroy(&rl->r_rd_cv); + } + kmem_free(rl, sizeof (rl_t)); + } else { + /* + * lock may be shared, let zfs_range_unlock_reader() + * release the lock and free the rl_t + */ + zfs_range_unlock_reader(zp, rl); + mutex_exit(&zp->z_range_lock); + } +} + +/* + * Reduce range locked as RL_WRITER from whole file to specified range. + * Asserts the whole file is exclusivly locked and so there's only one + * entry in the tree. + */ +void +zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len) +{ + znode_t *zp = rl->r_zp; + + /* Ensure there are no other locks */ + ASSERT(avl_numnodes(&zp->z_range_avl) == 1); + ASSERT(rl->r_off == 0); + ASSERT(rl->r_type == RL_WRITER); + ASSERT(!rl->r_proxy); + ASSERT3U(rl->r_len, ==, UINT64_MAX); + ASSERT3U(rl->r_cnt, ==, 1); + + mutex_enter(&zp->z_range_lock); + rl->r_off = off; + rl->r_len = len; + mutex_exit(&zp->z_range_lock); + if (rl->r_write_wanted) + cv_broadcast(&rl->r_wr_cv); + if (rl->r_read_wanted) + cv_broadcast(&rl->r_rd_cv); +} + +/* + * AVL comparison function used to order range locks + * Locks are ordered on the start offset of the range. + */ +int +zfs_range_compare(const void *arg1, const void *arg2) +{ + const rl_t *rl1 = arg1; + const rl_t *rl2 = arg2; + + if (rl1->r_off > rl2->r_off) + return (1); + if (rl1->r_off < rl2->r_off) + return (-1); + return (0); +} diff --git a/uts/common/fs/zfs/zfs_sa.c b/uts/common/fs/zfs/zfs_sa.c new file mode 100644 index 000000000000..d141e43d722a --- /dev/null +++ b/uts/common/fs/zfs/zfs_sa.c @@ -0,0 +1,334 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/sa.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_sa.h> + +/* + * ZPL attribute registration table. + * Order of attributes doesn't matter + * a unique value will be assigned for each + * attribute that is file system specific + * + * This is just the set of ZPL attributes that this + * version of ZFS deals with natively. The file system + * could have other attributes stored in files, but they will be + * ignored. The SA framework will preserve them, just that + * this version of ZFS won't change or delete them. + */ + +sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { + {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, + {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, + {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, + {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, + {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, + {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, + {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, + {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, + {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, + {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, + {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, + {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, + {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, + {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, + {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, + {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, + {"ZPL_DACL_COUNT", sizeof (uint64_t), SA_UINT64_ARRAY, 0}, + {"ZPL_SYMLINK", 0, SA_UINT8_ARRAY, 0}, + {"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0}, + {"ZPL_DACL_ACES", 0, SA_ACL, 0}, + {NULL, 0, 0, 0} +}; + +#ifdef _KERNEL + +int +zfs_sa_readlink(znode_t *zp, uio_t *uio) +{ + dmu_buf_t *db = sa_get_db(zp->z_sa_hdl); + size_t bufsz; + int error; + + bufsz = zp->z_size; + if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) { + error = uiomove((caddr_t)db->db_data + + ZFS_OLD_ZNODE_PHYS_SIZE, + MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); + } else { + dmu_buf_t *dbp; + if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id, + 0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) { + error = uiomove(dbp->db_data, + MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); + dmu_buf_rele(dbp, FTAG); + } + } + return (error); +} + +void +zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx) +{ + dmu_buf_t *db = sa_get_db(zp->z_sa_hdl); + + if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) { + VERIFY(dmu_set_bonus(db, + len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0); + if (len) { + bcopy(link, (caddr_t)db->db_data + + ZFS_OLD_ZNODE_PHYS_SIZE, len); + } + } else { + dmu_buf_t *dbp; + + zfs_grow_blocksize(zp, len, tx); + VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os, + zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH)); + + dmu_buf_will_dirty(dbp, tx); + + ASSERT3U(len, <=, dbp->db_size); + bcopy(link, dbp->db_data, len); + dmu_buf_rele(dbp, FTAG); + } +} + +void +zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + xoptattr_t *xoap; + + ASSERT(MUTEX_HELD(&zp->z_lock)); + VERIFY((xoap = xva_getxoptattr(xvap)) != NULL); + if (zp->z_is_sa) { + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), + &xoap->xoa_av_scanstamp, + sizeof (xoap->xoa_av_scanstamp)) != 0) + return; + } else { + dmu_object_info_t doi; + dmu_buf_t *db = sa_get_db(zp->z_sa_hdl); + int len; + + if (!(zp->z_pflags & ZFS_BONUS_SCANSTAMP)) + return; + + sa_object_info(zp->z_sa_hdl, &doi); + len = sizeof (xoap->xoa_av_scanstamp) + + ZFS_OLD_ZNODE_PHYS_SIZE; + + if (len <= doi.doi_bonus_size) { + (void) memcpy(xoap->xoa_av_scanstamp, + (caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, + sizeof (xoap->xoa_av_scanstamp)); + } + } + XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); +} + +void +zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + xoptattr_t *xoap; + + ASSERT(MUTEX_HELD(&zp->z_lock)); + VERIFY((xoap = xva_getxoptattr(xvap)) != NULL); + if (zp->z_is_sa) + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), + &xoap->xoa_av_scanstamp, + sizeof (xoap->xoa_av_scanstamp), tx)); + else { + dmu_object_info_t doi; + dmu_buf_t *db = sa_get_db(zp->z_sa_hdl); + int len; + + sa_object_info(zp->z_sa_hdl, &doi); + len = sizeof (xoap->xoa_av_scanstamp) + + ZFS_OLD_ZNODE_PHYS_SIZE; + if (len > doi.doi_bonus_size) + VERIFY(dmu_set_bonus(db, len, tx) == 0); + (void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, + xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp)); + + zp->z_pflags |= ZFS_BONUS_SCANSTAMP; + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + &zp->z_pflags, sizeof (uint64_t), tx)); + } +} + +/* + * I'm not convinced we should do any of this upgrade. + * since the SA code can read both old/new znode formats + * with probably little to know performance difference. + * + * All new files will be created with the new format. + */ + +void +zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) +{ + dmu_buf_t *db = sa_get_db(hdl); + znode_t *zp = sa_get_userdata(hdl); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + sa_bulk_attr_t bulk[20]; + int count = 0; + sa_bulk_attr_t sa_attrs[20] = { 0 }; + zfs_acl_locator_cb_t locate = { 0 }; + uint64_t uid, gid, mode, rdev, xattr, parent; + uint64_t crtime[2], mtime[2], ctime[2]; + zfs_acl_phys_t znode_acl; + char scanstamp[AV_SCANSTAMP_SZ]; + boolean_t drop_lock = B_FALSE; + + /* + * No upgrade if ACL isn't cached + * since we won't know which locks are held + * and ready the ACL would require special "locked" + * interfaces that would be messy + */ + if (zp->z_acl_cached == NULL || ZTOV(zp)->v_type == VLNK) + return; + + /* + * If the z_lock is held and we aren't the owner + * the just return since we don't want to deadlock + * trying to update the status of z_is_sa. This + * file can then be upgraded at a later time. + * + * Otherwise, we know we are doing the + * sa_update() that caused us to enter this function. + */ + if (mutex_owner(&zp->z_lock) != curthread) { + if (mutex_tryenter(&zp->z_lock) == 0) + return; + else + drop_lock = B_TRUE; + } + + /* First do a bulk query of the attributes that aren't cached */ + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + &znode_acl, 88); + + if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) + goto done; + + + /* + * While the order here doesn't matter its best to try and organize + * it is such a way to pick up an already existing layout number + */ + count = 0; + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL, + &zp->z_size, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs), + NULL, &zp->z_gen, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs), + NULL, &parent, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL, + zp->z_atime, 16); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL, + &mtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, 16); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL, + &crtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, + &zp->z_links, 8); + if (zp->z_vnode->v_type == VBLK || zp->z_vnode->v_type == VCHR) + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL, + &rdev, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, + &zp->z_acl_cached->z_acl_count, 8); + + if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID) + zfs_acl_xform(zp, zp->z_acl_cached, CRED()); + + locate.cb_aclp = zp->z_acl_cached; + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs), + zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes); + + if (xattr) + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs), + NULL, &xattr, 8); + + /* if scanstamp then add scanstamp */ + + if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) { + bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, + scanstamp, AV_SCANSTAMP_SZ); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs), + NULL, scanstamp, AV_SCANSTAMP_SZ); + zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; + } + + VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); + VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, + count, tx) == 0); + if (znode_acl.z_acl_extern_obj) + VERIFY(0 == dmu_object_free(zfsvfs->z_os, + znode_acl.z_acl_extern_obj, tx)); + + zp->z_is_sa = B_TRUE; +done: + if (drop_lock) + mutex_exit(&zp->z_lock); +} + +void +zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) +{ + if (!zp->z_zfsvfs->z_use_sa || zp->z_is_sa) + return; + + + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + + if (zfs_external_acl(zp)) { + dmu_tx_hold_free(tx, zfs_external_acl(zp), 0, + DMU_OBJECT_END); + } +} + +#endif diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c new file mode 100644 index 000000000000..4970552d0cb7 --- /dev/null +++ b/uts/common/fs/zfs/zfs_vfsops.c @@ -0,0 +1,2303 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/kmem.h> +#include <sys/pathname.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/mntent.h> +#include <sys/mount.h> +#include <sys/cmn_err.h> +#include "fs/fs_subr.h" +#include <sys/zfs_znode.h> +#include <sys/zfs_dir.h> +#include <sys/zil.h> +#include <sys/fs/zfs.h> +#include <sys/dmu.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_deleg.h> +#include <sys/spa.h> +#include <sys/zap.h> +#include <sys/sa.h> +#include <sys/varargs.h> +#include <sys/policy.h> +#include <sys/atomic.h> +#include <sys/mkdev.h> +#include <sys/modctl.h> +#include <sys/refstr.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_ctldir.h> +#include <sys/zfs_fuid.h> +#include <sys/bootconf.h> +#include <sys/sunddi.h> +#include <sys/dnlc.h> +#include <sys/dmu_objset.h> +#include <sys/spa_boot.h> +#include <sys/sa.h> +#include "zfs_comutil.h" + +int zfsfstype; +vfsops_t *zfs_vfsops = NULL; +static major_t zfs_major; +static minor_t zfs_minor; +static kmutex_t zfs_dev_mtx; + +extern int sys_shutdown; + +static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); +static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); +static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); +static int zfs_root(vfs_t *vfsp, vnode_t **vpp); +static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); +static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); +static void zfs_freevfs(vfs_t *vfsp); + +static const fs_operation_def_t zfs_vfsops_template[] = { + VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, + VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, + VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, + VFSNAME_ROOT, { .vfs_root = zfs_root }, + VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, + VFSNAME_SYNC, { .vfs_sync = zfs_sync }, + VFSNAME_VGET, { .vfs_vget = zfs_vget }, + VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, + NULL, NULL +}; + +static const fs_operation_def_t zfs_vfsops_eio_template[] = { + VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, + NULL, NULL +}; + +/* + * We need to keep a count of active fs's. + * This is necessary to prevent our module + * from being unloaded after a umount -f + */ +static uint32_t zfs_active_fs_count = 0; + +static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; +static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; +static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; +static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; + +/* + * MO_DEFAULT is not used since the default value is determined + * by the equivalent property. + */ +static mntopt_t mntopts[] = { + { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, + { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, + { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, + { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } +}; + +static mntopts_t zfs_mntopts = { + sizeof (mntopts) / sizeof (mntopt_t), + mntopts +}; + +/*ARGSUSED*/ +int +zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) +{ + /* + * Data integrity is job one. We don't want a compromised kernel + * writing to the storage pool, so we never sync during panic. + */ + if (panicstr) + return (0); + + /* + * SYNC_ATTR is used by fsflush() to force old filesystems like UFS + * to sync metadata, which they would otherwise cache indefinitely. + * Semantically, the only requirement is that the sync be initiated. + * The DMU syncs out txgs frequently, so there's nothing to do. + */ + if (flag & SYNC_ATTR) + return (0); + + if (vfsp != NULL) { + /* + * Sync a specific filesystem. + */ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + dsl_pool_t *dp; + + ZFS_ENTER(zfsvfs); + dp = dmu_objset_pool(zfsvfs->z_os); + + /* + * If the system is shutting down, then skip any + * filesystems which may exist on a suspended pool. + */ + if (sys_shutdown && spa_suspended(dp->dp_spa)) { + ZFS_EXIT(zfsvfs); + return (0); + } + + if (zfsvfs->z_log != NULL) + zil_commit(zfsvfs->z_log, 0); + + ZFS_EXIT(zfsvfs); + } else { + /* + * Sync all ZFS filesystems. This is what happens when you + * run sync(1M). Unlike other filesystems, ZFS honors the + * request by waiting for all pools to commit all dirty data. + */ + spa_sync_allpools(); + } + + return (0); +} + +static int +zfs_create_unique_device(dev_t *dev) +{ + major_t new_major; + + do { + ASSERT3U(zfs_minor, <=, MAXMIN32); + minor_t start = zfs_minor; + do { + mutex_enter(&zfs_dev_mtx); + if (zfs_minor >= MAXMIN32) { + /* + * If we're still using the real major + * keep out of /dev/zfs and /dev/zvol minor + * number space. If we're using a getudev()'ed + * major number, we can use all of its minors. + */ + if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) + zfs_minor = ZFS_MIN_MINOR; + else + zfs_minor = 0; + } else { + zfs_minor++; + } + *dev = makedevice(zfs_major, zfs_minor); + mutex_exit(&zfs_dev_mtx); + } while (vfs_devismounted(*dev) && zfs_minor != start); + if (zfs_minor == start) { + /* + * We are using all ~262,000 minor numbers for the + * current major number. Create a new major number. + */ + if ((new_major = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, + "zfs_mount: Can't get unique major " + "device number."); + return (-1); + } + mutex_enter(&zfs_dev_mtx); + zfs_major = new_major; + zfs_minor = 0; + + mutex_exit(&zfs_dev_mtx); + } else { + break; + } + /* CONSTANTCONDITION */ + } while (1); + + return (0); +} + +static void +atime_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval == TRUE) { + zfsvfs->z_atime = TRUE; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); + } else { + zfsvfs->z_atime = FALSE; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); + } +} + +static void +xattr_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval == TRUE) { + /* XXX locking on vfs_flag? */ + zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); + } else { + /* XXX locking on vfs_flag? */ + zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); + } +} + +static void +blksz_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval < SPA_MINBLOCKSIZE || + newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) + newval = SPA_MAXBLOCKSIZE; + + zfsvfs->z_max_blksz = newval; + zfsvfs->z_vfs->vfs_bsize = newval; +} + +static void +readonly_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval) { + /* XXX locking on vfs_flag? */ + zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); + } else { + /* XXX locking on vfs_flag? */ + zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); + } +} + +static void +devices_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval == FALSE) { + zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); + } else { + zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); + } +} + +static void +setuid_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval == FALSE) { + zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); + } else { + zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); + } +} + +static void +exec_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + if (newval == FALSE) { + zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); + } else { + zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); + } +} + +/* + * The nbmand mount option can be changed at mount time. + * We can't allow it to be toggled on live file systems or incorrect + * behavior may be seen from cifs clients + * + * This property isn't registered via dsl_prop_register(), but this callback + * will be called when a file system is first mounted + */ +static void +nbmand_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + if (newval == FALSE) { + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); + } else { + vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); + vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); + } +} + +static void +snapdir_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + zfsvfs->z_show_ctldir = newval; +} + +static void +vscan_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + zfsvfs->z_vscan = newval; +} + +static void +acl_inherit_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + zfsvfs->z_acl_inherit = newval; +} + +static int +zfs_register_callbacks(vfs_t *vfsp) +{ + struct dsl_dataset *ds = NULL; + objset_t *os = NULL; + zfsvfs_t *zfsvfs = NULL; + uint64_t nbmand; + int readonly, do_readonly = B_FALSE; + int setuid, do_setuid = B_FALSE; + int exec, do_exec = B_FALSE; + int devices, do_devices = B_FALSE; + int xattr, do_xattr = B_FALSE; + int atime, do_atime = B_FALSE; + int error = 0; + + ASSERT(vfsp); + zfsvfs = vfsp->vfs_data; + ASSERT(zfsvfs); + os = zfsvfs->z_os; + + /* + * The act of registering our callbacks will destroy any mount + * options we may have. In order to enable temporary overrides + * of mount options, we stash away the current values and + * restore them after we register the callbacks. + */ + if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || + !spa_writeable(dmu_objset_spa(os))) { + readonly = B_TRUE; + do_readonly = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { + readonly = B_FALSE; + do_readonly = B_TRUE; + } + if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { + devices = B_FALSE; + setuid = B_FALSE; + do_devices = B_TRUE; + do_setuid = B_TRUE; + } else { + if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { + devices = B_FALSE; + do_devices = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { + devices = B_TRUE; + do_devices = B_TRUE; + } + + if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { + setuid = B_FALSE; + do_setuid = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { + setuid = B_TRUE; + do_setuid = B_TRUE; + } + } + if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { + exec = B_FALSE; + do_exec = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { + exec = B_TRUE; + do_exec = B_TRUE; + } + if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { + xattr = B_FALSE; + do_xattr = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { + xattr = B_TRUE; + do_xattr = B_TRUE; + } + if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { + atime = B_FALSE; + do_atime = B_TRUE; + } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { + atime = B_TRUE; + do_atime = B_TRUE; + } + + /* + * nbmand is a special property. It can only be changed at + * mount time. + * + * This is weird, but it is documented to only be changeable + * at mount time. + */ + if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { + nbmand = B_FALSE; + } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { + nbmand = B_TRUE; + } else { + char osname[MAXNAMELEN]; + + dmu_objset_name(os, osname); + if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, + NULL)) { + return (error); + } + } + + /* + * Register property callbacks. + * + * It would probably be fine to just check for i/o error from + * the first prop_register(), but I guess I like to go + * overboard... + */ + ds = dmu_objset_ds(os); + error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "xattr", xattr_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "recordsize", blksz_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "readonly", readonly_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "devices", devices_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "setuid", setuid_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "exec", exec_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "snapdir", snapdir_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "aclinherit", acl_inherit_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + "vscan", vscan_changed_cb, zfsvfs); + if (error) + goto unregister; + + /* + * Invoke our callbacks to restore temporary mount options. + */ + if (do_readonly) + readonly_changed_cb(zfsvfs, readonly); + if (do_setuid) + setuid_changed_cb(zfsvfs, setuid); + if (do_exec) + exec_changed_cb(zfsvfs, exec); + if (do_devices) + devices_changed_cb(zfsvfs, devices); + if (do_xattr) + xattr_changed_cb(zfsvfs, xattr); + if (do_atime) + atime_changed_cb(zfsvfs, atime); + + nbmand_changed_cb(zfsvfs, nbmand); + + return (0); + +unregister: + /* + * We may attempt to unregister some callbacks that are not + * registered, but this is OK; it will simply return ENOMSG, + * which we will ignore. + */ + (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, + zfsvfs); + (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); + return (error); + +} + +static int +zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, + uint64_t *userp, uint64_t *groupp) +{ + znode_phys_t *znp = data; + int error = 0; + + /* + * Is it a valid type of object to track? + */ + if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) + return (ENOENT); + + /* + * If we have a NULL data pointer + * then assume the id's aren't changing and + * return EEXIST to the dmu to let it know to + * use the same ids + */ + if (data == NULL) + return (EEXIST); + + if (bonustype == DMU_OT_ZNODE) { + *userp = znp->zp_uid; + *groupp = znp->zp_gid; + } else { + int hdrsize; + + ASSERT(bonustype == DMU_OT_SA); + hdrsize = sa_hdrsize(data); + + if (hdrsize != 0) { + *userp = *((uint64_t *)((uintptr_t)data + hdrsize + + SA_UID_OFFSET)); + *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + + SA_GID_OFFSET)); + } else { + /* + * This should only happen for newly created + * files that haven't had the znode data filled + * in yet. + */ + *userp = 0; + *groupp = 0; + } + } + return (error); +} + +static void +fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, + char *domainbuf, int buflen, uid_t *ridp) +{ + uint64_t fuid; + const char *domain; + + fuid = strtonum(fuidstr, NULL); + + domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); + if (domain) + (void) strlcpy(domainbuf, domain, buflen); + else + domainbuf[0] = '\0'; + *ridp = FUID_RID(fuid); +} + +static uint64_t +zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) +{ + switch (type) { + case ZFS_PROP_USERUSED: + return (DMU_USERUSED_OBJECT); + case ZFS_PROP_GROUPUSED: + return (DMU_GROUPUSED_OBJECT); + case ZFS_PROP_USERQUOTA: + return (zfsvfs->z_userquota_obj); + case ZFS_PROP_GROUPQUOTA: + return (zfsvfs->z_groupquota_obj); + } + return (0); +} + +int +zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) +{ + int error; + zap_cursor_t zc; + zap_attribute_t za; + zfs_useracct_t *buf = vbuf; + uint64_t obj; + + if (!dmu_objset_userspace_present(zfsvfs->z_os)) + return (ENOTSUP); + + obj = zfs_userquota_prop_to_obj(zfsvfs, type); + if (obj == 0) { + *bufsizep = 0; + return (0); + } + + for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); + (error = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > + *bufsizep) + break; + + fuidstr_to_sid(zfsvfs, za.za_name, + buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); + + buf->zu_space = za.za_first_integer; + buf++; + } + if (error == ENOENT) + error = 0; + + ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); + *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; + *cookiep = zap_cursor_serialize(&zc); + zap_cursor_fini(&zc); + return (error); +} + +/* + * buf must be big enough (eg, 32 bytes) + */ +static int +id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, + char *buf, boolean_t addok) +{ + uint64_t fuid; + int domainid = 0; + + if (domain && domain[0]) { + domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); + if (domainid == -1) + return (ENOENT); + } + fuid = FUID_ENCODE(domainid, rid); + (void) sprintf(buf, "%llx", (longlong_t)fuid); + return (0); +} + +int +zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + const char *domain, uint64_t rid, uint64_t *valp) +{ + char buf[32]; + int err; + uint64_t obj; + + *valp = 0; + + if (!dmu_objset_userspace_present(zfsvfs->z_os)) + return (ENOTSUP); + + obj = zfs_userquota_prop_to_obj(zfsvfs, type); + if (obj == 0) + return (0); + + err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); + if (err) + return (err); + + err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); + if (err == ENOENT) + err = 0; + return (err); +} + +int +zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, + const char *domain, uint64_t rid, uint64_t quota) +{ + char buf[32]; + int err; + dmu_tx_t *tx; + uint64_t *objp; + boolean_t fuid_dirtied; + + if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) + return (EINVAL); + + if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) + return (ENOTSUP); + + objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : + &zfsvfs->z_groupquota_obj; + + err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); + if (err) + return (err); + fuid_dirtied = zfsvfs->z_fuid_dirty; + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); + if (*objp == 0) { + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, + zfs_userquota_prop_prefixes[type]); + } + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + return (err); + } + + mutex_enter(&zfsvfs->z_lock); + if (*objp == 0) { + *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, + DMU_OT_NONE, 0, tx); + VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); + } + mutex_exit(&zfsvfs->z_lock); + + if (quota == 0) { + err = zap_remove(zfsvfs->z_os, *objp, buf, tx); + if (err == ENOENT) + err = 0; + } else { + err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); + } + ASSERT(err == 0); + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + dmu_tx_commit(tx); + return (err); +} + +boolean_t +zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) +{ + char buf[32]; + uint64_t used, quota, usedobj, quotaobj; + int err; + + usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; + quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + + if (quotaobj == 0 || zfsvfs->z_replay) + return (B_FALSE); + + (void) sprintf(buf, "%llx", (longlong_t)fuid); + err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); + if (err != 0) + return (B_FALSE); + + err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); + if (err != 0) + return (B_FALSE); + return (used >= quota); +} + +boolean_t +zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) +{ + uint64_t fuid; + uint64_t quotaobj; + + quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + + fuid = isgroup ? zp->z_gid : zp->z_uid; + + if (quotaobj == 0 || zfsvfs->z_replay) + return (B_FALSE); + + return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); +} + +int +zfsvfs_create(const char *osname, zfsvfs_t **zfvp) +{ + objset_t *os; + zfsvfs_t *zfsvfs; + uint64_t zval; + int i, error; + uint64_t sa_obj; + + zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); + + /* + * We claim to always be readonly so we can open snapshots; + * other ZPL code will prevent us from writing to snapshots. + */ + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + if (error) { + kmem_free(zfsvfs, sizeof (zfsvfs_t)); + return (error); + } + + /* + * Initialize the zfs-specific filesystem structure. + * Should probably make this a kmem cache, shuffle fields, + * and just bzero up to z_hold_mtx[]. + */ + zfsvfs->z_vfs = NULL; + zfsvfs->z_parent = zfsvfs; + zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; + zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; + zfsvfs->z_os = os; + + error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); + if (error) { + goto out; + } else if (zfsvfs->z_version > + zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { + (void) printf("Can't mount a version %lld file system " + "on a version %lld pool\n. Pool must be upgraded to mount " + "this file system.", (u_longlong_t)zfsvfs->z_version, + (u_longlong_t)spa_version(dmu_objset_spa(os))); + error = ENOTSUP; + goto out; + } + if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) + goto out; + zfsvfs->z_norm = (int)zval; + + if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) + goto out; + zfsvfs->z_utf8 = (zval != 0); + + if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) + goto out; + zfsvfs->z_case = (uint_t)zval; + + /* + * Fold case on file systems that are always or sometimes case + * insensitive. + */ + if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || + zfsvfs->z_case == ZFS_CASE_MIXED) + zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; + + zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); + zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); + + if (zfsvfs->z_use_sa) { + /* should either have both of these objects or none */ + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, + &sa_obj); + if (error) + return (error); + } else { + /* + * Pre SA versions file systems should never touch + * either the attribute registration or layout objects. + */ + sa_obj = 0; + } + + error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, + &zfsvfs->z_attr_table); + if (error) + goto out; + + if (zfsvfs->z_version >= ZPL_VERSION_SA) + sa_register_update_callback(os, zfs_sa_upgrade); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, + &zfsvfs->z_root); + if (error) + goto out; + ASSERT(zfsvfs->z_root != 0); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, + &zfsvfs->z_unlinkedobj); + if (error) + goto out; + + error = zap_lookup(os, MASTER_NODE_OBJ, + zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], + 8, 1, &zfsvfs->z_userquota_obj); + if (error && error != ENOENT) + goto out; + + error = zap_lookup(os, MASTER_NODE_OBJ, + zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], + 8, 1, &zfsvfs->z_groupquota_obj); + if (error && error != ENOENT) + goto out; + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, + &zfsvfs->z_fuid_obj); + if (error && error != ENOENT) + goto out; + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, + &zfsvfs->z_shares_dir); + if (error && error != ENOENT) + goto out; + + mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), + offsetof(znode_t, z_link_node)); + rrw_init(&zfsvfs->z_teardown_lock); + rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); + rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + + *zfvp = zfsvfs; + return (0); + +out: + dmu_objset_disown(os, zfsvfs); + *zfvp = NULL; + kmem_free(zfsvfs, sizeof (zfsvfs_t)); + return (error); +} + +static int +zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) +{ + int error; + + error = zfs_register_callbacks(zfsvfs->z_vfs); + if (error) + return (error); + + /* + * Set the objset user_ptr to track its zfsvfs. + */ + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); + dmu_objset_set_user(zfsvfs->z_os, zfsvfs); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); + + zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); + + /* + * If we are not mounting (ie: online recv), then we don't + * have to worry about replaying the log as we blocked all + * operations out since we closed the ZIL. + */ + if (mounting) { + boolean_t readonly; + + /* + * During replay we remove the read only flag to + * allow replays to succeed. + */ + readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; + if (readonly != 0) + zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; + else + zfs_unlinked_drain(zfsvfs); + + /* + * Parse and replay the intent log. + * + * Because of ziltest, this must be done after + * zfs_unlinked_drain(). (Further note: ziltest + * doesn't use readonly mounts, where + * zfs_unlinked_drain() isn't called.) This is because + * ziltest causes spa_sync() to think it's committed, + * but actually it is not, so the intent log contains + * many txg's worth of changes. + * + * In particular, if object N is in the unlinked set in + * the last txg to actually sync, then it could be + * actually freed in a later txg and then reallocated + * in a yet later txg. This would write a "create + * object N" record to the intent log. Normally, this + * would be fine because the spa_sync() would have + * written out the fact that object N is free, before + * we could write the "create object N" intent log + * record. + * + * But when we are in ziltest mode, we advance the "open + * txg" without actually spa_sync()-ing the changes to + * disk. So we would see that object N is still + * allocated and in the unlinked set, and there is an + * intent log record saying to allocate it. + */ + if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { + if (zil_replay_disable) { + zil_destroy(zfsvfs->z_log, B_FALSE); + } else { + zfsvfs->z_replay = B_TRUE; + zil_replay(zfsvfs->z_os, zfsvfs, + zfs_replay_vector); + zfsvfs->z_replay = B_FALSE; + } + } + zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ + } + + return (0); +} + +void +zfsvfs_free(zfsvfs_t *zfsvfs) +{ + int i; + extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ + + /* + * This is a barrier to prevent the filesystem from going away in + * zfs_znode_move() until we can safely ensure that the filesystem is + * not unmounted. We consider the filesystem valid before the barrier + * and invalid after the barrier. + */ + rw_enter(&zfsvfs_lock, RW_READER); + rw_exit(&zfsvfs_lock); + + zfs_fuid_destroy(zfsvfs); + + mutex_destroy(&zfsvfs->z_znodes_lock); + mutex_destroy(&zfsvfs->z_lock); + list_destroy(&zfsvfs->z_all_znodes); + rrw_destroy(&zfsvfs->z_teardown_lock); + rw_destroy(&zfsvfs->z_teardown_inactive_lock); + rw_destroy(&zfsvfs->z_fuid_lock); + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zfsvfs->z_hold_mtx[i]); + kmem_free(zfsvfs, sizeof (zfsvfs_t)); +} + +static void +zfs_set_fuid_feature(zfsvfs_t *zfsvfs) +{ + zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); + if (zfsvfs->z_vfs) { + if (zfsvfs->z_use_fuids) { + vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); + } else { + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); + vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); + } + } + zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); +} + +static int +zfs_domount(vfs_t *vfsp, char *osname) +{ + dev_t mount_dev; + uint64_t recordsize, fsid_guid; + int error = 0; + zfsvfs_t *zfsvfs; + + ASSERT(vfsp); + ASSERT(osname); + + error = zfsvfs_create(osname, &zfsvfs); + if (error) + return (error); + zfsvfs->z_vfs = vfsp; + + /* Initialize the generic filesystem structure. */ + vfsp->vfs_bcount = 0; + vfsp->vfs_data = NULL; + + if (zfs_create_unique_device(&mount_dev) == -1) { + error = ENODEV; + goto out; + } + ASSERT(vfs_devismounted(mount_dev) == 0); + + if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, + NULL)) + goto out; + + vfsp->vfs_dev = mount_dev; + vfsp->vfs_fstype = zfsfstype; + vfsp->vfs_bsize = recordsize; + vfsp->vfs_flag |= VFS_NOTRUNC; + vfsp->vfs_data = zfsvfs; + + /* + * The fsid is 64 bits, composed of an 8-bit fs type, which + * separates our fsid from any other filesystem types, and a + * 56-bit objset unique ID. The objset unique ID is unique to + * all objsets open on this system, provided by unique_create(). + * The 8-bit fs type must be put in the low bits of fsid[1] + * because that's where other Solaris filesystems put it. + */ + fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); + ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); + vfsp->vfs_fsid.val[0] = fsid_guid; + vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | + zfsfstype & 0xFF; + + /* + * Set features for file system. + */ + zfs_set_fuid_feature(zfsvfs); + if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { + vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); + vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); + vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); + } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { + vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); + vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); + } + vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); + + if (dmu_objset_is_snapshot(zfsvfs->z_os)) { + uint64_t pval; + + atime_changed_cb(zfsvfs, B_FALSE); + readonly_changed_cb(zfsvfs, B_TRUE); + if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) + goto out; + xattr_changed_cb(zfsvfs, pval); + zfsvfs->z_issnap = B_TRUE; + zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; + + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); + dmu_objset_set_user(zfsvfs->z_os, zfsvfs); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); + } else { + error = zfsvfs_setup(zfsvfs, B_TRUE); + } + + if (!zfsvfs->z_issnap) + zfsctl_create(zfsvfs); +out: + if (error) { + dmu_objset_disown(zfsvfs->z_os, zfsvfs); + zfsvfs_free(zfsvfs); + } else { + atomic_add_32(&zfs_active_fs_count, 1); + } + + return (error); +} + +void +zfs_unregister_callbacks(zfsvfs_t *zfsvfs) +{ + objset_t *os = zfsvfs->z_os; + struct dsl_dataset *ds; + + /* + * Unregister properties. + */ + if (!dmu_objset_is_snapshot(os)) { + ds = dmu_objset_ds(os); + VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, + zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "aclinherit", + acl_inherit_changed_cb, zfsvfs) == 0); + + VERIFY(dsl_prop_unregister(ds, "vscan", + vscan_changed_cb, zfsvfs) == 0); + } +} + +/* + * Convert a decimal digit string to a uint64_t integer. + */ +static int +str_to_uint64(char *str, uint64_t *objnum) +{ + uint64_t num = 0; + + while (*str) { + if (*str < '0' || *str > '9') + return (EINVAL); + + num = num*10 + *str++ - '0'; + } + + *objnum = num; + return (0); +} + +/* + * The boot path passed from the boot loader is in the form of + * "rootpool-name/root-filesystem-object-number'. Convert this + * string to a dataset name: "rootpool-name/root-filesystem-name". + */ +static int +zfs_parse_bootfs(char *bpath, char *outpath) +{ + char *slashp; + uint64_t objnum; + int error; + + if (*bpath == 0 || *bpath == '/') + return (EINVAL); + + (void) strcpy(outpath, bpath); + + slashp = strchr(bpath, '/'); + + /* if no '/', just return the pool name */ + if (slashp == NULL) { + return (0); + } + + /* if not a number, just return the root dataset name */ + if (str_to_uint64(slashp+1, &objnum)) { + return (0); + } + + *slashp = '\0'; + error = dsl_dsobj_to_dsname(bpath, objnum, outpath); + *slashp = '/'; + + return (error); +} + +/* + * zfs_check_global_label: + * Check that the hex label string is appropriate for the dataset + * being mounted into the global_zone proper. + * + * Return an error if the hex label string is not default or + * admin_low/admin_high. For admin_low labels, the corresponding + * dataset must be readonly. + */ +int +zfs_check_global_label(const char *dsname, const char *hexsl) +{ + if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) + return (0); + if (strcasecmp(hexsl, ADMIN_HIGH) == 0) + return (0); + if (strcasecmp(hexsl, ADMIN_LOW) == 0) { + /* must be readonly */ + uint64_t rdonly; + + if (dsl_prop_get_integer(dsname, + zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) + return (EACCES); + return (rdonly ? 0 : EACCES); + } + return (EACCES); +} + +/* + * zfs_mount_label_policy: + * Determine whether the mount is allowed according to MAC check. + * by comparing (where appropriate) label of the dataset against + * the label of the zone being mounted into. If the dataset has + * no label, create one. + * + * Returns: + * 0 : access allowed + * >0 : error code, such as EACCES + */ +static int +zfs_mount_label_policy(vfs_t *vfsp, char *osname) +{ + int error, retv; + zone_t *mntzone = NULL; + ts_label_t *mnt_tsl; + bslabel_t *mnt_sl; + bslabel_t ds_sl; + char ds_hexsl[MAXNAMELEN]; + + retv = EACCES; /* assume the worst */ + + /* + * Start by getting the dataset label if it exists. + */ + error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), + 1, sizeof (ds_hexsl), &ds_hexsl, NULL); + if (error) + return (EACCES); + + /* + * If labeling is NOT enabled, then disallow the mount of datasets + * which have a non-default label already. No other label checks + * are needed. + */ + if (!is_system_labeled()) { + if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) + return (0); + return (EACCES); + } + + /* + * Get the label of the mountpoint. If mounting into the global + * zone (i.e. mountpoint is not within an active zone and the + * zoned property is off), the label must be default or + * admin_low/admin_high only; no other checks are needed. + */ + mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); + if (mntzone->zone_id == GLOBAL_ZONEID) { + uint64_t zoned; + + zone_rele(mntzone); + + if (dsl_prop_get_integer(osname, + zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) + return (EACCES); + if (!zoned) + return (zfs_check_global_label(osname, ds_hexsl)); + else + /* + * This is the case of a zone dataset being mounted + * initially, before the zone has been fully created; + * allow this mount into global zone. + */ + return (0); + } + + mnt_tsl = mntzone->zone_slabel; + ASSERT(mnt_tsl != NULL); + label_hold(mnt_tsl); + mnt_sl = label2bslabel(mnt_tsl); + + if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { + /* + * The dataset doesn't have a real label, so fabricate one. + */ + char *str = NULL; + + if (l_to_str_internal(mnt_sl, &str) == 0 && + dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), + ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) + retv = 0; + if (str != NULL) + kmem_free(str, strlen(str) + 1); + } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { + /* + * Now compare labels to complete the MAC check. If the + * labels are equal then allow access. If the mountpoint + * label dominates the dataset label, allow readonly access. + * Otherwise, access is denied. + */ + if (blequal(mnt_sl, &ds_sl)) + retv = 0; + else if (bldominates(mnt_sl, &ds_sl)) { + vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); + retv = 0; + } + } + + label_rele(mnt_tsl); + zone_rele(mntzone); + return (retv); +} + +static int +zfs_mountroot(vfs_t *vfsp, enum whymountroot why) +{ + int error = 0; + static int zfsrootdone = 0; + zfsvfs_t *zfsvfs = NULL; + znode_t *zp = NULL; + vnode_t *vp = NULL; + char *zfs_bootfs; + char *zfs_devid; + + ASSERT(vfsp); + + /* + * The filesystem that we mount as root is defined in the + * boot property "zfs-bootfs" with a format of + * "poolname/root-dataset-objnum". + */ + if (why == ROOT_INIT) { + if (zfsrootdone++) + return (EBUSY); + /* + * the process of doing a spa_load will require the + * clock to be set before we could (for example) do + * something better by looking at the timestamp on + * an uberblock, so just set it to -1. + */ + clkset(-1); + + if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { + cmn_err(CE_NOTE, "spa_get_bootfs: can not get " + "bootfs name"); + return (EINVAL); + } + zfs_devid = spa_get_bootprop("diskdevid"); + error = spa_import_rootpool(rootfs.bo_name, zfs_devid); + if (zfs_devid) + spa_free_bootprop(zfs_devid); + if (error) { + spa_free_bootprop(zfs_bootfs); + cmn_err(CE_NOTE, "spa_import_rootpool: error %d", + error); + return (error); + } + if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { + spa_free_bootprop(zfs_bootfs); + cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", + error); + return (error); + } + + spa_free_bootprop(zfs_bootfs); + + if (error = vfs_lock(vfsp)) + return (error); + + if (error = zfs_domount(vfsp, rootfs.bo_name)) { + cmn_err(CE_NOTE, "zfs_domount: error %d", error); + goto out; + } + + zfsvfs = (zfsvfs_t *)vfsp->vfs_data; + ASSERT(zfsvfs); + if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { + cmn_err(CE_NOTE, "zfs_zget: error %d", error); + goto out; + } + + vp = ZTOV(zp); + mutex_enter(&vp->v_lock); + vp->v_flag |= VROOT; + mutex_exit(&vp->v_lock); + rootvp = vp; + + /* + * Leave rootvp held. The root file system is never unmounted. + */ + + vfs_add((struct vnode *)0, vfsp, + (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); +out: + vfs_unlock(vfsp); + return (error); + } else if (why == ROOT_REMOUNT) { + readonly_changed_cb(vfsp->vfs_data, B_FALSE); + vfsp->vfs_flag |= VFS_REMOUNT; + + /* refresh mount options */ + zfs_unregister_callbacks(vfsp->vfs_data); + return (zfs_register_callbacks(vfsp)); + + } else if (why == ROOT_UNMOUNT) { + zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); + (void) zfs_sync(vfsp, 0, 0); + return (0); + } + + /* + * if "why" is equal to anything else other than ROOT_INIT, + * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. + */ + return (ENOTSUP); +} + +/*ARGSUSED*/ +static int +zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) +{ + char *osname; + pathname_t spn; + int error = 0; + uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? + UIO_SYSSPACE : UIO_USERSPACE; + int canwrite; + + if (mvp->v_type != VDIR) + return (ENOTDIR); + + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_REMOUNT) == 0 && + (uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + return (EBUSY); + } + mutex_exit(&mvp->v_lock); + + /* + * ZFS does not support passing unparsed data in via MS_DATA. + * Users should use the MS_OPTIONSTR interface; this means + * that all option parsing is already done and the options struct + * can be interrogated. + */ + if ((uap->flags & MS_DATA) && uap->datalen > 0) + return (EINVAL); + + /* + * Get the objset name (the "special" mount argument). + */ + if (error = pn_get(uap->spec, fromspace, &spn)) + return (error); + + osname = spn.pn_path; + + /* + * Check for mount privilege? + * + * If we don't have privilege then see if + * we have local permission to allow it + */ + error = secpolicy_fs_mount(cr, mvp, vfsp); + if (error) { + if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) { + vattr_t vattr; + + /* + * Make sure user is the owner of the mount point + * or has sufficient privileges. + */ + + vattr.va_mask = AT_UID; + + if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { + goto out; + } + + if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 && + VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) { + goto out; + } + secpolicy_fs_mount_clearopts(cr, vfsp); + } else { + goto out; + } + } + + /* + * Refuse to mount a filesystem if we are in a local zone and the + * dataset is not visible. + */ + if (!INGLOBALZONE(curproc) && + (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { + error = EPERM; + goto out; + } + + error = zfs_mount_label_policy(vfsp, osname); + if (error) + goto out; + + /* + * When doing a remount, we simply refresh our temporary properties + * according to those options set in the current VFS options. + */ + if (uap->flags & MS_REMOUNT) { + /* refresh mount options */ + zfs_unregister_callbacks(vfsp->vfs_data); + error = zfs_register_callbacks(vfsp); + goto out; + } + + error = zfs_domount(vfsp, osname); + + /* + * Add an extra VFS_HOLD on our parent vfs so that it can't + * disappear due to a forced unmount. + */ + if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) + VFS_HOLD(mvp->v_vfsp); + +out: + pn_free(&spn); + return (error); +} + +static int +zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + dev32_t d32; + uint64_t refdbytes, availbytes, usedobjs, availobjs; + + ZFS_ENTER(zfsvfs); + + dmu_objset_space(zfsvfs->z_os, + &refdbytes, &availbytes, &usedobjs, &availobjs); + + /* + * The underlying storage pool actually uses multiple block sizes. + * We report the fragsize as the smallest block size we support, + * and we report our blocksize as the filesystem's maximum blocksize. + */ + statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; + statp->f_bsize = zfsvfs->z_max_blksz; + + /* + * The following report "total" blocks of various kinds in the + * file system, but reported in terms of f_frsize - the + * "fragment" size. + */ + + statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; + statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; + statp->f_bavail = statp->f_bfree; /* no root reservation */ + + /* + * statvfs() should really be called statufs(), because it assumes + * static metadata. ZFS doesn't preallocate files, so the best + * we can do is report the max that could possibly fit in f_files, + * and that minus the number actually used in f_ffree. + * For f_ffree, report the smaller of the number of object available + * and the number of blocks (each object will take at least a block). + */ + statp->f_ffree = MIN(availobjs, statp->f_bfree); + statp->f_favail = statp->f_ffree; /* no "root reservation" */ + statp->f_files = statp->f_ffree + usedobjs; + + (void) cmpldev(&d32, vfsp->vfs_dev); + statp->f_fsid = d32; + + /* + * We're a zfs filesystem. + */ + (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); + + statp->f_flag = vf_to_stf(vfsp->vfs_flag); + + statp->f_namemax = ZFS_MAXNAMELEN; + + /* + * We have all of 32 characters to stuff a string here. + * Is there anything useful we could/should provide? + */ + bzero(statp->f_fstr, sizeof (statp->f_fstr)); + + ZFS_EXIT(zfsvfs); + return (0); +} + +static int +zfs_root(vfs_t *vfsp, vnode_t **vpp) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + znode_t *rootzp; + int error; + + ZFS_ENTER(zfsvfs); + + error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); + if (error == 0) + *vpp = ZTOV(rootzp); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Teardown the zfsvfs::z_os. + * + * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' + * and 'z_teardown_inactive_lock' held. + */ +static int +zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) +{ + znode_t *zp; + + rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); + + if (!unmounting) { + /* + * We purge the parent filesystem's vfsp as the parent + * filesystem and all of its snapshots have their vnode's + * v_vfsp set to the parent's filesystem's vfsp. Note, + * 'z_parent' is self referential for non-snapshots. + */ + (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); + } + + /* + * Close the zil. NB: Can't close the zil while zfs_inactive + * threads are blocked as zil_close can call zfs_inactive. + */ + if (zfsvfs->z_log) { + zil_close(zfsvfs->z_log); + zfsvfs->z_log = NULL; + } + + rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); + + /* + * If we are not unmounting (ie: online recv) and someone already + * unmounted this file system while we were doing the switcheroo, + * or a reopen of z_os failed then just bail out now. + */ + if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { + rw_exit(&zfsvfs->z_teardown_inactive_lock); + rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + return (EIO); + } + + /* + * At this point there are no vops active, and any new vops will + * fail with EIO since we have z_teardown_lock for writer (only + * relavent for forced unmount). + * + * Release all holds on dbufs. + */ + mutex_enter(&zfsvfs->z_znodes_lock); + for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; + zp = list_next(&zfsvfs->z_all_znodes, zp)) + if (zp->z_sa_hdl) { + ASSERT(ZTOV(zp)->v_count > 0); + zfs_znode_dmu_fini(zp); + } + mutex_exit(&zfsvfs->z_znodes_lock); + + /* + * If we are unmounting, set the unmounted flag and let new vops + * unblock. zfs_inactive will have the unmounted behavior, and all + * other vops will fail with EIO. + */ + if (unmounting) { + zfsvfs->z_unmounted = B_TRUE; + rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rw_exit(&zfsvfs->z_teardown_inactive_lock); + } + + /* + * z_os will be NULL if there was an error in attempting to reopen + * zfsvfs, so just return as the properties had already been + * unregistered and cached data had been evicted before. + */ + if (zfsvfs->z_os == NULL) + return (0); + + /* + * Unregister properties. + */ + zfs_unregister_callbacks(zfsvfs); + + /* + * Evict cached data + */ + if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os)) + if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) + txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + (void) dmu_objset_evict_dbufs(zfsvfs->z_os); + + return (0); +} + +/*ARGSUSED*/ +static int +zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + objset_t *os; + int ret; + + ret = secpolicy_fs_unmount(cr, vfsp); + if (ret) { + if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), + ZFS_DELEG_PERM_MOUNT, cr)) + return (ret); + } + + /* + * We purge the parent filesystem's vfsp as the parent filesystem + * and all of its snapshots have their vnode's v_vfsp set to the + * parent's filesystem's vfsp. Note, 'z_parent' is self + * referential for non-snapshots. + */ + (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); + + /* + * Unmount any snapshots mounted under .zfs before unmounting the + * dataset itself. + */ + if (zfsvfs->z_ctldir != NULL && + (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { + return (ret); + } + + if (!(fflag & MS_FORCE)) { + /* + * Check the number of active vnodes in the file system. + * Our count is maintained in the vfs structure, but the + * number is off by 1 to indicate a hold on the vfs + * structure itself. + * + * The '.zfs' directory maintains a reference of its + * own, and any active references underneath are + * reflected in the vnode count. + */ + if (zfsvfs->z_ctldir == NULL) { + if (vfsp->vfs_count > 1) + return (EBUSY); + } else { + if (vfsp->vfs_count > 2 || + zfsvfs->z_ctldir->v_count > 1) + return (EBUSY); + } + } + + vfsp->vfs_flag |= VFS_UNMOUNTED; + + VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); + os = zfsvfs->z_os; + + /* + * z_os will be NULL if there was an error in + * attempting to reopen zfsvfs. + */ + if (os != NULL) { + /* + * Unset the objset user_ptr. + */ + mutex_enter(&os->os_user_ptr_lock); + dmu_objset_set_user(os, NULL); + mutex_exit(&os->os_user_ptr_lock); + + /* + * Finally release the objset + */ + dmu_objset_disown(os, zfsvfs); + } + + /* + * We can now safely destroy the '.zfs' directory node. + */ + if (zfsvfs->z_ctldir != NULL) + zfsctl_destroy(zfsvfs); + + return (0); +} + +static int +zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + znode_t *zp; + uint64_t object = 0; + uint64_t fid_gen = 0; + uint64_t gen_mask; + uint64_t zp_gen; + int i, err; + + *vpp = NULL; + + ZFS_ENTER(zfsvfs); + + if (fidp->fid_len == LONG_FID_LEN) { + zfid_long_t *zlfid = (zfid_long_t *)fidp; + uint64_t objsetid = 0; + uint64_t setgen = 0; + + for (i = 0; i < sizeof (zlfid->zf_setid); i++) + objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); + + for (i = 0; i < sizeof (zlfid->zf_setgen); i++) + setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); + + ZFS_EXIT(zfsvfs); + + err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); + if (err) + return (EINVAL); + ZFS_ENTER(zfsvfs); + } + + if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { + zfid_short_t *zfid = (zfid_short_t *)fidp; + + for (i = 0; i < sizeof (zfid->zf_object); i++) + object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); + + for (i = 0; i < sizeof (zfid->zf_gen); i++) + fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); + } else { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* A zero fid_gen means we are in the .zfs control directories */ + if (fid_gen == 0 && + (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { + *vpp = zfsvfs->z_ctldir; + ASSERT(*vpp != NULL); + if (object == ZFSCTL_INO_SNAPDIR) { + VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, + 0, NULL, NULL, NULL, NULL, NULL) == 0); + } else { + VN_HOLD(*vpp); + } + ZFS_EXIT(zfsvfs); + return (0); + } + + gen_mask = -1ULL >> (64 - 8 * i); + + dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); + if (err = zfs_zget(zfsvfs, object, &zp)) { + ZFS_EXIT(zfsvfs); + return (err); + } + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, + sizeof (uint64_t)); + zp_gen = zp_gen & gen_mask; + if (zp_gen == 0) + zp_gen = 1; + if (zp->z_unlinked || zp_gen != fid_gen) { + dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); + VN_RELE(ZTOV(zp)); + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + *vpp = ZTOV(zp); + ZFS_EXIT(zfsvfs); + return (0); +} + +/* + * Block out VOPs and close zfsvfs_t::z_os + * + * Note, if successful, then we return with the 'z_teardown_lock' and + * 'z_teardown_inactive_lock' write held. + */ +int +zfs_suspend_fs(zfsvfs_t *zfsvfs) +{ + int error; + + if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) + return (error); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); + + return (0); +} + +/* + * Reopen zfsvfs_t::z_os and release VOPs. + */ +int +zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) +{ + int err; + + ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); + ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); + + err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, + &zfsvfs->z_os); + if (err) { + zfsvfs->z_os = NULL; + } else { + znode_t *zp; + uint64_t sa_obj = 0; + + /* + * Make sure version hasn't changed + */ + + err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, + &zfsvfs->z_version); + + if (err) + goto bail; + + err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj); + + if (err && zfsvfs->z_version >= ZPL_VERSION_SA) + goto bail; + + if ((err = sa_setup(zfsvfs->z_os, sa_obj, + zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) + goto bail; + + if (zfsvfs->z_version >= ZPL_VERSION_SA) + sa_register_update_callback(zfsvfs->z_os, + zfs_sa_upgrade); + + VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + + zfs_set_fuid_feature(zfsvfs); + + /* + * Attempt to re-establish all the active znodes with + * their dbufs. If a zfs_rezget() fails, then we'll let + * any potential callers discover that via ZFS_ENTER_VERIFY_VP + * when they try to use their znode. + */ + mutex_enter(&zfsvfs->z_znodes_lock); + for (zp = list_head(&zfsvfs->z_all_znodes); zp; + zp = list_next(&zfsvfs->z_all_znodes, zp)) { + (void) zfs_rezget(zp); + } + mutex_exit(&zfsvfs->z_znodes_lock); + } + +bail: + /* release the VOPs */ + rw_exit(&zfsvfs->z_teardown_inactive_lock); + rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + + if (err) { + /* + * Since we couldn't reopen zfsvfs::z_os, or + * setup the sa framework force unmount this file system. + */ + if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) + (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); + } + return (err); +} + +static void +zfs_freevfs(vfs_t *vfsp) +{ + zfsvfs_t *zfsvfs = vfsp->vfs_data; + + /* + * If this is a snapshot, we have an extra VFS_HOLD on our parent + * from zfs_mount(). Release it here. If we came through + * zfs_mountroot() instead, we didn't grab an extra hold, so + * skip the VFS_RELE for rootvfs. + */ + if (zfsvfs->z_issnap && (vfsp != rootvfs)) + VFS_RELE(zfsvfs->z_parent->z_vfs); + + zfsvfs_free(zfsvfs); + + atomic_add_32(&zfs_active_fs_count, -1); +} + +/* + * VFS_INIT() initialization. Note that there is no VFS_FINI(), + * so we can't safely do any non-idempotent initialization here. + * Leave that to zfs_init() and zfs_fini(), which are called + * from the module's _init() and _fini() entry points. + */ +/*ARGSUSED*/ +static int +zfs_vfsinit(int fstype, char *name) +{ + int error; + + zfsfstype = fstype; + + /* + * Setup vfsops and vnodeops tables. + */ + error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); + if (error != 0) { + cmn_err(CE_WARN, "zfs: bad vfs ops template"); + } + + error = zfs_create_op_tables(); + if (error) { + zfs_remove_op_tables(); + cmn_err(CE_WARN, "zfs: bad vnode ops template"); + (void) vfs_freevfsops_by_type(zfsfstype); + return (error); + } + + mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); + + /* + * Unique major number for all zfs mounts. + * If we run out of 32-bit minors, we'll getudev() another major. + */ + zfs_major = ddi_name_to_major(ZFS_DRIVER); + zfs_minor = ZFS_MIN_MINOR; + + return (0); +} + +void +zfs_init(void) +{ + /* + * Initialize .zfs directory structures + */ + zfsctl_init(); + + /* + * Initialize znode cache, vnode ops, etc... + */ + zfs_znode_init(); + + dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); +} + +void +zfs_fini(void) +{ + zfsctl_fini(); + zfs_znode_fini(); +} + +int +zfs_busy(void) +{ + return (zfs_active_fs_count != 0); +} + +int +zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) +{ + int error; + objset_t *os = zfsvfs->z_os; + dmu_tx_t *tx; + + if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) + return (EINVAL); + + if (newvers < zfsvfs->z_version) + return (EINVAL); + + if (zfs_spa_version_map(newvers) > + spa_version(dmu_objset_spa(zfsvfs->z_os))) + return (ENOTSUP); + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); + if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, + ZFS_SA_ATTRS); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); + } + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, + 8, 1, &newvers, tx); + + if (error) { + dmu_tx_commit(tx); + return (error); + } + + if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + uint64_t sa_obj; + + ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, + SPA_VERSION_SA); + sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, + DMU_OT_NONE, 0, tx); + + error = zap_add(os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); + ASSERT3U(error, ==, 0); + + VERIFY(0 == sa_set_sa_object(os, sa_obj)); + sa_register_update_callback(os, zfs_sa_upgrade); + } + + spa_history_log_internal(LOG_DS_UPGRADE, + dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", + zfsvfs->z_version, newvers, dmu_objset_id(os)); + + dmu_tx_commit(tx); + + zfsvfs->z_version = newvers; + + zfs_set_fuid_feature(zfsvfs); + + return (0); +} + +/* + * Read a property stored within the master node. + */ +int +zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) +{ + const char *pname; + int error = ENOENT; + + /* + * Look up the file system's value for the property. For the + * version property, we look up a slightly different string. + */ + if (prop == ZFS_PROP_VERSION) + pname = ZPL_VERSION_STR; + else + pname = zfs_prop_to_name(prop); + + if (os != NULL) + error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); + + if (error == ENOENT) { + /* No value set, use the default value */ + switch (prop) { + case ZFS_PROP_VERSION: + *value = ZPL_VERSION; + break; + case ZFS_PROP_NORMALIZE: + case ZFS_PROP_UTF8ONLY: + *value = 0; + break; + case ZFS_PROP_CASE: + *value = ZFS_CASE_SENSITIVE; + break; + default: + return (error); + } + error = 0; + } + return (error); +} + +static vfsdef_t vfw = { + VFSDEF_VERSION, + MNTTYPE_ZFS, + zfs_vfsinit, + VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| + VSW_XID|VSW_ZMOUNT, + &zfs_mntopts +}; + +struct modlfs zfs_modlfs = { + &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw +}; diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c new file mode 100644 index 000000000000..a0720079cf46 --- /dev/null +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -0,0 +1,5243 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2007 Jeremy Teo */ +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/resource.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/kmem.h> +#include <sys/taskq.h> +#include <sys/uio.h> +#include <sys/vmsystm.h> +#include <sys/atomic.h> +#include <sys/vm.h> +#include <vm/seg_vn.h> +#include <vm/pvn.h> +#include <vm/as.h> +#include <vm/kpm.h> +#include <vm/seg_kpm.h> +#include <sys/mman.h> +#include <sys/pathname.h> +#include <sys/cmn_err.h> +#include <sys/errno.h> +#include <sys/unistd.h> +#include <sys/zfs_dir.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/fs/zfs.h> +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/dbuf.h> +#include <sys/zap.h> +#include <sys/sa.h> +#include <sys/dirent.h> +#include <sys/policy.h> +#include <sys/sunddi.h> +#include <sys/filio.h> +#include <sys/sid.h> +#include "fs/fs_subr.h" +#include <sys/zfs_ctldir.h> +#include <sys/zfs_fuid.h> +#include <sys/zfs_sa.h> +#include <sys/dnlc.h> +#include <sys/zfs_rlock.h> +#include <sys/extdirent.h> +#include <sys/kidmap.h> +#include <sys/cred.h> +#include <sys/attr.h> + +/* + * Programming rules. + * + * Each vnode op performs some logical unit of work. To do this, the ZPL must + * properly lock its in-core state, create a DMU transaction, do the work, + * record this work in the intent log (ZIL), commit the DMU transaction, + * and wait for the intent log to commit if it is a synchronous operation. + * Moreover, the vnode ops must work in both normal and log replay context. + * The ordering of events is important to avoid deadlocks and references + * to freed memory. The example below illustrates the following Big Rules: + * + * (1) A check must be made in each zfs thread for a mounted file system. + * This is done avoiding races using ZFS_ENTER(zfsvfs). + * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes + * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros + * can return EIO from the calling function. + * + * (2) VN_RELE() should always be the last thing except for zil_commit() + * (if necessary) and ZFS_EXIT(). This is for 3 reasons: + * First, if it's the last reference, the vnode/znode + * can be freed, so the zp may point to freed memory. Second, the last + * reference will call zfs_zinactive(), which may induce a lot of work -- + * pushing cached pages (which acquires range locks) and syncing out + * cached atime changes. Third, zfs_zinactive() may require a new tx, + * which could deadlock the system if you were already holding one. + * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). + * + * (3) All range locks must be grabbed before calling dmu_tx_assign(), + * as they can span dmu_tx_assign() calls. + * + * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). + * This is critical because we don't want to block while holding locks. + * Note, in particular, that if a lock is sometimes acquired before + * the tx assigns, and sometimes after (e.g. z_lock), then failing to + * use a non-blocking assign can deadlock the system. The scenario: + * + * Thread A has grabbed a lock before calling dmu_tx_assign(). + * Thread B is in an already-assigned tx, and blocks for this lock. + * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() + * forever, because the previous txg can't quiesce until B's tx commits. + * + * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, + * then drop all locks, call dmu_tx_wait(), and try again. + * + * (5) If the operation succeeded, generate the intent log entry for it + * before dropping locks. This ensures that the ordering of events + * in the intent log matches the order in which they actually occurred. + * During ZIL replay the zfs_log_* functions will update the sequence + * number to indicate the zil transaction has replayed. + * + * (6) At the end of each vnode op, the DMU tx must always commit, + * regardless of whether there were any errors. + * + * (7) After dropping all locks, invoke zil_commit(zilog, foid) + * to ensure that synchronous semantics are provided when necessary. + * + * In general, this is how things should be ordered in each vnode op: + * + * ZFS_ENTER(zfsvfs); // exit if unmounted + * top: + * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) + * rw_enter(...); // grab any other locks you need + * tx = dmu_tx_create(...); // get DMU tx + * dmu_tx_hold_*(); // hold each object you might modify + * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign + * if (error) { + * rw_exit(...); // drop locks + * zfs_dirent_unlock(dl); // unlock directory entry + * VN_RELE(...); // release held vnodes + * if (error == ERESTART) { + * dmu_tx_wait(tx); + * dmu_tx_abort(tx); + * goto top; + * } + * dmu_tx_abort(tx); // abort DMU tx + * ZFS_EXIT(zfsvfs); // finished in zfs + * return (error); // really out of space + * } + * error = do_real_work(); // do whatever this VOP does + * if (error == 0) + * zfs_log_*(...); // on success, make ZIL entry + * dmu_tx_commit(tx); // commit DMU tx -- error or not + * rw_exit(...); // drop locks + * zfs_dirent_unlock(dl); // unlock directory entry + * VN_RELE(...); // release held vnodes + * zil_commit(zilog, foid); // synchronous when necessary + * ZFS_EXIT(zfsvfs); // finished in zfs + * return (error); // done, report error + */ + +/* ARGSUSED */ +static int +zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(*vpp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && + ((flag & FAPPEND) == 0)) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && + ZTOV(zp)->v_type == VREG && + !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { + if (fs_vscan(*vpp, cr, 0) != 0) { + ZFS_EXIT(zfsvfs); + return (EACCES); + } + } + + /* Keep a count of the synchronous opens in the znode */ + if (flag & (FSYNC | FDSYNC)) + atomic_inc_32(&zp->z_sync_cnt); + + ZFS_EXIT(zfsvfs); + return (0); +} + +/* ARGSUSED */ +static int +zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + /* + * Clean up any locks held by this process on the vp. + */ + cleanlocks(vp, ddi_get_pid(), 0); + cleanshares(vp, ddi_get_pid()); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + /* Decrement the synchronous opens in the znode */ + if ((flag & (FSYNC | FDSYNC)) && (count == 1)) + atomic_dec_32(&zp->z_sync_cnt); + + if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && + ZTOV(zp)->v_type == VREG && + !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) + VERIFY(fs_vscan(vp, cr, 1) == 0); + + ZFS_EXIT(zfsvfs); + return (0); +} + +/* + * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and + * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. + */ +static int +zfs_holey(vnode_t *vp, int cmd, offset_t *off) +{ + znode_t *zp = VTOZ(vp); + uint64_t noff = (uint64_t)*off; /* new offset */ + uint64_t file_sz; + int error; + boolean_t hole; + + file_sz = zp->z_size; + if (noff >= file_sz) { + return (ENXIO); + } + + if (cmd == _FIO_SEEK_HOLE) + hole = B_TRUE; + else + hole = B_FALSE; + + error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); + + /* end of file? */ + if ((error == ESRCH) || (noff > file_sz)) { + /* + * Handle the virtual hole at the end of file. + */ + if (hole) { + *off = file_sz; + return (0); + } + return (ENXIO); + } + + if (noff < *off) + return (error); + *off = noff; + return (error); +} + +/* ARGSUSED */ +static int +zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, + int *rvalp, caller_context_t *ct) +{ + offset_t off; + int error; + zfsvfs_t *zfsvfs; + znode_t *zp; + + switch (com) { + case _FIOFFS: + return (zfs_sync(vp->v_vfsp, 0, cred)); + + /* + * The following two ioctls are used by bfu. Faking out, + * necessary to avoid bfu errors. + */ + case _FIOGDIO: + case _FIOSDIO: + return (0); + + case _FIO_SEEK_DATA: + case _FIO_SEEK_HOLE: + if (ddi_copyin((void *)data, &off, sizeof (off), flag)) + return (EFAULT); + + zp = VTOZ(vp); + zfsvfs = zp->z_zfsvfs; + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + /* offset parameter is in/out */ + error = zfs_holey(vp, com, &off); + ZFS_EXIT(zfsvfs); + if (error) + return (error); + if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) + return (EFAULT); + return (0); + } + return (ENOTTY); +} + +/* + * Utility functions to map and unmap a single physical page. These + * are used to manage the mappable copies of ZFS file data, and therefore + * do not update ref/mod bits. + */ +caddr_t +zfs_map_page(page_t *pp, enum seg_rw rw) +{ + if (kpm_enable) + return (hat_kpm_mapin(pp, 0)); + ASSERT(rw == S_READ || rw == S_WRITE); + return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), + (caddr_t)-1)); +} + +void +zfs_unmap_page(page_t *pp, caddr_t addr) +{ + if (kpm_enable) { + hat_kpm_mapout(pp, 0, addr); + } else { + ppmapout(addr); + } +} + +/* + * When a file is memory mapped, we must keep the IO data synchronized + * between the DMU cache and the memory mapped pages. What this means: + * + * On Write: If we find a memory mapped page, we write to *both* + * the page and the dmu buffer. + */ +static void +update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) +{ + int64_t off; + + off = start & PAGEOFFSET; + for (start &= PAGEMASK; len > 0; start += PAGESIZE) { + page_t *pp; + uint64_t nbytes = MIN(PAGESIZE - off, len); + + if (pp = page_lookup(vp, start, SE_SHARED)) { + caddr_t va; + + va = zfs_map_page(pp, S_WRITE); + (void) dmu_read(os, oid, start+off, nbytes, va+off, + DMU_READ_PREFETCH); + zfs_unmap_page(pp, va); + page_unlock(pp); + } + len -= nbytes; + off = 0; + } +} + +/* + * When a file is memory mapped, we must keep the IO data synchronized + * between the DMU cache and the memory mapped pages. What this means: + * + * On Read: We "read" preferentially from memory mapped pages, + * else we default from the dmu buffer. + * + * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when + * the file is memory mapped. + */ +static int +mappedread(vnode_t *vp, int nbytes, uio_t *uio) +{ + znode_t *zp = VTOZ(vp); + objset_t *os = zp->z_zfsvfs->z_os; + int64_t start, off; + int len = nbytes; + int error = 0; + + start = uio->uio_loffset; + off = start & PAGEOFFSET; + for (start &= PAGEMASK; len > 0; start += PAGESIZE) { + page_t *pp; + uint64_t bytes = MIN(PAGESIZE - off, len); + + if (pp = page_lookup(vp, start, SE_SHARED)) { + caddr_t va; + + va = zfs_map_page(pp, S_READ); + error = uiomove(va + off, bytes, UIO_READ, uio); + zfs_unmap_page(pp, va); + page_unlock(pp); + } else { + error = dmu_read_uio(os, zp->z_id, uio, bytes); + } + len -= bytes; + off = 0; + if (error) + break; + } + return (error); +} + +offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ + +/* + * Read bytes from specified file into supplied buffer. + * + * IN: vp - vnode of file to be read from. + * uio - structure supplying read location, range info, + * and return buffer. + * ioflag - SYNC flags; used to provide FRSYNC semantics. + * cr - credentials of caller. + * ct - caller context + * + * OUT: uio - updated offset and range, buffer filled. + * + * RETURN: 0 if success + * error code if failure + * + * Side Effects: + * vp - atime updated if byte count > 0 + */ +/* ARGSUSED */ +static int +zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + objset_t *os; + ssize_t n, nbytes; + int error; + rl_t *rl; + xuio_t *xuio = NULL; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + os = zfsvfs->z_os; + + if (zp->z_pflags & ZFS_AV_QUARANTINED) { + ZFS_EXIT(zfsvfs); + return (EACCES); + } + + /* + * Validate file offset + */ + if (uio->uio_loffset < (offset_t)0) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * Fasttrack empty reads + */ + if (uio->uio_resid == 0) { + ZFS_EXIT(zfsvfs); + return (0); + } + + /* + * Check for mandatory locks + */ + if (MANDMODE(zp->z_mode)) { + if (error = chklock(vp, FREAD, + uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { + ZFS_EXIT(zfsvfs); + return (error); + } + } + + /* + * If we're in FRSYNC mode, sync out this znode before reading it. + */ + if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zfsvfs->z_log, zp->z_id); + + /* + * Lock the range against changes. + */ + rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); + + /* + * If we are reading past end-of-file we can skip + * to the end; but we might still need to set atime. + */ + if (uio->uio_loffset >= zp->z_size) { + error = 0; + goto out; + } + + ASSERT(uio->uio_loffset < zp->z_size); + n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); + + if ((uio->uio_extflg == UIO_XUIO) && + (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { + int nblk; + int blksz = zp->z_blksz; + uint64_t offset = uio->uio_loffset; + + xuio = (xuio_t *)uio; + if ((ISP2(blksz))) { + nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, + blksz)) / blksz; + } else { + ASSERT(offset + n <= blksz); + nblk = 1; + } + (void) dmu_xuio_init(xuio, nblk); + + if (vn_has_cached_data(vp)) { + /* + * For simplicity, we always allocate a full buffer + * even if we only expect to read a portion of a block. + */ + while (--nblk >= 0) { + (void) dmu_xuio_add(xuio, + dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), + blksz), 0, blksz); + } + } + } + + while (n > 0) { + nbytes = MIN(n, zfs_read_chunk_size - + P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); + + if (vn_has_cached_data(vp)) + error = mappedread(vp, nbytes, uio); + else + error = dmu_read_uio(os, zp->z_id, uio, nbytes); + if (error) { + /* convert checksum errors into IO errors */ + if (error == ECKSUM) + error = EIO; + break; + } + + n -= nbytes; + } +out: + zfs_range_unlock(rl); + + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Write the bytes to a file. + * + * IN: vp - vnode of file to be written to. + * uio - structure supplying write location, range info, + * and data buffer. + * ioflag - FAPPEND flag set if in append mode. + * cr - credentials of caller. + * ct - caller context (NFS/CIFS fem monitor only) + * + * OUT: uio - updated offset and range. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - ctime|mtime updated if byte count > 0 + */ + +/* ARGSUSED */ +static int +zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + rlim64_t limit = uio->uio_llimit; + ssize_t start_resid = uio->uio_resid; + ssize_t tx_bytes; + uint64_t end_size; + dmu_tx_t *tx; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zilog_t *zilog; + offset_t woff; + ssize_t n, nbytes; + rl_t *rl; + int max_blksz = zfsvfs->z_max_blksz; + int error; + arc_buf_t *abuf; + iovec_t *aiov; + xuio_t *xuio = NULL; + int i_iov = 0; + int iovcnt = uio->uio_iovcnt; + iovec_t *iovp = uio->uio_iov; + int write_eof; + int count = 0; + sa_bulk_attr_t bulk[4]; + uint64_t mtime[2], ctime[2]; + + /* + * Fasttrack empty write + */ + n = start_resid; + if (n == 0) + return (0); + + if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) + limit = MAXOFFSET_T; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + &zp->z_size, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, 8); + + /* + * If immutable or not appending then return EPERM + */ + if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || + ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && + (uio->uio_loffset < zp->z_size))) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + zilog = zfsvfs->z_log; + + /* + * Validate file offset + */ + woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; + if (woff < 0) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * Check for mandatory locks before calling zfs_range_lock() + * in order to prevent a deadlock with locks set via fcntl(). + */ + if (MANDMODE((mode_t)zp->z_mode) && + (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * Pre-fault the pages to ensure slow (eg NFS) pages + * don't hold up txg. + * Skip this if uio contains loaned arc_buf. + */ + if ((uio->uio_extflg == UIO_XUIO) && + (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) + xuio = (xuio_t *)uio; + else + uio_prefaultpages(MIN(n, max_blksz), uio); + + /* + * If in append mode, set the io offset pointer to eof. + */ + if (ioflag & FAPPEND) { + /* + * Obtain an appending range lock to guarantee file append + * semantics. We reset the write offset once we have the lock. + */ + rl = zfs_range_lock(zp, 0, n, RL_APPEND); + woff = rl->r_off; + if (rl->r_len == UINT64_MAX) { + /* + * We overlocked the file because this write will cause + * the file block size to increase. + * Note that zp_size cannot change with this lock held. + */ + woff = zp->z_size; + } + uio->uio_loffset = woff; + } else { + /* + * Note that if the file block size will change as a result of + * this write, then this range lock will lock the entire file + * so that we can re-write the block safely. + */ + rl = zfs_range_lock(zp, woff, n, RL_WRITER); + } + + if (woff >= limit) { + zfs_range_unlock(rl); + ZFS_EXIT(zfsvfs); + return (EFBIG); + } + + if ((woff + n) > limit || woff > (limit - n)) + n = limit - woff; + + /* Will this write extend the file length? */ + write_eof = (woff + n > zp->z_size); + + end_size = MAX(zp->z_size, woff + n); + + /* + * Write the file in reasonable size chunks. Each chunk is written + * in a separate transaction; this keeps the intent log records small + * and allows us to do more fine-grained space accounting. + */ + while (n > 0) { + abuf = NULL; + woff = uio->uio_loffset; +again: + if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || + zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { + if (abuf != NULL) + dmu_return_arcbuf(abuf); + error = EDQUOT; + break; + } + + if (xuio && abuf == NULL) { + ASSERT(i_iov < iovcnt); + aiov = &iovp[i_iov]; + abuf = dmu_xuio_arcbuf(xuio, i_iov); + dmu_xuio_clear(xuio, i_iov); + DTRACE_PROBE3(zfs_cp_write, int, i_iov, + iovec_t *, aiov, arc_buf_t *, abuf); + ASSERT((aiov->iov_base == abuf->b_data) || + ((char *)aiov->iov_base - (char *)abuf->b_data + + aiov->iov_len == arc_buf_size(abuf))); + i_iov++; + } else if (abuf == NULL && n >= max_blksz && + woff >= zp->z_size && + P2PHASE(woff, max_blksz) == 0 && + zp->z_blksz == max_blksz) { + /* + * This write covers a full block. "Borrow" a buffer + * from the dmu so that we can fill it before we enter + * a transaction. This avoids the possibility of + * holding up the transaction if the data copy hangs + * up on a pagefault (e.g., from an NFS server mapping). + */ + size_t cbytes; + + abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), + max_blksz); + ASSERT(abuf != NULL); + ASSERT(arc_buf_size(abuf) == max_blksz); + if (error = uiocopy(abuf->b_data, max_blksz, + UIO_WRITE, uio, &cbytes)) { + dmu_return_arcbuf(abuf); + break; + } + ASSERT(cbytes == max_blksz); + } + + /* + * Start a transaction. + */ + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto again; + } + dmu_tx_abort(tx); + if (abuf != NULL) + dmu_return_arcbuf(abuf); + break; + } + + /* + * If zfs_range_lock() over-locked we grow the blocksize + * and then reduce the lock range. This will only happen + * on the first iteration since zfs_range_reduce() will + * shrink down r_len to the appropriate size. + */ + if (rl->r_len == UINT64_MAX) { + uint64_t new_blksz; + + if (zp->z_blksz > max_blksz) { + ASSERT(!ISP2(zp->z_blksz)); + new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); + } else { + new_blksz = MIN(end_size, max_blksz); + } + zfs_grow_blocksize(zp, new_blksz, tx); + zfs_range_reduce(rl, woff, n); + } + + /* + * XXX - should we really limit each write to z_max_blksz? + * Perhaps we should use SPA_MAXBLOCKSIZE chunks? + */ + nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); + + if (abuf == NULL) { + tx_bytes = uio->uio_resid; + error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes, tx); + tx_bytes -= uio->uio_resid; + } else { + tx_bytes = nbytes; + ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); + /* + * If this is not a full block write, but we are + * extending the file past EOF and this data starts + * block-aligned, use assign_arcbuf(). Otherwise, + * write via dmu_write(). + */ + if (tx_bytes < max_blksz && (!write_eof || + aiov->iov_base != abuf->b_data)) { + ASSERT(xuio); + dmu_write(zfsvfs->z_os, zp->z_id, woff, + aiov->iov_len, aiov->iov_base, tx); + dmu_return_arcbuf(abuf); + xuio_stat_wbuf_copied(); + } else { + ASSERT(xuio || tx_bytes == max_blksz); + dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), + woff, abuf, tx); + } + ASSERT(tx_bytes <= uio->uio_resid); + uioskip(uio, tx_bytes); + } + if (tx_bytes && vn_has_cached_data(vp)) { + update_pages(vp, woff, + tx_bytes, zfsvfs->z_os, zp->z_id); + } + + /* + * If we made no progress, we're done. If we made even + * partial progress, update the znode and ZIL accordingly. + */ + if (tx_bytes == 0) { + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void *)&zp->z_size, sizeof (uint64_t), tx); + dmu_tx_commit(tx); + ASSERT(error != 0); + break; + } + + /* + * Clear Set-UID/Set-GID bits on successful write if not + * privileged and at least one of the excute bits is set. + * + * It would be nice to to this after all writes have + * been done, but that would still expose the ISUID/ISGID + * to another app after the partial write is committed. + * + * Note: we don't call zfs_fuid_map_id() here because + * user 0 is not an ephemeral uid. + */ + mutex_enter(&zp->z_acl_lock); + if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | + (S_IXUSR >> 6))) != 0 && + (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && + secpolicy_vnode_setid_retain(cr, + (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { + uint64_t newmode; + zp->z_mode &= ~(S_ISUID | S_ISGID); + newmode = zp->z_mode; + (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), + (void *)&newmode, sizeof (uint64_t), tx); + } + mutex_exit(&zp->z_acl_lock); + + zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, + B_TRUE); + + /* + * Update the file size (zp_size) if it has changed; + * account for possible concurrent updates. + */ + while ((end_size = zp->z_size) < uio->uio_loffset) { + (void) atomic_cas_64(&zp->z_size, end_size, + uio->uio_loffset); + ASSERT(error == 0); + } + /* + * If we are replaying and eof is non zero then force + * the file size to the specified eof. Note, there's no + * concurrency during replay. + */ + if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) + zp->z_size = zfsvfs->z_replay_eof; + + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + + zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); + dmu_tx_commit(tx); + + if (error != 0) + break; + ASSERT(tx_bytes == nbytes); + n -= nbytes; + + if (!xuio && n > 0) + uio_prefaultpages(MIN(n, max_blksz), uio); + } + + zfs_range_unlock(rl); + + /* + * If we're in replay mode, or we made no progress, return error. + * Otherwise, it's at least a partial write, so it's successful. + */ + if (zfsvfs->z_replay || uio->uio_resid == start_resid) { + ZFS_EXIT(zfsvfs); + return (error); + } + + if (ioflag & (FSYNC | FDSYNC) || + zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, zp->z_id); + + ZFS_EXIT(zfsvfs); + return (0); +} + +void +zfs_get_done(zgd_t *zgd, int error) +{ + znode_t *zp = zgd->zgd_private; + objset_t *os = zp->z_zfsvfs->z_os; + + if (zgd->zgd_db) + dmu_buf_rele(zgd->zgd_db, zgd); + + zfs_range_unlock(zgd->zgd_rl); + + /* + * Release the vnode asynchronously as we currently have the + * txg stopped from syncing. + */ + VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); + + if (error == 0 && zgd->zgd_bp) + zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); + + kmem_free(zgd, sizeof (zgd_t)); +} + +#ifdef DEBUG +static int zil_fault_io = 0; +#endif + +/* + * Get data to generate a TX_WRITE intent log record. + */ +int +zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +{ + zfsvfs_t *zfsvfs = arg; + objset_t *os = zfsvfs->z_os; + znode_t *zp; + uint64_t object = lr->lr_foid; + uint64_t offset = lr->lr_offset; + uint64_t size = lr->lr_length; + blkptr_t *bp = &lr->lr_blkptr; + dmu_buf_t *db; + zgd_t *zgd; + int error = 0; + + ASSERT(zio != NULL); + ASSERT(size != 0); + + /* + * Nothing to do if the file has been removed + */ + if (zfs_zget(zfsvfs, object, &zp) != 0) + return (ENOENT); + if (zp->z_unlinked) { + /* + * Release the vnode asynchronously as we currently have the + * txg stopped from syncing. + */ + VN_RELE_ASYNC(ZTOV(zp), + dsl_pool_vnrele_taskq(dmu_objset_pool(os))); + return (ENOENT); + } + + zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd->zgd_zilog = zfsvfs->z_log; + zgd->zgd_private = zp; + + /* + * Write records come in two flavors: immediate and indirect. + * For small writes it's cheaper to store the data with the + * log record (immediate); for large writes it's cheaper to + * sync the data and get a pointer to it (indirect) so that + * we don't have to write the data twice. + */ + if (buf != NULL) { /* immediate write */ + zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); + /* test for truncation needs to be done while range locked */ + if (offset >= zp->z_size) { + error = ENOENT; + } else { + error = dmu_read(os, object, offset, size, buf, + DMU_READ_NO_PREFETCH); + } + ASSERT(error == 0 || error == ENOENT); + } else { /* indirect write */ + /* + * Have to lock the whole block to ensure when it's + * written out and it's checksum is being calculated + * that no one can change the data. We need to re-check + * blocksize after we get the lock in case it's changed! + */ + for (;;) { + uint64_t blkoff; + size = zp->z_blksz; + blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; + offset -= blkoff; + zgd->zgd_rl = zfs_range_lock(zp, offset, size, + RL_READER); + if (zp->z_blksz == size) + break; + offset += blkoff; + zfs_range_unlock(zgd->zgd_rl); + } + /* test for truncation needs to be done while range locked */ + if (lr->lr_offset >= zp->z_size) + error = ENOENT; +#ifdef DEBUG + if (zil_fault_io) { + error = EIO; + zil_fault_io = 0; + } +#endif + if (error == 0) + error = dmu_buf_hold(os, object, offset, zgd, &db, + DMU_READ_NO_PREFETCH); + + if (error == 0) { + zgd->zgd_db = db; + zgd->zgd_bp = bp; + + ASSERT(db->db_offset == offset); + ASSERT(db->db_size == size); + + error = dmu_sync(zio, lr->lr_common.lrc_txg, + zfs_get_done, zgd); + ASSERT(error || lr->lr_length <= zp->z_blksz); + + /* + * On success, we need to wait for the write I/O + * initiated by dmu_sync() to complete before we can + * release this dbuf. We will finish everything up + * in the zfs_get_done() callback. + */ + if (error == 0) + return (0); + + if (error == EALREADY) { + lr->lr_common.lrc_txtype = TX_WRITE2; + error = 0; + } + } + } + + zfs_get_done(zgd, error); + + return (error); +} + +/*ARGSUSED*/ +static int +zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if (flag & V_ACE_MASK) + error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); + else + error = zfs_zaccess_rwx(zp, mode, flag, cr); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * If vnode is for a device return a specfs vnode instead. + */ +static int +specvp_check(vnode_t **vpp, cred_t *cr) +{ + int error = 0; + + if (IS_DEVVP(*vpp)) { + struct vnode *svp; + + svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); + VN_RELE(*vpp); + if (svp == NULL) + error = ENOSYS; + *vpp = svp; + } + return (error); +} + + +/* + * Lookup an entry in a directory, or an extended attribute directory. + * If it exists, return a held vnode reference for it. + * + * IN: dvp - vnode of directory to search. + * nm - name of entry to lookup. + * pnp - full pathname to lookup [UNUSED]. + * flags - LOOKUP_XATTR set if looking for an attribute. + * rdir - root directory vnode [UNUSED]. + * cr - credentials of caller. + * ct - caller context + * direntflags - directory lookup flags + * realpnp - returned pathname. + * + * OUT: vpp - vnode of located entry, NULL if not found. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * NA + */ +/* ARGSUSED */ +static int +zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + znode_t *zdp = VTOZ(dvp); + zfsvfs_t *zfsvfs = zdp->z_zfsvfs; + int error = 0; + + /* fast path */ + if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { + + if (dvp->v_type != VDIR) { + return (ENOTDIR); + } else if (zdp->z_sa_hdl == NULL) { + return (EIO); + } + + if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { + error = zfs_fastaccesschk_execute(zdp, cr); + if (!error) { + *vpp = dvp; + VN_HOLD(*vpp); + return (0); + } + return (error); + } else { + vnode_t *tvp = dnlc_lookup(dvp, nm); + + if (tvp) { + error = zfs_fastaccesschk_execute(zdp, cr); + if (error) { + VN_RELE(tvp); + return (error); + } + if (tvp == DNLC_NO_VNODE) { + VN_RELE(tvp); + return (ENOENT); + } else { + *vpp = tvp; + return (specvp_check(vpp, cr)); + } + } + } + } + + DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zdp); + + *vpp = NULL; + + if (flags & LOOKUP_XATTR) { + /* + * If the xattr property is off, refuse the lookup request. + */ + if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * We don't allow recursive attributes.. + * Maybe someday we will. + */ + if (zdp->z_pflags & ZFS_XATTR) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * Do we have permission to get into attribute directory? + */ + + if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, + B_FALSE, cr)) { + VN_RELE(*vpp); + *vpp = NULL; + } + + ZFS_EXIT(zfsvfs); + return (error); + } + + if (dvp->v_type != VDIR) { + ZFS_EXIT(zfsvfs); + return (ENOTDIR); + } + + /* + * Check accessibility of directory. + */ + + if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { + ZFS_EXIT(zfsvfs); + return (error); + } + + if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), + NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + + error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); + if (error == 0) + error = specvp_check(vpp, cr); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Attempt to create a new entry in a directory. If the entry + * already exists, truncate the file if permissible, else return + * an error. Return the vp of the created or trunc'd file. + * + * IN: dvp - vnode of directory to put new file entry in. + * name - name of new file entry. + * vap - attributes of new file. + * excl - flag indicating exclusive or non-exclusive mode. + * mode - mode to open file with. + * cr - credentials of caller. + * flag - large file flag [UNUSED]. + * ct - caller context + * vsecp - ACL to be set + * + * OUT: vpp - vnode of created or trunc'd entry. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * dvp - ctime|mtime updated if new entry created + * vp - ctime|mtime always, atime if new + */ + +/* ARGSUSED */ +static int +zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, + int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, + vsecattr_t *vsecp) +{ + znode_t *zp, *dzp = VTOZ(dvp); + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + objset_t *os; + zfs_dirlock_t *dl; + dmu_tx_t *tx; + int error; + ksid_t *ksid; + uid_t uid; + gid_t gid = crgetgid(cr); + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + boolean_t have_acl = B_FALSE; + + /* + * If we have an ephemeral id, ACL, or XVATTR then + * make sure file system is at proper version + */ + + ksid = crgetsid(cr, KSID_OWNER); + if (ksid) + uid = ksid_getid(ksid); + else + uid = crgetuid(cr); + + if (zfsvfs->z_use_fuids == B_FALSE && + (vsecp || (vap->va_mask & AT_XVATTR) || + IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + os = zfsvfs->z_os; + zilog = zfsvfs->z_log; + + if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), + NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + + if (vap->va_mask & AT_XVATTR) { + if ((error = secpolicy_xvattr((xvattr_t *)vap, + crgetuid(cr), cr, vap->va_type)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + } +top: + *vpp = NULL; + + if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) + vap->va_mode &= ~VSVTX; + + if (*name == '\0') { + /* + * Null component name refers to the directory itself. + */ + VN_HOLD(dvp); + zp = dzp; + dl = NULL; + error = 0; + } else { + /* possible VN_HOLD(zp) */ + int zflg = 0; + + if (flag & FIGNORECASE) + zflg |= ZCILOOK; + + error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, + NULL, NULL); + if (error) { + if (have_acl) + zfs_acl_ids_free(&acl_ids); + if (strcmp(name, "..") == 0) + error = EISDIR; + ZFS_EXIT(zfsvfs); + return (error); + } + } + + if (zp == NULL) { + uint64_t txtype; + + /* + * Create a new file object and update the directory + * to reference it. + */ + if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { + if (have_acl) + zfs_acl_ids_free(&acl_ids); + goto out; + } + + /* + * We only support the creation of regular files in + * extended attribute directories. + */ + + if ((dzp->z_pflags & ZFS_XATTR) && + (vap->va_type != VREG)) { + if (have_acl) + zfs_acl_ids_free(&acl_ids); + error = EINVAL; + goto out; + } + + if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, + cr, vsecp, &acl_ids)) != 0) + goto out; + have_acl = B_TRUE; + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + error = EDQUOT; + goto out; + } + + tx = dmu_tx_create(os); + + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); + dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); + if (!zfsvfs->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + (void) zfs_link_create(dl, zp, tx, ZNEW); + txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); + if (flag & FIGNORECASE) + txtype |= TX_CI; + zfs_log_create(zilog, tx, txtype, dzp, zp, name, + vsecp, acl_ids.z_fuidp, vap); + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); + } else { + int aflags = (flag & FAPPEND) ? V_APPEND : 0; + + if (have_acl) + zfs_acl_ids_free(&acl_ids); + have_acl = B_FALSE; + + /* + * A directory entry already exists for this name. + */ + /* + * Can't truncate an existing file if in exclusive mode. + */ + if (excl == EXCL) { + error = EEXIST; + goto out; + } + /* + * Can't open a directory for writing. + */ + if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { + error = EISDIR; + goto out; + } + /* + * Verify requested access to file. + */ + if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { + goto out; + } + + mutex_enter(&dzp->z_lock); + dzp->z_seq++; + mutex_exit(&dzp->z_lock); + + /* + * Truncate regular files if requested. + */ + if ((ZTOV(zp)->v_type == VREG) && + (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { + /* we can't hold any locks when calling zfs_freesp() */ + zfs_dirent_unlock(dl); + dl = NULL; + error = zfs_freesp(zp, 0, 0, mode, TRUE); + if (error == 0) { + vnevent_create(ZTOV(zp), ct); + } + } + } +out: + + if (dl) + zfs_dirent_unlock(dl); + + if (error) { + if (zp) + VN_RELE(ZTOV(zp)); + } else { + *vpp = ZTOV(zp); + error = specvp_check(vpp, cr); + } + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Remove an entry from a directory. + * + * IN: dvp - vnode of directory to remove entry from. + * name - name of entry to remove. + * cr - credentials of caller. + * ct - caller context + * flags - case flags + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * dvp - ctime|mtime + * vp - ctime (if nlink > 0) + */ + +uint64_t null_xattr = 0; + +/*ARGSUSED*/ +static int +zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, + int flags) +{ + znode_t *zp, *dzp = VTOZ(dvp); + znode_t *xzp; + vnode_t *vp; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + uint64_t acl_obj, xattr_obj; + uint64_t xattr_obj_unlinked = 0; + uint64_t obj = 0; + zfs_dirlock_t *dl; + dmu_tx_t *tx; + boolean_t may_delete_now, delete_now = FALSE; + boolean_t unlinked, toobig = FALSE; + uint64_t txtype; + pathname_t *realnmp = NULL; + pathname_t realnm; + int error; + int zflg = ZEXISTS; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + zilog = zfsvfs->z_log; + + if (flags & FIGNORECASE) { + zflg |= ZCILOOK; + pn_alloc(&realnm); + realnmp = &realnm; + } + +top: + xattr_obj = 0; + xzp = NULL; + /* + * Attempt to lock directory; fail if entry doesn't exist. + */ + if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, + NULL, realnmp)) { + if (realnmp) + pn_free(realnmp); + ZFS_EXIT(zfsvfs); + return (error); + } + + vp = ZTOV(zp); + + if (error = zfs_zaccess_delete(dzp, zp, cr)) { + goto out; + } + + /* + * Need to use rmdir for removing directories. + */ + if (vp->v_type == VDIR) { + error = EPERM; + goto out; + } + + vnevent_remove(vp, dvp, name, ct); + + if (realnmp) + dnlc_remove(dvp, realnmp->pn_buf); + else + dnlc_remove(dvp, name); + + mutex_enter(&vp->v_lock); + may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); + mutex_exit(&vp->v_lock); + + /* + * We may delete the znode now, or we may put it in the unlinked set; + * it depends on whether we're the last link, and on whether there are + * other holds on the vnode. So we dmu_tx_hold() the right things to + * allow for either case. + */ + obj = zp->z_id; + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + zfs_sa_upgrade_txholds(tx, dzp); + if (may_delete_now) { + toobig = + zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; + /* if the file is too big, only hold_free a token amount */ + dmu_tx_hold_free(tx, zp->z_id, 0, + (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); + } + + /* are there any extended attributes? */ + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + &xattr_obj, sizeof (xattr_obj)); + if (error == 0 && xattr_obj) { + error = zfs_zget(zfsvfs, xattr_obj, &xzp); + ASSERT3U(error, ==, 0); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); + } + + mutex_enter(&zp->z_lock); + if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) + dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); + mutex_exit(&zp->z_lock); + + /* charge as an update -- would be nice not to charge at all */ + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + VN_RELE(vp); + if (xzp) + VN_RELE(ZTOV(xzp)); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + if (realnmp) + pn_free(realnmp); + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * Remove the directory entry. + */ + error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); + + if (error) { + dmu_tx_commit(tx); + goto out; + } + + if (unlinked) { + + /* + * Hold z_lock so that we can make sure that the ACL obj + * hasn't changed. Could have been deleted due to + * zfs_sa_upgrade(). + */ + mutex_enter(&zp->z_lock); + mutex_enter(&vp->v_lock); + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); + delete_now = may_delete_now && !toobig && + vp->v_count == 1 && !vn_has_cached_data(vp) && + xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == + acl_obj; + mutex_exit(&vp->v_lock); + } + + if (delete_now) { + if (xattr_obj_unlinked) { + ASSERT3U(xzp->z_links, ==, 2); + mutex_enter(&xzp->z_lock); + xzp->z_unlinked = 1; + xzp->z_links = 0; + error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + &xzp->z_links, sizeof (xzp->z_links), tx); + ASSERT3U(error, ==, 0); + mutex_exit(&xzp->z_lock); + zfs_unlinked_add(xzp, tx); + + if (zp->z_is_sa) + error = sa_remove(zp->z_sa_hdl, + SA_ZPL_XATTR(zfsvfs), tx); + else + error = sa_update(zp->z_sa_hdl, + SA_ZPL_XATTR(zfsvfs), &null_xattr, + sizeof (uint64_t), tx); + ASSERT3U(error, ==, 0); + } + mutex_enter(&vp->v_lock); + vp->v_count--; + ASSERT3U(vp->v_count, ==, 0); + mutex_exit(&vp->v_lock); + mutex_exit(&zp->z_lock); + zfs_znode_delete(zp, tx); + } else if (unlinked) { + mutex_exit(&zp->z_lock); + zfs_unlinked_add(zp, tx); + } + + txtype = TX_REMOVE; + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_remove(zilog, tx, txtype, dzp, name, obj); + + dmu_tx_commit(tx); +out: + if (realnmp) + pn_free(realnmp); + + zfs_dirent_unlock(dl); + + if (!delete_now) + VN_RELE(vp); + if (xzp) + VN_RELE(ZTOV(xzp)); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Create a new directory and insert it into dvp using the name + * provided. Return a pointer to the inserted directory. + * + * IN: dvp - vnode of directory to add subdir to. + * dirname - name of new directory. + * vap - attributes of new directory. + * cr - credentials of caller. + * ct - caller context + * vsecp - ACL to be set + * + * OUT: vpp - vnode of created directory. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * dvp - ctime|mtime updated + * vp - ctime|mtime|atime updated + */ +/*ARGSUSED*/ +static int +zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, + caller_context_t *ct, int flags, vsecattr_t *vsecp) +{ + znode_t *zp, *dzp = VTOZ(dvp); + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + zfs_dirlock_t *dl; + uint64_t txtype; + dmu_tx_t *tx; + int error; + int zf = ZNEW; + ksid_t *ksid; + uid_t uid; + gid_t gid = crgetgid(cr); + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + + ASSERT(vap->va_type == VDIR); + + /* + * If we have an ephemeral id, ACL, or XVATTR then + * make sure file system is at proper version + */ + + ksid = crgetsid(cr, KSID_OWNER); + if (ksid) + uid = ksid_getid(ksid); + else + uid = crgetuid(cr); + if (zfsvfs->z_use_fuids == B_FALSE && + (vsecp || (vap->va_mask & AT_XVATTR) || + IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + zilog = zfsvfs->z_log; + + if (dzp->z_pflags & ZFS_XATTR) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + if (zfsvfs->z_utf8 && u8_validate(dirname, + strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + if (flags & FIGNORECASE) + zf |= ZCILOOK; + + if (vap->va_mask & AT_XVATTR) { + if ((error = secpolicy_xvattr((xvattr_t *)vap, + crgetuid(cr), cr, vap->va_type)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + } + + if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, + vsecp, &acl_ids)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + /* + * First make sure the new directory doesn't exist. + * + * Existence is checked first to make sure we don't return + * EACCES instead of EEXIST which can cause some applications + * to fail. + */ +top: + *vpp = NULL; + + if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, + NULL, NULL)) { + zfs_acl_ids_free(&acl_ids); + ZFS_EXIT(zfsvfs); + return (error); + } + + if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { + zfs_acl_ids_free(&acl_ids); + zfs_dirent_unlock(dl); + ZFS_EXIT(zfsvfs); + return (error); + } + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + zfs_dirent_unlock(dl); + ZFS_EXIT(zfsvfs); + return (EDQUOT); + } + + /* + * Add a new entry to the directory. + */ + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, + acl_ids.z_aclp->z_acl_bytes); + } + + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * Create new node. + */ + zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + /* + * Now put new name in parent dir. + */ + (void) zfs_link_create(dl, zp, tx, ZNEW); + + *vpp = ZTOV(zp); + + txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, + acl_ids.z_fuidp, vap); + + zfs_acl_ids_free(&acl_ids); + + dmu_tx_commit(tx); + + zfs_dirent_unlock(dl); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (0); +} + +/* + * Remove a directory subdir entry. If the current working + * directory is the same as the subdir to be removed, the + * remove will fail. + * + * IN: dvp - vnode of directory to remove from. + * name - name of directory to be removed. + * cwd - vnode of current working directory. + * cr - credentials of caller. + * ct - caller context + * flags - case flags + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * dvp - ctime|mtime updated + */ +/*ARGSUSED*/ +static int +zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, + caller_context_t *ct, int flags) +{ + znode_t *dzp = VTOZ(dvp); + znode_t *zp; + vnode_t *vp; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + zfs_dirlock_t *dl; + dmu_tx_t *tx; + int error; + int zflg = ZEXISTS; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + zilog = zfsvfs->z_log; + + if (flags & FIGNORECASE) + zflg |= ZCILOOK; +top: + zp = NULL; + + /* + * Attempt to lock directory; fail if entry doesn't exist. + */ + if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, + NULL, NULL)) { + ZFS_EXIT(zfsvfs); + return (error); + } + + vp = ZTOV(zp); + + if (error = zfs_zaccess_delete(dzp, zp, cr)) { + goto out; + } + + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + + if (vp == cwd) { + error = EINVAL; + goto out; + } + + vnevent_rmdir(vp, dvp, name, ct); + + /* + * Grab a lock on the directory to make sure that noone is + * trying to add (or lookup) entries while we are removing it. + */ + rw_enter(&zp->z_name_lock, RW_WRITER); + + /* + * Grab a lock on the parent pointer to make sure we play well + * with the treewalk and directory rename code. + */ + rw_enter(&zp->z_parent_lock, RW_WRITER); + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + zfs_sa_upgrade_txholds(tx, zp); + zfs_sa_upgrade_txholds(tx, dzp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + rw_exit(&zp->z_parent_lock); + rw_exit(&zp->z_name_lock); + zfs_dirent_unlock(dl); + VN_RELE(vp); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + error = zfs_link_destroy(dl, zp, tx, zflg, NULL); + + if (error == 0) { + uint64_t txtype = TX_RMDIR; + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); + } + + dmu_tx_commit(tx); + + rw_exit(&zp->z_parent_lock); + rw_exit(&zp->z_name_lock); +out: + zfs_dirent_unlock(dl); + + VN_RELE(vp); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Read as many directory entries as will fit into the provided + * buffer from the given directory cursor position (specified in + * the uio structure. + * + * IN: vp - vnode of directory to read. + * uio - structure supplying read location, range info, + * and return buffer. + * cr - credentials of caller. + * ct - caller context + * flags - case flags + * + * OUT: uio - updated offset and range, buffer filled. + * eofp - set to true if end-of-file detected. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - atime updated + * + * Note that the low 4 bits of the cookie returned by zap is always zero. + * This allows us to use the low range for "special" directory entries: + * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, + * we use the offset 2 for the '.zfs' directory. + */ +/* ARGSUSED */ +static int +zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, + caller_context_t *ct, int flags) +{ + znode_t *zp = VTOZ(vp); + iovec_t *iovp; + edirent_t *eodp; + dirent64_t *odp; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + objset_t *os; + caddr_t outbuf; + size_t bufsize; + zap_cursor_t zc; + zap_attribute_t zap; + uint_t bytes_wanted; + uint64_t offset; /* must be unsigned; checks for < 1 */ + uint64_t parent; + int local_eof; + int outcount; + int error; + uint8_t prefetch; + boolean_t check_sysattrs; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), + &parent, sizeof (parent))) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * If we are not given an eof variable, + * use a local one. + */ + if (eofp == NULL) + eofp = &local_eof; + + /* + * Check for valid iov_len. + */ + if (uio->uio_iov->iov_len <= 0) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * Quit if directory has been removed (posix) + */ + if ((*eofp = zp->z_unlinked) != 0) { + ZFS_EXIT(zfsvfs); + return (0); + } + + error = 0; + os = zfsvfs->z_os; + offset = uio->uio_loffset; + prefetch = zp->z_zn_prefetch; + + /* + * Initialize the iterator cursor. + */ + if (offset <= 3) { + /* + * Start iteration from the beginning of the directory. + */ + zap_cursor_init(&zc, os, zp->z_id); + } else { + /* + * The offset is a serialized cursor. + */ + zap_cursor_init_serialized(&zc, os, zp->z_id, offset); + } + + /* + * Get space to change directory entries into fs independent format. + */ + iovp = uio->uio_iov; + bytes_wanted = iovp->iov_len; + if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { + bufsize = bytes_wanted; + outbuf = kmem_alloc(bufsize, KM_SLEEP); + odp = (struct dirent64 *)outbuf; + } else { + bufsize = bytes_wanted; + odp = (struct dirent64 *)iovp->iov_base; + } + eodp = (struct edirent *)odp; + + /* + * If this VFS supports the system attribute view interface; and + * we're looking at an extended attribute directory; and we care + * about normalization conflicts on this vfs; then we must check + * for normalization conflicts with the sysattr name space. + */ + check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && + (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && + (flags & V_RDDIR_ENTFLAGS); + + /* + * Transform to file-system independent format + */ + outcount = 0; + while (outcount < bytes_wanted) { + ino64_t objnum; + ushort_t reclen; + off64_t *next = NULL; + + /* + * Special case `.', `..', and `.zfs'. + */ + if (offset == 0) { + (void) strcpy(zap.za_name, "."); + zap.za_normalization_conflict = 0; + objnum = zp->z_id; + } else if (offset == 1) { + (void) strcpy(zap.za_name, ".."); + zap.za_normalization_conflict = 0; + objnum = parent; + } else if (offset == 2 && zfs_show_ctldir(zp)) { + (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); + zap.za_normalization_conflict = 0; + objnum = ZFSCTL_INO_ROOT; + } else { + /* + * Grab next entry. + */ + if (error = zap_cursor_retrieve(&zc, &zap)) { + if ((*eofp = (error == ENOENT)) != 0) + break; + else + goto update; + } + + if (zap.za_integer_length != 8 || + zap.za_num_integers != 1) { + cmn_err(CE_WARN, "zap_readdir: bad directory " + "entry, obj = %lld, offset = %lld\n", + (u_longlong_t)zp->z_id, + (u_longlong_t)offset); + error = ENXIO; + goto update; + } + + objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); + /* + * MacOS X can extract the object type here such as: + * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); + */ + + if (check_sysattrs && !zap.za_normalization_conflict) { + zap.za_normalization_conflict = + xattr_sysattr_casechk(zap.za_name); + } + } + + if (flags & V_RDDIR_ACCFILTER) { + /* + * If we have no access at all, don't include + * this entry in the returned information + */ + znode_t *ezp; + if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) + goto skip_entry; + if (!zfs_has_access(ezp, cr)) { + VN_RELE(ZTOV(ezp)); + goto skip_entry; + } + VN_RELE(ZTOV(ezp)); + } + + if (flags & V_RDDIR_ENTFLAGS) + reclen = EDIRENT_RECLEN(strlen(zap.za_name)); + else + reclen = DIRENT64_RECLEN(strlen(zap.za_name)); + + /* + * Will this entry fit in the buffer? + */ + if (outcount + reclen > bufsize) { + /* + * Did we manage to fit anything in the buffer? + */ + if (!outcount) { + error = EINVAL; + goto update; + } + break; + } + if (flags & V_RDDIR_ENTFLAGS) { + /* + * Add extended flag entry: + */ + eodp->ed_ino = objnum; + eodp->ed_reclen = reclen; + /* NOTE: ed_off is the offset for the *next* entry */ + next = &(eodp->ed_off); + eodp->ed_eflags = zap.za_normalization_conflict ? + ED_CASE_CONFLICT : 0; + (void) strncpy(eodp->ed_name, zap.za_name, + EDIRENT_NAMELEN(reclen)); + eodp = (edirent_t *)((intptr_t)eodp + reclen); + } else { + /* + * Add normal entry: + */ + odp->d_ino = objnum; + odp->d_reclen = reclen; + /* NOTE: d_off is the offset for the *next* entry */ + next = &(odp->d_off); + (void) strncpy(odp->d_name, zap.za_name, + DIRENT64_NAMELEN(reclen)); + odp = (dirent64_t *)((intptr_t)odp + reclen); + } + outcount += reclen; + + ASSERT(outcount <= bufsize); + + /* Prefetch znode */ + if (prefetch) + dmu_prefetch(os, objnum, 0, 0); + + skip_entry: + /* + * Move to the next entry, fill in the previous offset. + */ + if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { + zap_cursor_advance(&zc); + offset = zap_cursor_serialize(&zc); + } else { + offset += 1; + } + if (next) + *next = offset; + } + zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ + + if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { + iovp->iov_base += outcount; + iovp->iov_len -= outcount; + uio->uio_resid -= outcount; + } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { + /* + * Reset the pointer. + */ + offset = uio->uio_loffset; + } + +update: + zap_cursor_fini(&zc); + if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) + kmem_free(outbuf, bufsize); + + if (error == ENOENT) + error = 0; + + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); + + uio->uio_loffset = offset; + ZFS_EXIT(zfsvfs); + return (error); +} + +ulong_t zfs_fsync_sync_cnt = 4; + +static int +zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + /* + * Regardless of whether this is required for standards conformance, + * this is the logical behavior when fsync() is called on a file with + * dirty pages. We use B_ASYNC since the ZIL transactions are already + * going to be pushed out as part of the zil_commit(). + */ + if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && + (vp->v_type == VREG) && !(IS_SWAPVP(vp))) + (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); + + (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); + + if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + zil_commit(zfsvfs->z_log, zp->z_id); + ZFS_EXIT(zfsvfs); + } + return (0); +} + + +/* + * Get the requested file attributes and place them in the provided + * vattr structure. + * + * IN: vp - vnode of file. + * vap - va_mask identifies requested attributes. + * If AT_XVATTR set, then optional attrs are requested + * flags - ATTR_NOACLCHECK (CIFS server context) + * cr - credentials of caller. + * ct - caller context + * + * OUT: vap - attribute values. + * + * RETURN: 0 (always succeeds) + */ +/* ARGSUSED */ +static int +zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error = 0; + uint64_t links; + uint64_t mtime[2], ctime[2]; + xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ + xoptattr_t *xoap = NULL; + boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; + sa_bulk_attr_t bulk[2]; + int count = 0; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); + + if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. + * Also, if we are the owner don't bother, since owner should + * always be allowed to read basic attributes of file. + */ + if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && + (vap->va_uid != crgetuid(cr))) { + if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, + skipaclchk, cr)) { + ZFS_EXIT(zfsvfs); + return (error); + } + } + + /* + * Return all attributes. It's cheaper to provide the answer + * than to determine whether we were asked the question. + */ + + mutex_enter(&zp->z_lock); + vap->va_type = vp->v_type; + vap->va_mode = zp->z_mode & MODEMASK; + vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; + vap->va_nodeid = zp->z_id; + if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) + links = zp->z_links + 1; + else + links = zp->z_links; + vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ + vap->va_size = zp->z_size; + vap->va_rdev = vp->v_rdev; + vap->va_seq = zp->z_seq; + + /* + * Add in any requested optional attributes and the create time. + * Also set the corresponding bits in the returned attribute bitmap. + */ + if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { + if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { + xoap->xoa_archive = + ((zp->z_pflags & ZFS_ARCHIVE) != 0); + XVA_SET_RTN(xvap, XAT_ARCHIVE); + } + + if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { + xoap->xoa_readonly = + ((zp->z_pflags & ZFS_READONLY) != 0); + XVA_SET_RTN(xvap, XAT_READONLY); + } + + if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { + xoap->xoa_system = + ((zp->z_pflags & ZFS_SYSTEM) != 0); + XVA_SET_RTN(xvap, XAT_SYSTEM); + } + + if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { + xoap->xoa_hidden = + ((zp->z_pflags & ZFS_HIDDEN) != 0); + XVA_SET_RTN(xvap, XAT_HIDDEN); + } + + if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { + xoap->xoa_nounlink = + ((zp->z_pflags & ZFS_NOUNLINK) != 0); + XVA_SET_RTN(xvap, XAT_NOUNLINK); + } + + if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { + xoap->xoa_immutable = + ((zp->z_pflags & ZFS_IMMUTABLE) != 0); + XVA_SET_RTN(xvap, XAT_IMMUTABLE); + } + + if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { + xoap->xoa_appendonly = + ((zp->z_pflags & ZFS_APPENDONLY) != 0); + XVA_SET_RTN(xvap, XAT_APPENDONLY); + } + + if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { + xoap->xoa_nodump = + ((zp->z_pflags & ZFS_NODUMP) != 0); + XVA_SET_RTN(xvap, XAT_NODUMP); + } + + if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { + xoap->xoa_opaque = + ((zp->z_pflags & ZFS_OPAQUE) != 0); + XVA_SET_RTN(xvap, XAT_OPAQUE); + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { + xoap->xoa_av_quarantined = + ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); + XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { + xoap->xoa_av_modified = + ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); + XVA_SET_RTN(xvap, XAT_AV_MODIFIED); + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && + vp->v_type == VREG) { + zfs_sa_get_scanstamp(zp, xvap); + } + + if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { + uint64_t times[2]; + + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), + times, sizeof (times)); + ZFS_TIME_DECODE(&xoap->xoa_createtime, times); + XVA_SET_RTN(xvap, XAT_CREATETIME); + } + + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { + xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); + XVA_SET_RTN(xvap, XAT_REPARSE); + } + if (XVA_ISSET_REQ(xvap, XAT_GEN)) { + xoap->xoa_generation = zp->z_gen; + XVA_SET_RTN(xvap, XAT_GEN); + } + + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { + xoap->xoa_offline = + ((zp->z_pflags & ZFS_OFFLINE) != 0); + XVA_SET_RTN(xvap, XAT_OFFLINE); + } + + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { + xoap->xoa_sparse = + ((zp->z_pflags & ZFS_SPARSE) != 0); + XVA_SET_RTN(xvap, XAT_SPARSE); + } + } + + ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); + ZFS_TIME_DECODE(&vap->va_mtime, mtime); + ZFS_TIME_DECODE(&vap->va_ctime, ctime); + + mutex_exit(&zp->z_lock); + + sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); + + if (zp->z_blksz == 0) { + /* + * Block size hasn't been set; suggest maximal I/O transfers. + */ + vap->va_blksize = zfsvfs->z_max_blksz; + } + + ZFS_EXIT(zfsvfs); + return (0); +} + +/* + * Set the file attributes to the values contained in the + * vattr structure. + * + * IN: vp - vnode of file to be modified. + * vap - new attribute values. + * If AT_XVATTR set, then optional attrs are being set + * flags - ATTR_UTIME set if non-default time values provided. + * - ATTR_NOACLCHECK (CIFS context only). + * cr - credentials of caller. + * ct - caller context + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - ctime updated, mtime updated if size changed. + */ +/* ARGSUSED */ +static int +zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zilog_t *zilog; + dmu_tx_t *tx; + vattr_t oldva; + xvattr_t tmpxvattr; + uint_t mask = vap->va_mask; + uint_t saved_mask; + int trim_mask = 0; + uint64_t new_mode; + uint64_t new_uid, new_gid; + uint64_t xattr_obj; + uint64_t mtime[2], ctime[2]; + znode_t *attrzp; + int need_policy = FALSE; + int err, err2; + zfs_fuid_info_t *fuidp = NULL; + xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ + xoptattr_t *xoap; + zfs_acl_t *aclp; + boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; + boolean_t fuid_dirtied = B_FALSE; + sa_bulk_attr_t bulk[7], xattr_bulk[7]; + int count = 0, xattr_count = 0; + + if (mask == 0) + return (0); + + if (mask & AT_NOSET) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + zilog = zfsvfs->z_log; + + /* + * Make sure that if we have ephemeral uid/gid or xvattr specified + * that file system is at proper version level + */ + + if (zfsvfs->z_use_fuids == B_FALSE && + (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || + ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || + (mask & AT_XVATTR))) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + if (mask & AT_SIZE && vp->v_type == VDIR) { + ZFS_EXIT(zfsvfs); + return (EISDIR); + } + + if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * If this is an xvattr_t, then get a pointer to the structure of + * optional attributes. If this is NULL, then we have a vattr_t. + */ + xoap = xva_getxoptattr(xvap); + + xva_init(&tmpxvattr); + + /* + * Immutable files can only alter immutable bit and atime + */ + if ((zp->z_pflags & ZFS_IMMUTABLE) && + ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || + ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + /* + * Verify timestamps doesn't overflow 32 bits. + * ZFS can handle large timestamps, but 32bit syscalls can't + * handle times greater than 2039. This check should be removed + * once large timestamps are fully supported. + */ + if (mask & (AT_ATIME | AT_MTIME)) { + if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || + ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { + ZFS_EXIT(zfsvfs); + return (EOVERFLOW); + } + } + +top: + attrzp = NULL; + aclp = NULL; + + /* Can this be moved to before the top label? */ + if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { + ZFS_EXIT(zfsvfs); + return (EROFS); + } + + /* + * First validate permissions + */ + + if (mask & AT_SIZE) { + err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); + if (err) { + ZFS_EXIT(zfsvfs); + return (err); + } + /* + * XXX - Note, we are not providing any open + * mode flags here (like FNDELAY), so we may + * block if there are locks present... this + * should be addressed in openat(). + */ + /* XXX - would it be OK to generate a log record here? */ + err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); + if (err) { + ZFS_EXIT(zfsvfs); + return (err); + } + } + + if (mask & (AT_ATIME|AT_MTIME) || + ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || + XVA_ISSET_REQ(xvap, XAT_READONLY) || + XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || + XVA_ISSET_REQ(xvap, XAT_OFFLINE) || + XVA_ISSET_REQ(xvap, XAT_SPARSE) || + XVA_ISSET_REQ(xvap, XAT_CREATETIME) || + XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { + need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, + skipaclchk, cr); + } + + if (mask & (AT_UID|AT_GID)) { + int idmask = (mask & (AT_UID|AT_GID)); + int take_owner; + int take_group; + + /* + * NOTE: even if a new mode is being set, + * we may clear S_ISUID/S_ISGID bits. + */ + + if (!(mask & AT_MODE)) + vap->va_mode = zp->z_mode; + + /* + * Take ownership or chgrp to group we are a member of + */ + + take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); + take_group = (mask & AT_GID) && + zfs_groupmember(zfsvfs, vap->va_gid, cr); + + /* + * If both AT_UID and AT_GID are set then take_owner and + * take_group must both be set in order to allow taking + * ownership. + * + * Otherwise, send the check through secpolicy_vnode_setattr() + * + */ + + if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || + ((idmask == AT_UID) && take_owner) || + ((idmask == AT_GID) && take_group)) { + if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, + skipaclchk, cr) == 0) { + /* + * Remove setuid/setgid for non-privileged users + */ + secpolicy_setid_clear(vap, cr); + trim_mask = (mask & (AT_UID|AT_GID)); + } else { + need_policy = TRUE; + } + } else { + need_policy = TRUE; + } + } + + mutex_enter(&zp->z_lock); + oldva.va_mode = zp->z_mode; + zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); + if (mask & AT_XVATTR) { + /* + * Update xvattr mask to include only those attributes + * that are actually changing. + * + * the bits will be restored prior to actually setting + * the attributes so the caller thinks they were set. + */ + if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { + if (xoap->xoa_appendonly != + ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_APPENDONLY); + XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { + if (xoap->xoa_nounlink != + ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_NOUNLINK); + XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { + if (xoap->xoa_immutable != + ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_IMMUTABLE); + XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { + if (xoap->xoa_nodump != + ((zp->z_pflags & ZFS_NODUMP) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_NODUMP); + XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { + if (xoap->xoa_av_modified != + ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); + XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { + if ((vp->v_type != VREG && + xoap->xoa_av_quarantined) || + xoap->xoa_av_quarantined != + ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { + need_policy = TRUE; + } else { + XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); + XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); + } + } + + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { + mutex_exit(&zp->z_lock); + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if (need_policy == FALSE && + (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || + XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { + need_policy = TRUE; + } + } + + mutex_exit(&zp->z_lock); + + if (mask & AT_MODE) { + if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { + err = secpolicy_setid_setsticky_clear(vp, vap, + &oldva, cr); + if (err) { + ZFS_EXIT(zfsvfs); + return (err); + } + trim_mask |= AT_MODE; + } else { + need_policy = TRUE; + } + } + + if (need_policy) { + /* + * If trim_mask is set then take ownership + * has been granted or write_acl is present and user + * has the ability to modify mode. In that case remove + * UID|GID and or MODE from mask so that + * secpolicy_vnode_setattr() doesn't revoke it. + */ + + if (trim_mask) { + saved_mask = vap->va_mask; + vap->va_mask &= ~trim_mask; + } + err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, + (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); + if (err) { + ZFS_EXIT(zfsvfs); + return (err); + } + + if (trim_mask) + vap->va_mask |= saved_mask; + } + + /* + * secpolicy_vnode_setattr, or take ownership may have + * changed va_mask + */ + mask = vap->va_mask; + + if ((mask & (AT_UID | AT_GID))) { + err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + &xattr_obj, sizeof (xattr_obj)); + + if (err == 0 && xattr_obj) { + err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); + if (err) + goto out2; + } + if (mask & AT_UID) { + new_uid = zfs_fuid_create(zfsvfs, + (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); + if (new_uid != zp->z_uid && + zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { + if (attrzp) + VN_RELE(ZTOV(attrzp)); + err = EDQUOT; + goto out2; + } + } + + if (mask & AT_GID) { + new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, + cr, ZFS_GROUP, &fuidp); + if (new_gid != zp->z_gid && + zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { + if (attrzp) + VN_RELE(ZTOV(attrzp)); + err = EDQUOT; + goto out2; + } + } + } + tx = dmu_tx_create(zfsvfs->z_os); + + if (mask & AT_MODE) { + uint64_t pmode = zp->z_mode; + uint64_t acl_obj; + new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); + + zfs_acl_chmod_setattr(zp, &aclp, new_mode); + + mutex_enter(&zp->z_lock); + if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { + /* + * Are we upgrading ACL from old V0 format + * to V1 format? + */ + if (zfsvfs->z_version >= ZPL_VERSION_FUID && + zfs_znode_acl_version(zp) == + ZFS_ACL_VERSION_INITIAL) { + dmu_tx_hold_free(tx, acl_obj, 0, + DMU_OBJECT_END); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, aclp->z_acl_bytes); + } else { + dmu_tx_hold_write(tx, acl_obj, 0, + aclp->z_acl_bytes); + } + } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, aclp->z_acl_bytes); + } + mutex_exit(&zp->z_lock); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + } else { + if ((mask & AT_XVATTR) && + XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + else + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + } + + if (attrzp) { + dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); + } + + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + + zfs_sa_upgrade_txholds(tx, zp); + + err = dmu_tx_assign(tx, TXG_NOWAIT); + if (err) { + if (err == ERESTART) + dmu_tx_wait(tx); + goto out; + } + + count = 0; + /* + * Set each attribute requested. + * We group settings according to the locks they need to acquire. + * + * Note: you cannot set ctime directly, although it will be + * updated as a side-effect of calling this function. + */ + + + if (mask & (AT_UID|AT_GID|AT_MODE)) + mutex_enter(&zp->z_acl_lock); + mutex_enter(&zp->z_lock); + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, sizeof (zp->z_pflags)); + + if (attrzp) { + if (mask & (AT_UID|AT_GID|AT_MODE)) + mutex_enter(&attrzp->z_acl_lock); + mutex_enter(&attrzp->z_lock); + SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, + SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, + sizeof (attrzp->z_pflags)); + } + + if (mask & (AT_UID|AT_GID)) { + + if (mask & AT_UID) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, + &new_uid, sizeof (new_uid)); + zp->z_uid = new_uid; + if (attrzp) { + SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, + SA_ZPL_UID(zfsvfs), NULL, &new_uid, + sizeof (new_uid)); + attrzp->z_uid = new_uid; + } + } + + if (mask & AT_GID) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), + NULL, &new_gid, sizeof (new_gid)); + zp->z_gid = new_gid; + if (attrzp) { + SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, + SA_ZPL_GID(zfsvfs), NULL, &new_gid, + sizeof (new_gid)); + attrzp->z_gid = new_gid; + } + } + if (!(mask & AT_MODE)) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), + NULL, &new_mode, sizeof (new_mode)); + new_mode = zp->z_mode; + } + err = zfs_acl_chown_setattr(zp); + ASSERT(err == 0); + if (attrzp) { + err = zfs_acl_chown_setattr(attrzp); + ASSERT(err == 0); + } + } + + if (mask & AT_MODE) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + &new_mode, sizeof (new_mode)); + zp->z_mode = new_mode; + ASSERT3U((uintptr_t)aclp, !=, NULL); + err = zfs_aclset_common(zp, aclp, cr, tx); + ASSERT3U(err, ==, 0); + if (zp->z_acl_cached) + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = aclp; + aclp = NULL; + } + + + if (mask & AT_ATIME) { + ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + &zp->z_atime, sizeof (zp->z_atime)); + } + + if (mask & AT_MTIME) { + ZFS_TIME_ENCODE(&vap->va_mtime, mtime); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, + mtime, sizeof (mtime)); + } + + /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ + if (mask & AT_SIZE && !(mask & AT_MTIME)) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), + NULL, mtime, sizeof (mtime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, sizeof (ctime)); + zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, + B_TRUE); + } else if (mask != 0) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, sizeof (ctime)); + zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, + B_TRUE); + if (attrzp) { + SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, + SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, sizeof (ctime)); + zfs_tstamp_update_setup(attrzp, STATE_CHANGED, + mtime, ctime, B_TRUE); + } + } + /* + * Do this after setting timestamps to prevent timestamp + * update from toggling bit + */ + + if (xoap && (mask & AT_XVATTR)) { + + /* + * restore trimmed off masks + * so that return masks can be set for caller. + */ + + if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { + XVA_SET_REQ(xvap, XAT_APPENDONLY); + } + if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { + XVA_SET_REQ(xvap, XAT_NOUNLINK); + } + if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { + XVA_SET_REQ(xvap, XAT_IMMUTABLE); + } + if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { + XVA_SET_REQ(xvap, XAT_NODUMP); + } + if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { + XVA_SET_REQ(xvap, XAT_AV_MODIFIED); + } + if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { + XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); + } + + if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) + ASSERT(vp->v_type == VREG); + + zfs_xvattr_set(zp, xvap, tx); + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + if (mask != 0) + zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); + + mutex_exit(&zp->z_lock); + if (mask & (AT_UID|AT_GID|AT_MODE)) + mutex_exit(&zp->z_acl_lock); + + if (attrzp) { + if (mask & (AT_UID|AT_GID|AT_MODE)) + mutex_exit(&attrzp->z_acl_lock); + mutex_exit(&attrzp->z_lock); + } +out: + if (err == 0 && attrzp) { + err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, + xattr_count, tx); + ASSERT(err2 == 0); + } + + if (attrzp) + VN_RELE(ZTOV(attrzp)); + if (aclp) + zfs_acl_free(aclp); + + if (fuidp) { + zfs_fuid_info_free(fuidp); + fuidp = NULL; + } + + if (err) { + dmu_tx_abort(tx); + if (err == ERESTART) + goto top; + } else { + err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + dmu_tx_commit(tx); + } + +out2: + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (err); +} + +typedef struct zfs_zlock { + krwlock_t *zl_rwlock; /* lock we acquired */ + znode_t *zl_znode; /* znode we held */ + struct zfs_zlock *zl_next; /* next in list */ +} zfs_zlock_t; + +/* + * Drop locks and release vnodes that were held by zfs_rename_lock(). + */ +static void +zfs_rename_unlock(zfs_zlock_t **zlpp) +{ + zfs_zlock_t *zl; + + while ((zl = *zlpp) != NULL) { + if (zl->zl_znode != NULL) + VN_RELE(ZTOV(zl->zl_znode)); + rw_exit(zl->zl_rwlock); + *zlpp = zl->zl_next; + kmem_free(zl, sizeof (*zl)); + } +} + +/* + * Search back through the directory tree, using the ".." entries. + * Lock each directory in the chain to prevent concurrent renames. + * Fail any attempt to move a directory into one of its own descendants. + * XXX - z_parent_lock can overlap with map or grow locks + */ +static int +zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) +{ + zfs_zlock_t *zl; + znode_t *zp = tdzp; + uint64_t rootid = zp->z_zfsvfs->z_root; + uint64_t oidp = zp->z_id; + krwlock_t *rwlp = &szp->z_parent_lock; + krw_t rw = RW_WRITER; + + /* + * First pass write-locks szp and compares to zp->z_id. + * Later passes read-lock zp and compare to zp->z_parent. + */ + do { + if (!rw_tryenter(rwlp, rw)) { + /* + * Another thread is renaming in this path. + * Note that if we are a WRITER, we don't have any + * parent_locks held yet. + */ + if (rw == RW_READER && zp->z_id > szp->z_id) { + /* + * Drop our locks and restart + */ + zfs_rename_unlock(&zl); + *zlpp = NULL; + zp = tdzp; + oidp = zp->z_id; + rwlp = &szp->z_parent_lock; + rw = RW_WRITER; + continue; + } else { + /* + * Wait for other thread to drop its locks + */ + rw_enter(rwlp, rw); + } + } + + zl = kmem_alloc(sizeof (*zl), KM_SLEEP); + zl->zl_rwlock = rwlp; + zl->zl_znode = NULL; + zl->zl_next = *zlpp; + *zlpp = zl; + + if (oidp == szp->z_id) /* We're a descendant of szp */ + return (EINVAL); + + if (oidp == rootid) /* We've hit the top */ + return (0); + + if (rw == RW_READER) { /* i.e. not the first pass */ + int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); + if (error) + return (error); + zl->zl_znode = zp; + } + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), + &oidp, sizeof (oidp)); + rwlp = &zp->z_parent_lock; + rw = RW_READER; + + } while (zp->z_id != sdzp->z_id); + + return (0); +} + +/* + * Move an entry from the provided source directory to the target + * directory. Change the entry name as indicated. + * + * IN: sdvp - Source directory containing the "old entry". + * snm - Old entry name. + * tdvp - Target directory to contain the "new entry". + * tnm - New entry name. + * cr - credentials of caller. + * ct - caller context + * flags - case flags + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * sdvp,tdvp - ctime|mtime updated + */ +/*ARGSUSED*/ +static int +zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, + caller_context_t *ct, int flags) +{ + znode_t *tdzp, *szp, *tzp; + znode_t *sdzp = VTOZ(sdvp); + zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; + zilog_t *zilog; + vnode_t *realvp; + zfs_dirlock_t *sdl, *tdl; + dmu_tx_t *tx; + zfs_zlock_t *zl; + int cmp, serr, terr; + int error = 0; + int zflg = 0; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(sdzp); + zilog = zfsvfs->z_log; + + /* + * Make sure we have the real vp for the target directory. + */ + if (VOP_REALVP(tdvp, &realvp, ct) == 0) + tdvp = realvp; + + if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { + ZFS_EXIT(zfsvfs); + return (EXDEV); + } + + tdzp = VTOZ(tdvp); + ZFS_VERIFY_ZP(tdzp); + if (zfsvfs->z_utf8 && u8_validate(tnm, + strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + + if (flags & FIGNORECASE) + zflg |= ZCILOOK; + +top: + szp = NULL; + tzp = NULL; + zl = NULL; + + /* + * This is to prevent the creation of links into attribute space + * by renaming a linked file into/outof an attribute directory. + * See the comment in zfs_link() for why this is considered bad. + */ + if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + /* + * Lock source and target directory entries. To prevent deadlock, + * a lock ordering must be defined. We lock the directory with + * the smallest object id first, or if it's a tie, the one with + * the lexically first name. + */ + if (sdzp->z_id < tdzp->z_id) { + cmp = -1; + } else if (sdzp->z_id > tdzp->z_id) { + cmp = 1; + } else { + /* + * First compare the two name arguments without + * considering any case folding. + */ + int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); + + cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); + ASSERT(error == 0 || !zfsvfs->z_utf8); + if (cmp == 0) { + /* + * POSIX: "If the old argument and the new argument + * both refer to links to the same existing file, + * the rename() function shall return successfully + * and perform no other action." + */ + ZFS_EXIT(zfsvfs); + return (0); + } + /* + * If the file system is case-folding, then we may + * have some more checking to do. A case-folding file + * system is either supporting mixed case sensitivity + * access or is completely case-insensitive. Note + * that the file system is always case preserving. + * + * In mixed sensitivity mode case sensitive behavior + * is the default. FIGNORECASE must be used to + * explicitly request case insensitive behavior. + * + * If the source and target names provided differ only + * by case (e.g., a request to rename 'tim' to 'Tim'), + * we will treat this as a special case in the + * case-insensitive mode: as long as the source name + * is an exact match, we will allow this to proceed as + * a name-change request. + */ + if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || + (zfsvfs->z_case == ZFS_CASE_MIXED && + flags & FIGNORECASE)) && + u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, + &error) == 0) { + /* + * case preserving rename request, require exact + * name matches + */ + zflg |= ZCIEXACT; + zflg &= ~ZCILOOK; + } + } + + /* + * If the source and destination directories are the same, we should + * grab the z_name_lock of that directory only once. + */ + if (sdzp == tdzp) { + zflg |= ZHAVELOCK; + rw_enter(&sdzp->z_name_lock, RW_READER); + } + + if (cmp < 0) { + serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, + ZEXISTS | zflg, NULL, NULL); + terr = zfs_dirent_lock(&tdl, + tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); + } else { + terr = zfs_dirent_lock(&tdl, + tdzp, tnm, &tzp, zflg, NULL, NULL); + serr = zfs_dirent_lock(&sdl, + sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, + NULL, NULL); + } + + if (serr) { + /* + * Source entry invalid or not there. + */ + if (!terr) { + zfs_dirent_unlock(tdl); + if (tzp) + VN_RELE(ZTOV(tzp)); + } + + if (sdzp == tdzp) + rw_exit(&sdzp->z_name_lock); + + if (strcmp(snm, "..") == 0) + serr = EINVAL; + ZFS_EXIT(zfsvfs); + return (serr); + } + if (terr) { + zfs_dirent_unlock(sdl); + VN_RELE(ZTOV(szp)); + + if (sdzp == tdzp) + rw_exit(&sdzp->z_name_lock); + + if (strcmp(tnm, "..") == 0) + terr = EINVAL; + ZFS_EXIT(zfsvfs); + return (terr); + } + + /* + * Must have write access at the source to remove the old entry + * and write access at the target to create the new entry. + * Note that if target and source are the same, this can be + * done in a single check. + */ + + if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) + goto out; + + if (ZTOV(szp)->v_type == VDIR) { + /* + * Check to make sure rename is valid. + * Can't do a move like this: /usr/a/b to /usr/a/b/c/d + */ + if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) + goto out; + } + + /* + * Does target exist? + */ + if (tzp) { + /* + * Source and target must be the same type. + */ + if (ZTOV(szp)->v_type == VDIR) { + if (ZTOV(tzp)->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + } else { + if (ZTOV(tzp)->v_type == VDIR) { + error = EISDIR; + goto out; + } + } + /* + * POSIX dictates that when the source and target + * entries refer to the same file object, rename + * must do nothing and exit without error. + */ + if (szp->z_id == tzp->z_id) { + error = 0; + goto out; + } + } + + vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); + if (tzp) + vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); + + /* + * notify the target directory if it is not the same + * as source directory. + */ + if (tdvp != sdvp) { + vnevent_rename_dest_dir(tdvp, ct); + } + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); + dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); + dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); + dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); + if (sdzp != tdzp) { + dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, tdzp); + } + if (tzp) { + dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, tzp); + } + + zfs_sa_upgrade_txholds(tx, szp); + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (zl != NULL) + zfs_rename_unlock(&zl); + zfs_dirent_unlock(sdl); + zfs_dirent_unlock(tdl); + + if (sdzp == tdzp) + rw_exit(&sdzp->z_name_lock); + + VN_RELE(ZTOV(szp)); + if (tzp) + VN_RELE(ZTOV(tzp)); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + if (tzp) /* Attempt to remove the existing target */ + error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + + if (error == 0) { + error = zfs_link_create(tdl, szp, tx, ZRENAMING); + if (error == 0) { + szp->z_pflags |= ZFS_AV_MODIFIED; + + error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&szp->z_pflags, sizeof (uint64_t), tx); + ASSERT3U(error, ==, 0); + + error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); + if (error == 0) { + zfs_log_rename(zilog, tx, TX_RENAME | + (flags & FIGNORECASE ? TX_CI : 0), sdzp, + sdl->dl_name, tdzp, tdl->dl_name, szp); + + /* + * Update path information for the target vnode + */ + vn_renamepath(tdvp, ZTOV(szp), tnm, + strlen(tnm)); + } else { + /* + * At this point, we have successfully created + * the target name, but have failed to remove + * the source name. Since the create was done + * with the ZRENAMING flag, there are + * complications; for one, the link count is + * wrong. The easiest way to deal with this + * is to remove the newly created target, and + * return the original error. This must + * succeed; fortunately, it is very unlikely to + * fail, since we just created it. + */ + VERIFY3U(zfs_link_destroy(tdl, szp, tx, + ZRENAMING, NULL), ==, 0); + } + } + } + + dmu_tx_commit(tx); +out: + if (zl != NULL) + zfs_rename_unlock(&zl); + + zfs_dirent_unlock(sdl); + zfs_dirent_unlock(tdl); + + if (sdzp == tdzp) + rw_exit(&sdzp->z_name_lock); + + + VN_RELE(ZTOV(szp)); + if (tzp) + VN_RELE(ZTOV(tzp)); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Insert the indicated symbolic reference entry into the directory. + * + * IN: dvp - Directory to contain new symbolic link. + * link - Name for new symlink entry. + * vap - Attributes of new entry. + * target - Target path of new symlink. + * cr - credentials of caller. + * ct - caller context + * flags - case flags + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * dvp - ctime|mtime updated + */ +/*ARGSUSED*/ +static int +zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, + caller_context_t *ct, int flags) +{ + znode_t *zp, *dzp = VTOZ(dvp); + zfs_dirlock_t *dl; + dmu_tx_t *tx; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + uint64_t len = strlen(link); + int error; + int zflg = ZNEW; + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + uint64_t txtype = TX_SYMLINK; + + ASSERT(vap->va_type == VLNK); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + zilog = zfsvfs->z_log; + + if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), + NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + if (flags & FIGNORECASE) + zflg |= ZCILOOK; + + if (len > MAXPATHLEN) { + ZFS_EXIT(zfsvfs); + return (ENAMETOOLONG); + } + + if ((error = zfs_acl_ids_create(dzp, 0, + vap, cr, NULL, &acl_ids)) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } +top: + /* + * Attempt to lock directory; fail if entry already exists. + */ + error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); + if (error) { + zfs_acl_ids_free(&acl_ids); + ZFS_EXIT(zfsvfs); + return (error); + } + + if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { + zfs_acl_ids_free(&acl_ids); + zfs_dirent_unlock(dl); + ZFS_EXIT(zfsvfs); + return (error); + } + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + zfs_dirent_unlock(dl); + ZFS_EXIT(zfsvfs); + return (EDQUOT); + } + tx = dmu_tx_create(zfsvfs->z_os); + fuid_dirtied = zfsvfs->z_fuid_dirty; + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); + dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE + len); + dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); + if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, + acl_ids.z_aclp->z_acl_bytes); + } + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + /* + * Create a new object for the symlink. + * for version 4 ZPL datsets the symlink will be an SA attribute + */ + zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + mutex_enter(&zp->z_lock); + if (zp->z_is_sa) + error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), + link, len, tx); + else + zfs_sa_symlink(zp, link, len, tx); + mutex_exit(&zp->z_lock); + + zp->z_size = len; + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + &zp->z_size, sizeof (zp->z_size), tx); + /* + * Insert the new object into the directory. + */ + (void) zfs_link_create(dl, zp, tx, ZNEW); + + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + + zfs_acl_ids_free(&acl_ids); + + dmu_tx_commit(tx); + + zfs_dirent_unlock(dl); + + VN_RELE(ZTOV(zp)); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Return, in the buffer contained in the provided uio structure, + * the symbolic path referred to by vp. + * + * IN: vp - vnode of symbolic link. + * uoip - structure to contain the link path. + * cr - credentials of caller. + * ct - caller context + * + * OUT: uio - structure to contain the link path. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - atime updated + */ +/* ARGSUSED */ +static int +zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + mutex_enter(&zp->z_lock); + if (zp->z_is_sa) + error = sa_lookup_uio(zp->z_sa_hdl, + SA_ZPL_SYMLINK(zfsvfs), uio); + else + error = zfs_sa_readlink(zp, uio); + mutex_exit(&zp->z_lock); + + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Insert a new entry into directory tdvp referencing svp. + * + * IN: tdvp - Directory to contain new entry. + * svp - vnode of new entry. + * name - name of new entry. + * cr - credentials of caller. + * ct - caller context + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * tdvp - ctime|mtime updated + * svp - ctime updated + */ +/* ARGSUSED */ +static int +zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, + caller_context_t *ct, int flags) +{ + znode_t *dzp = VTOZ(tdvp); + znode_t *tzp, *szp; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zilog_t *zilog; + zfs_dirlock_t *dl; + dmu_tx_t *tx; + vnode_t *realvp; + int error; + int zf = ZNEW; + uint64_t parent; + uid_t owner; + + ASSERT(tdvp->v_type == VDIR); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); + zilog = zfsvfs->z_log; + + if (VOP_REALVP(svp, &realvp, ct) == 0) + svp = realvp; + + /* + * POSIX dictates that we return EPERM here. + * Better choices include ENOTSUP or EISDIR. + */ + if (svp->v_type == VDIR) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { + ZFS_EXIT(zfsvfs); + return (EXDEV); + } + + szp = VTOZ(svp); + ZFS_VERIFY_ZP(szp); + + /* Prevent links to .zfs/shares files */ + + if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), + &parent, sizeof (uint64_t))) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + if (parent == zfsvfs->z_shares_dir) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if (zfsvfs->z_utf8 && u8_validate(name, + strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { + ZFS_EXIT(zfsvfs); + return (EILSEQ); + } + if (flags & FIGNORECASE) + zf |= ZCILOOK; + + /* + * We do not support links between attributes and non-attributes + * because of the potential security risk of creating links + * into "normal" file space in order to circumvent restrictions + * imposed in attribute space. + */ + if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + + owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); + if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { + ZFS_EXIT(zfsvfs); + return (error); + } + +top: + /* + * Attempt to lock directory; fail if entry already exists. + */ + error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); + if (error) { + ZFS_EXIT(zfsvfs); + return (error); + } + + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); + dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); + zfs_sa_upgrade_txholds(tx, szp); + zfs_sa_upgrade_txholds(tx, dzp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + ZFS_EXIT(zfsvfs); + return (error); + } + + error = zfs_link_create(dl, szp, tx, 0); + + if (error == 0) { + uint64_t txtype = TX_LINK; + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_link(zilog, tx, txtype, dzp, szp, name); + } + + dmu_tx_commit(tx); + + zfs_dirent_unlock(dl); + + if (error == 0) { + vnevent_link(svp, ct); + } + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * zfs_null_putapage() is used when the file system has been force + * unmounted. It just drops the pages. + */ +/* ARGSUSED */ +static int +zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, + size_t *lenp, int flags, cred_t *cr) +{ + pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); + return (0); +} + +/* + * Push a page out to disk, klustering if possible. + * + * IN: vp - file to push page to. + * pp - page to push. + * flags - additional flags. + * cr - credentials of caller. + * + * OUT: offp - start of range pushed. + * lenp - len of range pushed. + * + * RETURN: 0 if success + * error code if failure + * + * NOTE: callers must have locked the page to be pushed. On + * exit, the page (and all other pages in the kluster) must be + * unlocked. + */ +/* ARGSUSED */ +static int +zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, + size_t *lenp, int flags, cred_t *cr) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + dmu_tx_t *tx; + u_offset_t off, koff; + size_t len, klen; + int err; + + off = pp->p_offset; + len = PAGESIZE; + /* + * If our blocksize is bigger than the page size, try to kluster + * multiple pages so that we write a full block (thus avoiding + * a read-modify-write). + */ + if (off < zp->z_size && zp->z_blksz > PAGESIZE) { + klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); + koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; + ASSERT(koff <= zp->z_size); + if (koff + klen > zp->z_size) + klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); + pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); + } + ASSERT3U(btop(len), ==, btopr(len)); + + /* + * Can't push pages past end-of-file. + */ + if (off >= zp->z_size) { + /* ignore all pages */ + err = 0; + goto out; + } else if (off + len > zp->z_size) { + int npages = btopr(zp->z_size - off); + page_t *trunc; + + page_list_break(&pp, &trunc, npages); + /* ignore pages past end of file */ + if (trunc) + pvn_write_done(trunc, flags); + len = zp->z_size - off; + } + + if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || + zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { + err = EDQUOT; + goto out; + } +top: + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_write(tx, zp->z_id, off, len); + + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + err = dmu_tx_assign(tx, TXG_NOWAIT); + if (err != 0) { + if (err == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + goto out; + } + + if (zp->z_blksz <= PAGESIZE) { + caddr_t va = zfs_map_page(pp, S_READ); + ASSERT3U(len, <=, PAGESIZE); + dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); + zfs_unmap_page(pp, va); + } else { + err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); + } + + if (err == 0) { + uint64_t mtime[2], ctime[2]; + sa_bulk_attr_t bulk[3]; + int count = 0; + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, + &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, 8); + zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, + B_TRUE); + zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); + } + dmu_tx_commit(tx); + +out: + pvn_write_done(pp, (err ? B_ERROR : 0) | flags); + if (offp) + *offp = off; + if (lenp) + *lenp = len; + + return (err); +} + +/* + * Copy the portion of the file indicated from pages into the file. + * The pages are stored in a page list attached to the files vnode. + * + * IN: vp - vnode of file to push page data to. + * off - position in file to put data. + * len - amount of data to write. + * flags - flags to control the operation. + * cr - credentials of caller. + * ct - caller context. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - ctime|mtime updated + */ +/*ARGSUSED*/ +static int +zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + page_t *pp; + size_t io_len; + u_offset_t io_off; + uint_t blksz; + rl_t *rl; + int error = 0; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + /* + * Align this request to the file block size in case we kluster. + * XXX - this can result in pretty aggresive locking, which can + * impact simultanious read/write access. One option might be + * to break up long requests (len == 0) into block-by-block + * operations to get narrower locking. + */ + blksz = zp->z_blksz; + if (ISP2(blksz)) + io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); + else + io_off = 0; + if (len > 0 && ISP2(blksz)) + io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); + else + io_len = 0; + + if (io_len == 0) { + /* + * Search the entire vp list for pages >= io_off. + */ + rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); + error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); + goto out; + } + rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); + + if (off > zp->z_size) { + /* past end of file */ + zfs_range_unlock(rl); + ZFS_EXIT(zfsvfs); + return (0); + } + + len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); + + for (off = io_off; io_off < off + len; io_off += io_len) { + if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { + pp = page_lookup(vp, io_off, + (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); + } else { + pp = page_lookup_nowait(vp, io_off, + (flags & B_FREE) ? SE_EXCL : SE_SHARED); + } + + if (pp != NULL && pvn_getdirty(pp, flags)) { + int err; + + /* + * Found a dirty page to push + */ + err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); + if (err) + error = err; + } else { + io_len = PAGESIZE; + } + } +out: + zfs_range_unlock(rl); + if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zfsvfs->z_log, zp->z_id); + ZFS_EXIT(zfsvfs); + return (error); +} + +/*ARGSUSED*/ +void +zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error; + + rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + if (zp->z_sa_hdl == NULL) { + /* + * The fs has been unmounted, or we did a + * suspend/resume and this file no longer exists. + */ + if (vn_has_cached_data(vp)) { + (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, + B_INVAL, cr); + } + + mutex_enter(&zp->z_lock); + mutex_enter(&vp->v_lock); + ASSERT(vp->v_count == 1); + vp->v_count = 0; + mutex_exit(&vp->v_lock); + mutex_exit(&zp->z_lock); + rw_exit(&zfsvfs->z_teardown_inactive_lock); + zfs_znode_free(zp); + return; + } + + /* + * Attempt to push any data in the page cache. If this fails + * we will get kicked out later in zfs_zinactive(). + */ + if (vn_has_cached_data(vp)) { + (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, + cr); + } + + if (zp->z_atime_dirty && zp->z_unlinked == 0) { + dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + mutex_enter(&zp->z_lock); + (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), + (void *)&zp->z_atime, sizeof (zp->z_atime), tx); + zp->z_atime_dirty = 0; + mutex_exit(&zp->z_lock); + dmu_tx_commit(tx); + } + } + + zfs_zinactive(zp); + rw_exit(&zfsvfs->z_teardown_inactive_lock); +} + +/* + * Bounds-check the seek operation. + * + * IN: vp - vnode seeking within + * ooff - old file offset + * noffp - pointer to new file offset + * ct - caller context + * + * RETURN: 0 if success + * EINVAL if new offset invalid + */ +/* ARGSUSED */ +static int +zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, + caller_context_t *ct) +{ + if (vp->v_type == VDIR) + return (0); + return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); +} + +/* + * Pre-filter the generic locking function to trap attempts to place + * a mandatory lock on a memory mapped file. + */ +static int +zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, + flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + /* + * We are following the UFS semantics with respect to mapcnt + * here: If we see that the file is mapped already, then we will + * return an error, but we don't worry about races between this + * function and zfs_map(). + */ + if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { + ZFS_EXIT(zfsvfs); + return (EAGAIN); + } + ZFS_EXIT(zfsvfs); + return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); +} + +/* + * If we can't find a page in the cache, we will create a new page + * and fill it with file data. For efficiency, we may try to fill + * multiple pages at once (klustering) to fill up the supplied page + * list. Note that the pages to be filled are held with an exclusive + * lock to prevent access by other threads while they are being filled. + */ +static int +zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, + caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) +{ + znode_t *zp = VTOZ(vp); + page_t *pp, *cur_pp; + objset_t *os = zp->z_zfsvfs->z_os; + u_offset_t io_off, total; + size_t io_len; + int err; + + if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { + /* + * We only have a single page, don't bother klustering + */ + io_off = off; + io_len = PAGESIZE; + pp = page_create_va(vp, io_off, io_len, + PG_EXCL | PG_WAIT, seg, addr); + } else { + /* + * Try to find enough pages to fill the page list + */ + pp = pvn_read_kluster(vp, off, seg, addr, &io_off, + &io_len, off, plsz, 0); + } + if (pp == NULL) { + /* + * The page already exists, nothing to do here. + */ + *pl = NULL; + return (0); + } + + /* + * Fill the pages in the kluster. + */ + cur_pp = pp; + for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { + caddr_t va; + + ASSERT3U(io_off, ==, cur_pp->p_offset); + va = zfs_map_page(cur_pp, S_WRITE); + err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, + DMU_READ_PREFETCH); + zfs_unmap_page(cur_pp, va); + if (err) { + /* On error, toss the entire kluster */ + pvn_read_done(pp, B_ERROR); + /* convert checksum errors into IO errors */ + if (err == ECKSUM) + err = EIO; + return (err); + } + cur_pp = cur_pp->p_next; + } + + /* + * Fill in the page list array from the kluster starting + * from the desired offset `off'. + * NOTE: the page list will always be null terminated. + */ + pvn_plist_init(pp, pl, plsz, off, io_len, rw); + ASSERT(pl == NULL || (*pl)->p_offset == off); + + return (0); +} + +/* + * Return pointers to the pages for the file region [off, off + len] + * in the pl array. If plsz is greater than len, this function may + * also return page pointers from after the specified region + * (i.e. the region [off, off + plsz]). These additional pages are + * only returned if they are already in the cache, or were created as + * part of a klustered read. + * + * IN: vp - vnode of file to get data from. + * off - position in file to get data from. + * len - amount of data to retrieve. + * plsz - length of provided page list. + * seg - segment to obtain pages for. + * addr - virtual address of fault. + * rw - mode of created pages. + * cr - credentials of caller. + * ct - caller context. + * + * OUT: protp - protection mode of created pages. + * pl - list of pages created. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - atime updated + */ +/* ARGSUSED */ +static int +zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, + page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, + enum seg_rw rw, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + page_t **pl0 = pl; + int err = 0; + + /* we do our own caching, faultahead is unnecessary */ + if (pl == NULL) + return (0); + else if (len > plsz) + len = plsz; + else + len = P2ROUNDUP(len, PAGESIZE); + ASSERT(plsz >= len); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if (protp) + *protp = PROT_ALL; + + /* + * Loop through the requested range [off, off + len) looking + * for pages. If we don't find a page, we will need to create + * a new page and fill it with data from the file. + */ + while (len > 0) { + if (*pl = page_lookup(vp, off, SE_SHARED)) + *(pl+1) = NULL; + else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) + goto out; + while (*pl) { + ASSERT3U((*pl)->p_offset, ==, off); + off += PAGESIZE; + addr += PAGESIZE; + if (len > 0) { + ASSERT3U(len, >=, PAGESIZE); + len -= PAGESIZE; + } + ASSERT3U(plsz, >=, PAGESIZE); + plsz -= PAGESIZE; + pl++; + } + } + + /* + * Fill out the page array with any pages already in the cache. + */ + while (plsz > 0 && + (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { + off += PAGESIZE; + plsz -= PAGESIZE; + } +out: + if (err) { + /* + * Release any pages we have previously locked. + */ + while (pl > pl0) + page_unlock(*--pl); + } else { + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); + } + + *pl = NULL; + + ZFS_EXIT(zfsvfs); + return (err); +} + +/* + * Request a memory map for a section of a file. This code interacts + * with common code and the VM system as follows: + * + * common code calls mmap(), which ends up in smmap_common() + * + * this calls VOP_MAP(), which takes you into (say) zfs + * + * zfs_map() calls as_map(), passing segvn_create() as the callback + * + * segvn_create() creates the new segment and calls VOP_ADDMAP() + * + * zfs_addmap() updates z_mapcnt + */ +/*ARGSUSED*/ +static int +zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, + size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + segvn_crargs_t vn_a; + int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if ((prot & PROT_WRITE) && (zp->z_pflags & + (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { + ZFS_EXIT(zfsvfs); + return (EPERM); + } + + if ((prot & (PROT_READ | PROT_EXEC)) && + (zp->z_pflags & ZFS_AV_QUARANTINED)) { + ZFS_EXIT(zfsvfs); + return (EACCES); + } + + if (vp->v_flag & VNOMAP) { + ZFS_EXIT(zfsvfs); + return (ENOSYS); + } + + if (off < 0 || len > MAXOFFSET_T - off) { + ZFS_EXIT(zfsvfs); + return (ENXIO); + } + + if (vp->v_type != VREG) { + ZFS_EXIT(zfsvfs); + return (ENODEV); + } + + /* + * If file is locked, disallow mapping. + */ + if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { + ZFS_EXIT(zfsvfs); + return (EAGAIN); + } + + as_rangelock(as); + error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); + if (error != 0) { + as_rangeunlock(as); + ZFS_EXIT(zfsvfs); + return (error); + } + + vn_a.vp = vp; + vn_a.offset = (u_offset_t)off; + vn_a.type = flags & MAP_TYPE; + vn_a.prot = prot; + vn_a.maxprot = maxprot; + vn_a.cred = cr; + vn_a.amp = NULL; + vn_a.flags = flags & ~MAP_TYPE; + vn_a.szc = 0; + vn_a.lgrp_mem_policy_flags = 0; + + error = as_map(as, *addrp, len, segvn_create, &vn_a); + + as_rangeunlock(as); + ZFS_EXIT(zfsvfs); + return (error); +} + +/* ARGSUSED */ +static int +zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, + size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, + caller_context_t *ct) +{ + uint64_t pages = btopr(len); + + atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); + return (0); +} + +/* + * The reason we push dirty pages as part of zfs_delmap() is so that we get a + * more accurate mtime for the associated file. Since we don't have a way of + * detecting when the data was actually modified, we have to resort to + * heuristics. If an explicit msync() is done, then we mark the mtime when the + * last page is pushed. The problem occurs when the msync() call is omitted, + * which by far the most common case: + * + * open() + * mmap() + * <modify memory> + * munmap() + * close() + * <time lapse> + * putpage() via fsflush + * + * If we wait until fsflush to come along, we can have a modification time that + * is some arbitrary point in the future. In order to prevent this in the + * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is + * torn down. + */ +/* ARGSUSED */ +static int +zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, + size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, + caller_context_t *ct) +{ + uint64_t pages = btopr(len); + + ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); + atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); + + if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && + vn_has_cached_data(vp)) + (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); + + return (0); +} + +/* + * Free or allocate space in a file. Currently, this function only + * supports the `F_FREESP' command. However, this command is somewhat + * misnamed, as its functionality includes the ability to allocate as + * well as free space. + * + * IN: vp - vnode of file to free data in. + * cmd - action to take (only F_FREESP supported). + * bfp - section of file to free/alloc. + * flag - current file open mode flags. + * offset - current file offset. + * cr - credentials of caller [UNUSED]. + * ct - caller context. + * + * RETURN: 0 if success + * error code if failure + * + * Timestamps: + * vp - ctime|mtime updated + */ +/* ARGSUSED */ +static int +zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, + offset_t offset, cred_t *cr, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + uint64_t off, len; + int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if (cmd != F_FREESP) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + if (error = convoff(vp, bfp, 0, offset)) { + ZFS_EXIT(zfsvfs); + return (error); + } + + if (bfp->l_len < 0) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + off = bfp->l_start; + len = bfp->l_len; /* 0 means from off to end of file */ + + error = zfs_freesp(zp, off, len, flag, TRUE); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/*ARGSUSED*/ +static int +zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + uint32_t gen; + uint64_t gen64; + uint64_t object = zp->z_id; + zfid_short_t *zfid; + int size, i, error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), + &gen64, sizeof (uint64_t))) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + + gen = (uint32_t)gen64; + + size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; + if (fidp->fid_len < size) { + fidp->fid_len = size; + ZFS_EXIT(zfsvfs); + return (ENOSPC); + } + + zfid = (zfid_short_t *)fidp; + + zfid->zf_len = size; + + for (i = 0; i < sizeof (zfid->zf_object); i++) + zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); + + /* Must have a non-zero generation number to distinguish from .zfs */ + if (gen == 0) + gen = 1; + for (i = 0; i < sizeof (zfid->zf_gen); i++) + zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); + + if (size == LONG_FID_LEN) { + uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); + zfid_long_t *zlfid; + + zlfid = (zfid_long_t *)fidp; + + for (i = 0; i < sizeof (zlfid->zf_setid); i++) + zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); + + /* XXX - this should be the generation number for the objset */ + for (i = 0; i < sizeof (zlfid->zf_setgen); i++) + zlfid->zf_setgen[i] = 0; + } + + ZFS_EXIT(zfsvfs); + return (0); +} + +static int +zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp, *xzp; + zfsvfs_t *zfsvfs; + zfs_dirlock_t *dl; + int error; + + switch (cmd) { + case _PC_LINK_MAX: + *valp = ULONG_MAX; + return (0); + + case _PC_FILESIZEBITS: + *valp = 64; + return (0); + + case _PC_XATTR_EXISTS: + zp = VTOZ(vp); + zfsvfs = zp->z_zfsvfs; + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + *valp = 0; + error = zfs_dirent_lock(&dl, zp, "", &xzp, + ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); + if (error == 0) { + zfs_dirent_unlock(dl); + if (!zfs_dirempty(xzp)) + *valp = 1; + VN_RELE(ZTOV(xzp)); + } else if (error == ENOENT) { + /* + * If there aren't extended attributes, it's the + * same as having zero of them. + */ + error = 0; + } + ZFS_EXIT(zfsvfs); + return (error); + + case _PC_SATTR_ENABLED: + case _PC_SATTR_EXISTS: + *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && + (vp->v_type == VREG || vp->v_type == VDIR); + return (0); + + case _PC_ACCESS_FILTERING: + *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && + vp->v_type == VDIR; + return (0); + + case _PC_ACL_ENABLED: + *valp = _ACL_ACE_ENABLED; + return (0); + + case _PC_MIN_HOLE_SIZE: + *valp = (ulong_t)SPA_MINBLOCKSIZE; + return (0); + + case _PC_TIMESTAMP_RESOLUTION: + /* nanosecond timestamp resolution */ + *valp = 1L; + return (0); + + default: + return (fs_pathconf(vp, cmd, valp, cr, ct)); + } +} + +/*ARGSUSED*/ +static int +zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error; + boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + error = zfs_getacl(zp, vsecp, skipaclchk, cr); + ZFS_EXIT(zfsvfs); + + return (error); +} + +/*ARGSUSED*/ +static int +zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int error; + boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; + zilog_t *zilog = zfsvfs->z_log; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + error = zfs_setacl(zp, vsecp, skipaclchk, cr); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + + ZFS_EXIT(zfsvfs); + return (error); +} + +/* + * Tunable, both must be a power of 2. + * + * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf + * zcr_blksz_max: if set to less than the file block size, allow loaning out of + * an arcbuf for a partial block read + */ +int zcr_blksz_min = (1 << 10); /* 1K */ +int zcr_blksz_max = (1 << 17); /* 128K */ + +/*ARGSUSED*/ +static int +zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, + caller_context_t *ct) +{ + znode_t *zp = VTOZ(vp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + int max_blksz = zfsvfs->z_max_blksz; + uio_t *uio = &xuio->xu_uio; + ssize_t size = uio->uio_resid; + offset_t offset = uio->uio_loffset; + int blksz; + int fullblk, i; + arc_buf_t *abuf; + ssize_t maxsize; + int preamble, postamble; + + if (xuio->xu_type != UIOTYPE_ZEROCOPY) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + switch (ioflag) { + case UIO_WRITE: + /* + * Loan out an arc_buf for write if write size is bigger than + * max_blksz, and the file's block size is also max_blksz. + */ + blksz = max_blksz; + if (size < blksz || zp->z_blksz != blksz) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + /* + * Caller requests buffers for write before knowing where the + * write offset might be (e.g. NFS TCP write). + */ + if (offset == -1) { + preamble = 0; + } else { + preamble = P2PHASE(offset, blksz); + if (preamble) { + preamble = blksz - preamble; + size -= preamble; + } + } + + postamble = P2PHASE(size, blksz); + size -= postamble; + + fullblk = size / blksz; + (void) dmu_xuio_init(xuio, + (preamble != 0) + fullblk + (postamble != 0)); + DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, + int, postamble, int, + (preamble != 0) + fullblk + (postamble != 0)); + + /* + * Have to fix iov base/len for partial buffers. They + * currently represent full arc_buf's. + */ + if (preamble) { + /* data begins in the middle of the arc_buf */ + abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), + blksz); + ASSERT(abuf); + (void) dmu_xuio_add(xuio, abuf, + blksz - preamble, preamble); + } + + for (i = 0; i < fullblk; i++) { + abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), + blksz); + ASSERT(abuf); + (void) dmu_xuio_add(xuio, abuf, 0, blksz); + } + + if (postamble) { + /* data ends in the middle of the arc_buf */ + abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), + blksz); + ASSERT(abuf); + (void) dmu_xuio_add(xuio, abuf, 0, postamble); + } + break; + case UIO_READ: + /* + * Loan out an arc_buf for read if the read size is larger than + * the current file block size. Block alignment is not + * considered. Partial arc_buf will be loaned out for read. + */ + blksz = zp->z_blksz; + if (blksz < zcr_blksz_min) + blksz = zcr_blksz_min; + if (blksz > zcr_blksz_max) + blksz = zcr_blksz_max; + /* avoid potential complexity of dealing with it */ + if (blksz > max_blksz) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + maxsize = zp->z_size - uio->uio_loffset; + if (size > maxsize) + size = maxsize; + + if (size < blksz || vn_has_cached_data(vp)) { + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + break; + default: + ZFS_EXIT(zfsvfs); + return (EINVAL); + } + + uio->uio_extflg = UIO_XUIO; + XUIO_XUZC_RW(xuio) = ioflag; + ZFS_EXIT(zfsvfs); + return (0); +} + +/*ARGSUSED*/ +static int +zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) +{ + int i; + arc_buf_t *abuf; + int ioflag = XUIO_XUZC_RW(xuio); + + ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); + + i = dmu_xuio_cnt(xuio); + while (i-- > 0) { + abuf = dmu_xuio_arcbuf(xuio, i); + /* + * if abuf == NULL, it must be a write buffer + * that has been returned in zfs_write(). + */ + if (abuf) + dmu_return_arcbuf(abuf); + ASSERT(abuf || ioflag == UIO_WRITE); + } + + dmu_xuio_fini(xuio); + return (0); +} + +/* + * Predeclare these here so that the compiler assumes that + * this is an "old style" function declaration that does + * not include arguments => we won't get type mismatch errors + * in the initializations that follow. + */ +static int zfs_inval(); +static int zfs_isdir(); + +static int +zfs_inval() +{ + return (EINVAL); +} + +static int +zfs_isdir() +{ + return (EISDIR); +} +/* + * Directory vnode operations template + */ +vnodeops_t *zfs_dvnodeops; +const fs_operation_def_t zfs_dvnodeops_template[] = { + VOPNAME_OPEN, { .vop_open = zfs_open }, + VOPNAME_CLOSE, { .vop_close = zfs_close }, + VOPNAME_READ, { .error = zfs_isdir }, + VOPNAME_WRITE, { .error = zfs_isdir }, + VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, + VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, + VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, + VOPNAME_ACCESS, { .vop_access = zfs_access }, + VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, + VOPNAME_CREATE, { .vop_create = zfs_create }, + VOPNAME_REMOVE, { .vop_remove = zfs_remove }, + VOPNAME_LINK, { .vop_link = zfs_link }, + VOPNAME_RENAME, { .vop_rename = zfs_rename }, + VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, + VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, + VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, + VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, + VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_FID, { .vop_fid = zfs_fid }, + VOPNAME_SEEK, { .vop_seek = zfs_seek }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, + VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + NULL, NULL +}; + +/* + * Regular file vnode operations template + */ +vnodeops_t *zfs_fvnodeops; +const fs_operation_def_t zfs_fvnodeops_template[] = { + VOPNAME_OPEN, { .vop_open = zfs_open }, + VOPNAME_CLOSE, { .vop_close = zfs_close }, + VOPNAME_READ, { .vop_read = zfs_read }, + VOPNAME_WRITE, { .vop_write = zfs_write }, + VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, + VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, + VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, + VOPNAME_ACCESS, { .vop_access = zfs_access }, + VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, + VOPNAME_RENAME, { .vop_rename = zfs_rename }, + VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_FID, { .vop_fid = zfs_fid }, + VOPNAME_SEEK, { .vop_seek = zfs_seek }, + VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, + VOPNAME_SPACE, { .vop_space = zfs_space }, + VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, + VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, + VOPNAME_MAP, { .vop_map = zfs_map }, + VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, + VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, + VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, + VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, + NULL, NULL +}; + +/* + * Symbolic link vnode operations template + */ +vnodeops_t *zfs_symvnodeops; +const fs_operation_def_t zfs_symvnodeops_template[] = { + VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, + VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, + VOPNAME_ACCESS, { .vop_access = zfs_access }, + VOPNAME_RENAME, { .vop_rename = zfs_rename }, + VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_FID, { .vop_fid = zfs_fid }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + NULL, NULL +}; + +/* + * special share hidden files vnode operations template + */ +vnodeops_t *zfs_sharevnodeops; +const fs_operation_def_t zfs_sharevnodeops_template[] = { + VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, + VOPNAME_ACCESS, { .vop_access = zfs_access }, + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_FID, { .vop_fid = zfs_fid }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, + VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + NULL, NULL +}; + +/* + * Extended attribute directory vnode operations template + * This template is identical to the directory vnodes + * operation template except for restricted operations: + * VOP_MKDIR() + * VOP_SYMLINK() + * Note that there are other restrictions embedded in: + * zfs_create() - restrict type to VREG + * zfs_link() - no links into/out of attribute space + * zfs_rename() - no moves into/out of attribute space + */ +vnodeops_t *zfs_xdvnodeops; +const fs_operation_def_t zfs_xdvnodeops_template[] = { + VOPNAME_OPEN, { .vop_open = zfs_open }, + VOPNAME_CLOSE, { .vop_close = zfs_close }, + VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, + VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, + VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, + VOPNAME_ACCESS, { .vop_access = zfs_access }, + VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, + VOPNAME_CREATE, { .vop_create = zfs_create }, + VOPNAME_REMOVE, { .vop_remove = zfs_remove }, + VOPNAME_LINK, { .vop_link = zfs_link }, + VOPNAME_RENAME, { .vop_rename = zfs_rename }, + VOPNAME_MKDIR, { .error = zfs_inval }, + VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, + VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, + VOPNAME_SYMLINK, { .error = zfs_inval }, + VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_FID, { .vop_fid = zfs_fid }, + VOPNAME_SEEK, { .vop_seek = zfs_seek }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, + VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + NULL, NULL +}; + +/* + * Error vnode operations template + */ +vnodeops_t *zfs_evnodeops; +const fs_operation_def_t zfs_evnodeops_template[] = { + VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, + VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, + NULL, NULL +}; diff --git a/uts/common/fs/zfs/zfs_znode.c b/uts/common/fs/zfs/zfs_znode.c new file mode 100644 index 000000000000..12639a44a9c4 --- /dev/null +++ b/uts/common/fs/zfs/zfs_znode.c @@ -0,0 +1,2121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2007 Jeremy Teo */ + +#ifdef _KERNEL +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/resource.h> +#include <sys/mntent.h> +#include <sys/mkdev.h> +#include <sys/u8_textprep.h> +#include <sys/dsl_dataset.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/unistd.h> +#include <sys/mode.h> +#include <sys/atomic.h> +#include <vm/pvn.h> +#include "fs/fs_subr.h" +#include <sys/zfs_dir.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_rlock.h> +#include <sys/zfs_fuid.h> +#include <sys/dnode.h> +#include <sys/fs/zfs.h> +#include <sys/kidmap.h> +#endif /* _KERNEL */ + +#include <sys/dmu.h> +#include <sys/refcount.h> +#include <sys/stat.h> +#include <sys/zap.h> +#include <sys/zfs_znode.h> +#include <sys/sa.h> +#include <sys/zfs_sa.h> +#include <sys/zfs_stat.h> + +#include "zfs_prop.h" +#include "zfs_comutil.h" + +/* + * Define ZNODE_STATS to turn on statistic gathering. By default, it is only + * turned on when DEBUG is also defined. + */ +#ifdef DEBUG +#define ZNODE_STATS +#endif /* DEBUG */ + +#ifdef ZNODE_STATS +#define ZNODE_STAT_ADD(stat) ((stat)++) +#else +#define ZNODE_STAT_ADD(stat) /* nothing */ +#endif /* ZNODE_STATS */ + +/* + * Functions needed for userland (ie: libzpool) are not put under + * #ifdef_KERNEL; the rest of the functions have dependencies + * (such as VFS logic) that will not compile easily in userland. + */ +#ifdef _KERNEL +/* + * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to + * be freed before it can be safely accessed. + */ +krwlock_t zfsvfs_lock; + +static kmem_cache_t *znode_cache = NULL; + +/*ARGSUSED*/ +static void +znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) +{ + /* + * We should never drop all dbuf refs without first clearing + * the eviction callback. + */ + panic("evicting znode %p\n", user_ptr); +} + +/*ARGSUSED*/ +static int +zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) +{ + znode_t *zp = buf; + + ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); + + zp->z_vnode = vn_alloc(kmflags); + if (zp->z_vnode == NULL) { + return (-1); + } + ZTOV(zp)->v_data = zp; + + list_link_init(&zp->z_link_node); + + mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); + rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); + mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); + + mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&zp->z_range_avl, zfs_range_compare, + sizeof (rl_t), offsetof(rl_t, r_node)); + + zp->z_dirlocks = NULL; + zp->z_acl_cached = NULL; + zp->z_moved = 0; + return (0); +} + +/*ARGSUSED*/ +static void +zfs_znode_cache_destructor(void *buf, void *arg) +{ + znode_t *zp = buf; + + ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); + ASSERT(ZTOV(zp)->v_data == zp); + vn_free(ZTOV(zp)); + ASSERT(!list_link_active(&zp->z_link_node)); + mutex_destroy(&zp->z_lock); + rw_destroy(&zp->z_parent_lock); + rw_destroy(&zp->z_name_lock); + mutex_destroy(&zp->z_acl_lock); + avl_destroy(&zp->z_range_avl); + mutex_destroy(&zp->z_range_lock); + + ASSERT(zp->z_dirlocks == NULL); + ASSERT(zp->z_acl_cached == NULL); +} + +#ifdef ZNODE_STATS +static struct { + uint64_t zms_zfsvfs_invalid; + uint64_t zms_zfsvfs_recheck1; + uint64_t zms_zfsvfs_unmounted; + uint64_t zms_zfsvfs_recheck2; + uint64_t zms_obj_held; + uint64_t zms_vnode_locked; + uint64_t zms_not_only_dnlc; +} znode_move_stats; +#endif /* ZNODE_STATS */ + +static void +zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) +{ + vnode_t *vp; + + /* Copy fields. */ + nzp->z_zfsvfs = ozp->z_zfsvfs; + + /* Swap vnodes. */ + vp = nzp->z_vnode; + nzp->z_vnode = ozp->z_vnode; + ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ + ZTOV(ozp)->v_data = ozp; + ZTOV(nzp)->v_data = nzp; + + nzp->z_id = ozp->z_id; + ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ + ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); + nzp->z_unlinked = ozp->z_unlinked; + nzp->z_atime_dirty = ozp->z_atime_dirty; + nzp->z_zn_prefetch = ozp->z_zn_prefetch; + nzp->z_blksz = ozp->z_blksz; + nzp->z_seq = ozp->z_seq; + nzp->z_mapcnt = ozp->z_mapcnt; + nzp->z_gen = ozp->z_gen; + nzp->z_sync_cnt = ozp->z_sync_cnt; + nzp->z_is_sa = ozp->z_is_sa; + nzp->z_sa_hdl = ozp->z_sa_hdl; + bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); + nzp->z_links = ozp->z_links; + nzp->z_size = ozp->z_size; + nzp->z_pflags = ozp->z_pflags; + nzp->z_uid = ozp->z_uid; + nzp->z_gid = ozp->z_gid; + nzp->z_mode = ozp->z_mode; + + /* + * Since this is just an idle znode and kmem is already dealing with + * memory pressure, release any cached ACL. + */ + if (ozp->z_acl_cached) { + zfs_acl_free(ozp->z_acl_cached); + ozp->z_acl_cached = NULL; + } + + sa_set_userp(nzp->z_sa_hdl, nzp); + + /* + * Invalidate the original znode by clearing fields that provide a + * pointer back to the znode. Set the low bit of the vfs pointer to + * ensure that zfs_znode_move() recognizes the znode as invalid in any + * subsequent callback. + */ + ozp->z_sa_hdl = NULL; + POINTER_INVALIDATE(&ozp->z_zfsvfs); + + /* + * Mark the znode. + */ + nzp->z_moved = 1; + ozp->z_moved = (uint8_t)-1; +} + +/*ARGSUSED*/ +static kmem_cbrc_t +zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) +{ + znode_t *ozp = buf, *nzp = newbuf; + zfsvfs_t *zfsvfs; + vnode_t *vp; + + /* + * The znode is on the file system's list of known znodes if the vfs + * pointer is valid. We set the low bit of the vfs pointer when freeing + * the znode to invalidate it, and the memory patterns written by kmem + * (baddcafe and deadbeef) set at least one of the two low bits. A newly + * created znode sets the vfs pointer last of all to indicate that the + * znode is known and in a valid state to be moved by this function. + */ + zfsvfs = ozp->z_zfsvfs; + if (!POINTER_IS_VALID(zfsvfs)) { + ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * Close a small window in which it's possible that the filesystem could + * be unmounted and freed, and zfsvfs, though valid in the previous + * statement, could point to unrelated memory by the time we try to + * prevent the filesystem from being unmounted. + */ + rw_enter(&zfsvfs_lock, RW_WRITER); + if (zfsvfs != ozp->z_zfsvfs) { + rw_exit(&zfsvfs_lock); + ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * If the znode is still valid, then so is the file system. We know that + * no valid file system can be freed while we hold zfsvfs_lock, so we + * can safely ensure that the filesystem is not and will not be + * unmounted. The next statement is equivalent to ZFS_ENTER(). + */ + rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); + if (zfsvfs->z_unmounted) { + ZFS_EXIT(zfsvfs); + rw_exit(&zfsvfs_lock); + ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); + return (KMEM_CBRC_DONT_KNOW); + } + rw_exit(&zfsvfs_lock); + + mutex_enter(&zfsvfs->z_znodes_lock); + /* + * Recheck the vfs pointer in case the znode was removed just before + * acquiring the lock. + */ + if (zfsvfs != ozp->z_zfsvfs) { + mutex_exit(&zfsvfs->z_znodes_lock); + ZFS_EXIT(zfsvfs); + ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); + return (KMEM_CBRC_DONT_KNOW); + } + + /* + * At this point we know that as long as we hold z_znodes_lock, the + * znode cannot be freed and fields within the znode can be safely + * accessed. Now, prevent a race with zfs_zget(). + */ + if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { + mutex_exit(&zfsvfs->z_znodes_lock); + ZFS_EXIT(zfsvfs); + ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); + return (KMEM_CBRC_LATER); + } + + vp = ZTOV(ozp); + if (mutex_tryenter(&vp->v_lock) == 0) { + ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); + mutex_exit(&zfsvfs->z_znodes_lock); + ZFS_EXIT(zfsvfs); + ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); + return (KMEM_CBRC_LATER); + } + + /* Only move znodes that are referenced _only_ by the DNLC. */ + if (vp->v_count != 1 || !vn_in_dnlc(vp)) { + mutex_exit(&vp->v_lock); + ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); + mutex_exit(&zfsvfs->z_znodes_lock); + ZFS_EXIT(zfsvfs); + ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); + return (KMEM_CBRC_LATER); + } + + /* + * The znode is known and in a valid state to move. We're holding the + * locks needed to execute the critical section. + */ + zfs_znode_move_impl(ozp, nzp); + mutex_exit(&vp->v_lock); + ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); + + list_link_replace(&ozp->z_link_node, &nzp->z_link_node); + mutex_exit(&zfsvfs->z_znodes_lock); + ZFS_EXIT(zfsvfs); + + return (KMEM_CBRC_YES); +} + +void +zfs_znode_init(void) +{ + /* + * Initialize zcache + */ + rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); + ASSERT(znode_cache == NULL); + znode_cache = kmem_cache_create("zfs_znode_cache", + sizeof (znode_t), 0, zfs_znode_cache_constructor, + zfs_znode_cache_destructor, NULL, NULL, NULL, 0); + kmem_cache_set_move(znode_cache, zfs_znode_move); +} + +void +zfs_znode_fini(void) +{ + /* + * Cleanup vfs & vnode ops + */ + zfs_remove_op_tables(); + + /* + * Cleanup zcache + */ + if (znode_cache) + kmem_cache_destroy(znode_cache); + znode_cache = NULL; + rw_destroy(&zfsvfs_lock); +} + +struct vnodeops *zfs_dvnodeops; +struct vnodeops *zfs_fvnodeops; +struct vnodeops *zfs_symvnodeops; +struct vnodeops *zfs_xdvnodeops; +struct vnodeops *zfs_evnodeops; +struct vnodeops *zfs_sharevnodeops; + +void +zfs_remove_op_tables() +{ + /* + * Remove vfs ops + */ + ASSERT(zfsfstype); + (void) vfs_freevfsops_by_type(zfsfstype); + zfsfstype = 0; + + /* + * Remove vnode ops + */ + if (zfs_dvnodeops) + vn_freevnodeops(zfs_dvnodeops); + if (zfs_fvnodeops) + vn_freevnodeops(zfs_fvnodeops); + if (zfs_symvnodeops) + vn_freevnodeops(zfs_symvnodeops); + if (zfs_xdvnodeops) + vn_freevnodeops(zfs_xdvnodeops); + if (zfs_evnodeops) + vn_freevnodeops(zfs_evnodeops); + if (zfs_sharevnodeops) + vn_freevnodeops(zfs_sharevnodeops); + + zfs_dvnodeops = NULL; + zfs_fvnodeops = NULL; + zfs_symvnodeops = NULL; + zfs_xdvnodeops = NULL; + zfs_evnodeops = NULL; + zfs_sharevnodeops = NULL; +} + +extern const fs_operation_def_t zfs_dvnodeops_template[]; +extern const fs_operation_def_t zfs_fvnodeops_template[]; +extern const fs_operation_def_t zfs_xdvnodeops_template[]; +extern const fs_operation_def_t zfs_symvnodeops_template[]; +extern const fs_operation_def_t zfs_evnodeops_template[]; +extern const fs_operation_def_t zfs_sharevnodeops_template[]; + +int +zfs_create_op_tables() +{ + int error; + + /* + * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() + * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). + * In this case we just return as the ops vectors are already set up. + */ + if (zfs_dvnodeops) + return (0); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, + &zfs_dvnodeops); + if (error) + return (error); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, + &zfs_fvnodeops); + if (error) + return (error); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, + &zfs_symvnodeops); + if (error) + return (error); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, + &zfs_xdvnodeops); + if (error) + return (error); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, + &zfs_evnodeops); + if (error) + return (error); + + error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, + &zfs_sharevnodeops); + + return (error); +} + +int +zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +{ + zfs_acl_ids_t acl_ids; + vattr_t vattr; + znode_t *sharezp; + vnode_t *vp; + znode_t *zp; + int error; + + vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; + vattr.va_type = VDIR; + vattr.va_mode = S_IFDIR|0555; + vattr.va_uid = crgetuid(kcred); + vattr.va_gid = crgetgid(kcred); + + sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); + ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); + sharezp->z_moved = 0; + sharezp->z_unlinked = 0; + sharezp->z_atime_dirty = 0; + sharezp->z_zfsvfs = zfsvfs; + sharezp->z_is_sa = zfsvfs->z_use_sa; + + vp = ZTOV(sharezp); + vn_reinit(vp); + vp->v_type = VDIR; + + VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, + kcred, NULL, &acl_ids)); + zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); + ASSERT3P(zp, ==, sharezp); + ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ + POINTER_INVALIDATE(&sharezp->z_zfsvfs); + error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); + zfsvfs->z_shares_dir = sharezp->z_id; + + zfs_acl_ids_free(&acl_ids); + ZTOV(sharezp)->v_count = 0; + sa_handle_destroy(sharezp->z_sa_hdl); + kmem_cache_free(znode_cache, sharezp); + + return (error); +} + +/* + * define a couple of values we need available + * for both 64 and 32 bit environments. + */ +#ifndef NBITSMINOR64 +#define NBITSMINOR64 32 +#endif +#ifndef MAXMAJ64 +#define MAXMAJ64 0xffffffffUL +#endif +#ifndef MAXMIN64 +#define MAXMIN64 0xffffffffUL +#endif + +/* + * Create special expldev for ZFS private use. + * Can't use standard expldev since it doesn't do + * what we want. The standard expldev() takes a + * dev32_t in LP64 and expands it to a long dev_t. + * We need an interface that takes a dev32_t in ILP32 + * and expands it to a long dev_t. + */ +static uint64_t +zfs_expldev(dev_t dev) +{ +#ifndef _LP64 + major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32; + return (((uint64_t)major << NBITSMINOR64) | + ((minor_t)dev & MAXMIN32)); +#else + return (dev); +#endif +} + +/* + * Special cmpldev for ZFS private use. + * Can't use standard cmpldev since it takes + * a long dev_t and compresses it to dev32_t in + * LP64. We need to do a compaction of a long dev_t + * to a dev32_t in ILP32. + */ +dev_t +zfs_cmpldev(uint64_t dev) +{ +#ifndef _LP64 + minor_t minor = (minor_t)dev & MAXMIN64; + major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; + + if (major > MAXMAJ32 || minor > MAXMIN32) + return (NODEV32); + + return (((dev32_t)major << NBITSMINOR32) | minor); +#else + return (dev); +#endif +} + +static void +zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, + dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) +{ + ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); + ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); + + mutex_enter(&zp->z_lock); + + ASSERT(zp->z_sa_hdl == NULL); + ASSERT(zp->z_acl_cached == NULL); + if (sa_hdl == NULL) { + VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, + SA_HDL_SHARED, &zp->z_sa_hdl)); + } else { + zp->z_sa_hdl = sa_hdl; + sa_set_userp(sa_hdl, zp); + } + + zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; + + /* + * Slap on VROOT if we are the root znode + */ + if (zp->z_id == zfsvfs->z_root) + ZTOV(zp)->v_flag |= VROOT; + + mutex_exit(&zp->z_lock); + vn_exists(ZTOV(zp)); +} + +void +zfs_znode_dmu_fini(znode_t *zp) +{ + ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || + zp->z_unlinked || + RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); + + sa_handle_destroy(zp->z_sa_hdl); + zp->z_sa_hdl = NULL; +} + +/* + * Construct a new znode/vnode and intialize. + * + * This does not do a call to dmu_set_user() that is + * up to the caller to do, in case you don't want to + * return the znode + */ +static znode_t * +zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, + dmu_object_type_t obj_type, sa_handle_t *hdl) +{ + znode_t *zp; + vnode_t *vp; + uint64_t mode; + uint64_t parent; + sa_bulk_attr_t bulk[9]; + int count = 0; + + zp = kmem_cache_alloc(znode_cache, KM_SLEEP); + + ASSERT(zp->z_dirlocks == NULL); + ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); + zp->z_moved = 0; + + /* + * Defer setting z_zfsvfs until the znode is ready to be a candidate for + * the zfs_znode_move() callback. + */ + zp->z_sa_hdl = NULL; + zp->z_unlinked = 0; + zp->z_atime_dirty = 0; + zp->z_mapcnt = 0; + zp->z_id = db->db_object; + zp->z_blksz = blksz; + zp->z_seq = 0x7A4653; + zp->z_sync_cnt = 0; + + vp = ZTOV(zp); + vn_reinit(vp); + + zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + &zp->z_size, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + &zp->z_links, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + &zp->z_atime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, + &zp->z_uid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, + &zp->z_gid, 8); + + if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { + if (hdl == NULL) + sa_handle_destroy(zp->z_sa_hdl); + kmem_cache_free(znode_cache, zp); + return (NULL); + } + + zp->z_mode = mode; + vp->v_vfsp = zfsvfs->z_parent->z_vfs; + + vp->v_type = IFTOVT((mode_t)mode); + + switch (vp->v_type) { + case VDIR: + if (zp->z_pflags & ZFS_XATTR) { + vn_setops(vp, zfs_xdvnodeops); + vp->v_flag |= V_XATTRDIR; + } else { + vn_setops(vp, zfs_dvnodeops); + } + zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ + break; + case VBLK: + case VCHR: + { + uint64_t rdev; + VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), + &rdev, sizeof (rdev)) == 0); + + vp->v_rdev = zfs_cmpldev(rdev); + } + /*FALLTHROUGH*/ + case VFIFO: + case VSOCK: + case VDOOR: + vn_setops(vp, zfs_fvnodeops); + break; + case VREG: + vp->v_flag |= VMODSORT; + if (parent == zfsvfs->z_shares_dir) { + ASSERT(zp->z_uid == 0 && zp->z_gid == 0); + vn_setops(vp, zfs_sharevnodeops); + } else { + vn_setops(vp, zfs_fvnodeops); + } + break; + case VLNK: + vn_setops(vp, zfs_symvnodeops); + break; + default: + vn_setops(vp, zfs_evnodeops); + break; + } + + mutex_enter(&zfsvfs->z_znodes_lock); + list_insert_tail(&zfsvfs->z_all_znodes, zp); + membar_producer(); + /* + * Everything else must be valid before assigning z_zfsvfs makes the + * znode eligible for zfs_znode_move(). + */ + zp->z_zfsvfs = zfsvfs; + mutex_exit(&zfsvfs->z_znodes_lock); + + VFS_HOLD(zfsvfs->z_vfs); + return (zp); +} + +static uint64_t empty_xattr; +static uint64_t pad[4]; +static zfs_acl_phys_t acl_phys; +/* + * Create a new DMU object to hold a zfs znode. + * + * IN: dzp - parent directory for new znode + * vap - file attributes for new znode + * tx - dmu transaction id for zap operations + * cr - credentials of caller + * flag - flags: + * IS_ROOT_NODE - new object will be root + * IS_XATTR - new object is an attribute + * bonuslen - length of bonus buffer + * setaclp - File/Dir initial ACL + * fuidp - Tracks fuid allocation. + * + * OUT: zpp - allocated znode + * + */ +void +zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, + uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) +{ + uint64_t crtime[2], atime[2], mtime[2], ctime[2]; + uint64_t mode, size, links, parent, pflags; + uint64_t dzp_pflags = 0; + uint64_t rdev = 0; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + dmu_buf_t *db; + timestruc_t now; + uint64_t gen, obj; + int err; + int bonuslen; + sa_handle_t *sa_hdl; + dmu_object_type_t obj_type; + sa_bulk_attr_t sa_attrs[ZPL_END]; + int cnt = 0; + zfs_acl_locator_cb_t locate = { 0 }; + + ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); + + if (zfsvfs->z_replay) { + obj = vap->va_nodeid; + now = vap->va_ctime; /* see zfs_replay_create() */ + gen = vap->va_nblocks; /* ditto */ + } else { + obj = 0; + gethrestime(&now); + gen = dmu_tx_get_txg(tx); + } + + obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; + bonuslen = (obj_type == DMU_OT_SA) ? + DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; + + /* + * Create a new DMU object. + */ + /* + * There's currently no mechanism for pre-reading the blocks that will + * be needed to allocate a new object, so we accept the small chance + * that there will be an i/o error and we will fail one of the + * assertions below. + */ + if (vap->va_type == VDIR) { + if (zfsvfs->z_replay) { + err = zap_create_claim_norm(zfsvfs->z_os, obj, + zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, + obj_type, bonuslen, tx); + ASSERT3U(err, ==, 0); + } else { + obj = zap_create_norm(zfsvfs->z_os, + zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, + obj_type, bonuslen, tx); + } + } else { + if (zfsvfs->z_replay) { + err = dmu_object_claim(zfsvfs->z_os, obj, + DMU_OT_PLAIN_FILE_CONTENTS, 0, + obj_type, bonuslen, tx); + ASSERT3U(err, ==, 0); + } else { + obj = dmu_object_alloc(zfsvfs->z_os, + DMU_OT_PLAIN_FILE_CONTENTS, 0, + obj_type, bonuslen, tx); + } + } + + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); + VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); + + /* + * If this is the root, fix up the half-initialized parent pointer + * to reference the just-allocated physical data area. + */ + if (flag & IS_ROOT_NODE) { + dzp->z_id = obj; + } else { + dzp_pflags = dzp->z_pflags; + } + + /* + * If parent is an xattr, so am I. + */ + if (dzp_pflags & ZFS_XATTR) { + flag |= IS_XATTR; + } + + if (zfsvfs->z_use_fuids) + pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; + else + pflags = 0; + + if (vap->va_type == VDIR) { + size = 2; /* contents ("." and "..") */ + links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; + } else { + size = links = 0; + } + + if (vap->va_type == VBLK || vap->va_type == VCHR) { + rdev = zfs_expldev(vap->va_rdev); + } + + parent = dzp->z_id; + mode = acl_ids->z_mode; + if (flag & IS_XATTR) + pflags |= ZFS_XATTR; + + /* + * No execs denied will be deterimed when zfs_mode_compute() is called. + */ + pflags |= acl_ids->z_aclp->z_hints & + (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| + ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); + + ZFS_TIME_ENCODE(&now, crtime); + ZFS_TIME_ENCODE(&now, ctime); + + if (vap->va_mask & AT_ATIME) { + ZFS_TIME_ENCODE(&vap->va_atime, atime); + } else { + ZFS_TIME_ENCODE(&now, atime); + } + + if (vap->va_mask & AT_MTIME) { + ZFS_TIME_ENCODE(&vap->va_mtime, mtime); + } else { + ZFS_TIME_ENCODE(&now, mtime); + } + + /* Now add in all of the "SA" attributes */ + VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, + &sa_hdl)); + + /* + * Setup the array of attributes to be replaced/set on the new file + * + * order for DMU_OT_ZNODE is critical since it needs to be constructed + * in the old znode_phys_t format. Don't change this ordering + */ + + if (obj_type == DMU_OT_ZNODE) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), + NULL, &atime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), + NULL, &mtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), + NULL, &ctime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), + NULL, &crtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), + NULL, &gen, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), + NULL, &mode, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), + NULL, &size, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), + NULL, &parent, 8); + } else { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), + NULL, &mode, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), + NULL, &size, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), + NULL, &gen, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, + &acl_ids->z_fuid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, + &acl_ids->z_fgid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), + NULL, &parent, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), + NULL, &pflags, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), + NULL, &atime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), + NULL, &mtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), + NULL, &ctime, 16); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), + NULL, &crtime, 16); + } + + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); + + if (obj_type == DMU_OT_ZNODE) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, + &empty_xattr, 8); + } + if (obj_type == DMU_OT_ZNODE || + (vap->va_type == VBLK || vap->va_type == VCHR)) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), + NULL, &rdev, 8); + + } + if (obj_type == DMU_OT_ZNODE) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), + NULL, &pflags, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, + &acl_ids->z_fuid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, + &acl_ids->z_fgid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, + sizeof (uint64_t) * 4); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + &acl_phys, sizeof (zfs_acl_phys_t)); + } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, + &acl_ids->z_aclp->z_acl_count, 8); + locate.cb_aclp = acl_ids->z_aclp; + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), + zfs_acl_data_locator, &locate, + acl_ids->z_aclp->z_acl_bytes); + mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, + acl_ids->z_fuid, acl_ids->z_fgid); + } + + VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); + + if (!(flag & IS_ROOT_NODE)) { + *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); + ASSERT(*zpp != NULL); + } else { + /* + * If we are creating the root node, the "parent" we + * passed in is the znode for the root. + */ + *zpp = dzp; + + (*zpp)->z_sa_hdl = sa_hdl; + } + + (*zpp)->z_pflags = pflags; + (*zpp)->z_mode = mode; + + if (vap->va_mask & AT_XVATTR) + zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); + + if (obj_type == DMU_OT_ZNODE || + acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { + err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); + ASSERT3P(err, ==, 0); + } + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); +} + +/* + * zfs_xvattr_set only updates the in-core attributes + * it is assumed the caller will be doing an sa_bulk_update + * to push the changes out + */ +void +zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) +{ + xoptattr_t *xoap; + + xoap = xva_getxoptattr(xvap); + ASSERT(xoap); + + if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { + uint64_t times[2]; + ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); + (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), + ×, sizeof (times), tx); + XVA_SET_RTN(xvap, XAT_CREATETIME); + } + if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { + ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_READONLY); + } + if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { + ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_HIDDEN); + } + if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { + ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_SYSTEM); + } + if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { + ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_ARCHIVE); + } + if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { + ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_IMMUTABLE); + } + if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { + ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_NOUNLINK); + } + if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { + ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_APPENDONLY); + } + if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { + ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_NODUMP); + } + if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { + ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_OPAQUE); + } + if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { + ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, + xoap->xoa_av_quarantined, zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); + } + if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { + ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_AV_MODIFIED); + } + if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { + zfs_sa_set_scanstamp(zp, xvap, tx); + XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); + } + if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { + ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_REPARSE); + } + if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { + ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_OFFLINE); + } + if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { + ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, + zp->z_pflags, tx); + XVA_SET_RTN(xvap, XAT_SPARSE); + } +} + +int +zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) +{ + dmu_object_info_t doi; + dmu_buf_t *db; + znode_t *zp; + int err; + sa_handle_t *hdl; + + *zpp = NULL; + + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); + + err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); + if (err) { + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (err); + } + + dmu_object_info_from_db(db, &doi); + if (doi.doi_bonus_type != DMU_OT_SA && + (doi.doi_bonus_type != DMU_OT_ZNODE || + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t)))) { + sa_buf_rele(db, NULL); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (EINVAL); + } + + hdl = dmu_buf_get_user(db); + if (hdl != NULL) { + zp = sa_get_userdata(hdl); + + + /* + * Since "SA" does immediate eviction we + * should never find a sa handle that doesn't + * know about the znode. + */ + + ASSERT3P(zp, !=, NULL); + + mutex_enter(&zp->z_lock); + ASSERT3U(zp->z_id, ==, obj_num); + if (zp->z_unlinked) { + err = ENOENT; + } else { + VN_HOLD(ZTOV(zp)); + *zpp = zp; + err = 0; + } + sa_buf_rele(db, NULL); + mutex_exit(&zp->z_lock); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (err); + } + + /* + * Not found create new znode/vnode + * but only if file exists. + * + * There is a small window where zfs_vget() could + * find this object while a file create is still in + * progress. This is checked for in zfs_znode_alloc() + * + * if zfs_znode_alloc() fails it will drop the hold on the + * bonus buffer. + */ + zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, + doi.doi_bonus_type, NULL); + if (zp == NULL) { + err = ENOENT; + } else { + *zpp = zp; + } + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (err); +} + +int +zfs_rezget(znode_t *zp) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + dmu_object_info_t doi; + dmu_buf_t *db; + uint64_t obj_num = zp->z_id; + uint64_t mode; + sa_bulk_attr_t bulk[8]; + int err; + int count = 0; + uint64_t gen; + + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); + + mutex_enter(&zp->z_acl_lock); + if (zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + + mutex_exit(&zp->z_acl_lock); + ASSERT(zp->z_sa_hdl == NULL); + err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); + if (err) { + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (err); + } + + dmu_object_info_from_db(db, &doi); + if (doi.doi_bonus_type != DMU_OT_SA && + (doi.doi_bonus_type != DMU_OT_ZNODE || + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t)))) { + sa_buf_rele(db, NULL); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (EINVAL); + } + + zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); + + /* reload cached values */ + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, + &gen, sizeof (gen)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + &zp->z_size, sizeof (zp->z_size)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + &zp->z_links, sizeof (zp->z_links)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + &zp->z_pflags, sizeof (zp->z_pflags)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + &zp->z_atime, sizeof (zp->z_atime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, + &zp->z_uid, sizeof (zp->z_uid)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, + &zp->z_gid, sizeof (zp->z_gid)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + &mode, sizeof (mode)); + + if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { + zfs_znode_dmu_fini(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (EIO); + } + + zp->z_mode = mode; + + if (gen != zp->z_gen) { + zfs_znode_dmu_fini(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (EIO); + } + + zp->z_unlinked = (zp->z_links == 0); + zp->z_blksz = doi.doi_data_block_size; + + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + + return (0); +} + +void +zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + objset_t *os = zfsvfs->z_os; + uint64_t obj = zp->z_id; + uint64_t acl_obj = zfs_external_acl(zp); + + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); + if (acl_obj) { + VERIFY(!zp->z_is_sa); + VERIFY(0 == dmu_object_free(os, acl_obj, tx)); + } + VERIFY(0 == dmu_object_free(os, obj, tx)); + zfs_znode_dmu_fini(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); + zfs_znode_free(zp); +} + +void +zfs_zinactive(znode_t *zp) +{ + vnode_t *vp = ZTOV(zp); + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + uint64_t z_id = zp->z_id; + + ASSERT(zp->z_sa_hdl); + + /* + * Don't allow a zfs_zget() while were trying to release this znode + */ + ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); + + mutex_enter(&zp->z_lock); + mutex_enter(&vp->v_lock); + vp->v_count--; + if (vp->v_count > 0 || vn_has_cached_data(vp)) { + /* + * If the hold count is greater than zero, somebody has + * obtained a new reference on this znode while we were + * processing it here, so we are done. If we still have + * mapped pages then we are also done, since we don't + * want to inactivate the znode until the pages get pushed. + * + * XXX - if vn_has_cached_data(vp) is true, but count == 0, + * this seems like it would leave the znode hanging with + * no chance to go inactive... + */ + mutex_exit(&vp->v_lock); + mutex_exit(&zp->z_lock); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + return; + } + mutex_exit(&vp->v_lock); + + /* + * If this was the last reference to a file with no links, + * remove the file from the file system. + */ + if (zp->z_unlinked) { + mutex_exit(&zp->z_lock); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + zfs_rmnode(zp); + return; + } + + mutex_exit(&zp->z_lock); + zfs_znode_dmu_fini(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + zfs_znode_free(zp); +} + +void +zfs_znode_free(znode_t *zp) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + + vn_invalid(ZTOV(zp)); + + ASSERT(ZTOV(zp)->v_count == 0); + + mutex_enter(&zfsvfs->z_znodes_lock); + POINTER_INVALIDATE(&zp->z_zfsvfs); + list_remove(&zfsvfs->z_all_znodes, zp); + mutex_exit(&zfsvfs->z_znodes_lock); + + if (zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + + kmem_cache_free(znode_cache, zp); + + VFS_RELE(zfsvfs->z_vfs); +} + +void +zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], + uint64_t ctime[2], boolean_t have_tx) +{ + timestruc_t now; + + gethrestime(&now); + + if (have_tx) { /* will sa_bulk_update happen really soon? */ + zp->z_atime_dirty = 0; + zp->z_seq++; + } else { + zp->z_atime_dirty = 1; + } + + if (flag & AT_ATIME) { + ZFS_TIME_ENCODE(&now, zp->z_atime); + } + + if (flag & AT_MTIME) { + ZFS_TIME_ENCODE(&now, mtime); + if (zp->z_zfsvfs->z_use_fuids) { + zp->z_pflags |= (ZFS_ARCHIVE | + ZFS_AV_MODIFIED); + } + } + + if (flag & AT_CTIME) { + ZFS_TIME_ENCODE(&now, ctime); + if (zp->z_zfsvfs->z_use_fuids) + zp->z_pflags |= ZFS_ARCHIVE; + } +} + +/* + * Grow the block size for a file. + * + * IN: zp - znode of file to free data in. + * size - requested block size + * tx - open transaction. + * + * NOTE: this function assumes that the znode is write locked. + */ +void +zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) +{ + int error; + u_longlong_t dummy; + + if (size <= zp->z_blksz) + return; + /* + * If the file size is already greater than the current blocksize, + * we will not grow. If there is more than one block in a file, + * the blocksize cannot change. + */ + if (zp->z_blksz && zp->z_size > zp->z_blksz) + return; + + error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, + size, 0, tx); + + if (error == ENOTSUP) + return; + ASSERT3U(error, ==, 0); + + /* What blocksize did we actually get? */ + dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); +} + +/* + * This is a dummy interface used when pvn_vplist_dirty() should *not* + * be calling back into the fs for a putpage(). E.g.: when truncating + * a file, the pages being "thrown away* don't need to be written out. + */ +/* ARGSUSED */ +static int +zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, + int flags, cred_t *cr) +{ + ASSERT(0); + return (0); +} + +/* + * Increase the file length + * + * IN: zp - znode of file to free data in. + * end - new end-of-file + * + * RETURN: 0 if success + * error code if failure + */ +static int +zfs_extend(znode_t *zp, uint64_t end) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + dmu_tx_t *tx; + rl_t *rl; + uint64_t newblksz; + int error; + + /* + * We will change zp_size, lock the whole file. + */ + rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + + /* + * Nothing to do if file already at desired length. + */ + if (end <= zp->z_size) { + zfs_range_unlock(rl); + return (0); + } +top: + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + if (end > zp->z_blksz && + (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { + /* + * We are growing the file past the current block size. + */ + if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { + ASSERT(!ISP2(zp->z_blksz)); + newblksz = MIN(end, SPA_MAXBLOCKSIZE); + } else { + newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); + } + dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); + } else { + newblksz = 0; + } + + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + zfs_range_unlock(rl); + return (error); + } + + if (newblksz) + zfs_grow_blocksize(zp, newblksz, tx); + + zp->z_size = end; + + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), + &zp->z_size, sizeof (zp->z_size), tx)); + + zfs_range_unlock(rl); + + dmu_tx_commit(tx); + + return (0); +} + +/* + * Free space in a file. + * + * IN: zp - znode of file to free data in. + * off - start of section to free. + * len - length of section to free. + * + * RETURN: 0 if success + * error code if failure + */ +static int +zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + rl_t *rl; + int error; + + /* + * Lock the range being freed. + */ + rl = zfs_range_lock(zp, off, len, RL_WRITER); + + /* + * Nothing to do if file already at desired length. + */ + if (off >= zp->z_size) { + zfs_range_unlock(rl); + return (0); + } + + if (off + len > zp->z_size) + len = zp->z_size - off; + + error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); + + zfs_range_unlock(rl); + + return (error); +} + +/* + * Truncate a file + * + * IN: zp - znode of file to free data in. + * end - new end-of-file. + * + * RETURN: 0 if success + * error code if failure + */ +static int +zfs_trunc(znode_t *zp, uint64_t end) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + vnode_t *vp = ZTOV(zp); + dmu_tx_t *tx; + rl_t *rl; + int error; + sa_bulk_attr_t bulk[2]; + int count = 0; + + /* + * We will change zp_size, lock the whole file. + */ + rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + + /* + * Nothing to do if file already at desired length. + */ + if (end >= zp->z_size) { + zfs_range_unlock(rl); + return (0); + } + + error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); + if (error) { + zfs_range_unlock(rl); + return (error); + } +top: + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + zfs_range_unlock(rl); + return (error); + } + + zp->z_size = end; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), + NULL, &zp->z_size, sizeof (zp->z_size)); + + if (end == 0) { + zp->z_pflags &= ~ZFS_SPARSE; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, 8); + } + VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); + + dmu_tx_commit(tx); + + /* + * Clear any mapped pages in the truncated region. This has to + * happen outside of the transaction to avoid the possibility of + * a deadlock with someone trying to push a page that we are + * about to invalidate. + */ + if (vn_has_cached_data(vp)) { + page_t *pp; + uint64_t start = end & PAGEMASK; + int poff = end & PAGEOFFSET; + + if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { + /* + * We need to zero a partial page. + */ + pagezero(pp, poff, PAGESIZE - poff); + start += PAGESIZE; + page_unlock(pp); + } + error = pvn_vplist_dirty(vp, start, zfs_no_putpage, + B_INVAL | B_TRUNC, NULL); + ASSERT(error == 0); + } + + zfs_range_unlock(rl); + + return (0); +} + +/* + * Free space in a file + * + * IN: zp - znode of file to free data in. + * off - start of range + * len - end of range (0 => EOF) + * flag - current file open mode flags. + * log - TRUE if this action should be logged + * + * RETURN: 0 if success + * error code if failure + */ +int +zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) +{ + vnode_t *vp = ZTOV(zp); + dmu_tx_t *tx; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zilog_t *zilog = zfsvfs->z_log; + uint64_t mode; + uint64_t mtime[2], ctime[2]; + sa_bulk_attr_t bulk[3]; + int count = 0; + int error; + + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, + sizeof (mode))) != 0) + return (error); + + if (off > zp->z_size) { + error = zfs_extend(zp, off+len); + if (error == 0 && log) + goto log; + else + return (error); + } + + /* + * Check for any locks in the region to be freed. + */ + + if (MANDLOCK(vp, (mode_t)mode)) { + uint64_t length = (len ? len : zp->z_size - off); + if (error = chklock(vp, FWRITE, off, length, flag, NULL)) + return (error); + } + + if (len == 0) { + error = zfs_trunc(zp, off); + } else { + if ((error = zfs_free_range(zp, off, len)) == 0 && + off + len > zp->z_size) + error = zfs_extend(zp, off+len); + } + if (error || !log) + return (error); +log: + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + zfs_sa_upgrade_txholds(tx, zp); + error = dmu_tx_assign(tx, TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto log; + } + dmu_tx_abort(tx); + return (error); + } + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, 8); + zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + ASSERT(error == 0); + + zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); + + dmu_tx_commit(tx); + return (0); +} + +void +zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) +{ + zfsvfs_t zfsvfs; + uint64_t moid, obj, sa_obj, version; + uint64_t sense = ZFS_CASE_SENSITIVE; + uint64_t norm = 0; + nvpair_t *elem; + int error; + int i; + znode_t *rootzp = NULL; + vnode_t *vp; + vattr_t vattr; + znode_t *zp; + zfs_acl_ids_t acl_ids; + + /* + * First attempt to create master node. + */ + /* + * In an empty objset, there are no blocks to read and thus + * there can be no i/o errors (which we assert below). + */ + moid = MASTER_NODE_OBJ; + error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + + /* + * Set starting attributes. + */ + version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); + elem = NULL; + while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { + /* For the moment we expect all zpl props to be uint64_ts */ + uint64_t val; + char *name; + + ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); + VERIFY(nvpair_value_uint64(elem, &val) == 0); + name = nvpair_name(elem); + if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { + if (val < version) + version = val; + } else { + error = zap_update(os, moid, name, 8, 1, &val, tx); + } + ASSERT(error == 0); + if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) + norm = val; + else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) + sense = val; + } + ASSERT(version != 0); + error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); + + /* + * Create zap object used for SA attribute registration + */ + + if (version >= ZPL_VERSION_SA) { + sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, + DMU_OT_NONE, 0, tx); + error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); + ASSERT(error == 0); + } else { + sa_obj = 0; + } + /* + * Create a delete queue. + */ + obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); + + error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); + ASSERT(error == 0); + + /* + * Create root znode. Create minimal znode/vnode/zfsvfs + * to allow zfs_mknode to work. + */ + vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; + vattr.va_type = VDIR; + vattr.va_mode = S_IFDIR|0755; + vattr.va_uid = crgetuid(cr); + vattr.va_gid = crgetgid(cr); + + rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); + ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); + rootzp->z_moved = 0; + rootzp->z_unlinked = 0; + rootzp->z_atime_dirty = 0; + rootzp->z_is_sa = USE_SA(version, os); + + vp = ZTOV(rootzp); + vn_reinit(vp); + vp->v_type = VDIR; + + bzero(&zfsvfs, sizeof (zfsvfs_t)); + + zfsvfs.z_os = os; + zfsvfs.z_parent = &zfsvfs; + zfsvfs.z_version = version; + zfsvfs.z_use_fuids = USE_FUIDS(version, os); + zfsvfs.z_use_sa = USE_SA(version, os); + zfsvfs.z_norm = norm; + + error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, + &zfsvfs.z_attr_table); + + ASSERT(error == 0); + + /* + * Fold case on file systems that are always or sometimes case + * insensitive. + */ + if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) + zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; + + mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), + offsetof(znode_t, z_link_node)); + + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + + rootzp->z_zfsvfs = &zfsvfs; + VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, + cr, NULL, &acl_ids)); + zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); + ASSERT3P(zp, ==, rootzp); + ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */ + error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); + ASSERT(error == 0); + zfs_acl_ids_free(&acl_ids); + POINTER_INVALIDATE(&rootzp->z_zfsvfs); + + ZTOV(rootzp)->v_count = 0; + sa_handle_destroy(rootzp->z_sa_hdl); + kmem_cache_free(znode_cache, rootzp); + + /* + * Create shares directory + */ + + error = zfs_create_share_dir(&zfsvfs, tx); + + ASSERT(error == 0); + + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zfsvfs.z_hold_mtx[i]); +} + +#endif /* _KERNEL */ + +static int +zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) +{ + uint64_t sa_obj = 0; + int error; + + error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); + if (error != 0 && error != ENOENT) + return (error); + + error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); + return (error); +} + +static int +zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, + dmu_buf_t **db, void *tag) +{ + dmu_object_info_t doi; + int error; + + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) + return (error); + + dmu_object_info_from_db(*db, &doi); + if ((doi.doi_bonus_type != DMU_OT_SA && + doi.doi_bonus_type != DMU_OT_ZNODE) || + doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t)) { + sa_buf_rele(*db, tag); + return (ENOTSUP); + } + + error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); + if (error != 0) { + sa_buf_rele(*db, tag); + return (error); + } + + return (0); +} + +void +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) +{ + sa_handle_destroy(hdl); + sa_buf_rele(db, tag); +} + +/* + * Given an object number, return its parent object number and whether + * or not the object is an extended attribute directory. + */ +static int +zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp, + int *is_xattrdir) +{ + uint64_t parent; + uint64_t pflags; + uint64_t mode; + sa_bulk_attr_t bulk[3]; + int count = 0; + int error; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, + &parent, sizeof (parent)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, + &pflags, sizeof (pflags)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &mode, sizeof (mode)); + + if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) + return (error); + + *pobjp = parent; + *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); + + return (0); +} + +/* + * Given an object number, return some zpl level statistics + */ +static int +zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, + zfs_stat_t *sb) +{ + sa_bulk_attr_t bulk[4]; + int count = 0; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &sb->zs_mode, sizeof (sb->zs_mode)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, + &sb->zs_gen, sizeof (sb->zs_gen)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, + &sb->zs_links, sizeof (sb->zs_links)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, + &sb->zs_ctime, sizeof (sb->zs_ctime)); + + return (sa_bulk_lookup(hdl, bulk, count)); +} + +static int +zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, + sa_attr_type_t *sa_table, char *buf, int len) +{ + sa_handle_t *sa_hdl; + sa_handle_t *prevhdl = NULL; + dmu_buf_t *prevdb = NULL; + dmu_buf_t *sa_db = NULL; + char *path = buf + len - 1; + int error; + + *path = '\0'; + sa_hdl = hdl; + + for (;;) { + uint64_t pobj; + char component[MAXNAMELEN + 2]; + size_t complen; + int is_xattrdir; + + if (prevdb) + zfs_release_sa_handle(prevhdl, prevdb, FTAG); + + if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj, + &is_xattrdir)) != 0) + break; + + if (pobj == obj) { + if (path[0] != '/') + *--path = '/'; + break; + } + + component[0] = '/'; + if (is_xattrdir) { + (void) sprintf(component + 1, "<xattrdir>"); + } else { + error = zap_value_search(osp, pobj, obj, + ZFS_DIRENT_OBJ(-1ULL), component + 1); + if (error != 0) + break; + } + + complen = strlen(component); + path -= complen; + ASSERT(path >= buf); + bcopy(component, path, complen); + obj = pobj; + + if (sa_hdl != hdl) { + prevhdl = sa_hdl; + prevdb = sa_db; + } + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); + if (error != 0) { + sa_hdl = prevhdl; + sa_db = prevdb; + break; + } + } + + if (sa_hdl != NULL && sa_hdl != hdl) { + ASSERT(sa_db != NULL); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); + } + + if (error == 0) + (void) memmove(buf, path, buf + len - path); + + return (error); +} + +int +zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) +{ + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} + +int +zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, + char *buf, int len) +{ + char *path = buf + len - 1; + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + *path = '\0'; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_stats_impl(hdl, sa_table, sb); + if (error != 0) { + zfs_release_sa_handle(hdl, db, FTAG); + return (error); + } + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} diff --git a/uts/common/fs/zfs/zil.c b/uts/common/fs/zfs/zil.c new file mode 100644 index 000000000000..c66313ff6f85 --- /dev/null +++ b/uts/common/fs/zfs/zil.c @@ -0,0 +1,1992 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/zap.h> +#include <sys/arc.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <sys/zil.h> +#include <sys/zil_impl.h> +#include <sys/dsl_dataset.h> +#include <sys/vdev_impl.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_pool.h> + +/* + * The zfs intent log (ZIL) saves transaction records of system calls + * that change the file system in memory with enough information + * to be able to replay them. These are stored in memory until + * either the DMU transaction group (txg) commits them to the stable pool + * and they can be discarded, or they are flushed to the stable log + * (also in the pool) due to a fsync, O_DSYNC or other synchronous + * requirement. In the event of a panic or power fail then those log + * records (transactions) are replayed. + * + * There is one ZIL per file system. Its on-disk (pool) format consists + * of 3 parts: + * + * - ZIL header + * - ZIL blocks + * - ZIL records + * + * A log record holds a system call transaction. Log blocks can + * hold many log records and the blocks are chained together. + * Each ZIL block contains a block pointer (blkptr_t) to the next + * ZIL block in the chain. The ZIL header points to the first + * block in the chain. Note there is not a fixed place in the pool + * to hold blocks. They are dynamically allocated and freed as + * needed from the blocks available. Figure X shows the ZIL structure: + */ + +/* + * This global ZIL switch affects all pools + */ +int zil_replay_disable = 0; /* disable intent logging replay */ + +/* + * Tunable parameter for debugging or performance analysis. Setting + * zfs_nocacheflush will cause corruption on power loss if a volatile + * out-of-order write cache is enabled. + */ +boolean_t zfs_nocacheflush = B_FALSE; + +static kmem_cache_t *zil_lwb_cache; + +static void zil_async_to_sync(zilog_t *zilog, uint64_t foid); + +#define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \ + sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused)) + + +/* + * ziltest is by and large an ugly hack, but very useful in + * checking replay without tedious work. + * When running ziltest we want to keep all itx's and so maintain + * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG + * We subtract TXG_CONCURRENT_STATES to allow for common code. + */ +#define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES) + +static int +zil_bp_compare(const void *x1, const void *x2) +{ + const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva; + const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva; + + if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2)) + return (-1); + if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2)) + return (1); + + if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2)) + return (-1); + if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2)) + return (1); + + return (0); +} + +static void +zil_bp_tree_init(zilog_t *zilog) +{ + avl_create(&zilog->zl_bp_tree, zil_bp_compare, + sizeof (zil_bp_node_t), offsetof(zil_bp_node_t, zn_node)); +} + +static void +zil_bp_tree_fini(zilog_t *zilog) +{ + avl_tree_t *t = &zilog->zl_bp_tree; + zil_bp_node_t *zn; + void *cookie = NULL; + + while ((zn = avl_destroy_nodes(t, &cookie)) != NULL) + kmem_free(zn, sizeof (zil_bp_node_t)); + + avl_destroy(t); +} + +int +zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp) +{ + avl_tree_t *t = &zilog->zl_bp_tree; + const dva_t *dva = BP_IDENTITY(bp); + zil_bp_node_t *zn; + avl_index_t where; + + if (avl_find(t, dva, &where) != NULL) + return (EEXIST); + + zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP); + zn->zn_dva = *dva; + avl_insert(t, zn, where); + + return (0); +} + +static zil_header_t * +zil_header_in_syncing_context(zilog_t *zilog) +{ + return ((zil_header_t *)zilog->zl_header); +} + +static void +zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) +{ + zio_cksum_t *zc = &bp->blk_cksum; + + zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); + zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); + zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); + zc->zc_word[ZIL_ZC_SEQ] = 1ULL; +} + +/* + * Read a log block and make sure it's valid. + */ +static int +zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, + char **end) +{ + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf = NULL; + zbookmark_t zb; + int error; + + if (zilog->zl_header->zh_claim_txg == 0) + zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + + if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) + zio_flags |= ZIO_FLAG_SPECULATIVE; + + SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], + ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); + + error = dsl_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, + ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + + if (error == 0) { + zio_cksum_t cksum = bp->blk_cksum; + + /* + * Validate the checksummed log block. + * + * Sequence numbers should be... sequential. The checksum + * verifier for the next block should be bp's checksum plus 1. + * + * Also check the log chain linkage and size used. + */ + cksum.zc_word[ZIL_ZC_SEQ]++; + + if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { + zil_chain_t *zilc = abuf->b_data; + char *lr = (char *)(zilc + 1); + uint64_t len = zilc->zc_nused - sizeof (zil_chain_t); + + if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, + sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) { + error = ECKSUM; + } else { + bcopy(lr, dst, len); + *end = (char *)dst + len; + *nbp = zilc->zc_next_blk; + } + } else { + char *lr = abuf->b_data; + uint64_t size = BP_GET_LSIZE(bp); + zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1; + + if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, + sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) || + (zilc->zc_nused > (size - sizeof (*zilc)))) { + error = ECKSUM; + } else { + bcopy(lr, dst, zilc->zc_nused); + *end = (char *)dst + zilc->zc_nused; + *nbp = zilc->zc_next_blk; + } + } + + VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + } + + return (error); +} + +/* + * Read a TX_WRITE log data block. + */ +static int +zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) +{ + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + const blkptr_t *bp = &lr->lr_blkptr; + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf = NULL; + zbookmark_t zb; + int error; + + if (BP_IS_HOLE(bp)) { + if (wbuf != NULL) + bzero(wbuf, MAX(BP_GET_LSIZE(bp), lr->lr_length)); + return (0); + } + + if (zilog->zl_header->zh_claim_txg == 0) + zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, + ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); + + error = arc_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, + ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + + if (error == 0) { + if (wbuf != NULL) + bcopy(abuf->b_data, wbuf, arc_buf_size(abuf)); + (void) arc_buf_remove_ref(abuf, &abuf); + } + + return (error); +} + +/* + * Parse the intent log, and call parse_func for each valid record within. + */ +int +zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) +{ + const zil_header_t *zh = zilog->zl_header; + boolean_t claimed = !!zh->zh_claim_txg; + uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX; + uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX; + uint64_t max_blk_seq = 0; + uint64_t max_lr_seq = 0; + uint64_t blk_count = 0; + uint64_t lr_count = 0; + blkptr_t blk, next_blk; + char *lrbuf, *lrp; + int error = 0; + + /* + * Old logs didn't record the maximum zh_claim_lr_seq. + */ + if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) + claim_lr_seq = UINT64_MAX; + + /* + * Starting at the block pointed to by zh_log we read the log chain. + * For each block in the chain we strongly check that block to + * ensure its validity. We stop when an invalid block is found. + * For each block pointer in the chain we call parse_blk_func(). + * For each record in each valid block we call parse_lr_func(). + * If the log has been claimed, stop if we encounter a sequence + * number greater than the highest claimed sequence number. + */ + lrbuf = zio_buf_alloc(SPA_MAXBLOCKSIZE); + zil_bp_tree_init(zilog); + + for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { + uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; + int reclen; + char *end; + + if (blk_seq > claim_blk_seq) + break; + if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) + break; + ASSERT3U(max_blk_seq, <, blk_seq); + max_blk_seq = blk_seq; + blk_count++; + + if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) + break; + + error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); + if (error) + break; + + for (lrp = lrbuf; lrp < end; lrp += reclen) { + lr_t *lr = (lr_t *)lrp; + reclen = lr->lrc_reclen; + ASSERT3U(reclen, >=, sizeof (lr_t)); + if (lr->lrc_seq > claim_lr_seq) + goto done; + if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) + goto done; + ASSERT3U(max_lr_seq, <, lr->lrc_seq); + max_lr_seq = lr->lrc_seq; + lr_count++; + } + } +done: + zilog->zl_parse_error = error; + zilog->zl_parse_blk_seq = max_blk_seq; + zilog->zl_parse_lr_seq = max_lr_seq; + zilog->zl_parse_blk_count = blk_count; + zilog->zl_parse_lr_count = lr_count; + + ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || + (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); + + zil_bp_tree_fini(zilog); + zio_buf_free(lrbuf, SPA_MAXBLOCKSIZE); + + return (error); +} + +static int +zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg) +{ + /* + * Claim log block if not already committed and not already claimed. + * If tx == NULL, just verify that the block is claimable. + */ + if (bp->blk_birth < first_txg || zil_bp_tree_add(zilog, bp) != 0) + return (0); + + return (zio_wait(zio_claim(NULL, zilog->zl_spa, + tx == NULL ? 0 : first_txg, bp, spa_claim_notify, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB))); +} + +static int +zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) +{ + lr_write_t *lr = (lr_write_t *)lrc; + int error; + + if (lrc->lrc_txtype != TX_WRITE) + return (0); + + /* + * If the block is not readable, don't claim it. This can happen + * in normal operation when a log block is written to disk before + * some of the dmu_sync() blocks it points to. In this case, the + * transaction cannot have been committed to anyone (we would have + * waited for all writes to be stable first), so it is semantically + * correct to declare this the end of the log. + */ + if (lr->lr_blkptr.blk_birth >= first_txg && + (error = zil_read_log_data(zilog, lr, NULL)) != 0) + return (error); + return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); +} + +/* ARGSUSED */ +static int +zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg) +{ + zio_free_zil(zilog->zl_spa, dmu_tx_get_txg(tx), bp); + + return (0); +} + +static int +zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg) +{ + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + + /* + * If we previously claimed it, we need to free it. + */ + if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE && + bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0) + zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); + + return (0); +} + +static lwb_t * +zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg) +{ + lwb_t *lwb; + + lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); + lwb->lwb_zilog = zilog; + lwb->lwb_blk = *bp; + lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp)); + lwb->lwb_max_txg = txg; + lwb->lwb_zio = NULL; + lwb->lwb_tx = NULL; + if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { + lwb->lwb_nused = sizeof (zil_chain_t); + lwb->lwb_sz = BP_GET_LSIZE(bp); + } else { + lwb->lwb_nused = 0; + lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t); + } + + mutex_enter(&zilog->zl_lock); + list_insert_tail(&zilog->zl_lwb_list, lwb); + mutex_exit(&zilog->zl_lock); + + return (lwb); +} + +/* + * Create an on-disk intent log. + */ +static lwb_t * +zil_create(zilog_t *zilog) +{ + const zil_header_t *zh = zilog->zl_header; + lwb_t *lwb = NULL; + uint64_t txg = 0; + dmu_tx_t *tx = NULL; + blkptr_t blk; + int error = 0; + + /* + * Wait for any previous destroy to complete. + */ + txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); + + ASSERT(zh->zh_claim_txg == 0); + ASSERT(zh->zh_replay_seq == 0); + + blk = zh->zh_log; + + /* + * Allocate an initial log block if: + * - there isn't one already + * - the existing block is the wrong endianess + */ + if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) { + tx = dmu_tx_create(zilog->zl_os); + VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + txg = dmu_tx_get_txg(tx); + + if (!BP_IS_HOLE(&blk)) { + zio_free_zil(zilog->zl_spa, txg, &blk); + BP_ZERO(&blk); + } + + error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL, + ZIL_MIN_BLKSZ, zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + + if (error == 0) + zil_init_log_chain(zilog, &blk); + } + + /* + * Allocate a log write buffer (lwb) for the first log block. + */ + if (error == 0) + lwb = zil_alloc_lwb(zilog, &blk, txg); + + /* + * If we just allocated the first log block, commit our transaction + * and wait for zil_sync() to stuff the block poiner into zh_log. + * (zh is part of the MOS, so we cannot modify it in open context.) + */ + if (tx != NULL) { + dmu_tx_commit(tx); + txg_wait_synced(zilog->zl_dmu_pool, txg); + } + + ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0); + + return (lwb); +} + +/* + * In one tx, free all log blocks and clear the log header. + * If keep_first is set, then we're replaying a log with no content. + * We want to keep the first block, however, so that the first + * synchronous transaction doesn't require a txg_wait_synced() + * in zil_create(). We don't need to txg_wait_synced() here either + * when keep_first is set, because both zil_create() and zil_destroy() + * will wait for any in-progress destroys to complete. + */ +void +zil_destroy(zilog_t *zilog, boolean_t keep_first) +{ + const zil_header_t *zh = zilog->zl_header; + lwb_t *lwb; + dmu_tx_t *tx; + uint64_t txg; + + /* + * Wait for any previous destroy to complete. + */ + txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); + + zilog->zl_old_header = *zh; /* debugging aid */ + + if (BP_IS_HOLE(&zh->zh_log)) + return; + + tx = dmu_tx_create(zilog->zl_os); + VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + txg = dmu_tx_get_txg(tx); + + mutex_enter(&zilog->zl_lock); + + ASSERT3U(zilog->zl_destroy_txg, <, txg); + zilog->zl_destroy_txg = txg; + zilog->zl_keep_first = keep_first; + + if (!list_is_empty(&zilog->zl_lwb_list)) { + ASSERT(zh->zh_claim_txg == 0); + ASSERT(!keep_first); + while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { + list_remove(&zilog->zl_lwb_list, lwb); + if (lwb->lwb_buf != NULL) + zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk); + kmem_cache_free(zil_lwb_cache, lwb); + } + } else if (!keep_first) { + (void) zil_parse(zilog, zil_free_log_block, + zil_free_log_record, tx, zh->zh_claim_txg); + } + mutex_exit(&zilog->zl_lock); + + dmu_tx_commit(tx); +} + +int +zil_claim(const char *osname, void *txarg) +{ + dmu_tx_t *tx = txarg; + uint64_t first_txg = dmu_tx_get_txg(tx); + zilog_t *zilog; + zil_header_t *zh; + objset_t *os; + int error; + + error = dmu_objset_hold(osname, FTAG, &os); + if (error) { + cmn_err(CE_WARN, "can't open objset for %s", osname); + return (0); + } + + zilog = dmu_objset_zil(os); + zh = zil_header_in_syncing_context(zilog); + + if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) { + if (!BP_IS_HOLE(&zh->zh_log)) + zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); + BP_ZERO(&zh->zh_log); + dsl_dataset_dirty(dmu_objset_ds(os), tx); + dmu_objset_rele(os, FTAG); + return (0); + } + + /* + * Claim all log blocks if we haven't already done so, and remember + * the highest claimed sequence number. This ensures that if we can + * read only part of the log now (e.g. due to a missing device), + * but we can read the entire log later, we will not try to replay + * or destroy beyond the last block we successfully claimed. + */ + ASSERT3U(zh->zh_claim_txg, <=, first_txg); + if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { + (void) zil_parse(zilog, zil_claim_log_block, + zil_claim_log_record, tx, first_txg); + zh->zh_claim_txg = first_txg; + zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; + zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; + if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1) + zh->zh_flags |= ZIL_REPLAY_NEEDED; + zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID; + dsl_dataset_dirty(dmu_objset_ds(os), tx); + } + + ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); + dmu_objset_rele(os, FTAG); + return (0); +} + +/* + * Check the log by walking the log chain. + * Checksum errors are ok as they indicate the end of the chain. + * Any other error (no device or read failure) returns an error. + */ +int +zil_check_log_chain(const char *osname, void *tx) +{ + zilog_t *zilog; + objset_t *os; + blkptr_t *bp; + int error; + + ASSERT(tx == NULL); + + error = dmu_objset_hold(osname, FTAG, &os); + if (error) { + cmn_err(CE_WARN, "can't open objset for %s", osname); + return (0); + } + + zilog = dmu_objset_zil(os); + bp = (blkptr_t *)&zilog->zl_header->zh_log; + + /* + * Check the first block and determine if it's on a log device + * which may have been removed or faulted prior to loading this + * pool. If so, there's no point in checking the rest of the log + * as its content should have already been synced to the pool. + */ + if (!BP_IS_HOLE(bp)) { + vdev_t *vd; + boolean_t valid = B_TRUE; + + spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER); + vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0])); + if (vd->vdev_islog && vdev_is_dead(vd)) + valid = vdev_log_state_valid(vd); + spa_config_exit(os->os_spa, SCL_STATE, FTAG); + + if (!valid) { + dmu_objset_rele(os, FTAG); + return (0); + } + } + + /* + * Because tx == NULL, zil_claim_log_block() will not actually claim + * any blocks, but just determine whether it is possible to do so. + * In addition to checking the log chain, zil_claim_log_block() + * will invoke zio_claim() with a done func of spa_claim_notify(), + * which will update spa_max_claim_txg. See spa_load() for details. + */ + error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, + zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); + + dmu_objset_rele(os, FTAG); + + return ((error == ECKSUM || error == ENOENT) ? 0 : error); +} + +static int +zil_vdev_compare(const void *x1, const void *x2) +{ + const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev; + const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev; + + if (v1 < v2) + return (-1); + if (v1 > v2) + return (1); + + return (0); +} + +void +zil_add_block(zilog_t *zilog, const blkptr_t *bp) +{ + avl_tree_t *t = &zilog->zl_vdev_tree; + avl_index_t where; + zil_vdev_node_t *zv, zvsearch; + int ndvas = BP_GET_NDVAS(bp); + int i; + + if (zfs_nocacheflush) + return; + + ASSERT(zilog->zl_writer); + + /* + * Even though we're zl_writer, we still need a lock because the + * zl_get_data() callbacks may have dmu_sync() done callbacks + * that will run concurrently. + */ + mutex_enter(&zilog->zl_vdev_lock); + for (i = 0; i < ndvas; i++) { + zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); + if (avl_find(t, &zvsearch, &where) == NULL) { + zv = kmem_alloc(sizeof (*zv), KM_SLEEP); + zv->zv_vdev = zvsearch.zv_vdev; + avl_insert(t, zv, where); + } + } + mutex_exit(&zilog->zl_vdev_lock); +} + +static void +zil_flush_vdevs(zilog_t *zilog) +{ + spa_t *spa = zilog->zl_spa; + avl_tree_t *t = &zilog->zl_vdev_tree; + void *cookie = NULL; + zil_vdev_node_t *zv; + zio_t *zio; + + ASSERT(zilog->zl_writer); + + /* + * We don't need zl_vdev_lock here because we're the zl_writer, + * and all zl_get_data() callbacks are done. + */ + if (avl_numnodes(t) == 0) + return; + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + + zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + + while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { + vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); + if (vd != NULL) + zio_flush(zio, vd); + kmem_free(zv, sizeof (*zv)); + } + + /* + * Wait for all the flushes to complete. Not all devices actually + * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails. + */ + (void) zio_wait(zio); + + spa_config_exit(spa, SCL_STATE, FTAG); +} + +/* + * Function called when a log block write completes + */ +static void +zil_lwb_write_done(zio_t *zio) +{ + lwb_t *lwb = zio->io_private; + zilog_t *zilog = lwb->lwb_zilog; + dmu_tx_t *tx = lwb->lwb_tx; + + ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); + ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG); + ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); + ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER); + ASSERT(!BP_IS_GANG(zio->io_bp)); + ASSERT(!BP_IS_HOLE(zio->io_bp)); + ASSERT(zio->io_bp->blk_fill == 0); + + /* + * Ensure the lwb buffer pointer is cleared before releasing + * the txg. If we have had an allocation failure and + * the txg is waiting to sync then we want want zil_sync() + * to remove the lwb so that it's not picked up as the next new + * one in zil_commit_writer(). zil_sync() will only remove + * the lwb if lwb_buf is null. + */ + zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + mutex_enter(&zilog->zl_lock); + lwb->lwb_buf = NULL; + lwb->lwb_tx = NULL; + mutex_exit(&zilog->zl_lock); + + /* + * Now that we've written this log block, we have a stable pointer + * to the next block in the chain, so it's OK to let the txg in + * which we allocated the next block sync. + */ + dmu_tx_commit(tx); +} + +/* + * Initialize the io for a log block. + */ +static void +zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) +{ + zbookmark_t zb; + + SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET], + ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, + lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); + + if (zilog->zl_root_zio == NULL) { + zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL, + ZIO_FLAG_CANFAIL); + } + if (lwb->lwb_zio == NULL) { + lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa, + 0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk), + zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE, + ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb); + } +} + +/* + * Define a limited set of intent log block sizes. + * These must be a multiple of 4KB. Note only the amount used (again + * aligned to 4KB) actually gets written. However, we can't always just + * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted. + */ +uint64_t zil_block_buckets[] = { + 4096, /* non TX_WRITE */ + 8192+4096, /* data base */ + 32*1024 + 4096, /* NFS writes */ + UINT64_MAX +}; + +/* + * Use the slog as long as the logbias is 'latency' and the current commit size + * is less than the limit or the total list size is less than 2X the limit. + * Limit checking is disabled by setting zil_slog_limit to UINT64_MAX. + */ +uint64_t zil_slog_limit = 1024 * 1024; +#define USE_SLOG(zilog) (((zilog)->zl_logbias == ZFS_LOGBIAS_LATENCY) && \ + (((zilog)->zl_cur_used < zil_slog_limit) || \ + ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1)))) + +/* + * Start a log block write and advance to the next log block. + * Calls are serialized. + */ +static lwb_t * +zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) +{ + lwb_t *nlwb = NULL; + zil_chain_t *zilc; + spa_t *spa = zilog->zl_spa; + blkptr_t *bp; + dmu_tx_t *tx; + uint64_t txg; + uint64_t zil_blksz, wsz; + int i, error; + + if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) { + zilc = (zil_chain_t *)lwb->lwb_buf; + bp = &zilc->zc_next_blk; + } else { + zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz); + bp = &zilc->zc_next_blk; + } + + ASSERT(lwb->lwb_nused <= lwb->lwb_sz); + + /* + * Allocate the next block and save its address in this block + * before writing it in order to establish the log chain. + * Note that if the allocation of nlwb synced before we wrote + * the block that points at it (lwb), we'd leak it if we crashed. + * Therefore, we don't do dmu_tx_commit() until zil_lwb_write_done(). + * We dirty the dataset to ensure that zil_sync() will be called + * to clean up in the event of allocation failure or I/O failure. + */ + tx = dmu_tx_create(zilog->zl_os); + VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + txg = dmu_tx_get_txg(tx); + + lwb->lwb_tx = tx; + + /* + * Log blocks are pre-allocated. Here we select the size of the next + * block, based on size used in the last block. + * - first find the smallest bucket that will fit the block from a + * limited set of block sizes. This is because it's faster to write + * blocks allocated from the same metaslab as they are adjacent or + * close. + * - next find the maximum from the new suggested size and an array of + * previous sizes. This lessens a picket fence effect of wrongly + * guesssing the size if we have a stream of say 2k, 64k, 2k, 64k + * requests. + * + * Note we only write what is used, but we can't just allocate + * the maximum block size because we can exhaust the available + * pool log space. + */ + zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t); + for (i = 0; zil_blksz > zil_block_buckets[i]; i++) + continue; + zil_blksz = zil_block_buckets[i]; + if (zil_blksz == UINT64_MAX) + zil_blksz = SPA_MAXBLOCKSIZE; + zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; + for (i = 0; i < ZIL_PREV_BLKS; i++) + zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); + zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1); + + BP_ZERO(bp); + /* pass the old blkptr in order to spread log blocks across devs */ + error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, + USE_SLOG(zilog)); + if (!error) { + ASSERT3U(bp->blk_birth, ==, txg); + bp->blk_cksum = lwb->lwb_blk.blk_cksum; + bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; + + /* + * Allocate a new log write buffer (lwb). + */ + nlwb = zil_alloc_lwb(zilog, bp, txg); + + /* Record the block for later vdev flushing */ + zil_add_block(zilog, &lwb->lwb_blk); + } + + if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) { + /* For Slim ZIL only write what is used. */ + wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t); + ASSERT3U(wsz, <=, lwb->lwb_sz); + zio_shrink(lwb->lwb_zio, wsz); + + } else { + wsz = lwb->lwb_sz; + } + + zilc->zc_pad = 0; + zilc->zc_nused = lwb->lwb_nused; + zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum; + + /* + * clear unused data for security + */ + bzero(lwb->lwb_buf + lwb->lwb_nused, wsz - lwb->lwb_nused); + + zio_nowait(lwb->lwb_zio); /* Kick off the write for the old log block */ + + /* + * If there was an allocation failure then nlwb will be null which + * forces a txg_wait_synced(). + */ + return (nlwb); +} + +static lwb_t * +zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) +{ + lr_t *lrc = &itx->itx_lr; /* common log record */ + lr_write_t *lrw = (lr_write_t *)lrc; + char *lr_buf; + uint64_t txg = lrc->lrc_txg; + uint64_t reclen = lrc->lrc_reclen; + uint64_t dlen = 0; + + if (lwb == NULL) + return (NULL); + + ASSERT(lwb->lwb_buf != NULL); + + if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) + dlen = P2ROUNDUP_TYPED( + lrw->lr_length, sizeof (uint64_t), uint64_t); + + zilog->zl_cur_used += (reclen + dlen); + + zil_lwb_write_init(zilog, lwb); + + /* + * If this record won't fit in the current log block, start a new one. + */ + if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) { + lwb = zil_lwb_write_start(zilog, lwb); + if (lwb == NULL) + return (NULL); + zil_lwb_write_init(zilog, lwb); + ASSERT(LWB_EMPTY(lwb)); + if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) { + txg_wait_synced(zilog->zl_dmu_pool, txg); + return (lwb); + } + } + + lr_buf = lwb->lwb_buf + lwb->lwb_nused; + bcopy(lrc, lr_buf, reclen); + lrc = (lr_t *)lr_buf; + lrw = (lr_write_t *)lrc; + + /* + * If it's a write, fetch the data or get its blkptr as appropriate. + */ + if (lrc->lrc_txtype == TX_WRITE) { + if (txg > spa_freeze_txg(zilog->zl_spa)) + txg_wait_synced(zilog->zl_dmu_pool, txg); + if (itx->itx_wr_state != WR_COPIED) { + char *dbuf; + int error; + + if (dlen) { + ASSERT(itx->itx_wr_state == WR_NEED_COPY); + dbuf = lr_buf + reclen; + lrw->lr_common.lrc_reclen += dlen; + } else { + ASSERT(itx->itx_wr_state == WR_INDIRECT); + dbuf = NULL; + } + error = zilog->zl_get_data( + itx->itx_private, lrw, dbuf, lwb->lwb_zio); + if (error == EIO) { + txg_wait_synced(zilog->zl_dmu_pool, txg); + return (lwb); + } + if (error) { + ASSERT(error == ENOENT || error == EEXIST || + error == EALREADY); + return (lwb); + } + } + } + + /* + * We're actually making an entry, so update lrc_seq to be the + * log record sequence number. Note that this is generally not + * equal to the itx sequence number because not all transactions + * are synchronous, and sometimes spa_sync() gets there first. + */ + lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */ + lwb->lwb_nused += reclen + dlen; + lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); + ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz); + ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0); + + return (lwb); +} + +itx_t * +zil_itx_create(uint64_t txtype, size_t lrsize) +{ + itx_t *itx; + + lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t); + + itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP); + itx->itx_lr.lrc_txtype = txtype; + itx->itx_lr.lrc_reclen = lrsize; + itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */ + itx->itx_lr.lrc_seq = 0; /* defensive */ + itx->itx_sync = B_TRUE; /* default is synchronous */ + + return (itx); +} + +void +zil_itx_destroy(itx_t *itx) +{ + kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); +} + +/* + * Free up the sync and async itxs. The itxs_t has already been detached + * so no locks are needed. + */ +static void +zil_itxg_clean(itxs_t *itxs) +{ + itx_t *itx; + list_t *list; + avl_tree_t *t; + void *cookie; + itx_async_node_t *ian; + + list = &itxs->i_sync_list; + while ((itx = list_head(list)) != NULL) { + list_remove(list, itx); + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + } + + cookie = NULL; + t = &itxs->i_async_tree; + while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) { + list = &ian->ia_list; + while ((itx = list_head(list)) != NULL) { + list_remove(list, itx); + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + } + list_destroy(list); + kmem_free(ian, sizeof (itx_async_node_t)); + } + avl_destroy(t); + + kmem_free(itxs, sizeof (itxs_t)); +} + +static int +zil_aitx_compare(const void *x1, const void *x2) +{ + const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid; + const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid; + + if (o1 < o2) + return (-1); + if (o1 > o2) + return (1); + + return (0); +} + +/* + * Remove all async itx with the given oid. + */ +static void +zil_remove_async(zilog_t *zilog, uint64_t oid) +{ + uint64_t otxg, txg; + itx_async_node_t *ian; + avl_tree_t *t; + avl_index_t where; + list_t clean_list; + itx_t *itx; + + ASSERT(oid != 0); + list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node)); + + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ + otxg = ZILTEST_TXG; + else + otxg = spa_last_synced_txg(zilog->zl_spa) + 1; + + for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { + itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; + + mutex_enter(&itxg->itxg_lock); + if (itxg->itxg_txg != txg) { + mutex_exit(&itxg->itxg_lock); + continue; + } + + /* + * Locate the object node and append its list. + */ + t = &itxg->itxg_itxs->i_async_tree; + ian = avl_find(t, &oid, &where); + if (ian != NULL) + list_move_tail(&clean_list, &ian->ia_list); + mutex_exit(&itxg->itxg_lock); + } + while ((itx = list_head(&clean_list)) != NULL) { + list_remove(&clean_list, itx); + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + } + list_destroy(&clean_list); +} + +void +zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) +{ + uint64_t txg; + itxg_t *itxg; + itxs_t *itxs, *clean = NULL; + + /* + * Object ids can be re-instantiated in the next txg so + * remove any async transactions to avoid future leaks. + * This can happen if a fsync occurs on the re-instantiated + * object for a WR_INDIRECT or WR_NEED_COPY write, which gets + * the new file data and flushes a write record for the old object. + */ + if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) + zil_remove_async(zilog, itx->itx_oid); + + /* + * Ensure the data of a renamed file is committed before the rename. + */ + if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) + zil_async_to_sync(zilog, itx->itx_oid); + + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) + txg = ZILTEST_TXG; + else + txg = dmu_tx_get_txg(tx); + + itxg = &zilog->zl_itxg[txg & TXG_MASK]; + mutex_enter(&itxg->itxg_lock); + itxs = itxg->itxg_itxs; + if (itxg->itxg_txg != txg) { + if (itxs != NULL) { + /* + * The zil_clean callback hasn't got around to cleaning + * this itxg. Save the itxs for release below. + * This should be rare. + */ + atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod); + itxg->itxg_sod = 0; + clean = itxg->itxg_itxs; + } + ASSERT(itxg->itxg_sod == 0); + itxg->itxg_txg = txg; + itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), KM_SLEEP); + + list_create(&itxs->i_sync_list, sizeof (itx_t), + offsetof(itx_t, itx_node)); + avl_create(&itxs->i_async_tree, zil_aitx_compare, + sizeof (itx_async_node_t), + offsetof(itx_async_node_t, ia_node)); + } + if (itx->itx_sync) { + list_insert_tail(&itxs->i_sync_list, itx); + atomic_add_64(&zilog->zl_itx_list_sz, itx->itx_sod); + itxg->itxg_sod += itx->itx_sod; + } else { + avl_tree_t *t = &itxs->i_async_tree; + uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid; + itx_async_node_t *ian; + avl_index_t where; + + ian = avl_find(t, &foid, &where); + if (ian == NULL) { + ian = kmem_alloc(sizeof (itx_async_node_t), KM_SLEEP); + list_create(&ian->ia_list, sizeof (itx_t), + offsetof(itx_t, itx_node)); + ian->ia_foid = foid; + avl_insert(t, ian, where); + } + list_insert_tail(&ian->ia_list, itx); + } + + itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx); + mutex_exit(&itxg->itxg_lock); + + /* Release the old itxs now we've dropped the lock */ + if (clean != NULL) + zil_itxg_clean(clean); +} + +/* + * If there are any in-memory intent log transactions which have now been + * synced then start up a taskq to free them. + */ +void +zil_clean(zilog_t *zilog, uint64_t synced_txg) +{ + itxg_t *itxg = &zilog->zl_itxg[synced_txg & TXG_MASK]; + itxs_t *clean_me; + + mutex_enter(&itxg->itxg_lock); + if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) { + mutex_exit(&itxg->itxg_lock); + return; + } + ASSERT3U(itxg->itxg_txg, <=, synced_txg); + ASSERT(itxg->itxg_txg != 0); + ASSERT(zilog->zl_clean_taskq != NULL); + atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod); + itxg->itxg_sod = 0; + clean_me = itxg->itxg_itxs; + itxg->itxg_itxs = NULL; + itxg->itxg_txg = 0; + mutex_exit(&itxg->itxg_lock); + /* + * Preferably start a task queue to free up the old itxs but + * if taskq_dispatch can't allocate resources to do that then + * free it in-line. This should be rare. Note, using TQ_SLEEP + * created a bad performance problem. + */ + if (taskq_dispatch(zilog->zl_clean_taskq, + (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP) == NULL) + zil_itxg_clean(clean_me); +} + +/* + * Get the list of itxs to commit into zl_itx_commit_list. + */ +static void +zil_get_commit_list(zilog_t *zilog) +{ + uint64_t otxg, txg; + list_t *commit_list = &zilog->zl_itx_commit_list; + uint64_t push_sod = 0; + + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ + otxg = ZILTEST_TXG; + else + otxg = spa_last_synced_txg(zilog->zl_spa) + 1; + + for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { + itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; + + mutex_enter(&itxg->itxg_lock); + if (itxg->itxg_txg != txg) { + mutex_exit(&itxg->itxg_lock); + continue; + } + + list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list); + push_sod += itxg->itxg_sod; + itxg->itxg_sod = 0; + + mutex_exit(&itxg->itxg_lock); + } + atomic_add_64(&zilog->zl_itx_list_sz, -push_sod); +} + +/* + * Move the async itxs for a specified object to commit into sync lists. + */ +static void +zil_async_to_sync(zilog_t *zilog, uint64_t foid) +{ + uint64_t otxg, txg; + itx_async_node_t *ian; + avl_tree_t *t; + avl_index_t where; + + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ + otxg = ZILTEST_TXG; + else + otxg = spa_last_synced_txg(zilog->zl_spa) + 1; + + for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { + itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; + + mutex_enter(&itxg->itxg_lock); + if (itxg->itxg_txg != txg) { + mutex_exit(&itxg->itxg_lock); + continue; + } + + /* + * If a foid is specified then find that node and append its + * list. Otherwise walk the tree appending all the lists + * to the sync list. We add to the end rather than the + * beginning to ensure the create has happened. + */ + t = &itxg->itxg_itxs->i_async_tree; + if (foid != 0) { + ian = avl_find(t, &foid, &where); + if (ian != NULL) { + list_move_tail(&itxg->itxg_itxs->i_sync_list, + &ian->ia_list); + } + } else { + void *cookie = NULL; + + while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) { + list_move_tail(&itxg->itxg_itxs->i_sync_list, + &ian->ia_list); + list_destroy(&ian->ia_list); + kmem_free(ian, sizeof (itx_async_node_t)); + } + } + mutex_exit(&itxg->itxg_lock); + } +} + +static void +zil_commit_writer(zilog_t *zilog) +{ + uint64_t txg; + itx_t *itx; + lwb_t *lwb; + spa_t *spa = zilog->zl_spa; + int error = 0; + + ASSERT(zilog->zl_root_zio == NULL); + + mutex_exit(&zilog->zl_lock); + + zil_get_commit_list(zilog); + + /* + * Return if there's nothing to commit before we dirty the fs by + * calling zil_create(). + */ + if (list_head(&zilog->zl_itx_commit_list) == NULL) { + mutex_enter(&zilog->zl_lock); + return; + } + + if (zilog->zl_suspend) { + lwb = NULL; + } else { + lwb = list_tail(&zilog->zl_lwb_list); + if (lwb == NULL) + lwb = zil_create(zilog); + } + + DTRACE_PROBE1(zil__cw1, zilog_t *, zilog); + while (itx = list_head(&zilog->zl_itx_commit_list)) { + txg = itx->itx_lr.lrc_txg; + ASSERT(txg); + + if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa)) + lwb = zil_lwb_commit(zilog, itx, lwb); + list_remove(&zilog->zl_itx_commit_list, itx); + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + } + DTRACE_PROBE1(zil__cw2, zilog_t *, zilog); + + /* write the last block out */ + if (lwb != NULL && lwb->lwb_zio != NULL) + lwb = zil_lwb_write_start(zilog, lwb); + + zilog->zl_cur_used = 0; + + /* + * Wait if necessary for the log blocks to be on stable storage. + */ + if (zilog->zl_root_zio) { + error = zio_wait(zilog->zl_root_zio); + zilog->zl_root_zio = NULL; + zil_flush_vdevs(zilog); + } + + if (error || lwb == NULL) + txg_wait_synced(zilog->zl_dmu_pool, 0); + + mutex_enter(&zilog->zl_lock); + + /* + * Remember the highest committed log sequence number for ztest. + * We only update this value when all the log writes succeeded, + * because ztest wants to ASSERT that it got the whole log chain. + */ + if (error == 0 && lwb != NULL) + zilog->zl_commit_lr_seq = zilog->zl_lr_seq; +} + +/* + * Commit zfs transactions to stable storage. + * If foid is 0 push out all transactions, otherwise push only those + * for that object or might reference that object. + * + * itxs are committed in batches. In a heavily stressed zil there will be + * a commit writer thread who is writing out a bunch of itxs to the log + * for a set of committing threads (cthreads) in the same batch as the writer. + * Those cthreads are all waiting on the same cv for that batch. + * + * There will also be a different and growing batch of threads that are + * waiting to commit (qthreads). When the committing batch completes + * a transition occurs such that the cthreads exit and the qthreads become + * cthreads. One of the new cthreads becomes the writer thread for the + * batch. Any new threads arriving become new qthreads. + * + * Only 2 condition variables are needed and there's no transition + * between the two cvs needed. They just flip-flop between qthreads + * and cthreads. + * + * Using this scheme we can efficiently wakeup up only those threads + * that have been committed. + */ +void +zil_commit(zilog_t *zilog, uint64_t foid) +{ + uint64_t mybatch; + + if (zilog->zl_sync == ZFS_SYNC_DISABLED) + return; + + /* move the async itxs for the foid to the sync queues */ + zil_async_to_sync(zilog, foid); + + mutex_enter(&zilog->zl_lock); + mybatch = zilog->zl_next_batch; + while (zilog->zl_writer) { + cv_wait(&zilog->zl_cv_batch[mybatch & 1], &zilog->zl_lock); + if (mybatch <= zilog->zl_com_batch) { + mutex_exit(&zilog->zl_lock); + return; + } + } + + zilog->zl_next_batch++; + zilog->zl_writer = B_TRUE; + zil_commit_writer(zilog); + zilog->zl_com_batch = mybatch; + zilog->zl_writer = B_FALSE; + mutex_exit(&zilog->zl_lock); + + /* wake up one thread to become the next writer */ + cv_signal(&zilog->zl_cv_batch[(mybatch+1) & 1]); + + /* wake up all threads waiting for this batch to be committed */ + cv_broadcast(&zilog->zl_cv_batch[mybatch & 1]); +} + +/* + * Called in syncing context to free committed log blocks and update log header. + */ +void +zil_sync(zilog_t *zilog, dmu_tx_t *tx) +{ + zil_header_t *zh = zil_header_in_syncing_context(zilog); + uint64_t txg = dmu_tx_get_txg(tx); + spa_t *spa = zilog->zl_spa; + uint64_t *replayed_seq = &zilog->zl_replayed_seq[txg & TXG_MASK]; + lwb_t *lwb; + + /* + * We don't zero out zl_destroy_txg, so make sure we don't try + * to destroy it twice. + */ + if (spa_sync_pass(spa) != 1) + return; + + mutex_enter(&zilog->zl_lock); + + ASSERT(zilog->zl_stop_sync == 0); + + if (*replayed_seq != 0) { + ASSERT(zh->zh_replay_seq < *replayed_seq); + zh->zh_replay_seq = *replayed_seq; + *replayed_seq = 0; + } + + if (zilog->zl_destroy_txg == txg) { + blkptr_t blk = zh->zh_log; + + ASSERT(list_head(&zilog->zl_lwb_list) == NULL); + + bzero(zh, sizeof (zil_header_t)); + bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq)); + + if (zilog->zl_keep_first) { + /* + * If this block was part of log chain that couldn't + * be claimed because a device was missing during + * zil_claim(), but that device later returns, + * then this block could erroneously appear valid. + * To guard against this, assign a new GUID to the new + * log chain so it doesn't matter what blk points to. + */ + zil_init_log_chain(zilog, &blk); + zh->zh_log = blk; + } + } + + while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { + zh->zh_log = lwb->lwb_blk; + if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg) + break; + list_remove(&zilog->zl_lwb_list, lwb); + zio_free_zil(spa, txg, &lwb->lwb_blk); + kmem_cache_free(zil_lwb_cache, lwb); + + /* + * If we don't have anything left in the lwb list then + * we've had an allocation failure and we need to zero + * out the zil_header blkptr so that we don't end + * up freeing the same block twice. + */ + if (list_head(&zilog->zl_lwb_list) == NULL) + BP_ZERO(&zh->zh_log); + } + mutex_exit(&zilog->zl_lock); +} + +void +zil_init(void) +{ + zil_lwb_cache = kmem_cache_create("zil_lwb_cache", + sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +zil_fini(void) +{ + kmem_cache_destroy(zil_lwb_cache); +} + +void +zil_set_sync(zilog_t *zilog, uint64_t sync) +{ + zilog->zl_sync = sync; +} + +void +zil_set_logbias(zilog_t *zilog, uint64_t logbias) +{ + zilog->zl_logbias = logbias; +} + +zilog_t * +zil_alloc(objset_t *os, zil_header_t *zh_phys) +{ + zilog_t *zilog; + + zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP); + + zilog->zl_header = zh_phys; + zilog->zl_os = os; + zilog->zl_spa = dmu_objset_spa(os); + zilog->zl_dmu_pool = dmu_objset_pool(os); + zilog->zl_destroy_txg = TXG_INITIAL - 1; + zilog->zl_logbias = dmu_objset_logbias(os); + zilog->zl_sync = dmu_objset_syncprop(os); + zilog->zl_next_batch = 1; + + mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); + + for (int i = 0; i < TXG_SIZE; i++) { + mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL, + MUTEX_DEFAULT, NULL); + } + + list_create(&zilog->zl_lwb_list, sizeof (lwb_t), + offsetof(lwb_t, lwb_node)); + + list_create(&zilog->zl_itx_commit_list, sizeof (itx_t), + offsetof(itx_t, itx_node)); + + mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + + avl_create(&zilog->zl_vdev_tree, zil_vdev_compare, + sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node)); + + cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL); + cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL); + cv_init(&zilog->zl_cv_batch[0], NULL, CV_DEFAULT, NULL); + cv_init(&zilog->zl_cv_batch[1], NULL, CV_DEFAULT, NULL); + + return (zilog); +} + +void +zil_free(zilog_t *zilog) +{ + lwb_t *head_lwb; + + zilog->zl_stop_sync = 1; + + /* + * After zil_close() there should only be one lwb with a buffer. + */ + head_lwb = list_head(&zilog->zl_lwb_list); + if (head_lwb) { + ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list)); + list_remove(&zilog->zl_lwb_list, head_lwb); + zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz); + kmem_cache_free(zil_lwb_cache, head_lwb); + } + list_destroy(&zilog->zl_lwb_list); + + avl_destroy(&zilog->zl_vdev_tree); + mutex_destroy(&zilog->zl_vdev_lock); + + ASSERT(list_is_empty(&zilog->zl_itx_commit_list)); + list_destroy(&zilog->zl_itx_commit_list); + + for (int i = 0; i < TXG_SIZE; i++) { + /* + * It's possible for an itx to be generated that doesn't dirty + * a txg (e.g. ztest TX_TRUNCATE). So there's no zil_clean() + * callback to remove the entry. We remove those here. + * + * Also free up the ziltest itxs. + */ + if (zilog->zl_itxg[i].itxg_itxs) + zil_itxg_clean(zilog->zl_itxg[i].itxg_itxs); + mutex_destroy(&zilog->zl_itxg[i].itxg_lock); + } + + mutex_destroy(&zilog->zl_lock); + + cv_destroy(&zilog->zl_cv_writer); + cv_destroy(&zilog->zl_cv_suspend); + cv_destroy(&zilog->zl_cv_batch[0]); + cv_destroy(&zilog->zl_cv_batch[1]); + + kmem_free(zilog, sizeof (zilog_t)); +} + +/* + * Open an intent log. + */ +zilog_t * +zil_open(objset_t *os, zil_get_data_t *get_data) +{ + zilog_t *zilog = dmu_objset_zil(os); + + zilog->zl_get_data = get_data; + zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, + 2, 2, TASKQ_PREPOPULATE); + + return (zilog); +} + +/* + * Close an intent log. + */ +void +zil_close(zilog_t *zilog) +{ + lwb_t *tail_lwb; + uint64_t txg = 0; + + zil_commit(zilog, 0); /* commit all itx */ + + /* + * The lwb_max_txg for the stubby lwb will reflect the last activity + * for the zil. After a txg_wait_synced() on the txg we know all the + * callbacks have occurred that may clean the zil. Only then can we + * destroy the zl_clean_taskq. + */ + mutex_enter(&zilog->zl_lock); + tail_lwb = list_tail(&zilog->zl_lwb_list); + if (tail_lwb != NULL) + txg = tail_lwb->lwb_max_txg; + mutex_exit(&zilog->zl_lock); + if (txg) + txg_wait_synced(zilog->zl_dmu_pool, txg); + + taskq_destroy(zilog->zl_clean_taskq); + zilog->zl_clean_taskq = NULL; + zilog->zl_get_data = NULL; +} + +/* + * Suspend an intent log. While in suspended mode, we still honor + * synchronous semantics, but we rely on txg_wait_synced() to do it. + * We suspend the log briefly when taking a snapshot so that the snapshot + * contains all the data it's supposed to, and has an empty intent log. + */ +int +zil_suspend(zilog_t *zilog) +{ + const zil_header_t *zh = zilog->zl_header; + + mutex_enter(&zilog->zl_lock); + if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */ + mutex_exit(&zilog->zl_lock); + return (EBUSY); + } + if (zilog->zl_suspend++ != 0) { + /* + * Someone else already began a suspend. + * Just wait for them to finish. + */ + while (zilog->zl_suspending) + cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); + mutex_exit(&zilog->zl_lock); + return (0); + } + zilog->zl_suspending = B_TRUE; + mutex_exit(&zilog->zl_lock); + + zil_commit(zilog, 0); + + zil_destroy(zilog, B_FALSE); + + mutex_enter(&zilog->zl_lock); + zilog->zl_suspending = B_FALSE; + cv_broadcast(&zilog->zl_cv_suspend); + mutex_exit(&zilog->zl_lock); + + return (0); +} + +void +zil_resume(zilog_t *zilog) +{ + mutex_enter(&zilog->zl_lock); + ASSERT(zilog->zl_suspend != 0); + zilog->zl_suspend--; + mutex_exit(&zilog->zl_lock); +} + +typedef struct zil_replay_arg { + zil_replay_func_t **zr_replay; + void *zr_arg; + boolean_t zr_byteswap; + char *zr_lr; +} zil_replay_arg_t; + +static int +zil_replay_error(zilog_t *zilog, lr_t *lr, int error) +{ + char name[MAXNAMELEN]; + + zilog->zl_replaying_seq--; /* didn't actually replay this one */ + + dmu_objset_name(zilog->zl_os, name); + + cmn_err(CE_WARN, "ZFS replay transaction error %d, " + "dataset %s, seq 0x%llx, txtype %llu %s\n", error, name, + (u_longlong_t)lr->lrc_seq, + (u_longlong_t)(lr->lrc_txtype & ~TX_CI), + (lr->lrc_txtype & TX_CI) ? "CI" : ""); + + return (error); +} + +static int +zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) +{ + zil_replay_arg_t *zr = zra; + const zil_header_t *zh = zilog->zl_header; + uint64_t reclen = lr->lrc_reclen; + uint64_t txtype = lr->lrc_txtype; + int error = 0; + + zilog->zl_replaying_seq = lr->lrc_seq; + + if (lr->lrc_seq <= zh->zh_replay_seq) /* already replayed */ + return (0); + + if (lr->lrc_txg < claim_txg) /* already committed */ + return (0); + + /* Strip case-insensitive bit, still present in log record */ + txtype &= ~TX_CI; + + if (txtype == 0 || txtype >= TX_MAX_TYPE) + return (zil_replay_error(zilog, lr, EINVAL)); + + /* + * If this record type can be logged out of order, the object + * (lr_foid) may no longer exist. That's legitimate, not an error. + */ + if (TX_OOO(txtype)) { + error = dmu_object_info(zilog->zl_os, + ((lr_ooo_t *)lr)->lr_foid, NULL); + if (error == ENOENT || error == EEXIST) + return (0); + } + + /* + * Make a copy of the data so we can revise and extend it. + */ + bcopy(lr, zr->zr_lr, reclen); + + /* + * If this is a TX_WRITE with a blkptr, suck in the data. + */ + if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) { + error = zil_read_log_data(zilog, (lr_write_t *)lr, + zr->zr_lr + reclen); + if (error) + return (zil_replay_error(zilog, lr, error)); + } + + /* + * The log block containing this lr may have been byteswapped + * so that we can easily examine common fields like lrc_txtype. + * However, the log is a mix of different record types, and only the + * replay vectors know how to byteswap their records. Therefore, if + * the lr was byteswapped, undo it before invoking the replay vector. + */ + if (zr->zr_byteswap) + byteswap_uint64_array(zr->zr_lr, reclen); + + /* + * We must now do two things atomically: replay this log record, + * and update the log header sequence number to reflect the fact that + * we did so. At the end of each replay function the sequence number + * is updated if we are in replay mode. + */ + error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap); + if (error) { + /* + * The DMU's dnode layer doesn't see removes until the txg + * commits, so a subsequent claim can spuriously fail with + * EEXIST. So if we receive any error we try syncing out + * any removes then retry the transaction. Note that we + * specify B_FALSE for byteswap now, so we don't do it twice. + */ + txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); + error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE); + if (error) + return (zil_replay_error(zilog, lr, error)); + } + return (0); +} + +/* ARGSUSED */ +static int +zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) +{ + zilog->zl_replay_blks++; + + return (0); +} + +/* + * If this dataset has a non-empty intent log, replay it and destroy it. + */ +void +zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) +{ + zilog_t *zilog = dmu_objset_zil(os); + const zil_header_t *zh = zilog->zl_header; + zil_replay_arg_t zr; + + if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { + zil_destroy(zilog, B_TRUE); + return; + } + + zr.zr_replay = replay_func; + zr.zr_arg = arg; + zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); + zr.zr_lr = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); + + /* + * Wait for in-progress removes to sync before starting replay. + */ + txg_wait_synced(zilog->zl_dmu_pool, 0); + + zilog->zl_replay = B_TRUE; + zilog->zl_replay_time = ddi_get_lbolt(); + ASSERT(zilog->zl_replay_blks == 0); + (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, + zh->zh_claim_txg); + kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); + + zil_destroy(zilog, B_FALSE); + txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); + zilog->zl_replay = B_FALSE; +} + +boolean_t +zil_replaying(zilog_t *zilog, dmu_tx_t *tx) +{ + if (zilog->zl_sync == ZFS_SYNC_DISABLED) + return (B_TRUE); + + if (zilog->zl_replay) { + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = + zilog->zl_replaying_seq; + return (B_TRUE); + } + + return (B_FALSE); +} + +/* ARGSUSED */ +int +zil_vdev_offline(const char *osname, void *arg) +{ + objset_t *os; + zilog_t *zilog; + int error; + + error = dmu_objset_hold(osname, FTAG, &os); + if (error) + return (error); + + zilog = dmu_objset_zil(os); + if (zil_suspend(zilog) != 0) + error = EEXIST; + else + zil_resume(zilog); + dmu_objset_rele(os, FTAG); + return (error); +} diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c new file mode 100644 index 000000000000..eb509c5911f7 --- /dev/null +++ b/uts/common/fs/zfs/zio.c @@ -0,0 +1,2952 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/fm/fs/zfs.h> +#include <sys/spa.h> +#include <sys/txg.h> +#include <sys/spa_impl.h> +#include <sys/vdev_impl.h> +#include <sys/zio_impl.h> +#include <sys/zio_compress.h> +#include <sys/zio_checksum.h> +#include <sys/dmu_objset.h> +#include <sys/arc.h> +#include <sys/ddt.h> + +/* + * ========================================================================== + * I/O priority table + * ========================================================================== + */ +uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = { + 0, /* ZIO_PRIORITY_NOW */ + 0, /* ZIO_PRIORITY_SYNC_READ */ + 0, /* ZIO_PRIORITY_SYNC_WRITE */ + 0, /* ZIO_PRIORITY_LOG_WRITE */ + 1, /* ZIO_PRIORITY_CACHE_FILL */ + 1, /* ZIO_PRIORITY_AGG */ + 4, /* ZIO_PRIORITY_FREE */ + 4, /* ZIO_PRIORITY_ASYNC_WRITE */ + 6, /* ZIO_PRIORITY_ASYNC_READ */ + 10, /* ZIO_PRIORITY_RESILVER */ + 20, /* ZIO_PRIORITY_SCRUB */ + 2, /* ZIO_PRIORITY_DDT_PREFETCH */ +}; + +/* + * ========================================================================== + * I/O type descriptions + * ========================================================================== + */ +char *zio_type_name[ZIO_TYPES] = { + "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim", + "zio_ioctl" +}; + +/* + * ========================================================================== + * I/O kmem caches + * ========================================================================== + */ +kmem_cache_t *zio_cache; +kmem_cache_t *zio_link_cache; +kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; +kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; + +#ifdef _KERNEL +extern vmem_t *zio_alloc_arena; +#endif + +/* + * An allocating zio is one that either currently has the DVA allocate + * stage set or will have it later in its lifetime. + */ +#define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) + +boolean_t zio_requeue_io_start_cut_in_line = B_TRUE; + +#ifdef ZFS_DEBUG +int zio_buf_debug_limit = 16384; +#else +int zio_buf_debug_limit = 0; +#endif + +void +zio_init(void) +{ + size_t c; + vmem_t *data_alloc_arena = NULL; + +#ifdef _KERNEL + data_alloc_arena = zio_alloc_arena; +#endif + zio_cache = kmem_cache_create("zio_cache", + sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + zio_link_cache = kmem_cache_create("zio_link_cache", + sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + + /* + * For small buffers, we want a cache for each multiple of + * SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache + * for each quarter-power of 2. For large buffers, we want + * a cache for each multiple of PAGESIZE. + */ + for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { + size_t size = (c + 1) << SPA_MINBLOCKSHIFT; + size_t p2 = size; + size_t align = 0; + size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; + + while (p2 & (p2 - 1)) + p2 &= p2 - 1; + + if (size <= 4 * SPA_MINBLOCKSIZE) { + align = SPA_MINBLOCKSIZE; + } else if (P2PHASE(size, PAGESIZE) == 0) { + align = PAGESIZE; + } else if (P2PHASE(size, p2 >> 2) == 0) { + align = p2 >> 2; + } + + if (align != 0) { + char name[36]; + (void) sprintf(name, "zio_buf_%lu", (ulong_t)size); + zio_buf_cache[c] = kmem_cache_create(name, size, + align, NULL, NULL, NULL, NULL, NULL, cflags); + + /* + * Since zio_data bufs do not appear in crash dumps, we + * pass KMC_NOTOUCH so that no allocator metadata is + * stored with the buffers. + */ + (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); + zio_data_buf_cache[c] = kmem_cache_create(name, size, + align, NULL, NULL, NULL, NULL, data_alloc_arena, + cflags | KMC_NOTOUCH); + } + } + + while (--c != 0) { + ASSERT(zio_buf_cache[c] != NULL); + if (zio_buf_cache[c - 1] == NULL) + zio_buf_cache[c - 1] = zio_buf_cache[c]; + + ASSERT(zio_data_buf_cache[c] != NULL); + if (zio_data_buf_cache[c - 1] == NULL) + zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; + } + + zio_inject_init(); +} + +void +zio_fini(void) +{ + size_t c; + kmem_cache_t *last_cache = NULL; + kmem_cache_t *last_data_cache = NULL; + + for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { + if (zio_buf_cache[c] != last_cache) { + last_cache = zio_buf_cache[c]; + kmem_cache_destroy(zio_buf_cache[c]); + } + zio_buf_cache[c] = NULL; + + if (zio_data_buf_cache[c] != last_data_cache) { + last_data_cache = zio_data_buf_cache[c]; + kmem_cache_destroy(zio_data_buf_cache[c]); + } + zio_data_buf_cache[c] = NULL; + } + + kmem_cache_destroy(zio_link_cache); + kmem_cache_destroy(zio_cache); + + zio_inject_fini(); +} + +/* + * ========================================================================== + * Allocate and free I/O buffers + * ========================================================================== + */ + +/* + * Use zio_buf_alloc to allocate ZFS metadata. This data will appear in a + * crashdump if the kernel panics, so use it judiciously. Obviously, it's + * useful to inspect ZFS metadata, but if possible, we should avoid keeping + * excess / transient data in-core during a crashdump. + */ +void * +zio_buf_alloc(size_t size) +{ + size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; + + ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + + return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); +} + +/* + * Use zio_data_buf_alloc to allocate data. The data will not appear in a + * crashdump if the kernel panics. This exists so that we will limit the amount + * of ZFS data that shows up in a kernel crashdump. (Thus reducing the amount + * of kernel heap dumped to disk when the kernel panics) + */ +void * +zio_data_buf_alloc(size_t size) +{ + size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; + + ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + + return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); +} + +void +zio_buf_free(void *buf, size_t size) +{ + size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; + + ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + + kmem_cache_free(zio_buf_cache[c], buf); +} + +void +zio_data_buf_free(void *buf, size_t size) +{ + size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; + + ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + + kmem_cache_free(zio_data_buf_cache[c], buf); +} + +/* + * ========================================================================== + * Push and pop I/O transform buffers + * ========================================================================== + */ +static void +zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, + zio_transform_func_t *transform) +{ + zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); + + zt->zt_orig_data = zio->io_data; + zt->zt_orig_size = zio->io_size; + zt->zt_bufsize = bufsize; + zt->zt_transform = transform; + + zt->zt_next = zio->io_transform_stack; + zio->io_transform_stack = zt; + + zio->io_data = data; + zio->io_size = size; +} + +static void +zio_pop_transforms(zio_t *zio) +{ + zio_transform_t *zt; + + while ((zt = zio->io_transform_stack) != NULL) { + if (zt->zt_transform != NULL) + zt->zt_transform(zio, + zt->zt_orig_data, zt->zt_orig_size); + + if (zt->zt_bufsize != 0) + zio_buf_free(zio->io_data, zt->zt_bufsize); + + zio->io_data = zt->zt_orig_data; + zio->io_size = zt->zt_orig_size; + zio->io_transform_stack = zt->zt_next; + + kmem_free(zt, sizeof (zio_transform_t)); + } +} + +/* + * ========================================================================== + * I/O transform callbacks for subblocks and decompression + * ========================================================================== + */ +static void +zio_subblock(zio_t *zio, void *data, uint64_t size) +{ + ASSERT(zio->io_size > size); + + if (zio->io_type == ZIO_TYPE_READ) + bcopy(zio->io_data, data, size); +} + +static void +zio_decompress(zio_t *zio, void *data, uint64_t size) +{ + if (zio->io_error == 0 && + zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), + zio->io_data, data, zio->io_size, size) != 0) + zio->io_error = EIO; +} + +/* + * ========================================================================== + * I/O parent/child relationships and pipeline interlocks + * ========================================================================== + */ +/* + * NOTE - Callers to zio_walk_parents() and zio_walk_children must + * continue calling these functions until they return NULL. + * Otherwise, the next caller will pick up the list walk in + * some indeterminate state. (Otherwise every caller would + * have to pass in a cookie to keep the state represented by + * io_walk_link, which gets annoying.) + */ +zio_t * +zio_walk_parents(zio_t *cio) +{ + zio_link_t *zl = cio->io_walk_link; + list_t *pl = &cio->io_parent_list; + + zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl); + cio->io_walk_link = zl; + + if (zl == NULL) + return (NULL); + + ASSERT(zl->zl_child == cio); + return (zl->zl_parent); +} + +zio_t * +zio_walk_children(zio_t *pio) +{ + zio_link_t *zl = pio->io_walk_link; + list_t *cl = &pio->io_child_list; + + zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl); + pio->io_walk_link = zl; + + if (zl == NULL) + return (NULL); + + ASSERT(zl->zl_parent == pio); + return (zl->zl_child); +} + +zio_t * +zio_unique_parent(zio_t *cio) +{ + zio_t *pio = zio_walk_parents(cio); + + VERIFY(zio_walk_parents(cio) == NULL); + return (pio); +} + +void +zio_add_child(zio_t *pio, zio_t *cio) +{ + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); + + /* + * Logical I/Os can have logical, gang, or vdev children. + * Gang I/Os can have gang or vdev children. + * Vdev I/Os can only have vdev children. + * The following ASSERT captures all of these constraints. + */ + ASSERT(cio->io_child_type <= pio->io_child_type); + + zl->zl_parent = pio; + zl->zl_child = cio; + + mutex_enter(&cio->io_lock); + mutex_enter(&pio->io_lock); + + ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0); + + for (int w = 0; w < ZIO_WAIT_TYPES; w++) + pio->io_children[cio->io_child_type][w] += !cio->io_state[w]; + + list_insert_head(&pio->io_child_list, zl); + list_insert_head(&cio->io_parent_list, zl); + + pio->io_child_count++; + cio->io_parent_count++; + + mutex_exit(&pio->io_lock); + mutex_exit(&cio->io_lock); +} + +static void +zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl) +{ + ASSERT(zl->zl_parent == pio); + ASSERT(zl->zl_child == cio); + + mutex_enter(&cio->io_lock); + mutex_enter(&pio->io_lock); + + list_remove(&pio->io_child_list, zl); + list_remove(&cio->io_parent_list, zl); + + pio->io_child_count--; + cio->io_parent_count--; + + mutex_exit(&pio->io_lock); + mutex_exit(&cio->io_lock); + + kmem_cache_free(zio_link_cache, zl); +} + +static boolean_t +zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait) +{ + uint64_t *countp = &zio->io_children[child][wait]; + boolean_t waiting = B_FALSE; + + mutex_enter(&zio->io_lock); + ASSERT(zio->io_stall == NULL); + if (*countp != 0) { + zio->io_stage >>= 1; + zio->io_stall = countp; + waiting = B_TRUE; + } + mutex_exit(&zio->io_lock); + + return (waiting); +} + +static void +zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait) +{ + uint64_t *countp = &pio->io_children[zio->io_child_type][wait]; + int *errorp = &pio->io_child_error[zio->io_child_type]; + + mutex_enter(&pio->io_lock); + if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) + *errorp = zio_worst_error(*errorp, zio->io_error); + pio->io_reexecute |= zio->io_reexecute; + ASSERT3U(*countp, >, 0); + if (--*countp == 0 && pio->io_stall == countp) { + pio->io_stall = NULL; + mutex_exit(&pio->io_lock); + zio_execute(pio); + } else { + mutex_exit(&pio->io_lock); + } +} + +static void +zio_inherit_child_errors(zio_t *zio, enum zio_child c) +{ + if (zio->io_child_error[c] != 0 && zio->io_error == 0) + zio->io_error = zio->io_child_error[c]; +} + +/* + * ========================================================================== + * Create the various types of I/O (read, write, free, etc) + * ========================================================================== + */ +static zio_t * +zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, + void *data, uint64_t size, zio_done_func_t *done, void *private, + zio_type_t type, int priority, enum zio_flag flags, + vdev_t *vd, uint64_t offset, const zbookmark_t *zb, + enum zio_stage stage, enum zio_stage pipeline) +{ + zio_t *zio; + + ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); + ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0); + ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0); + + ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER)); + ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); + ASSERT(vd || stage == ZIO_STAGE_OPEN); + + zio = kmem_cache_alloc(zio_cache, KM_SLEEP); + bzero(zio, sizeof (zio_t)); + + mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL); + + list_create(&zio->io_parent_list, sizeof (zio_link_t), + offsetof(zio_link_t, zl_parent_node)); + list_create(&zio->io_child_list, sizeof (zio_link_t), + offsetof(zio_link_t, zl_child_node)); + + if (vd != NULL) + zio->io_child_type = ZIO_CHILD_VDEV; + else if (flags & ZIO_FLAG_GANG_CHILD) + zio->io_child_type = ZIO_CHILD_GANG; + else if (flags & ZIO_FLAG_DDT_CHILD) + zio->io_child_type = ZIO_CHILD_DDT; + else + zio->io_child_type = ZIO_CHILD_LOGICAL; + + if (bp != NULL) { + zio->io_bp = (blkptr_t *)bp; + zio->io_bp_copy = *bp; + zio->io_bp_orig = *bp; + if (type != ZIO_TYPE_WRITE || + zio->io_child_type == ZIO_CHILD_DDT) + zio->io_bp = &zio->io_bp_copy; /* so caller can free */ + if (zio->io_child_type == ZIO_CHILD_LOGICAL) + zio->io_logical = zio; + if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp)) + pipeline |= ZIO_GANG_STAGES; + } + + zio->io_spa = spa; + zio->io_txg = txg; + zio->io_done = done; + zio->io_private = private; + zio->io_type = type; + zio->io_priority = priority; + zio->io_vd = vd; + zio->io_offset = offset; + zio->io_orig_data = zio->io_data = data; + zio->io_orig_size = zio->io_size = size; + zio->io_orig_flags = zio->io_flags = flags; + zio->io_orig_stage = zio->io_stage = stage; + zio->io_orig_pipeline = zio->io_pipeline = pipeline; + + zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); + zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); + + if (zb != NULL) + zio->io_bookmark = *zb; + + if (pio != NULL) { + if (zio->io_logical == NULL) + zio->io_logical = pio->io_logical; + if (zio->io_child_type == ZIO_CHILD_GANG) + zio->io_gang_leader = pio->io_gang_leader; + zio_add_child(pio, zio); + } + + return (zio); +} + +static void +zio_destroy(zio_t *zio) +{ + list_destroy(&zio->io_parent_list); + list_destroy(&zio->io_child_list); + mutex_destroy(&zio->io_lock); + cv_destroy(&zio->io_cv); + kmem_cache_free(zio_cache, zio); +} + +zio_t * +zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, + void *private, enum zio_flag flags) +{ + zio_t *zio; + + zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, + ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL, + ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE); + + return (zio); +} + +zio_t * +zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) +{ + return (zio_null(NULL, spa, NULL, done, private, flags)); +} + +zio_t * +zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, + void *data, uint64_t size, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, const zbookmark_t *zb) +{ + zio_t *zio; + + zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, + data, size, done, private, + ZIO_TYPE_READ, priority, flags, NULL, 0, zb, + ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE); + + return (zio); +} + +zio_t * +zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, + void *data, uint64_t size, const zio_prop_t *zp, + zio_done_func_t *ready, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, const zbookmark_t *zb) +{ + zio_t *zio; + + ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF && + zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && + zp->zp_compress >= ZIO_COMPRESS_OFF && + zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && + zp->zp_type < DMU_OT_NUMTYPES && + zp->zp_level < 32 && + zp->zp_copies > 0 && + zp->zp_copies <= spa_max_replication(spa) && + zp->zp_dedup <= 1 && + zp->zp_dedup_verify <= 1); + + zio = zio_create(pio, spa, txg, bp, data, size, done, private, + ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, + ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); + + zio->io_ready = ready; + zio->io_prop = *zp; + + return (zio); +} + +zio_t * +zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, + uint64_t size, zio_done_func_t *done, void *private, int priority, + enum zio_flag flags, zbookmark_t *zb) +{ + zio_t *zio; + + zio = zio_create(pio, spa, txg, bp, data, size, done, private, + ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, + ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE); + + return (zio); +} + +void +zio_write_override(zio_t *zio, blkptr_t *bp, int copies) +{ + ASSERT(zio->io_type == ZIO_TYPE_WRITE); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(zio->io_stage == ZIO_STAGE_OPEN); + ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa)); + + zio->io_prop.zp_copies = copies; + zio->io_bp_override = bp; +} + +void +zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) +{ + bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); +} + +zio_t * +zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, + enum zio_flag flags) +{ + zio_t *zio; + + dprintf_bp(bp, "freeing in txg %llu, pass %u", + (longlong_t)txg, spa->spa_sync_pass); + + ASSERT(!BP_IS_HOLE(bp)); + ASSERT(spa_syncing_txg(spa) == txg); + ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE); + + zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), + NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, + NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); + + return (zio); +} + +zio_t * +zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, + zio_done_func_t *done, void *private, enum zio_flag flags) +{ + zio_t *zio; + + /* + * A claim is an allocation of a specific block. Claims are needed + * to support immediate writes in the intent log. The issue is that + * immediate writes contain committed data, but in a txg that was + * *not* committed. Upon opening the pool after an unclean shutdown, + * the intent log claims all blocks that contain immediate write data + * so that the SPA knows they're in use. + * + * All claims *must* be resolved in the first txg -- before the SPA + * starts allocating blocks -- so that nothing is allocated twice. + * If txg == 0 we just verify that the block is claimable. + */ + ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa)); + ASSERT(txg == spa_first_txg(spa) || txg == 0); + ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */ + + zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), + done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags, + NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE); + + return (zio); +} + +zio_t * +zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, + zio_done_func_t *done, void *private, int priority, enum zio_flag flags) +{ + zio_t *zio; + int c; + + if (vd->vdev_children == 0) { + zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, + ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL, + ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE); + + zio->io_cmd = cmd; + } else { + zio = zio_null(pio, spa, NULL, NULL, NULL, flags); + + for (c = 0; c < vd->vdev_children; c++) + zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd, + done, private, priority, flags)); + } + + return (zio); +} + +zio_t * +zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, + void *data, int checksum, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, boolean_t labels) +{ + zio_t *zio; + + ASSERT(vd->vdev_children == 0); + ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE || + offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE); + ASSERT3U(offset + size, <=, vd->vdev_psize); + + zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private, + ZIO_TYPE_READ, priority, flags, vd, offset, NULL, + ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE); + + zio->io_prop.zp_checksum = checksum; + + return (zio); +} + +zio_t * +zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, + void *data, int checksum, zio_done_func_t *done, void *private, + int priority, enum zio_flag flags, boolean_t labels) +{ + zio_t *zio; + + ASSERT(vd->vdev_children == 0); + ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE || + offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE); + ASSERT3U(offset + size, <=, vd->vdev_psize); + + zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private, + ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL, + ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE); + + zio->io_prop.zp_checksum = checksum; + + if (zio_checksum_table[checksum].ci_eck) { + /* + * zec checksums are necessarily destructive -- they modify + * the end of the write buffer to hold the verifier/checksum. + * Therefore, we must make a local copy in case the data is + * being written to multiple places in parallel. + */ + void *wbuf = zio_buf_alloc(size); + bcopy(data, wbuf, size); + zio_push_transform(zio, wbuf, size, size, NULL); + } + + return (zio); +} + +/* + * Create a child I/O to do some work for us. + */ +zio_t * +zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, + void *data, uint64_t size, int type, int priority, enum zio_flag flags, + zio_done_func_t *done, void *private) +{ + enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE; + zio_t *zio; + + ASSERT(vd->vdev_parent == + (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev)); + + if (type == ZIO_TYPE_READ && bp != NULL) { + /* + * If we have the bp, then the child should perform the + * checksum and the parent need not. This pushes error + * detection as close to the leaves as possible and + * eliminates redundant checksums in the interior nodes. + */ + pipeline |= ZIO_STAGE_CHECKSUM_VERIFY; + pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; + } + + if (vd->vdev_children == 0) + offset += VDEV_LABEL_START_SIZE; + + flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE; + + /* + * If we've decided to do a repair, the write is not speculative -- + * even if the original read was. + */ + if (flags & ZIO_FLAG_IO_REPAIR) + flags &= ~ZIO_FLAG_SPECULATIVE; + + zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, + done, private, type, priority, flags, vd, offset, &pio->io_bookmark, + ZIO_STAGE_VDEV_IO_START >> 1, pipeline); + + return (zio); +} + +zio_t * +zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, + int type, int priority, enum zio_flag flags, + zio_done_func_t *done, void *private) +{ + zio_t *zio; + + ASSERT(vd->vdev_ops->vdev_op_leaf); + + zio = zio_create(NULL, vd->vdev_spa, 0, NULL, + data, size, done, private, type, priority, + flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY, + vd, offset, NULL, + ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE); + + return (zio); +} + +void +zio_flush(zio_t *zio, vdev_t *vd) +{ + zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE, + NULL, NULL, ZIO_PRIORITY_NOW, + ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY)); +} + +void +zio_shrink(zio_t *zio, uint64_t size) +{ + ASSERT(zio->io_executor == NULL); + ASSERT(zio->io_orig_size == zio->io_size); + ASSERT(size <= zio->io_size); + + /* + * We don't shrink for raidz because of problems with the + * reconstruction when reading back less than the block size. + * Note, BP_IS_RAIDZ() assumes no compression. + */ + ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); + if (!BP_IS_RAIDZ(zio->io_bp)) + zio->io_orig_size = zio->io_size = size; +} + +/* + * ========================================================================== + * Prepare to read and write logical blocks + * ========================================================================== + */ + +static int +zio_read_bp_init(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && + zio->io_child_type == ZIO_CHILD_LOGICAL && + !(zio->io_flags & ZIO_FLAG_RAW)) { + uint64_t psize = BP_GET_PSIZE(bp); + void *cbuf = zio_buf_alloc(psize); + + zio_push_transform(zio, cbuf, psize, psize, zio_decompress); + } + + if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) + zio->io_flags |= ZIO_FLAG_DONT_CACHE; + + if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) + zio->io_flags |= ZIO_FLAG_DONT_CACHE; + + if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL) + zio->io_pipeline = ZIO_DDT_READ_PIPELINE; + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_write_bp_init(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + zio_prop_t *zp = &zio->io_prop; + enum zio_compress compress = zp->zp_compress; + blkptr_t *bp = zio->io_bp; + uint64_t lsize = zio->io_size; + uint64_t psize = lsize; + int pass = 1; + + /* + * If our children haven't all reached the ready stage, + * wait for them and then repeat this pipeline stage. + */ + if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || + zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY)) + return (ZIO_PIPELINE_STOP); + + if (!IO_IS_ALLOCATING(zio)) + return (ZIO_PIPELINE_CONTINUE); + + ASSERT(zio->io_child_type != ZIO_CHILD_DDT); + + if (zio->io_bp_override) { + ASSERT(bp->blk_birth != zio->io_txg); + ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); + + *bp = *zio->io_bp_override; + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + if (BP_IS_HOLE(bp) || !zp->zp_dedup) + return (ZIO_PIPELINE_CONTINUE); + + ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup || + zp->zp_dedup_verify); + + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + BP_SET_DEDUP(bp, 1); + zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; + return (ZIO_PIPELINE_CONTINUE); + } + zio->io_bp_override = NULL; + BP_ZERO(bp); + } + + if (bp->blk_birth == zio->io_txg) { + /* + * We're rewriting an existing block, which means we're + * working on behalf of spa_sync(). For spa_sync() to + * converge, it must eventually be the case that we don't + * have to allocate new blocks. But compression changes + * the blocksize, which forces a reallocate, and makes + * convergence take longer. Therefore, after the first + * few passes, stop compressing to ensure convergence. + */ + pass = spa_sync_pass(spa); + + ASSERT(zio->io_txg == spa_syncing_txg(spa)); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(!BP_GET_DEDUP(bp)); + + if (pass > SYNC_PASS_DONT_COMPRESS) + compress = ZIO_COMPRESS_OFF; + + /* Make sure someone doesn't change their mind on overwrites */ + ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp), + spa_max_replication(spa)) == BP_GET_NDVAS(bp)); + } + + if (compress != ZIO_COMPRESS_OFF) { + void *cbuf = zio_buf_alloc(lsize); + psize = zio_compress_data(compress, zio->io_data, cbuf, lsize); + if (psize == 0 || psize == lsize) { + compress = ZIO_COMPRESS_OFF; + zio_buf_free(cbuf, lsize); + } else { + ASSERT(psize < lsize); + zio_push_transform(zio, cbuf, psize, lsize, NULL); + } + } + + /* + * The final pass of spa_sync() must be all rewrites, but the first + * few passes offer a trade-off: allocating blocks defers convergence, + * but newly allocated blocks are sequential, so they can be written + * to disk faster. Therefore, we allow the first few passes of + * spa_sync() to allocate new blocks, but force rewrites after that. + * There should only be a handful of blocks after pass 1 in any case. + */ + if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && + pass > SYNC_PASS_REWRITE) { + ASSERT(psize != 0); + enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; + zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; + zio->io_flags |= ZIO_FLAG_IO_REWRITE; + } else { + BP_ZERO(bp); + zio->io_pipeline = ZIO_WRITE_PIPELINE; + } + + if (psize == 0) { + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + } else { + ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER); + BP_SET_LSIZE(bp, lsize); + BP_SET_PSIZE(bp, psize); + BP_SET_COMPRESS(bp, compress); + BP_SET_CHECKSUM(bp, zp->zp_checksum); + BP_SET_TYPE(bp, zp->zp_type); + BP_SET_LEVEL(bp, zp->zp_level); + BP_SET_DEDUP(bp, zp->zp_dedup); + BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); + if (zp->zp_dedup) { + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); + zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_free_bp_init(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (zio->io_child_type == ZIO_CHILD_LOGICAL) { + if (BP_GET_DEDUP(bp)) + zio->io_pipeline = ZIO_DDT_FREE_PIPELINE; + } + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== + * Execute the I/O pipeline + * ========================================================================== + */ + +static void +zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) +{ + spa_t *spa = zio->io_spa; + zio_type_t t = zio->io_type; + int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); + + /* + * If we're a config writer or a probe, the normal issue and + * interrupt threads may all be blocked waiting for the config lock. + * In this case, select the otherwise-unused taskq for ZIO_TYPE_NULL. + */ + if (zio->io_flags & (ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_PROBE)) + t = ZIO_TYPE_NULL; + + /* + * A similar issue exists for the L2ARC write thread until L2ARC 2.0. + */ + if (t == ZIO_TYPE_WRITE && zio->io_vd && zio->io_vd->vdev_aux) + t = ZIO_TYPE_NULL; + + /* + * If this is a high priority I/O, then use the high priority taskq. + */ + if (zio->io_priority == ZIO_PRIORITY_NOW && + spa->spa_zio_taskq[t][q + 1] != NULL) + q++; + + ASSERT3U(q, <, ZIO_TASKQ_TYPES); + (void) taskq_dispatch(spa->spa_zio_taskq[t][q], + (task_func_t *)zio_execute, zio, flags); +} + +static boolean_t +zio_taskq_member(zio_t *zio, enum zio_taskq_type q) +{ + kthread_t *executor = zio->io_executor; + spa_t *spa = zio->io_spa; + + for (zio_type_t t = 0; t < ZIO_TYPES; t++) + if (taskq_member(spa->spa_zio_taskq[t][q], executor)) + return (B_TRUE); + + return (B_FALSE); +} + +static int +zio_issue_async(zio_t *zio) +{ + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); + + return (ZIO_PIPELINE_STOP); +} + +void +zio_interrupt(zio_t *zio) +{ + zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE); +} + +/* + * Execute the I/O pipeline until one of the following occurs: + * (1) the I/O completes; (2) the pipeline stalls waiting for + * dependent child I/Os; (3) the I/O issues, so we're waiting + * for an I/O completion interrupt; (4) the I/O is delegated by + * vdev-level caching or aggregation; (5) the I/O is deferred + * due to vdev-level queueing; (6) the I/O is handed off to + * another thread. In all cases, the pipeline stops whenever + * there's no CPU work; it never burns a thread in cv_wait(). + * + * There's no locking on io_stage because there's no legitimate way + * for multiple threads to be attempting to process the same I/O. + */ +static zio_pipe_stage_t *zio_pipeline[]; + +void +zio_execute(zio_t *zio) +{ + zio->io_executor = curthread; + + while (zio->io_stage < ZIO_STAGE_DONE) { + enum zio_stage pipeline = zio->io_pipeline; + enum zio_stage stage = zio->io_stage; + int rv; + + ASSERT(!MUTEX_HELD(&zio->io_lock)); + ASSERT(ISP2(stage)); + ASSERT(zio->io_stall == NULL); + + do { + stage <<= 1; + } while ((stage & pipeline) == 0); + + ASSERT(stage <= ZIO_STAGE_DONE); + + /* + * If we are in interrupt context and this pipeline stage + * will grab a config lock that is held across I/O, + * or may wait for an I/O that needs an interrupt thread + * to complete, issue async to avoid deadlock. + * + * For VDEV_IO_START, we cut in line so that the io will + * be sent to disk promptly. + */ + if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { + boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); + return; + } + + zio->io_stage = stage; + rv = zio_pipeline[highbit(stage) - 1](zio); + + if (rv == ZIO_PIPELINE_STOP) + return; + + ASSERT(rv == ZIO_PIPELINE_CONTINUE); + } +} + +/* + * ========================================================================== + * Initiate I/O, either sync or async + * ========================================================================== + */ +int +zio_wait(zio_t *zio) +{ + int error; + + ASSERT(zio->io_stage == ZIO_STAGE_OPEN); + ASSERT(zio->io_executor == NULL); + + zio->io_waiter = curthread; + + zio_execute(zio); + + mutex_enter(&zio->io_lock); + while (zio->io_executor != NULL) + cv_wait(&zio->io_cv, &zio->io_lock); + mutex_exit(&zio->io_lock); + + error = zio->io_error; + zio_destroy(zio); + + return (error); +} + +void +zio_nowait(zio_t *zio) +{ + ASSERT(zio->io_executor == NULL); + + if (zio->io_child_type == ZIO_CHILD_LOGICAL && + zio_unique_parent(zio) == NULL) { + /* + * This is a logical async I/O with no parent to wait for it. + * We add it to the spa_async_root_zio "Godfather" I/O which + * will ensure they complete prior to unloading the pool. + */ + spa_t *spa = zio->io_spa; + + zio_add_child(spa->spa_async_zio_root, zio); + } + + zio_execute(zio); +} + +/* + * ========================================================================== + * Reexecute or suspend/resume failed I/O + * ========================================================================== + */ + +static void +zio_reexecute(zio_t *pio) +{ + zio_t *cio, *cio_next; + + ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN); + ASSERT(pio->io_gang_leader == NULL); + ASSERT(pio->io_gang_tree == NULL); + + pio->io_flags = pio->io_orig_flags; + pio->io_stage = pio->io_orig_stage; + pio->io_pipeline = pio->io_orig_pipeline; + pio->io_reexecute = 0; + pio->io_error = 0; + for (int w = 0; w < ZIO_WAIT_TYPES; w++) + pio->io_state[w] = 0; + for (int c = 0; c < ZIO_CHILD_TYPES; c++) + pio->io_child_error[c] = 0; + + if (IO_IS_ALLOCATING(pio)) + BP_ZERO(pio->io_bp); + + /* + * As we reexecute pio's children, new children could be created. + * New children go to the head of pio's io_child_list, however, + * so we will (correctly) not reexecute them. The key is that + * the remainder of pio's io_child_list, from 'cio_next' onward, + * cannot be affected by any side effects of reexecuting 'cio'. + */ + for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) { + cio_next = zio_walk_children(pio); + mutex_enter(&pio->io_lock); + for (int w = 0; w < ZIO_WAIT_TYPES; w++) + pio->io_children[cio->io_child_type][w]++; + mutex_exit(&pio->io_lock); + zio_reexecute(cio); + } + + /* + * Now that all children have been reexecuted, execute the parent. + * We don't reexecute "The Godfather" I/O here as it's the + * responsibility of the caller to wait on him. + */ + if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) + zio_execute(pio); +} + +void +zio_suspend(spa_t *spa, zio_t *zio) +{ + if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) + fm_panic("Pool '%s' has encountered an uncorrectable I/O " + "failure and the failure mode property for this pool " + "is set to panic.", spa_name(spa)); + + zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); + + mutex_enter(&spa->spa_suspend_lock); + + if (spa->spa_suspend_zio_root == NULL) + spa->spa_suspend_zio_root = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + + spa->spa_suspended = B_TRUE; + + if (zio != NULL) { + ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); + ASSERT(zio != spa->spa_suspend_zio_root); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + ASSERT(zio_unique_parent(zio) == NULL); + ASSERT(zio->io_stage == ZIO_STAGE_DONE); + zio_add_child(spa->spa_suspend_zio_root, zio); + } + + mutex_exit(&spa->spa_suspend_lock); +} + +int +zio_resume(spa_t *spa) +{ + zio_t *pio; + + /* + * Reexecute all previously suspended i/o. + */ + mutex_enter(&spa->spa_suspend_lock); + spa->spa_suspended = B_FALSE; + cv_broadcast(&spa->spa_suspend_cv); + pio = spa->spa_suspend_zio_root; + spa->spa_suspend_zio_root = NULL; + mutex_exit(&spa->spa_suspend_lock); + + if (pio == NULL) + return (0); + + zio_reexecute(pio); + return (zio_wait(pio)); +} + +void +zio_resume_wait(spa_t *spa) +{ + mutex_enter(&spa->spa_suspend_lock); + while (spa_suspended(spa)) + cv_wait(&spa->spa_suspend_cv, &spa->spa_suspend_lock); + mutex_exit(&spa->spa_suspend_lock); +} + +/* + * ========================================================================== + * Gang blocks. + * + * A gang block is a collection of small blocks that looks to the DMU + * like one large block. When zio_dva_allocate() cannot find a block + * of the requested size, due to either severe fragmentation or the pool + * being nearly full, it calls zio_write_gang_block() to construct the + * block from smaller fragments. + * + * A gang block consists of a gang header (zio_gbh_phys_t) and up to + * three (SPA_GBH_NBLKPTRS) gang members. The gang header is just like + * an indirect block: it's an array of block pointers. It consumes + * only one sector and hence is allocatable regardless of fragmentation. + * The gang header's bps point to its gang members, which hold the data. + * + * Gang blocks are self-checksumming, using the bp's <vdev, offset, txg> + * as the verifier to ensure uniqueness of the SHA256 checksum. + * Critically, the gang block bp's blk_cksum is the checksum of the data, + * not the gang header. This ensures that data block signatures (needed for + * deduplication) are independent of how the block is physically stored. + * + * Gang blocks can be nested: a gang member may itself be a gang block. + * Thus every gang block is a tree in which root and all interior nodes are + * gang headers, and the leaves are normal blocks that contain user data. + * The root of the gang tree is called the gang leader. + * + * To perform any operation (read, rewrite, free, claim) on a gang block, + * zio_gang_assemble() first assembles the gang tree (minus data leaves) + * in the io_gang_tree field of the original logical i/o by recursively + * reading the gang leader and all gang headers below it. This yields + * an in-core tree containing the contents of every gang header and the + * bps for every constituent of the gang block. + * + * With the gang tree now assembled, zio_gang_issue() just walks the gang tree + * and invokes a callback on each bp. To free a gang block, zio_gang_issue() + * calls zio_free_gang() -- a trivial wrapper around zio_free() -- for each bp. + * zio_claim_gang() provides a similarly trivial wrapper for zio_claim(). + * zio_read_gang() is a wrapper around zio_read() that omits reading gang + * headers, since we already have those in io_gang_tree. zio_rewrite_gang() + * performs a zio_rewrite() of the data or, for gang headers, a zio_rewrite() + * of the gang header plus zio_checksum_compute() of the data to update the + * gang header's blk_cksum as described above. + * + * The two-phase assemble/issue model solves the problem of partial failure -- + * what if you'd freed part of a gang block but then couldn't read the + * gang header for another part? Assembling the entire gang tree first + * ensures that all the necessary gang header I/O has succeeded before + * starting the actual work of free, claim, or write. Once the gang tree + * is assembled, free and claim are in-memory operations that cannot fail. + * + * In the event that a gang write fails, zio_dva_unallocate() walks the + * gang tree to immediately free (i.e. insert back into the space map) + * everything we've allocated. This ensures that we don't get ENOSPC + * errors during repeated suspend/resume cycles due to a flaky device. + * + * Gang rewrites only happen during sync-to-convergence. If we can't assemble + * the gang tree, we won't modify the block, so we can safely defer the free + * (knowing that the block is still intact). If we *can* assemble the gang + * tree, then even if some of the rewrites fail, zio_dva_unallocate() will free + * each constituent bp and we can allocate a new block on the next sync pass. + * + * In all cases, the gang tree allows complete recovery from partial failure. + * ========================================================================== + */ + +static zio_t * +zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) +{ + if (gn != NULL) + return (pio); + + return (zio_read(pio, pio->io_spa, bp, data, BP_GET_PSIZE(bp), + NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), + &pio->io_bookmark)); +} + +zio_t * +zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) +{ + zio_t *zio; + + if (gn != NULL) { + zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, + gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority, + ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + /* + * As we rewrite each gang header, the pipeline will compute + * a new gang block header checksum for it; but no one will + * compute a new data checksum, so we do that here. The one + * exception is the gang leader: the pipeline already computed + * its data checksum because that stage precedes gang assembly. + * (Presently, nothing actually uses interior data checksums; + * this is just good hygiene.) + */ + if (gn != pio->io_gang_leader->io_gang_tree) { + zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), + data, BP_GET_PSIZE(bp)); + } + /* + * If we are here to damage data for testing purposes, + * leave the GBH alone so that we can detect the damage. + */ + if (pio->io_gang_leader->io_flags & ZIO_FLAG_INDUCE_DAMAGE) + zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; + } else { + zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, + data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, + ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + } + + return (zio); +} + +/* ARGSUSED */ +zio_t * +zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) +{ + return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp, + ZIO_GANG_CHILD_FLAGS(pio))); +} + +/* ARGSUSED */ +zio_t * +zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) +{ + return (zio_claim(pio, pio->io_spa, pio->io_txg, bp, + NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio))); +} + +static zio_gang_issue_func_t *zio_gang_issue_func[ZIO_TYPES] = { + NULL, + zio_read_gang, + zio_rewrite_gang, + zio_free_gang, + zio_claim_gang, + NULL +}; + +static void zio_gang_tree_assemble_done(zio_t *zio); + +static zio_gang_node_t * +zio_gang_node_alloc(zio_gang_node_t **gnpp) +{ + zio_gang_node_t *gn; + + ASSERT(*gnpp == NULL); + + gn = kmem_zalloc(sizeof (*gn), KM_SLEEP); + gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE); + *gnpp = gn; + + return (gn); +} + +static void +zio_gang_node_free(zio_gang_node_t **gnpp) +{ + zio_gang_node_t *gn = *gnpp; + + for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) + ASSERT(gn->gn_child[g] == NULL); + + zio_buf_free(gn->gn_gbh, SPA_GANGBLOCKSIZE); + kmem_free(gn, sizeof (*gn)); + *gnpp = NULL; +} + +static void +zio_gang_tree_free(zio_gang_node_t **gnpp) +{ + zio_gang_node_t *gn = *gnpp; + + if (gn == NULL) + return; + + for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) + zio_gang_tree_free(&gn->gn_child[g]); + + zio_gang_node_free(gnpp); +} + +static void +zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp) +{ + zio_gang_node_t *gn = zio_gang_node_alloc(gnpp); + + ASSERT(gio->io_gang_leader == gio); + ASSERT(BP_IS_GANG(bp)); + + zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh, + SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn, + gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); +} + +static void +zio_gang_tree_assemble_done(zio_t *zio) +{ + zio_t *gio = zio->io_gang_leader; + zio_gang_node_t *gn = zio->io_private; + blkptr_t *bp = zio->io_bp; + + ASSERT(gio == zio_unique_parent(zio)); + ASSERT(zio->io_child_count == 0); + + if (zio->io_error) + return; + + if (BP_SHOULD_BYTESWAP(bp)) + byteswap_uint64_array(zio->io_data, zio->io_size); + + ASSERT(zio->io_data == gn->gn_gbh); + ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); + ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); + + for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { + blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; + if (!BP_IS_GANG(gbp)) + continue; + zio_gang_tree_assemble(gio, gbp, &gn->gn_child[g]); + } +} + +static void +zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) +{ + zio_t *gio = pio->io_gang_leader; + zio_t *zio; + + ASSERT(BP_IS_GANG(bp) == !!gn); + ASSERT(BP_GET_CHECKSUM(bp) == BP_GET_CHECKSUM(gio->io_bp)); + ASSERT(BP_GET_LSIZE(bp) == BP_GET_PSIZE(bp) || gn == gio->io_gang_tree); + + /* + * If you're a gang header, your data is in gn->gn_gbh. + * If you're a gang member, your data is in 'data' and gn == NULL. + */ + zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data); + + if (gn != NULL) { + ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); + + for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { + blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; + if (BP_IS_HOLE(gbp)) + continue; + zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data); + data = (char *)data + BP_GET_PSIZE(gbp); + } + } + + if (gn == gio->io_gang_tree) + ASSERT3P((char *)gio->io_data + gio->io_size, ==, data); + + if (zio != pio) + zio_nowait(zio); +} + +static int +zio_gang_assemble(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == NULL); + ASSERT(zio->io_child_type > ZIO_CHILD_GANG); + + zio->io_gang_leader = zio; + + zio_gang_tree_assemble(zio, bp, &zio->io_gang_tree); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_gang_issue(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == zio); + ASSERT(zio->io_child_type > ZIO_CHILD_GANG); + + if (zio->io_child_error[ZIO_CHILD_GANG] == 0) + zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data); + else + zio_gang_tree_free(&zio->io_gang_tree); + + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + return (ZIO_PIPELINE_CONTINUE); +} + +static void +zio_write_gang_member_ready(zio_t *zio) +{ + zio_t *pio = zio_unique_parent(zio); + zio_t *gio = zio->io_gang_leader; + dva_t *cdva = zio->io_bp->blk_dva; + dva_t *pdva = pio->io_bp->blk_dva; + uint64_t asize; + + if (BP_IS_HOLE(zio->io_bp)) + return; + + ASSERT(BP_IS_HOLE(&zio->io_bp_orig)); + + ASSERT(zio->io_child_type == ZIO_CHILD_GANG); + ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies); + ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); + ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp)); + ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); + + mutex_enter(&pio->io_lock); + for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) { + ASSERT(DVA_GET_GANG(&pdva[d])); + asize = DVA_GET_ASIZE(&pdva[d]); + asize += DVA_GET_ASIZE(&cdva[d]); + DVA_SET_ASIZE(&pdva[d], asize); + } + mutex_exit(&pio->io_lock); +} + +static int +zio_write_gang_block(zio_t *pio) +{ + spa_t *spa = pio->io_spa; + blkptr_t *bp = pio->io_bp; + zio_t *gio = pio->io_gang_leader; + zio_t *zio; + zio_gang_node_t *gn, **gnpp; + zio_gbh_phys_t *gbh; + uint64_t txg = pio->io_txg; + uint64_t resid = pio->io_size; + uint64_t lsize; + int copies = gio->io_prop.zp_copies; + int gbh_copies = MIN(copies + 1, spa_max_replication(spa)); + zio_prop_t zp; + int error; + + error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE, + bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, + METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER); + if (error) { + pio->io_error = error; + return (ZIO_PIPELINE_CONTINUE); + } + + if (pio == gio) { + gnpp = &gio->io_gang_tree; + } else { + gnpp = pio->io_private; + ASSERT(pio->io_ready == zio_write_gang_member_ready); + } + + gn = zio_gang_node_alloc(gnpp); + gbh = gn->gn_gbh; + bzero(gbh, SPA_GANGBLOCKSIZE); + + /* + * Create the gang header. + */ + zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL, + pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + + /* + * Create and nowait the gang children. + */ + for (int g = 0; resid != 0; resid -= lsize, g++) { + lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g), + SPA_MINBLOCKSIZE); + ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid); + + zp.zp_checksum = gio->io_prop.zp_checksum; + zp.zp_compress = ZIO_COMPRESS_OFF; + zp.zp_type = DMU_OT_NONE; + zp.zp_level = 0; + zp.zp_copies = gio->io_prop.zp_copies; + zp.zp_dedup = 0; + zp.zp_dedup_verify = 0; + + zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g], + (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, + zio_write_gang_member_ready, NULL, &gn->gn_child[g], + pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), + &pio->io_bookmark)); + } + + /* + * Set pio's pipeline to just wait for zio to finish. + */ + pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + zio_nowait(zio); + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== + * Dedup + * ========================================================================== + */ +static void +zio_ddt_child_read_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp; + zio_t *pio = zio_unique_parent(zio); + + mutex_enter(&pio->io_lock); + ddp = ddt_phys_select(dde, bp); + if (zio->io_error == 0) + ddt_phys_clear(ddp); /* this ddp doesn't need repair */ + if (zio->io_error == 0 && dde->dde_repair_data == NULL) + dde->dde_repair_data = zio->io_data; + else + zio_buf_free(zio->io_data, zio->io_size); + mutex_exit(&pio->io_lock); +} + +static int +zio_ddt_read_start(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_PSIZE(bp) == zio->io_size); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + if (zio->io_child_error[ZIO_CHILD_DDT]) { + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = ddt_repair_start(ddt, bp); + ddt_phys_t *ddp = dde->dde_phys; + ddt_phys_t *ddp_self = ddt_phys_select(dde, bp); + blkptr_t blk; + + ASSERT(zio->io_vsd == NULL); + zio->io_vsd = dde; + + if (ddp_self == NULL) + return (ZIO_PIPELINE_CONTINUE); + + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + if (ddp->ddp_phys_birth == 0 || ddp == ddp_self) + continue; + ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp, + &blk); + zio_nowait(zio_read(zio, zio->io_spa, &blk, + zio_buf_alloc(zio->io_size), zio->io_size, + zio_ddt_child_read_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE, + &zio->io_bookmark)); + } + return (ZIO_PIPELINE_CONTINUE); + } + + zio_nowait(zio_read(zio, zio->io_spa, bp, + zio->io_data, zio->io_size, NULL, NULL, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark)); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_ddt_read_done(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + + if (zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_PSIZE(bp) == zio->io_size); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + if (zio->io_child_error[ZIO_CHILD_DDT]) { + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = zio->io_vsd; + if (ddt == NULL) { + ASSERT(spa_load_state(zio->io_spa) != SPA_LOAD_NONE); + return (ZIO_PIPELINE_CONTINUE); + } + if (dde == NULL) { + zio->io_stage = ZIO_STAGE_DDT_READ_START >> 1; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); + return (ZIO_PIPELINE_STOP); + } + if (dde->dde_repair_data != NULL) { + bcopy(dde->dde_repair_data, zio->io_data, zio->io_size); + zio->io_child_error[ZIO_CHILD_DDT] = 0; + } + ddt_repair_done(ddt, dde); + zio->io_vsd = NULL; + } + + ASSERT(zio->io_vsd == NULL); + + return (ZIO_PIPELINE_CONTINUE); +} + +static boolean_t +zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) +{ + spa_t *spa = zio->io_spa; + + /* + * Note: we compare the original data, not the transformed data, + * because when zio->io_bp is an override bp, we will not have + * pushed the I/O transforms. That's an important optimization + * because otherwise we'd compress/encrypt all dmu_sync() data twice. + */ + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + zio_t *lio = dde->dde_lead_zio[p]; + + if (lio != NULL) { + return (lio->io_orig_size != zio->io_orig_size || + bcmp(zio->io_orig_data, lio->io_orig_data, + zio->io_orig_size) != 0); + } + } + + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + + if (ddp->ddp_phys_birth != 0) { + arc_buf_t *abuf = NULL; + uint32_t aflags = ARC_WAIT; + blkptr_t blk = *zio->io_bp; + int error; + + ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + + ddt_exit(ddt); + + error = arc_read_nolock(NULL, spa, &blk, + arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, &zio->io_bookmark); + + if (error == 0) { + if (arc_buf_size(abuf) != zio->io_orig_size || + bcmp(abuf->b_data, zio->io_orig_data, + zio->io_orig_size) != 0) + error = EEXIST; + VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + } + + ddt_enter(ddt); + return (error != 0); + } + } + + return (B_FALSE); +} + +static void +zio_ddt_child_write_ready(zio_t *zio) +{ + int p = zio->io_prop.zp_copies; + ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + zio_t *pio; + + if (zio->io_error) + return; + + ddt_enter(ddt); + + ASSERT(dde->dde_lead_zio[p] == zio); + + ddt_phys_fill(ddp, zio->io_bp); + + while ((pio = zio_walk_parents(zio)) != NULL) + ddt_bp_fill(ddp, pio->io_bp, zio->io_txg); + + ddt_exit(ddt); +} + +static void +zio_ddt_child_write_done(zio_t *zio) +{ + int p = zio->io_prop.zp_copies; + ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + + ddt_enter(ddt); + + ASSERT(ddp->ddp_refcnt == 0); + ASSERT(dde->dde_lead_zio[p] == zio); + dde->dde_lead_zio[p] = NULL; + + if (zio->io_error == 0) { + while (zio_walk_parents(zio) != NULL) + ddt_phys_addref(ddp); + } else { + ddt_phys_clear(ddp); + } + + ddt_exit(ddt); +} + +static void +zio_ddt_ditto_write_done(zio_t *zio) +{ + int p = DDT_PHYS_DITTO; + zio_prop_t *zp = &zio->io_prop; + blkptr_t *bp = zio->io_bp; + ddt_t *ddt = ddt_select(zio->io_spa, bp); + ddt_entry_t *dde = zio->io_private; + ddt_phys_t *ddp = &dde->dde_phys[p]; + ddt_key_t *ddk = &dde->dde_key; + + ddt_enter(ddt); + + ASSERT(ddp->ddp_refcnt == 0); + ASSERT(dde->dde_lead_zio[p] == zio); + dde->dde_lead_zio[p] = NULL; + + if (zio->io_error == 0) { + ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum)); + ASSERT(zp->zp_copies < SPA_DVAS_PER_BP); + ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp)); + if (ddp->ddp_phys_birth != 0) + ddt_phys_free(ddt, ddk, ddp, zio->io_txg); + ddt_phys_fill(ddp, bp); + } + + ddt_exit(ddt); +} + +static int +zio_ddt_write(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t txg = zio->io_txg; + zio_prop_t *zp = &zio->io_prop; + int p = zp->zp_copies; + int ditto_copies; + zio_t *cio = NULL; + zio_t *dio = NULL; + ddt_t *ddt = ddt_select(spa, bp); + ddt_entry_t *dde; + ddt_phys_t *ddp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum); + ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override); + + ddt_enter(ddt); + dde = ddt_lookup(ddt, bp, B_TRUE); + ddp = &dde->dde_phys[p]; + + if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) { + /* + * If we're using a weak checksum, upgrade to a strong checksum + * and try again. If we're already using a strong checksum, + * we can't resolve it, so just convert to an ordinary write. + * (And automatically e-mail a paper to Nature?) + */ + if (!zio_checksum_table[zp->zp_checksum].ci_dedup) { + zp->zp_checksum = spa_dedup_checksum(spa); + zio_pop_transforms(zio); + zio->io_stage = ZIO_STAGE_OPEN; + BP_ZERO(bp); + } else { + zp->zp_dedup = 0; + } + zio->io_pipeline = ZIO_WRITE_PIPELINE; + ddt_exit(ddt); + return (ZIO_PIPELINE_CONTINUE); + } + + ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp); + ASSERT(ditto_copies < SPA_DVAS_PER_BP); + + if (ditto_copies > ddt_ditto_copies_present(dde) && + dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) { + zio_prop_t czp = *zp; + + czp.zp_copies = ditto_copies; + + /* + * If we arrived here with an override bp, we won't have run + * the transform stack, so we won't have the data we need to + * generate a child i/o. So, toss the override bp and restart. + * This is safe, because using the override bp is just an + * optimization; and it's rare, so the cost doesn't matter. + */ + if (zio->io_bp_override) { + zio_pop_transforms(zio); + zio->io_stage = ZIO_STAGE_OPEN; + zio->io_pipeline = ZIO_WRITE_PIPELINE; + zio->io_bp_override = NULL; + BP_ZERO(bp); + ddt_exit(ddt); + return (ZIO_PIPELINE_CONTINUE); + } + + dio = zio_write(zio, spa, txg, bp, zio->io_orig_data, + zio->io_orig_size, &czp, NULL, + zio_ddt_ditto_write_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); + + zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL); + dde->dde_lead_zio[DDT_PHYS_DITTO] = dio; + } + + if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) { + if (ddp->ddp_phys_birth != 0) + ddt_bp_fill(ddp, bp, txg); + if (dde->dde_lead_zio[p] != NULL) + zio_add_child(zio, dde->dde_lead_zio[p]); + else + ddt_phys_addref(ddp); + } else if (zio->io_bp_override) { + ASSERT(bp->blk_birth == txg); + ASSERT(BP_EQUAL(bp, zio->io_bp_override)); + ddt_phys_fill(ddp, bp); + ddt_phys_addref(ddp); + } else { + cio = zio_write(zio, spa, txg, bp, zio->io_orig_data, + zio->io_orig_size, zp, zio_ddt_child_write_ready, + zio_ddt_child_write_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); + + zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL); + dde->dde_lead_zio[p] = cio; + } + + ddt_exit(ddt); + + if (cio) + zio_nowait(cio); + if (dio) + zio_nowait(dio); + + return (ZIO_PIPELINE_CONTINUE); +} + +ddt_entry_t *freedde; /* for debugging */ + +static int +zio_ddt_free(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + ddt_t *ddt = ddt_select(spa, bp); + ddt_entry_t *dde; + ddt_phys_t *ddp; + + ASSERT(BP_GET_DEDUP(bp)); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + ddt_enter(ddt); + freedde = dde = ddt_lookup(ddt, bp, B_TRUE); + ddp = ddt_phys_select(dde, bp); + ddt_phys_decref(ddp); + ddt_exit(ddt); + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== + * Allocate and free blocks + * ========================================================================== + */ +static int +zio_dva_allocate(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + metaslab_class_t *mc = spa_normal_class(spa); + blkptr_t *bp = zio->io_bp; + int error; + + if (zio->io_gang_leader == NULL) { + ASSERT(zio->io_child_type > ZIO_CHILD_GANG); + zio->io_gang_leader = zio; + } + + ASSERT(BP_IS_HOLE(bp)); + ASSERT3U(BP_GET_NDVAS(bp), ==, 0); + ASSERT3U(zio->io_prop.zp_copies, >, 0); + ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); + ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); + + error = metaslab_alloc(spa, mc, zio->io_size, bp, + zio->io_prop.zp_copies, zio->io_txg, NULL, 0); + + if (error) { + if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) + return (zio_write_gang_block(zio)); + zio->io_error = error; + } + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_dva_free(zio_t *zio) +{ + metaslab_free(zio->io_spa, zio->io_bp, zio->io_txg, B_FALSE); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_dva_claim(zio_t *zio) +{ + int error; + + error = metaslab_claim(zio->io_spa, zio->io_bp, zio->io_txg); + if (error) + zio->io_error = error; + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * Undo an allocation. This is used by zio_done() when an I/O fails + * and we want to give back the block we just allocated. + * This handles both normal blocks and gang blocks. + */ +static void +zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) +{ + ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); + ASSERT(zio->io_bp_override == NULL); + + if (!BP_IS_HOLE(bp)) + metaslab_free(zio->io_spa, bp, bp->blk_birth, B_TRUE); + + if (gn != NULL) { + for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { + zio_dva_unallocate(zio, gn->gn_child[g], + &gn->gn_gbh->zg_blkptr[g]); + } + } +} + +/* + * Try to allocate an intent log block. Return 0 on success, errno on failure. + */ +int +zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, + uint64_t size, boolean_t use_slog) +{ + int error = 1; + + ASSERT(txg > spa_syncing_txg(spa)); + + if (use_slog) + error = metaslab_alloc(spa, spa_log_class(spa), size, + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + + if (error) + error = metaslab_alloc(spa, spa_normal_class(spa), size, + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + + if (error == 0) { + BP_SET_LSIZE(new_bp, size); + BP_SET_PSIZE(new_bp, size); + BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); + BP_SET_CHECKSUM(new_bp, + spa_version(spa) >= SPA_VERSION_SLIM_ZIL + ? ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); + BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG); + BP_SET_LEVEL(new_bp, 0); + BP_SET_DEDUP(new_bp, 0); + BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); + } + + return (error); +} + +/* + * Free an intent log block. + */ +void +zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp) +{ + ASSERT(BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG); + ASSERT(!BP_IS_GANG(bp)); + + zio_free(spa, txg, bp); +} + +/* + * ========================================================================== + * Read and write to physical devices + * ========================================================================== + */ +static int +zio_vdev_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + uint64_t align; + spa_t *spa = zio->io_spa; + + ASSERT(zio->io_error == 0); + ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0); + + if (vd == NULL) { + if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER)) + spa_config_enter(spa, SCL_ZIO, zio, RW_READER); + + /* + * The mirror_ops handle multiple DVAs in a single BP. + */ + return (vdev_mirror_ops.vdev_op_io_start(zio)); + } + + /* + * We keep track of time-sensitive I/Os so that the scan thread + * can quickly react to certain workloads. In particular, we care + * about non-scrubbing, top-level reads and writes with the following + * characteristics: + * - synchronous writes of user data to non-slog devices + * - any reads of user data + * When these conditions are met, adjust the timestamp of spa_last_io + * which allows the scan thread to adjust its workload accordingly. + */ + if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL && + vd == vd->vdev_top && !vd->vdev_islog && + zio->io_bookmark.zb_objset != DMU_META_OBJSET && + zio->io_txg != spa_syncing_txg(spa)) { + uint64_t old = spa->spa_last_io; + uint64_t new = ddi_get_lbolt64(); + if (old != new) + (void) atomic_cas_64(&spa->spa_last_io, old, new); + } + + align = 1ULL << vd->vdev_top->vdev_ashift; + + if (P2PHASE(zio->io_size, align) != 0) { + uint64_t asize = P2ROUNDUP(zio->io_size, align); + char *abuf = zio_buf_alloc(asize); + ASSERT(vd == vd->vdev_top); + if (zio->io_type == ZIO_TYPE_WRITE) { + bcopy(zio->io_data, abuf, zio->io_size); + bzero(abuf + zio->io_size, asize - zio->io_size); + } + zio_push_transform(zio, abuf, asize, asize, zio_subblock); + } + + ASSERT(P2PHASE(zio->io_offset, align) == 0); + ASSERT(P2PHASE(zio->io_size, align) == 0); + VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); + + /* + * If this is a repair I/O, and there's no self-healing involved -- + * that is, we're just resilvering what we expect to resilver -- + * then don't do the I/O unless zio's txg is actually in vd's DTL. + * This prevents spurious resilvering with nested replication. + * For example, given a mirror of mirrors, (A+B)+(C+D), if only + * A is out of date, we'll read from C+D, then use the data to + * resilver A+B -- but we don't actually want to resilver B, just A. + * The top-level mirror has no way to know this, so instead we just + * discard unnecessary repairs as we work our way down the vdev tree. + * The same logic applies to any form of nested replication: + * ditto + mirror, RAID-Z + replacing, etc. This covers them all. + */ + if ((zio->io_flags & ZIO_FLAG_IO_REPAIR) && + !(zio->io_flags & ZIO_FLAG_SELF_HEAL) && + zio->io_txg != 0 && /* not a delegated i/o */ + !vdev_dtl_contains(vd, DTL_PARTIAL, zio->io_txg, 1)) { + ASSERT(zio->io_type == ZIO_TYPE_WRITE); + zio_vdev_io_bypass(zio); + return (ZIO_PIPELINE_CONTINUE); + } + + if (vd->vdev_ops->vdev_op_leaf && + (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) { + + if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) + return (ZIO_PIPELINE_CONTINUE); + + if ((zio = vdev_queue_io(zio)) == NULL) + return (ZIO_PIPELINE_STOP); + + if (!vdev_accessible(vd, zio)) { + zio->io_error = ENXIO; + zio_interrupt(zio); + return (ZIO_PIPELINE_STOP); + } + } + + return (vd->vdev_ops->vdev_op_io_start(zio)); +} + +static int +zio_vdev_io_done(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_ops_t *ops = vd ? vd->vdev_ops : &vdev_mirror_ops; + boolean_t unexpected_error = B_FALSE; + + if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); + + if (vd != NULL && vd->vdev_ops->vdev_op_leaf) { + + vdev_queue_io_done(zio); + + if (zio->io_type == ZIO_TYPE_WRITE) + vdev_cache_write(zio); + + if (zio_injection_enabled && zio->io_error == 0) + zio->io_error = zio_handle_device_injection(vd, + zio, EIO); + + if (zio_injection_enabled && zio->io_error == 0) + zio->io_error = zio_handle_label_injection(zio, EIO); + + if (zio->io_error) { + if (!vdev_accessible(vd, zio)) { + zio->io_error = ENXIO; + } else { + unexpected_error = B_TRUE; + } + } + } + + ops->vdev_op_io_done(zio); + + if (unexpected_error) + VERIFY(vdev_probe(vd, zio) == NULL); + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * For non-raidz ZIOs, we can just copy aside the bad data read from the + * disk, and use that to finish the checksum ereport later. + */ +static void +zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, + const void *good_buf) +{ + /* no processing needed */ + zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); +} + +/*ARGSUSED*/ +void +zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) +{ + void *buf = zio_buf_alloc(zio->io_size); + + bcopy(zio->io_data, buf, zio->io_size); + + zcr->zcr_cbinfo = zio->io_size; + zcr->zcr_cbdata = buf; + zcr->zcr_finish = zio_vsd_default_cksum_finish; + zcr->zcr_free = zio_buf_free; +} + +static int +zio_vdev_io_assess(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + + if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + if (vd == NULL && !(zio->io_flags & ZIO_FLAG_CONFIG_WRITER)) + spa_config_exit(zio->io_spa, SCL_ZIO, zio); + + if (zio->io_vsd != NULL) { + zio->io_vsd_ops->vsd_free(zio); + zio->io_vsd = NULL; + } + + if (zio_injection_enabled && zio->io_error == 0) + zio->io_error = zio_handle_fault_injection(zio, EIO); + + /* + * If the I/O failed, determine whether we should attempt to retry it. + * + * On retry, we cut in line in the issue queue, since we don't want + * compression/checksumming/etc. work to prevent our (cheap) IO reissue. + */ + if (zio->io_error && vd == NULL && + !(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) { + ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */ + ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */ + zio->io_error = 0; + zio->io_flags |= ZIO_FLAG_IO_RETRY | + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE; + zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1; + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, + zio_requeue_io_start_cut_in_line); + return (ZIO_PIPELINE_STOP); + } + + /* + * If we got an error on a leaf device, convert it to ENXIO + * if the device is not accessible at all. + */ + if (zio->io_error && vd != NULL && vd->vdev_ops->vdev_op_leaf && + !vdev_accessible(vd, zio)) + zio->io_error = ENXIO; + + /* + * If we can't write to an interior vdev (mirror or RAID-Z), + * set vdev_cant_write so that we stop trying to allocate from it. + */ + if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE && + vd != NULL && !vd->vdev_ops->vdev_op_leaf) + vd->vdev_cant_write = B_TRUE; + + if (zio->io_error) + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + return (ZIO_PIPELINE_CONTINUE); +} + +void +zio_vdev_io_reissue(zio_t *zio) +{ + ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START); + ASSERT(zio->io_error == 0); + + zio->io_stage >>= 1; +} + +void +zio_vdev_io_redone(zio_t *zio) +{ + ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_DONE); + + zio->io_stage >>= 1; +} + +void +zio_vdev_io_bypass(zio_t *zio) +{ + ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START); + ASSERT(zio->io_error == 0); + + zio->io_flags |= ZIO_FLAG_IO_BYPASS; + zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; +} + +/* + * ========================================================================== + * Generate and verify checksums + * ========================================================================== + */ +static int +zio_checksum_generate(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + enum zio_checksum checksum; + + if (bp == NULL) { + /* + * This is zio_write_phys(). + * We're either generating a label checksum, or none at all. + */ + checksum = zio->io_prop.zp_checksum; + + if (checksum == ZIO_CHECKSUM_OFF) + return (ZIO_PIPELINE_CONTINUE); + + ASSERT(checksum == ZIO_CHECKSUM_LABEL); + } else { + if (BP_IS_GANG(bp) && zio->io_child_type == ZIO_CHILD_GANG) { + ASSERT(!IO_IS_ALLOCATING(zio)); + checksum = ZIO_CHECKSUM_GANG_HEADER; + } else { + checksum = BP_GET_CHECKSUM(bp); + } + } + + zio_checksum_compute(zio, checksum, zio->io_data, zio->io_size); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_checksum_verify(zio_t *zio) +{ + zio_bad_cksum_t info; + blkptr_t *bp = zio->io_bp; + int error; + + ASSERT(zio->io_vd != NULL); + + if (bp == NULL) { + /* + * This is zio_read_phys(). + * We're either verifying a label checksum, or nothing at all. + */ + if (zio->io_prop.zp_checksum == ZIO_CHECKSUM_OFF) + return (ZIO_PIPELINE_CONTINUE); + + ASSERT(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL); + } + + if ((error = zio_checksum_error(zio, &info)) != 0) { + zio->io_error = error; + if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + zfs_ereport_start_checksum(zio->io_spa, + zio->io_vd, zio, zio->io_offset, + zio->io_size, NULL, &info); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * Called by RAID-Z to ensure we don't compute the checksum twice. + */ +void +zio_checksum_verified(zio_t *zio) +{ + zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; +} + +/* + * ========================================================================== + * Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other. + * An error of 0 indictes success. ENXIO indicates whole-device failure, + * which may be transient (e.g. unplugged) or permament. ECKSUM and EIO + * indicate errors that are specific to one I/O, and most likely permanent. + * Any other error is presumed to be worse because we weren't expecting it. + * ========================================================================== + */ +int +zio_worst_error(int e1, int e2) +{ + static int zio_error_rank[] = { 0, ENXIO, ECKSUM, EIO }; + int r1, r2; + + for (r1 = 0; r1 < sizeof (zio_error_rank) / sizeof (int); r1++) + if (e1 == zio_error_rank[r1]) + break; + + for (r2 = 0; r2 < sizeof (zio_error_rank) / sizeof (int); r2++) + if (e2 == zio_error_rank[r2]) + break; + + return (r1 > r2 ? e1 : e2); +} + +/* + * ========================================================================== + * I/O completion + * ========================================================================== + */ +static int +zio_ready(zio_t *zio) +{ + blkptr_t *bp = zio->io_bp; + zio_t *pio, *pio_next; + + if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || + zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY)) + return (ZIO_PIPELINE_STOP); + + if (zio->io_ready) { + ASSERT(IO_IS_ALLOCATING(zio)); + ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); + ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); + + zio->io_ready(zio); + } + + if (bp != NULL && bp != &zio->io_bp_copy) + zio->io_bp_copy = *bp; + + if (zio->io_error) + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + + mutex_enter(&zio->io_lock); + zio->io_state[ZIO_WAIT_READY] = 1; + pio = zio_walk_parents(zio); + mutex_exit(&zio->io_lock); + + /* + * As we notify zio's parents, new parents could be added. + * New parents go to the head of zio's io_parent_list, however, + * so we will (correctly) not notify them. The remainder of zio's + * io_parent_list, from 'pio_next' onward, cannot change because + * all parents must wait for us to be done before they can be done. + */ + for (; pio != NULL; pio = pio_next) { + pio_next = zio_walk_parents(zio); + zio_notify_parent(pio, zio, ZIO_WAIT_READY); + } + + if (zio->io_flags & ZIO_FLAG_NODATA) { + if (BP_IS_GANG(bp)) { + zio->io_flags &= ~ZIO_FLAG_NODATA; + } else { + ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); + zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; + } + } + + if (zio_injection_enabled && + zio->io_spa->spa_syncing_txg == zio->io_txg) + zio_handle_ignored_writes(zio); + + return (ZIO_PIPELINE_CONTINUE); +} + +static int +zio_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + zio_t *lio = zio->io_logical; + blkptr_t *bp = zio->io_bp; + vdev_t *vd = zio->io_vd; + uint64_t psize = zio->io_size; + zio_t *pio, *pio_next; + + /* + * If our children haven't all completed, + * wait for them and then repeat this pipeline stage. + */ + if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || + zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || + zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || + zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) + return (ZIO_PIPELINE_STOP); + + for (int c = 0; c < ZIO_CHILD_TYPES; c++) + for (int w = 0; w < ZIO_WAIT_TYPES; w++) + ASSERT(zio->io_children[c][w] == 0); + + if (bp != NULL) { + ASSERT(bp->blk_pad[0] == 0); + ASSERT(bp->blk_pad[1] == 0); + ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || + (bp == zio_unique_parent(zio)->io_bp)); + if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && + zio->io_bp_override == NULL && + !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { + ASSERT(!BP_SHOULD_BYTESWAP(bp)); + ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp)); + ASSERT(BP_COUNT_GANG(bp) == 0 || + (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); + } + } + + /* + * If there were child vdev/gang/ddt errors, they apply to us now. + */ + zio_inherit_child_errors(zio, ZIO_CHILD_VDEV); + zio_inherit_child_errors(zio, ZIO_CHILD_GANG); + zio_inherit_child_errors(zio, ZIO_CHILD_DDT); + + /* + * If the I/O on the transformed data was successful, generate any + * checksum reports now while we still have the transformed data. + */ + if (zio->io_error == 0) { + while (zio->io_cksum_report != NULL) { + zio_cksum_report_t *zcr = zio->io_cksum_report; + uint64_t align = zcr->zcr_align; + uint64_t asize = P2ROUNDUP(psize, align); + char *abuf = zio->io_data; + + if (asize != psize) { + abuf = zio_buf_alloc(asize); + bcopy(zio->io_data, abuf, psize); + bzero(abuf + psize, asize - psize); + } + + zio->io_cksum_report = zcr->zcr_next; + zcr->zcr_next = NULL; + zcr->zcr_finish(zcr, abuf); + zfs_ereport_free_checksum(zcr); + + if (asize != psize) + zio_buf_free(abuf, asize); + } + } + + zio_pop_transforms(zio); /* note: may set zio->io_error */ + + vdev_stat_update(zio, psize); + + if (zio->io_error) { + /* + * If this I/O is attached to a particular vdev, + * generate an error message describing the I/O failure + * at the block level. We ignore these errors if the + * device is currently unavailable. + */ + if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) + zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); + + if ((zio->io_error == EIO || !(zio->io_flags & + (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && + zio == lio) { + /* + * For logical I/O requests, tell the SPA to log the + * error and generate a logical data ereport. + */ + spa_log_error(spa, zio); + zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio, + 0, 0); + } + } + + if (zio->io_error && zio == lio) { + /* + * Determine whether zio should be reexecuted. This will + * propagate all the way to the root via zio_notify_parent(). + */ + ASSERT(vd == NULL && bp != NULL); + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + if (IO_IS_ALLOCATING(zio) && + !(zio->io_flags & ZIO_FLAG_CANFAIL)) { + if (zio->io_error != ENOSPC) + zio->io_reexecute |= ZIO_REEXECUTE_NOW; + else + zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + } + + if ((zio->io_type == ZIO_TYPE_READ || + zio->io_type == ZIO_TYPE_FREE) && + !(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && + zio->io_error == ENXIO && + spa_load_state(spa) == SPA_LOAD_NONE && + spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE) + zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + + if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) + zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; + + /* + * Here is a possibly good place to attempt to do + * either combinatorial reconstruction or error correction + * based on checksums. It also might be a good place + * to send out preliminary ereports before we suspend + * processing. + */ + } + + /* + * If there were logical child errors, they apply to us now. + * We defer this until now to avoid conflating logical child + * errors with errors that happened to the zio itself when + * updating vdev stats and reporting FMA events above. + */ + zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL); + + if ((zio->io_error || zio->io_reexecute) && + IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio && + !(zio->io_flags & ZIO_FLAG_IO_REWRITE)) + zio_dva_unallocate(zio, zio->io_gang_tree, bp); + + zio_gang_tree_free(&zio->io_gang_tree); + + /* + * Godfather I/Os should never suspend. + */ + if ((zio->io_flags & ZIO_FLAG_GODFATHER) && + (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) + zio->io_reexecute = 0; + + if (zio->io_reexecute) { + /* + * This is a logical I/O that wants to reexecute. + * + * Reexecute is top-down. When an i/o fails, if it's not + * the root, it simply notifies its parent and sticks around. + * The parent, seeing that it still has children in zio_done(), + * does the same. This percolates all the way up to the root. + * The root i/o will reexecute or suspend the entire tree. + * + * This approach ensures that zio_reexecute() honors + * all the original i/o dependency relationships, e.g. + * parents not executing until children are ready. + */ + ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); + + zio->io_gang_leader = NULL; + + mutex_enter(&zio->io_lock); + zio->io_state[ZIO_WAIT_DONE] = 1; + mutex_exit(&zio->io_lock); + + /* + * "The Godfather" I/O monitors its children but is + * not a true parent to them. It will track them through + * the pipeline but severs its ties whenever they get into + * trouble (e.g. suspended). This allows "The Godfather" + * I/O to return status without blocking. + */ + for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { + zio_link_t *zl = zio->io_walk_link; + pio_next = zio_walk_parents(zio); + + if ((pio->io_flags & ZIO_FLAG_GODFATHER) && + (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) { + zio_remove_child(pio, zio, zl); + zio_notify_parent(pio, zio, ZIO_WAIT_DONE); + } + } + + if ((pio = zio_unique_parent(zio)) != NULL) { + /* + * We're not a root i/o, so there's nothing to do + * but notify our parent. Don't propagate errors + * upward since we haven't permanently failed yet. + */ + ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); + zio->io_flags |= ZIO_FLAG_DONT_PROPAGATE; + zio_notify_parent(pio, zio, ZIO_WAIT_DONE); + } else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) { + /* + * We'd fail again if we reexecuted now, so suspend + * until conditions improve (e.g. device comes online). + */ + zio_suspend(spa, zio); + } else { + /* + * Reexecution is potentially a huge amount of work. + * Hand it off to the otherwise-unused claim taskq. + */ + (void) taskq_dispatch( + spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], + (task_func_t *)zio_reexecute, zio, TQ_SLEEP); + } + return (ZIO_PIPELINE_STOP); + } + + ASSERT(zio->io_child_count == 0); + ASSERT(zio->io_reexecute == 0); + ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); + + /* + * Report any checksum errors, since the I/O is complete. + */ + while (zio->io_cksum_report != NULL) { + zio_cksum_report_t *zcr = zio->io_cksum_report; + zio->io_cksum_report = zcr->zcr_next; + zcr->zcr_next = NULL; + zcr->zcr_finish(zcr, NULL); + zfs_ereport_free_checksum(zcr); + } + + /* + * It is the responsibility of the done callback to ensure that this + * particular zio is no longer discoverable for adoption, and as + * such, cannot acquire any new parents. + */ + if (zio->io_done) + zio->io_done(zio); + + mutex_enter(&zio->io_lock); + zio->io_state[ZIO_WAIT_DONE] = 1; + mutex_exit(&zio->io_lock); + + for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { + zio_link_t *zl = zio->io_walk_link; + pio_next = zio_walk_parents(zio); + zio_remove_child(pio, zio, zl); + zio_notify_parent(pio, zio, ZIO_WAIT_DONE); + } + + if (zio->io_waiter != NULL) { + mutex_enter(&zio->io_lock); + zio->io_executor = NULL; + cv_broadcast(&zio->io_cv); + mutex_exit(&zio->io_lock); + } else { + zio_destroy(zio); + } + + return (ZIO_PIPELINE_STOP); +} + +/* + * ========================================================================== + * I/O pipeline definition + * ========================================================================== + */ +static zio_pipe_stage_t *zio_pipeline[] = { + NULL, + zio_read_bp_init, + zio_free_bp_init, + zio_issue_async, + zio_write_bp_init, + zio_checksum_generate, + zio_ddt_read_start, + zio_ddt_read_done, + zio_ddt_write, + zio_ddt_free, + zio_gang_assemble, + zio_gang_issue, + zio_dva_allocate, + zio_dva_free, + zio_dva_claim, + zio_ready, + zio_vdev_io_start, + zio_vdev_io_done, + zio_vdev_io_assess, + zio_checksum_verify, + zio_done +}; diff --git a/uts/common/fs/zfs/zio_checksum.c b/uts/common/fs/zfs/zio_checksum.c new file mode 100644 index 000000000000..c8fe20f2eb4e --- /dev/null +++ b/uts/common/fs/zfs/zio_checksum.c @@ -0,0 +1,274 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/zil.h> +#include <zfs_fletcher.h> + +/* + * Checksum vectors. + * + * In the SPA, everything is checksummed. We support checksum vectors + * for three distinct reasons: + * + * 1. Different kinds of data need different levels of protection. + * For SPA metadata, we always want a very strong checksum. + * For user data, we let users make the trade-off between speed + * and checksum strength. + * + * 2. Cryptographic hash and MAC algorithms are an area of active research. + * It is likely that in future hash functions will be at least as strong + * as current best-of-breed, and may be substantially faster as well. + * We want the ability to take advantage of these new hashes as soon as + * they become available. + * + * 3. If someone develops hardware that can compute a strong hash quickly, + * we want the ability to take advantage of that hardware. + * + * Of course, we don't want a checksum upgrade to invalidate existing + * data, so we store the checksum *function* in eight bits of the bp. + * This gives us room for up to 256 different checksum functions. + * + * When writing a block, we always checksum it with the latest-and-greatest + * checksum function of the appropriate strength. When reading a block, + * we compare the expected checksum against the actual checksum, which we + * compute via the checksum function specified by BP_GET_CHECKSUM(bp). + */ + +/*ARGSUSED*/ +static void +zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); +} + +zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { + {{NULL, NULL}, 0, 0, 0, "inherit"}, + {{NULL, NULL}, 0, 0, 0, "on"}, + {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, + {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, + {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, +}; + +enum zio_checksum +zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) +{ + ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); + ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); + ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); + + if (child == ZIO_CHECKSUM_INHERIT) + return (parent); + + if (child == ZIO_CHECKSUM_ON) + return (ZIO_CHECKSUM_ON_VALUE); + + return (child); +} + +enum zio_checksum +zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, + enum zio_checksum parent) +{ + ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); + ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); + ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); + + if (child == ZIO_CHECKSUM_INHERIT) + return (parent); + + if (child == ZIO_CHECKSUM_ON) + return (spa_dedup_checksum(spa)); + + if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) + return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); + + ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || + (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); + + return (child); +} + +/* + * Set the external verifier for a gang block based on <vdev, offset, txg>, + * a tuple which is guaranteed to be unique for the life of the pool. + */ +static void +zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) +{ + dva_t *dva = BP_IDENTITY(bp); + uint64_t txg = BP_PHYSICAL_BIRTH(bp); + + ASSERT(BP_IS_GANG(bp)); + + ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); +} + +/* + * Set the external verifier for a label block based on its offset. + * The vdev is implicit, and the txg is unknowable at pool open time -- + * hence the logic in vdev_uberblock_load() to find the most recent copy. + */ +static void +zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) +{ + ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); +} + +/* + * Generate the checksum. + */ +void +zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, + void *data, uint64_t size) +{ + blkptr_t *bp = zio->io_bp; + uint64_t offset = zio->io_offset; + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + zio_cksum_t cksum; + + ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); + ASSERT(ci->ci_func[0] != NULL); + + if (ci->ci_eck) { + zio_eck_t *eck; + + if (checksum == ZIO_CHECKSUM_ZILOG2) { + zil_chain_t *zilc = data; + + size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, + uint64_t); + eck = &zilc->zc_eck; + } else { + eck = (zio_eck_t *)((char *)data + size) - 1; + } + if (checksum == ZIO_CHECKSUM_GANG_HEADER) + zio_checksum_gang_verifier(&eck->zec_cksum, bp); + else if (checksum == ZIO_CHECKSUM_LABEL) + zio_checksum_label_verifier(&eck->zec_cksum, offset); + else + bp->blk_cksum = eck->zec_cksum; + eck->zec_magic = ZEC_MAGIC; + ci->ci_func[0](data, size, &cksum); + eck->zec_cksum = cksum; + } else { + ci->ci_func[0](data, size, &bp->blk_cksum); + } +} + +int +zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) +{ + blkptr_t *bp = zio->io_bp; + uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : + (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); + int byteswap; + int error; + uint64_t size = (bp == NULL ? zio->io_size : + (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); + uint64_t offset = zio->io_offset; + void *data = zio->io_data; + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + zio_cksum_t actual_cksum, expected_cksum, verifier; + + if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) + return (EINVAL); + + if (ci->ci_eck) { + zio_eck_t *eck; + + if (checksum == ZIO_CHECKSUM_ZILOG2) { + zil_chain_t *zilc = data; + uint64_t nused; + + eck = &zilc->zc_eck; + if (eck->zec_magic == ZEC_MAGIC) + nused = zilc->zc_nused; + else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) + nused = BSWAP_64(zilc->zc_nused); + else + return (ECKSUM); + + if (nused > size) + return (ECKSUM); + + size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); + } else { + eck = (zio_eck_t *)((char *)data + size) - 1; + } + + if (checksum == ZIO_CHECKSUM_GANG_HEADER) + zio_checksum_gang_verifier(&verifier, bp); + else if (checksum == ZIO_CHECKSUM_LABEL) + zio_checksum_label_verifier(&verifier, offset); + else + verifier = bp->blk_cksum; + + byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); + + if (byteswap) + byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); + + expected_cksum = eck->zec_cksum; + eck->zec_cksum = verifier; + ci->ci_func[byteswap](data, size, &actual_cksum); + eck->zec_cksum = expected_cksum; + + if (byteswap) + byteswap_uint64_array(&expected_cksum, + sizeof (zio_cksum_t)); + } else { + ASSERT(!BP_IS_GANG(bp)); + byteswap = BP_SHOULD_BYTESWAP(bp); + expected_cksum = bp->blk_cksum; + ci->ci_func[byteswap](data, size, &actual_cksum); + } + + info->zbc_expected = expected_cksum; + info->zbc_actual = actual_cksum; + info->zbc_checksum_name = ci->ci_name; + info->zbc_byteswapped = byteswap; + info->zbc_injected = 0; + info->zbc_has_cksum = 1; + + if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) + return (ECKSUM); + + if (zio_injection_enabled && !zio->io_error && + (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { + + info->zbc_injected = 1; + return (error); + } + + return (0); +} diff --git a/uts/common/fs/zfs/zio_compress.c b/uts/common/fs/zfs/zio_compress.c new file mode 100644 index 000000000000..f148977c4468 --- /dev/null +++ b/uts/common/fs/zfs/zio_compress.c @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/compress.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/zio_compress.h> + +/* + * Compression vectors. + */ + +zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { + {NULL, NULL, 0, "inherit"}, + {NULL, NULL, 0, "on"}, + {NULL, NULL, 0, "uncompressed"}, + {lzjb_compress, lzjb_decompress, 0, "lzjb"}, + {NULL, NULL, 0, "empty"}, + {gzip_compress, gzip_decompress, 1, "gzip-1"}, + {gzip_compress, gzip_decompress, 2, "gzip-2"}, + {gzip_compress, gzip_decompress, 3, "gzip-3"}, + {gzip_compress, gzip_decompress, 4, "gzip-4"}, + {gzip_compress, gzip_decompress, 5, "gzip-5"}, + {gzip_compress, gzip_decompress, 6, "gzip-6"}, + {gzip_compress, gzip_decompress, 7, "gzip-7"}, + {gzip_compress, gzip_decompress, 8, "gzip-8"}, + {gzip_compress, gzip_decompress, 9, "gzip-9"}, + {zle_compress, zle_decompress, 64, "zle"}, +}; + +enum zio_compress +zio_compress_select(enum zio_compress child, enum zio_compress parent) +{ + ASSERT(child < ZIO_COMPRESS_FUNCTIONS); + ASSERT(parent < ZIO_COMPRESS_FUNCTIONS); + ASSERT(parent != ZIO_COMPRESS_INHERIT && parent != ZIO_COMPRESS_ON); + + if (child == ZIO_COMPRESS_INHERIT) + return (parent); + + if (child == ZIO_COMPRESS_ON) + return (ZIO_COMPRESS_ON_VALUE); + + return (child); +} + +size_t +zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) +{ + uint64_t *word, *word_end; + size_t c_len, d_len, r_len; + zio_compress_info_t *ci = &zio_compress_table[c]; + + ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS); + ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL); + + /* + * If the data is all zeroes, we don't even need to allocate + * a block for it. We indicate this by returning zero size. + */ + word_end = (uint64_t *)((char *)src + s_len); + for (word = src; word < word_end; word++) + if (*word != 0) + break; + + if (word == word_end) + return (0); + + if (c == ZIO_COMPRESS_EMPTY) + return (s_len); + + /* Compress at least 12.5% */ + d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE); + if (d_len == 0) + return (s_len); + + c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level); + + if (c_len > d_len) + return (s_len); + + /* + * Cool. We compressed at least as much as we were hoping to. + * For both security and repeatability, pad out the last sector. + */ + r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE); + if (r_len > c_len) { + bzero((char *)dst + c_len, r_len - c_len); + c_len = r_len; + } + + ASSERT3U(c_len, <=, d_len); + ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0); + + return (c_len); +} + +int +zio_decompress_data(enum zio_compress c, void *src, void *dst, + size_t s_len, size_t d_len) +{ + zio_compress_info_t *ci = &zio_compress_table[c]; + + if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL) + return (EINVAL); + + return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level)); +} diff --git a/uts/common/fs/zfs/zio_inject.c b/uts/common/fs/zfs/zio_inject.c new file mode 100644 index 000000000000..9ae7d1f697fd --- /dev/null +++ b/uts/common/fs/zfs/zio_inject.c @@ -0,0 +1,515 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * ZFS fault injection + * + * To handle fault injection, we keep track of a series of zinject_record_t + * structures which describe which logical block(s) should be injected with a + * fault. These are kept in a global list. Each record corresponds to a given + * spa_t and maintains a special hold on the spa_t so that it cannot be deleted + * or exported while the injection record exists. + * + * Device level injection is done using the 'zi_guid' field. If this is set, it + * means that the error is destined for a particular device, not a piece of + * data. + * + * This is a rather poor data structure and algorithm, but we don't expect more + * than a few faults at any one time, so it should be sufficient for our needs. + */ + +#include <sys/arc.h> +#include <sys/zio_impl.h> +#include <sys/zfs_ioctl.h> +#include <sys/vdev_impl.h> +#include <sys/dmu_objset.h> +#include <sys/fs/zfs.h> + +uint32_t zio_injection_enabled; + +typedef struct inject_handler { + int zi_id; + spa_t *zi_spa; + zinject_record_t zi_record; + list_node_t zi_link; +} inject_handler_t; + +static list_t inject_handlers; +static krwlock_t inject_lock; +static int inject_next_id = 1; + +/* + * Returns true if the given record matches the I/O in progress. + */ +static boolean_t +zio_match_handler(zbookmark_t *zb, uint64_t type, + zinject_record_t *record, int error) +{ + /* + * Check for a match against the MOS, which is based on type + */ + if (zb->zb_objset == DMU_META_OBJSET && + record->zi_objset == DMU_META_OBJSET && + record->zi_object == DMU_META_DNODE_OBJECT) { + if (record->zi_type == DMU_OT_NONE || + type == record->zi_type) + return (record->zi_freq == 0 || + spa_get_random(100) < record->zi_freq); + else + return (B_FALSE); + } + + /* + * Check for an exact match. + */ + if (zb->zb_objset == record->zi_objset && + zb->zb_object == record->zi_object && + zb->zb_level == record->zi_level && + zb->zb_blkid >= record->zi_start && + zb->zb_blkid <= record->zi_end && + error == record->zi_error) + return (record->zi_freq == 0 || + spa_get_random(100) < record->zi_freq); + + return (B_FALSE); +} + +/* + * Panic the system when a config change happens in the function + * specified by tag. + */ +void +zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type) +{ + inject_handler_t *handler; + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + if (spa != handler->zi_spa) + continue; + + if (handler->zi_record.zi_type == type && + strcmp(tag, handler->zi_record.zi_func) == 0) + panic("Panic requested in function %s\n", tag); + } + + rw_exit(&inject_lock); +} + +/* + * Determine if the I/O in question should return failure. Returns the errno + * to be returned to the caller. + */ +int +zio_handle_fault_injection(zio_t *zio, int error) +{ + int ret = 0; + inject_handler_t *handler; + + /* + * Ignore I/O not associated with any logical data. + */ + if (zio->io_logical == NULL) + return (0); + + /* + * Currently, we only support fault injection on reads. + */ + if (zio->io_type != ZIO_TYPE_READ) + return (0); + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + /* Ignore errors not destined for this pool */ + if (zio->io_spa != handler->zi_spa) + continue; + + /* Ignore device errors and panic injection */ + if (handler->zi_record.zi_guid != 0 || + handler->zi_record.zi_func[0] != '\0' || + handler->zi_record.zi_duration != 0) + continue; + + /* If this handler matches, return EIO */ + if (zio_match_handler(&zio->io_logical->io_bookmark, + zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, + &handler->zi_record, error)) { + ret = error; + break; + } + } + + rw_exit(&inject_lock); + + return (ret); +} + +/* + * Determine if the zio is part of a label update and has an injection + * handler associated with that portion of the label. Currently, we + * allow error injection in either the nvlist or the uberblock region of + * of the vdev label. + */ +int +zio_handle_label_injection(zio_t *zio, int error) +{ + inject_handler_t *handler; + vdev_t *vd = zio->io_vd; + uint64_t offset = zio->io_offset; + int label; + int ret = 0; + + if (offset >= VDEV_LABEL_START_SIZE && + offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) + return (0); + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + uint64_t start = handler->zi_record.zi_start; + uint64_t end = handler->zi_record.zi_end; + + /* Ignore device only faults or panic injection */ + if (handler->zi_record.zi_start == 0 || + handler->zi_record.zi_func[0] != '\0' || + handler->zi_record.zi_duration != 0) + continue; + + /* + * The injection region is the relative offsets within a + * vdev label. We must determine the label which is being + * updated and adjust our region accordingly. + */ + label = vdev_label_number(vd->vdev_psize, offset); + start = vdev_label_offset(vd->vdev_psize, label, start); + end = vdev_label_offset(vd->vdev_psize, label, end); + + if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && + (offset >= start && offset <= end)) { + ret = error; + break; + } + } + rw_exit(&inject_lock); + return (ret); +} + + +int +zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) +{ + inject_handler_t *handler; + int ret = 0; + + /* + * We skip over faults in the labels unless it's during + * device open (i.e. zio == NULL). + */ + if (zio != NULL) { + uint64_t offset = zio->io_offset; + + if (offset < VDEV_LABEL_START_SIZE || + offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) + return (0); + } + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + /* + * Ignore label specific faults, panic injection + * or fake writes + */ + if (handler->zi_record.zi_start != 0 || + handler->zi_record.zi_func[0] != '\0' || + handler->zi_record.zi_duration != 0) + continue; + + if (vd->vdev_guid == handler->zi_record.zi_guid) { + if (handler->zi_record.zi_failfast && + (zio == NULL || (zio->io_flags & + (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { + continue; + } + + /* Handle type specific I/O failures */ + if (zio != NULL && + handler->zi_record.zi_iotype != ZIO_TYPES && + handler->zi_record.zi_iotype != zio->io_type) + continue; + + if (handler->zi_record.zi_error == error) { + /* + * For a failed open, pretend like the device + * has gone away. + */ + if (error == ENXIO) + vd->vdev_stat.vs_aux = + VDEV_AUX_OPEN_FAILED; + + /* + * Treat these errors as if they had been + * retried so that all the appropriate stats + * and FMA events are generated. + */ + if (!handler->zi_record.zi_failfast && + zio != NULL) + zio->io_flags |= ZIO_FLAG_IO_RETRY; + + ret = error; + break; + } + if (handler->zi_record.zi_error == ENXIO) { + ret = EIO; + break; + } + } + } + + rw_exit(&inject_lock); + + return (ret); +} + +/* + * Simulate hardware that ignores cache flushes. For requested number + * of seconds nix the actual writing to disk. + */ +void +zio_handle_ignored_writes(zio_t *zio) +{ + inject_handler_t *handler; + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + /* Ignore errors not destined for this pool */ + if (zio->io_spa != handler->zi_spa) + continue; + + if (handler->zi_record.zi_duration == 0) + continue; + + /* + * Positive duration implies # of seconds, negative + * a number of txgs + */ + if (handler->zi_record.zi_timer == 0) { + if (handler->zi_record.zi_duration > 0) + handler->zi_record.zi_timer = ddi_get_lbolt64(); + else + handler->zi_record.zi_timer = zio->io_txg; + } + + /* Have a "problem" writing 60% of the time */ + if (spa_get_random(100) < 60) + zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; + break; + } + + rw_exit(&inject_lock); +} + +void +spa_handle_ignored_writes(spa_t *spa) +{ + inject_handler_t *handler; + + if (zio_injection_enabled == 0) + return; + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + /* Ignore errors not destined for this pool */ + if (spa != handler->zi_spa) + continue; + + if (handler->zi_record.zi_duration == 0) + continue; + + if (handler->zi_record.zi_duration > 0) { + VERIFY(handler->zi_record.zi_timer == 0 || + handler->zi_record.zi_timer + + handler->zi_record.zi_duration * hz > + ddi_get_lbolt64()); + } else { + /* duration is negative so the subtraction here adds */ + VERIFY(handler->zi_record.zi_timer == 0 || + handler->zi_record.zi_timer - + handler->zi_record.zi_duration >= + spa_syncing_txg(spa)); + } + } + + rw_exit(&inject_lock); +} + +/* + * Create a new handler for the given record. We add it to the list, adding + * a reference to the spa_t in the process. We increment zio_injection_enabled, + * which is the switch to trigger all fault injection. + */ +int +zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) +{ + inject_handler_t *handler; + int error; + spa_t *spa; + + /* + * If this is pool-wide metadata, make sure we unload the corresponding + * spa_t, so that the next attempt to load it will trigger the fault. + * We call spa_reset() to unload the pool appropriately. + */ + if (flags & ZINJECT_UNLOAD_SPA) + if ((error = spa_reset(name)) != 0) + return (error); + + if (!(flags & ZINJECT_NULL)) { + /* + * spa_inject_ref() will add an injection reference, which will + * prevent the pool from being removed from the namespace while + * still allowing it to be unloaded. + */ + if ((spa = spa_inject_addref(name)) == NULL) + return (ENOENT); + + handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); + + rw_enter(&inject_lock, RW_WRITER); + + *id = handler->zi_id = inject_next_id++; + handler->zi_spa = spa; + handler->zi_record = *record; + list_insert_tail(&inject_handlers, handler); + atomic_add_32(&zio_injection_enabled, 1); + + rw_exit(&inject_lock); + } + + /* + * Flush the ARC, so that any attempts to read this data will end up + * going to the ZIO layer. Note that this is a little overkill, but + * we don't have the necessary ARC interfaces to do anything else, and + * fault injection isn't a performance critical path. + */ + if (flags & ZINJECT_FLUSH_ARC) + arc_flush(NULL); + + return (0); +} + +/* + * Returns the next record with an ID greater than that supplied to the + * function. Used to iterate over all handlers in the system. + */ +int +zio_inject_list_next(int *id, char *name, size_t buflen, + zinject_record_t *record) +{ + inject_handler_t *handler; + int ret; + + mutex_enter(&spa_namespace_lock); + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) + if (handler->zi_id > *id) + break; + + if (handler) { + *record = handler->zi_record; + *id = handler->zi_id; + (void) strncpy(name, spa_name(handler->zi_spa), buflen); + ret = 0; + } else { + ret = ENOENT; + } + + rw_exit(&inject_lock); + mutex_exit(&spa_namespace_lock); + + return (ret); +} + +/* + * Clear the fault handler with the given identifier, or return ENOENT if none + * exists. + */ +int +zio_clear_fault(int id) +{ + inject_handler_t *handler; + + rw_enter(&inject_lock, RW_WRITER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) + if (handler->zi_id == id) + break; + + if (handler == NULL) { + rw_exit(&inject_lock); + return (ENOENT); + } + + list_remove(&inject_handlers, handler); + rw_exit(&inject_lock); + + spa_inject_delref(handler->zi_spa); + kmem_free(handler, sizeof (inject_handler_t)); + atomic_add_32(&zio_injection_enabled, -1); + + return (0); +} + +void +zio_inject_init(void) +{ + rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); + list_create(&inject_handlers, sizeof (inject_handler_t), + offsetof(inject_handler_t, zi_link)); +} + +void +zio_inject_fini(void) +{ + list_destroy(&inject_handlers); + rw_destroy(&inject_lock); +} diff --git a/uts/common/fs/zfs/zle.c b/uts/common/fs/zfs/zle.c new file mode 100644 index 000000000000..13c5673fbe26 --- /dev/null +++ b/uts/common/fs/zfs/zle.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Zero-length encoding. This is a fast and simple algorithm to eliminate + * runs of zeroes. Each chunk of compressed data begins with a length byte, b. + * If b < n (where n is the compression parameter) then the next b + 1 bytes + * are literal values. If b >= n then the next (256 - b + 1) bytes are zero. + */ +#include <sys/types.h> +#include <sys/sysmacros.h> + +size_t +zle_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *s_end = src + s_len; + uchar_t *d_end = dst + d_len; + + while (src < s_end && dst < d_end - 1) { + uchar_t *first = src; + uchar_t *len = dst++; + if (src[0] == 0) { + uchar_t *last = src + (256 - n); + while (src < MIN(last, s_end) && src[0] == 0) + src++; + *len = src - first - 1 + n; + } else { + uchar_t *last = src + n; + if (d_end - dst < n) + break; + while (src < MIN(last, s_end) - 1 && (src[0] | src[1])) + *dst++ = *src++; + if (src[0]) + *dst++ = *src++; + *len = src - first - 1; + } + } + return (src == s_end ? dst - (uchar_t *)d_start : s_len); +} + +int +zle_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) +{ + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *s_end = src + s_len; + uchar_t *d_end = dst + d_len; + + while (src < s_end && dst < d_end) { + int len = 1 + *src++; + if (len <= n) { + while (len-- != 0) + *dst++ = *src++; + } else { + len -= n; + while (len-- != 0) + *dst++ = 0; + } + } + return (dst == d_end ? 0 : -1); +} diff --git a/uts/common/fs/zfs/zrlock.c b/uts/common/fs/zfs/zrlock.c new file mode 100644 index 000000000000..ec94b08555be --- /dev/null +++ b/uts/common/fs/zfs/zrlock.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * A Zero Reference Lock (ZRL) is a reference count that can lock out new + * references only when the count is zero and only without waiting if the count + * is not already zero. It is similar to a read-write lock in that it allows + * multiple readers and only a single writer, but it does not allow a writer to + * block while waiting for readers to exit, and therefore the question of + * reader/writer priority is moot (no WRWANT bit). Since the equivalent of + * rw_enter(&lock, RW_WRITER) is disallowed and only tryenter() is allowed, it + * is perfectly safe for the same reader to acquire the same lock multiple + * times. The fact that a ZRL is reentrant for readers (through multiple calls + * to zrl_add()) makes it convenient for determining whether something is + * actively referenced without the fuss of flagging lock ownership across + * function calls. + */ +#include <sys/zrlock.h> + +/* + * A ZRL can be locked only while there are zero references, so ZRL_LOCKED is + * treated as zero references. + */ +#define ZRL_LOCKED ((uint32_t)-1) +#define ZRL_DESTROYED -2 + +void +zrl_init(zrlock_t *zrl) +{ + mutex_init(&zrl->zr_mtx, NULL, MUTEX_DEFAULT, NULL); + zrl->zr_refcount = 0; + cv_init(&zrl->zr_cv, NULL, CV_DEFAULT, NULL); +#ifdef ZFS_DEBUG + zrl->zr_owner = NULL; + zrl->zr_caller = NULL; +#endif +} + +void +zrl_destroy(zrlock_t *zrl) +{ + ASSERT(zrl->zr_refcount == 0); + + mutex_destroy(&zrl->zr_mtx); + zrl->zr_refcount = ZRL_DESTROYED; + cv_destroy(&zrl->zr_cv); +} + +void +#ifdef ZFS_DEBUG +zrl_add_debug(zrlock_t *zrl, const char *zc) +#else +zrl_add(zrlock_t *zrl) +#endif +{ + uint32_t n = (uint32_t)zrl->zr_refcount; + + while (n != ZRL_LOCKED) { + uint32_t cas = atomic_cas_32( + (uint32_t *)&zrl->zr_refcount, n, n + 1); + if (cas == n) { + ASSERT((int32_t)n >= 0); +#ifdef ZFS_DEBUG + if (zrl->zr_owner == curthread) { + DTRACE_PROBE2(zrlock__reentry, + zrlock_t *, zrl, uint32_t, n); + } + zrl->zr_owner = curthread; + zrl->zr_caller = zc; +#endif + return; + } + n = cas; + } + + mutex_enter(&zrl->zr_mtx); + while (zrl->zr_refcount == ZRL_LOCKED) { + cv_wait(&zrl->zr_cv, &zrl->zr_mtx); + } + ASSERT(zrl->zr_refcount >= 0); + zrl->zr_refcount++; +#ifdef ZFS_DEBUG + zrl->zr_owner = curthread; + zrl->zr_caller = zc; +#endif + mutex_exit(&zrl->zr_mtx); +} + +void +zrl_remove(zrlock_t *zrl) +{ + uint32_t n; + + n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount); + ASSERT((int32_t)n >= 0); +#ifdef ZFS_DEBUG + if (zrl->zr_owner == curthread) { + zrl->zr_owner = NULL; + zrl->zr_caller = NULL; + } +#endif +} + +int +zrl_tryenter(zrlock_t *zrl) +{ + uint32_t n = (uint32_t)zrl->zr_refcount; + + if (n == 0) { + uint32_t cas = atomic_cas_32( + (uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED); + if (cas == 0) { +#ifdef ZFS_DEBUG + ASSERT(zrl->zr_owner == NULL); + zrl->zr_owner = curthread; +#endif + return (1); + } + } + + ASSERT((int32_t)n > ZRL_DESTROYED); + + return (0); +} + +void +zrl_exit(zrlock_t *zrl) +{ + ASSERT(zrl->zr_refcount == ZRL_LOCKED); + + mutex_enter(&zrl->zr_mtx); +#ifdef ZFS_DEBUG + ASSERT(zrl->zr_owner == curthread); + zrl->zr_owner = NULL; + membar_producer(); /* make sure the owner store happens first */ +#endif + zrl->zr_refcount = 0; + cv_broadcast(&zrl->zr_cv); + mutex_exit(&zrl->zr_mtx); +} + +int +zrl_refcount(zrlock_t *zrl) +{ + ASSERT(zrl->zr_refcount > ZRL_DESTROYED); + + int n = (int)zrl->zr_refcount; + return (n <= 0 ? 0 : n); +} + +int +zrl_is_zero(zrlock_t *zrl) +{ + ASSERT(zrl->zr_refcount > ZRL_DESTROYED); + + return (zrl->zr_refcount <= 0); +} + +int +zrl_is_locked(zrlock_t *zrl) +{ + ASSERT(zrl->zr_refcount > ZRL_DESTROYED); + + return (zrl->zr_refcount == ZRL_LOCKED); +} + +#ifdef ZFS_DEBUG +kthread_t * +zrl_owner(zrlock_t *zrl) +{ + return (zrl->zr_owner); +} +#endif diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c new file mode 100644 index 000000000000..47b6c5a87a52 --- /dev/null +++ b/uts/common/fs/zfs/zvol.c @@ -0,0 +1,1894 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +/* + * ZFS volume emulation driver. + * + * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. + * Volumes are accessed through the symbolic links named: + * + * /dev/zvol/dsk/<pool_name>/<dataset_name> + * /dev/zvol/rdsk/<pool_name>/<dataset_name> + * + * These links are created by the /dev filesystem (sdev_zvolops.c). + * Volumes are persistent through reboot. No user command needs to be + * run before opening and using a device. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/uio.h> +#include <sys/buf.h> +#include <sys/modctl.h> +#include <sys/open.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/cmn_err.h> +#include <sys/stat.h> +#include <sys/zap.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/dmu_traverse.h> +#include <sys/dnode.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_prop.h> +#include <sys/dkio.h> +#include <sys/efi_partition.h> +#include <sys/byteorder.h> +#include <sys/pathname.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/crc32.h> +#include <sys/dirent.h> +#include <sys/policy.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_ioctl.h> +#include <sys/mkdev.h> +#include <sys/zil.h> +#include <sys/refcount.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_rlock.h> +#include <sys/vdev_disk.h> +#include <sys/vdev_impl.h> +#include <sys/zvol.h> +#include <sys/dumphdr.h> +#include <sys/zil_impl.h> + +#include "zfs_namecheck.h" + +void *zfsdev_state; +static char *zvol_tag = "zvol_tag"; + +#define ZVOL_DUMPSIZE "dumpsize" + +/* + * This lock protects the zfsdev_state structure from being modified + * while it's being used, e.g. an open that comes in before a create + * finishes. It also protects temporary opens of the dataset so that, + * e.g., an open doesn't get a spurious EBUSY. + */ +kmutex_t zfsdev_state_lock; +static uint32_t zvol_minors; + +typedef struct zvol_extent { + list_node_t ze_node; + dva_t ze_dva; /* dva associated with this extent */ + uint64_t ze_nblks; /* number of blocks in extent */ +} zvol_extent_t; + +/* + * The in-core state of each volume. + */ +typedef struct zvol_state { + char zv_name[MAXPATHLEN]; /* pool/dd name */ + uint64_t zv_volsize; /* amount of space we advertise */ + uint64_t zv_volblocksize; /* volume block size */ + minor_t zv_minor; /* minor number */ + uint8_t zv_min_bs; /* minimum addressable block shift */ + uint8_t zv_flags; /* readonly, dumpified, etc. */ + objset_t *zv_objset; /* objset handle */ + uint32_t zv_open_count[OTYPCNT]; /* open counts */ + uint32_t zv_total_opens; /* total open count */ + zilog_t *zv_zilog; /* ZIL handle */ + list_t zv_extents; /* List of extents for dump */ + znode_t zv_znode; /* for range locking */ + dmu_buf_t *zv_dbuf; /* bonus handle */ +} zvol_state_t; + +/* + * zvol specific flags + */ +#define ZVOL_RDONLY 0x1 +#define ZVOL_DUMPIFIED 0x2 +#define ZVOL_EXCL 0x4 +#define ZVOL_WCE 0x8 + +/* + * zvol maximum transfer in one DMU tx. + */ +int zvol_maxphys = DMU_MAX_ACCESS/2; + +extern int zfs_set_prop_nvlist(const char *, zprop_source_t, + nvlist_t *, nvlist_t **); +static int zvol_remove_zv(zvol_state_t *); +static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); +static int zvol_dumpify(zvol_state_t *zv); +static int zvol_dump_fini(zvol_state_t *zv); +static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); + +static void +zvol_size_changed(uint64_t volsize, major_t maj, minor_t min) +{ + dev_t dev = makedevice(maj, min); + + VERIFY(ddi_prop_update_int64(dev, zfs_dip, + "Size", volsize) == DDI_SUCCESS); + VERIFY(ddi_prop_update_int64(dev, zfs_dip, + "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); + + /* Notify specfs to invalidate the cached size */ + spec_size_invalidate(dev, VBLK); + spec_size_invalidate(dev, VCHR); +} + +int +zvol_check_volsize(uint64_t volsize, uint64_t blocksize) +{ + if (volsize == 0) + return (EINVAL); + + if (volsize % blocksize != 0) + return (EINVAL); + +#ifdef _ILP32 + if (volsize - 1 > SPEC_MAXOFFSET_T) + return (EOVERFLOW); +#endif + return (0); +} + +int +zvol_check_volblocksize(uint64_t volblocksize) +{ + if (volblocksize < SPA_MINBLOCKSIZE || + volblocksize > SPA_MAXBLOCKSIZE || + !ISP2(volblocksize)) + return (EDOM); + + return (0); +} + +int +zvol_get_stats(objset_t *os, nvlist_t *nv) +{ + int error; + dmu_object_info_t doi; + uint64_t val; + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); + if (error) + return (error); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); + + error = dmu_object_info(os, ZVOL_OBJ, &doi); + + if (error == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, + doi.doi_data_block_size); + } + + return (error); +} + +static zvol_state_t * +zvol_minor_lookup(const char *name) +{ + minor_t minor; + zvol_state_t *zv; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + continue; + if (strcmp(zv->zv_name, name) == 0) + return (zv); + } + + return (NULL); +} + +/* extent mapping arg */ +struct maparg { + zvol_state_t *ma_zv; + uint64_t ma_blks; +}; + +/*ARGSUSED*/ +static int +zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct maparg *ma = arg; + zvol_extent_t *ze; + int bs = ma->ma_zv->zv_volblocksize; + + if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) + return (0); + + VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); + ma->ma_blks++; + + /* Abort immediately if we have encountered gang blocks */ + if (BP_IS_GANG(bp)) + return (EFRAGS); + + /* + * See if the block is at the end of the previous extent. + */ + ze = list_tail(&ma->ma_zv->zv_extents); + if (ze && + DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && + DVA_GET_OFFSET(BP_IDENTITY(bp)) == + DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { + ze->ze_nblks++; + return (0); + } + + dprintf_bp(bp, "%s", "next blkptr:"); + + /* start a new extent */ + ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); + ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ + ze->ze_nblks = 1; + list_insert_tail(&ma->ma_zv->zv_extents, ze); + return (0); +} + +static void +zvol_free_extents(zvol_state_t *zv) +{ + zvol_extent_t *ze; + + while (ze = list_head(&zv->zv_extents)) { + list_remove(&zv->zv_extents, ze); + kmem_free(ze, sizeof (zvol_extent_t)); + } +} + +static int +zvol_get_lbas(zvol_state_t *zv) +{ + objset_t *os = zv->zv_objset; + struct maparg ma; + int err; + + ma.ma_zv = zv; + ma.ma_blks = 0; + zvol_free_extents(zv); + + /* commit any in-flight changes before traversing the dataset */ + txg_wait_synced(dmu_objset_pool(os), 0); + err = traverse_dataset(dmu_objset_ds(os), 0, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); + if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { + zvol_free_extents(zv); + return (err ? err : EIO); + } + + return (0); +} + +/* ARGSUSED */ +void +zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) +{ + zfs_creat_t *zct = arg; + nvlist_t *nvprops = zct->zct_props; + int error; + uint64_t volblocksize, volsize; + + VERIFY(nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) + volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + + /* + * These properties must be removed from the list so the generic + * property setting step won't apply to them. + */ + VERIFY(nvlist_remove_all(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); + (void) nvlist_remove_all(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); + + error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + + error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + + error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); + ASSERT(error == 0); +} + +/* + * Replay a TX_WRITE ZIL transaction that didn't get committed + * after a system failure + */ +static int +zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) +{ + objset_t *os = zv->zv_objset; + char *data = (char *)(lr + 1); /* data follows lr_write_t */ + uint64_t offset, length; + dmu_tx_t *tx; + int error; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + offset = lr->lr_offset; + length = lr->lr_length; + + /* If it's a dmu_sync() block, write the whole block */ + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); + if (length < blocksize) { + offset -= offset % blocksize; + length = blocksize; + } + } + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + dmu_write(os, ZVOL_OBJ, offset, length, data, tx); + dmu_tx_commit(tx); + } + + return (error); +} + +/* ARGSUSED */ +static int +zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) +{ + return (ENOTSUP); +} + +/* + * Callback vectors for replaying records. + * Only TX_WRITE is needed for zvol. + */ +zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { + zvol_replay_err, /* 0 no such transaction type */ + zvol_replay_err, /* TX_CREATE */ + zvol_replay_err, /* TX_MKDIR */ + zvol_replay_err, /* TX_MKXATTR */ + zvol_replay_err, /* TX_SYMLINK */ + zvol_replay_err, /* TX_REMOVE */ + zvol_replay_err, /* TX_RMDIR */ + zvol_replay_err, /* TX_LINK */ + zvol_replay_err, /* TX_RENAME */ + zvol_replay_write, /* TX_WRITE */ + zvol_replay_err, /* TX_TRUNCATE */ + zvol_replay_err, /* TX_SETATTR */ + zvol_replay_err, /* TX_ACL */ + zvol_replay_err, /* TX_CREATE_ACL */ + zvol_replay_err, /* TX_CREATE_ATTR */ + zvol_replay_err, /* TX_CREATE_ACL_ATTR */ + zvol_replay_err, /* TX_MKDIR_ACL */ + zvol_replay_err, /* TX_MKDIR_ATTR */ + zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ + zvol_replay_err, /* TX_WRITE2 */ +}; + +int +zvol_name2minor(const char *name, minor_t *minor) +{ + zvol_state_t *zv; + + mutex_enter(&zfsdev_state_lock); + zv = zvol_minor_lookup(name); + if (minor && zv) + *minor = zv->zv_minor; + mutex_exit(&zfsdev_state_lock); + return (zv ? 0 : -1); +} + +/* + * Create a minor node (plus a whole lot more) for the specified volume. + */ +int +zvol_create_minor(const char *name) +{ + zfs_soft_state_t *zs; + zvol_state_t *zv; + objset_t *os; + dmu_object_info_t doi; + minor_t minor = 0; + char chrbuf[30], blkbuf[30]; + int error; + + mutex_enter(&zfsdev_state_lock); + + if (zvol_minor_lookup(name) != NULL) { + mutex_exit(&zfsdev_state_lock); + return (EEXIST); + } + + /* lie and say we're read-only */ + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); + + if (error) { + mutex_exit(&zfsdev_state_lock); + return (error); + } + + if ((minor = zfsdev_minor_alloc()) == 0) { + dmu_objset_disown(os, FTAG); + mutex_exit(&zfsdev_state_lock); + return (ENXIO); + } + + if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { + dmu_objset_disown(os, FTAG); + mutex_exit(&zfsdev_state_lock); + return (EAGAIN); + } + (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, + (char *)name); + + (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); + + if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, + minor, DDI_PSEUDO, 0) == DDI_FAILURE) { + ddi_soft_state_free(zfsdev_state, minor); + dmu_objset_disown(os, FTAG); + mutex_exit(&zfsdev_state_lock); + return (EAGAIN); + } + + (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); + + if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, + minor, DDI_PSEUDO, 0) == DDI_FAILURE) { + ddi_remove_minor_node(zfs_dip, chrbuf); + ddi_soft_state_free(zfsdev_state, minor); + dmu_objset_disown(os, FTAG); + mutex_exit(&zfsdev_state_lock); + return (EAGAIN); + } + + zs = ddi_get_soft_state(zfsdev_state, minor); + zs->zss_type = ZSST_ZVOL; + zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); + (void) strlcpy(zv->zv_name, name, MAXPATHLEN); + zv->zv_min_bs = DEV_BSHIFT; + zv->zv_minor = minor; + zv->zv_objset = os; + if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) + zv->zv_flags |= ZVOL_RDONLY; + mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, + sizeof (rl_t), offsetof(rl_t, r_node)); + list_create(&zv->zv_extents, sizeof (zvol_extent_t), + offsetof(zvol_extent_t, ze_node)); + /* get and cache the blocksize */ + error = dmu_object_info(os, ZVOL_OBJ, &doi); + ASSERT(error == 0); + zv->zv_volblocksize = doi.doi_data_block_size; + + if (spa_writeable(dmu_objset_spa(os))) { + if (zil_replay_disable) + zil_destroy(dmu_objset_zil(os), B_FALSE); + else + zil_replay(os, zv, zvol_replay_vector); + } + dmu_objset_disown(os, FTAG); + zv->zv_objset = NULL; + + zvol_minors++; + + mutex_exit(&zfsdev_state_lock); + + return (0); +} + +/* + * Remove minor node for the specified volume. + */ +static int +zvol_remove_zv(zvol_state_t *zv) +{ + char nmbuf[20]; + minor_t minor = zv->zv_minor; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + if (zv->zv_total_opens != 0) + return (EBUSY); + + (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); + ddi_remove_minor_node(zfs_dip, nmbuf); + + (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor); + ddi_remove_minor_node(zfs_dip, nmbuf); + + avl_destroy(&zv->zv_znode.z_range_avl); + mutex_destroy(&zv->zv_znode.z_range_lock); + + kmem_free(zv, sizeof (zvol_state_t)); + + ddi_soft_state_free(zfsdev_state, minor); + + zvol_minors--; + return (0); +} + +int +zvol_remove_minor(const char *name) +{ + zvol_state_t *zv; + int rc; + + mutex_enter(&zfsdev_state_lock); + if ((zv = zvol_minor_lookup(name)) == NULL) { + mutex_exit(&zfsdev_state_lock); + return (ENXIO); + } + rc = zvol_remove_zv(zv); + mutex_exit(&zfsdev_state_lock); + return (rc); +} + +int +zvol_first_open(zvol_state_t *zv) +{ + objset_t *os; + uint64_t volsize; + int error; + uint64_t readonly; + + /* lie and say we're read-only */ + error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, + zvol_tag, &os); + if (error) + return (error); + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); + if (error) { + ASSERT(error == 0); + dmu_objset_disown(os, zvol_tag); + return (error); + } + zv->zv_objset = os; + error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); + if (error) { + dmu_objset_disown(os, zvol_tag); + return (error); + } + zv->zv_volsize = volsize; + zv->zv_zilog = zil_open(os, zvol_get_data); + zvol_size_changed(zv->zv_volsize, ddi_driver_major(zfs_dip), + zv->zv_minor); + + VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, + NULL) == 0); + if (readonly || dmu_objset_is_snapshot(os) || + !spa_writeable(dmu_objset_spa(os))) + zv->zv_flags |= ZVOL_RDONLY; + else + zv->zv_flags &= ~ZVOL_RDONLY; + return (error); +} + +void +zvol_last_close(zvol_state_t *zv) +{ + zil_close(zv->zv_zilog); + zv->zv_zilog = NULL; + dmu_buf_rele(zv->zv_dbuf, zvol_tag); + zv->zv_dbuf = NULL; + dmu_objset_disown(zv->zv_objset, zvol_tag); + zv->zv_objset = NULL; +} + +int +zvol_prealloc(zvol_state_t *zv) +{ + objset_t *os = zv->zv_objset; + dmu_tx_t *tx; + uint64_t refd, avail, usedobjs, availobjs; + uint64_t resid = zv->zv_volsize; + uint64_t off = 0; + + /* Check the space usage before attempting to allocate the space */ + dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); + if (avail < zv->zv_volsize) + return (ENOSPC); + + /* Free old extents if they exist */ + zvol_free_extents(zv); + + while (resid != 0) { + int error; + uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); + return (error); + } + dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); + dmu_tx_commit(tx); + off += bytes; + resid -= bytes; + } + txg_wait_synced(dmu_objset_pool(os), 0); + + return (0); +} + +int +zvol_update_volsize(objset_t *os, uint64_t volsize) +{ + dmu_tx_t *tx; + int error; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, + &volsize, tx); + dmu_tx_commit(tx); + + if (error == 0) + error = dmu_free_long_range(os, + ZVOL_OBJ, volsize, DMU_OBJECT_END); + return (error); +} + +void +zvol_remove_minors(const char *name) +{ + zvol_state_t *zv; + char *namebuf; + minor_t minor; + + namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); + (void) strncpy(namebuf, name, strlen(name)); + (void) strcat(namebuf, "/"); + mutex_enter(&zfsdev_state_lock); + for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + continue; + if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) + (void) zvol_remove_zv(zv); + } + kmem_free(namebuf, strlen(name) + 2); + + mutex_exit(&zfsdev_state_lock); +} + +int +zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) +{ + zvol_state_t *zv = NULL; + objset_t *os; + int error; + dmu_object_info_t doi; + uint64_t old_volsize = 0ULL; + uint64_t readonly; + + mutex_enter(&zfsdev_state_lock); + zv = zvol_minor_lookup(name); + if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { + mutex_exit(&zfsdev_state_lock); + return (error); + } + + if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || + (error = zvol_check_volsize(volsize, + doi.doi_data_block_size)) != 0) + goto out; + + VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, + NULL) == 0); + if (readonly) { + error = EROFS; + goto out; + } + + error = zvol_update_volsize(os, volsize); + /* + * Reinitialize the dump area to the new size. If we + * failed to resize the dump area then restore it back to + * its original size. + */ + if (zv && error == 0) { + if (zv->zv_flags & ZVOL_DUMPIFIED) { + old_volsize = zv->zv_volsize; + zv->zv_volsize = volsize; + if ((error = zvol_dumpify(zv)) != 0 || + (error = dumpvp_resize()) != 0) { + (void) zvol_update_volsize(os, old_volsize); + zv->zv_volsize = old_volsize; + error = zvol_dumpify(zv); + } + } + if (error == 0) { + zv->zv_volsize = volsize; + zvol_size_changed(volsize, maj, zv->zv_minor); + } + } + + /* + * Generate a LUN expansion event. + */ + if (zv && error == 0) { + sysevent_id_t eid; + nvlist_t *attr; + char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + + (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, + zv->zv_minor); + + VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); + + (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, + ESC_DEV_DLE, attr, &eid, DDI_SLEEP); + + nvlist_free(attr); + kmem_free(physpath, MAXPATHLEN); + } + +out: + dmu_objset_rele(os, FTAG); + + mutex_exit(&zfsdev_state_lock); + + return (error); +} + +/*ARGSUSED*/ +int +zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) +{ + zvol_state_t *zv; + int err = 0; + + mutex_enter(&zfsdev_state_lock); + + zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL); + if (zv == NULL) { + mutex_exit(&zfsdev_state_lock); + return (ENXIO); + } + + if (zv->zv_total_opens == 0) + err = zvol_first_open(zv); + if (err) { + mutex_exit(&zfsdev_state_lock); + return (err); + } + if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { + err = EROFS; + goto out; + } + if (zv->zv_flags & ZVOL_EXCL) { + err = EBUSY; + goto out; + } + if (flag & FEXCL) { + if (zv->zv_total_opens != 0) { + err = EBUSY; + goto out; + } + zv->zv_flags |= ZVOL_EXCL; + } + + if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { + zv->zv_open_count[otyp]++; + zv->zv_total_opens++; + } + mutex_exit(&zfsdev_state_lock); + + return (err); +out: + if (zv->zv_total_opens == 0) + zvol_last_close(zv); + mutex_exit(&zfsdev_state_lock); + return (err); +} + +/*ARGSUSED*/ +int +zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) +{ + minor_t minor = getminor(dev); + zvol_state_t *zv; + int error = 0; + + mutex_enter(&zfsdev_state_lock); + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) { + mutex_exit(&zfsdev_state_lock); + return (ENXIO); + } + + if (zv->zv_flags & ZVOL_EXCL) { + ASSERT(zv->zv_total_opens == 1); + zv->zv_flags &= ~ZVOL_EXCL; + } + + /* + * If the open count is zero, this is a spurious close. + * That indicates a bug in the kernel / DDI framework. + */ + ASSERT(zv->zv_open_count[otyp] != 0); + ASSERT(zv->zv_total_opens != 0); + + /* + * You may get multiple opens, but only one close. + */ + zv->zv_open_count[otyp]--; + zv->zv_total_opens--; + + if (zv->zv_total_opens == 0) + zvol_last_close(zv); + + mutex_exit(&zfsdev_state_lock); + return (error); +} + +static void +zvol_get_done(zgd_t *zgd, int error) +{ + if (zgd->zgd_db) + dmu_buf_rele(zgd->zgd_db, zgd); + + zfs_range_unlock(zgd->zgd_rl); + + if (error == 0 && zgd->zgd_bp) + zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); + + kmem_free(zgd, sizeof (zgd_t)); +} + +/* + * Get data to generate a TX_WRITE intent log record. + */ +static int +zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +{ + zvol_state_t *zv = arg; + objset_t *os = zv->zv_objset; + uint64_t object = ZVOL_OBJ; + uint64_t offset = lr->lr_offset; + uint64_t size = lr->lr_length; /* length of user data */ + blkptr_t *bp = &lr->lr_blkptr; + dmu_buf_t *db; + zgd_t *zgd; + int error; + + ASSERT(zio != NULL); + ASSERT(size != 0); + + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd->zgd_zilog = zv->zv_zilog; + zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); + + /* + * Write records come in two flavors: immediate and indirect. + * For small writes it's cheaper to store the data with the + * log record (immediate); for large writes it's cheaper to + * sync the data and get a pointer to it (indirect) so that + * we don't have to write the data twice. + */ + if (buf != NULL) { /* immediate write */ + error = dmu_read(os, object, offset, size, buf, + DMU_READ_NO_PREFETCH); + } else { + size = zv->zv_volblocksize; + offset = P2ALIGN(offset, size); + error = dmu_buf_hold(os, object, offset, zgd, &db, + DMU_READ_NO_PREFETCH); + if (error == 0) { + zgd->zgd_db = db; + zgd->zgd_bp = bp; + + ASSERT(db->db_offset == offset); + ASSERT(db->db_size == size); + + error = dmu_sync(zio, lr->lr_common.lrc_txg, + zvol_get_done, zgd); + + if (error == 0) + return (0); + } + } + + zvol_get_done(zgd, error); + + return (error); +} + +/* + * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. + * + * We store data in the log buffers if it's small enough. + * Otherwise we will later flush the data out via dmu_sync(). + */ +ssize_t zvol_immediate_write_sz = 32768; + +static void +zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, + boolean_t sync) +{ + uint32_t blocksize = zv->zv_volblocksize; + zilog_t *zilog = zv->zv_zilog; + boolean_t slogging; + ssize_t immediate_write_sz; + + if (zil_replaying(zilog, tx)) + return; + + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zvol_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + + while (resid) { + itx_t *itx; + lr_write_t *lr; + ssize_t len; + itx_wr_state_t write_state; + + /* + * Unlike zfs_log_write() we can be called with + * upto DMU_MAX_ACCESS/2 (5MB) writes. + */ + if (blocksize > immediate_write_sz && !slogging && + resid >= blocksize && off % blocksize == 0) { + write_state = WR_INDIRECT; /* uses dmu_sync */ + len = blocksize; + } else if (sync) { + write_state = WR_COPIED; + len = MIN(ZIL_MAX_LOG_DATA, resid); + } else { + write_state = WR_NEED_COPY; + len = MIN(ZIL_MAX_LOG_DATA, resid); + } + + itx = zil_itx_create(TX_WRITE, sizeof (*lr) + + (write_state == WR_COPIED ? len : 0)); + lr = (lr_write_t *)&itx->itx_lr; + if (write_state == WR_COPIED && dmu_read(zv->zv_objset, + ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + zil_itx_destroy(itx); + itx = zil_itx_create(TX_WRITE, sizeof (*lr)); + lr = (lr_write_t *)&itx->itx_lr; + write_state = WR_NEED_COPY; + } + + itx->itx_wr_state = write_state; + if (write_state == WR_NEED_COPY) + itx->itx_sod += len; + lr->lr_foid = ZVOL_OBJ; + lr->lr_offset = off; + lr->lr_length = len; + lr->lr_blkoff = 0; + BP_ZERO(&lr->lr_blkptr); + + itx->itx_private = zv; + itx->itx_sync = sync; + + zil_itx_assign(zilog, itx, tx); + + off += len; + resid -= len; + } +} + +static int +zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, + boolean_t doread, boolean_t isdump) +{ + vdev_disk_t *dvd; + int c; + int numerrors = 0; + + for (c = 0; c < vd->vdev_children; c++) { + ASSERT(vd->vdev_ops == &vdev_mirror_ops || + vd->vdev_ops == &vdev_replacing_ops || + vd->vdev_ops == &vdev_spare_ops); + int err = zvol_dumpio_vdev(vd->vdev_child[c], + addr, offset, size, doread, isdump); + if (err != 0) { + numerrors++; + } else if (doread) { + break; + } + } + + if (!vd->vdev_ops->vdev_op_leaf) + return (numerrors < vd->vdev_children ? 0 : EIO); + + if (doread && !vdev_readable(vd)) + return (EIO); + else if (!doread && !vdev_writeable(vd)) + return (EIO); + + dvd = vd->vdev_tsd; + ASSERT3P(dvd, !=, NULL); + offset += VDEV_LABEL_START_SIZE; + + if (ddi_in_panic() || isdump) { + ASSERT(!doread); + if (doread) + return (EIO); + return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), + lbtodb(size))); + } else { + return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, + doread ? B_READ : B_WRITE)); + } +} + +static int +zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, + boolean_t doread, boolean_t isdump) +{ + vdev_t *vd; + int error; + zvol_extent_t *ze; + spa_t *spa = dmu_objset_spa(zv->zv_objset); + + /* Must be sector aligned, and not stradle a block boundary. */ + if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || + P2BOUNDARY(offset, size, zv->zv_volblocksize)) { + return (EINVAL); + } + ASSERT(size <= zv->zv_volblocksize); + + /* Locate the extent this belongs to */ + ze = list_head(&zv->zv_extents); + while (offset >= ze->ze_nblks * zv->zv_volblocksize) { + offset -= ze->ze_nblks * zv->zv_volblocksize; + ze = list_next(&zv->zv_extents, ze); + } + + if (!ddi_in_panic()) + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + + vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); + offset += DVA_GET_OFFSET(&ze->ze_dva); + error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump); + + if (!ddi_in_panic()) + spa_config_exit(spa, SCL_STATE, FTAG); + + return (error); +} + +int +zvol_strategy(buf_t *bp) +{ + zfs_soft_state_t *zs = NULL; + zvol_state_t *zv; + uint64_t off, volsize; + size_t resid; + char *addr; + objset_t *os; + rl_t *rl; + int error = 0; + boolean_t doread = bp->b_flags & B_READ; + boolean_t is_dump; + boolean_t sync; + + if (getminor(bp->b_edev) == 0) { + error = EINVAL; + } else { + zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev)); + if (zs == NULL) + error = ENXIO; + else if (zs->zss_type != ZSST_ZVOL) + error = EINVAL; + } + + if (error) { + bioerror(bp, error); + biodone(bp); + return (0); + } + + zv = zs->zss_data; + + if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { + bioerror(bp, EROFS); + biodone(bp); + return (0); + } + + off = ldbtob(bp->b_blkno); + volsize = zv->zv_volsize; + + os = zv->zv_objset; + ASSERT(os != NULL); + + bp_mapin(bp); + addr = bp->b_un.b_addr; + resid = bp->b_bcount; + + if (resid > 0 && (off < 0 || off >= volsize)) { + bioerror(bp, EIO); + biodone(bp); + return (0); + } + + is_dump = zv->zv_flags & ZVOL_DUMPIFIED; + sync = ((!(bp->b_flags & B_ASYNC) && + !(zv->zv_flags & ZVOL_WCE)) || + (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && + !doread && !is_dump; + + /* + * There must be no buffer changes when doing a dmu_sync() because + * we can't change the data whilst calculating the checksum. + */ + rl = zfs_range_lock(&zv->zv_znode, off, resid, + doread ? RL_READER : RL_WRITER); + + while (resid != 0 && off < volsize) { + size_t size = MIN(resid, zvol_maxphys); + if (is_dump) { + size = MIN(size, P2END(off, zv->zv_volblocksize) - off); + error = zvol_dumpio(zv, addr, off, size, + doread, B_FALSE); + } else if (doread) { + error = dmu_read(os, ZVOL_OBJ, off, size, addr, + DMU_READ_PREFETCH); + } else { + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + dmu_write(os, ZVOL_OBJ, off, size, addr, tx); + zvol_log_write(zv, tx, off, size, sync); + dmu_tx_commit(tx); + } + } + if (error) { + /* convert checksum errors into IO errors */ + if (error == ECKSUM) + error = EIO; + break; + } + off += size; + addr += size; + resid -= size; + } + zfs_range_unlock(rl); + + if ((bp->b_resid = resid) == bp->b_bcount) + bioerror(bp, off > volsize ? EINVAL : error); + + if (sync) + zil_commit(zv->zv_zilog, ZVOL_OBJ); + biodone(bp); + + return (0); +} + +/* + * Set the buffer count to the zvol maximum transfer. + * Using our own routine instead of the default minphys() + * means that for larger writes we write bigger buffers on X86 + * (128K instead of 56K) and flush the disk write cache less often + * (every zvol_maxphys - currently 1MB) instead of minphys (currently + * 56K on X86 and 128K on sparc). + */ +void +zvol_minphys(struct buf *bp) +{ + if (bp->b_bcount > zvol_maxphys) + bp->b_bcount = zvol_maxphys; +} + +int +zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) +{ + minor_t minor = getminor(dev); + zvol_state_t *zv; + int error = 0; + uint64_t size; + uint64_t boff; + uint64_t resid; + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + return (ENXIO); + + boff = ldbtob(blkno); + resid = ldbtob(nblocks); + + VERIFY3U(boff + resid, <=, zv->zv_volsize); + + while (resid) { + size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); + error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); + if (error) + break; + boff += size; + addr += size; + resid -= size; + } + + return (error); +} + +/*ARGSUSED*/ +int +zvol_read(dev_t dev, uio_t *uio, cred_t *cr) +{ + minor_t minor = getminor(dev); + zvol_state_t *zv; + uint64_t volsize; + rl_t *rl; + int error = 0; + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + return (ENXIO); + + volsize = zv->zv_volsize; + if (uio->uio_resid > 0 && + (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) + return (EIO); + + if (zv->zv_flags & ZVOL_DUMPIFIED) { + error = physio(zvol_strategy, NULL, dev, B_READ, + zvol_minphys, uio); + return (error); + } + + rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, + RL_READER); + while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { + uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); + + /* don't read past the end */ + if (bytes > volsize - uio->uio_loffset) + bytes = volsize - uio->uio_loffset; + + error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); + if (error) { + /* convert checksum errors into IO errors */ + if (error == ECKSUM) + error = EIO; + break; + } + } + zfs_range_unlock(rl); + return (error); +} + +/*ARGSUSED*/ +int +zvol_write(dev_t dev, uio_t *uio, cred_t *cr) +{ + minor_t minor = getminor(dev); + zvol_state_t *zv; + uint64_t volsize; + rl_t *rl; + int error = 0; + boolean_t sync; + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + return (ENXIO); + + volsize = zv->zv_volsize; + if (uio->uio_resid > 0 && + (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) + return (EIO); + + if (zv->zv_flags & ZVOL_DUMPIFIED) { + error = physio(zvol_strategy, NULL, dev, B_WRITE, + zvol_minphys, uio); + return (error); + } + + sync = !(zv->zv_flags & ZVOL_WCE) || + (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); + + rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, + RL_WRITER); + while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { + uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); + uint64_t off = uio->uio_loffset; + dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); + + if (bytes > volsize - off) /* don't write past the end */ + bytes = volsize - off; + + dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + break; + } + error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); + if (error == 0) + zvol_log_write(zv, tx, off, bytes, sync); + dmu_tx_commit(tx); + + if (error) + break; + } + zfs_range_unlock(rl); + if (sync) + zil_commit(zv->zv_zilog, ZVOL_OBJ); + return (error); +} + +int +zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) +{ + struct uuid uuid = EFI_RESERVED; + efi_gpe_t gpe = { 0 }; + uint32_t crc; + dk_efi_t efi; + int length; + char *ptr; + + if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) + return (EFAULT); + ptr = (char *)(uintptr_t)efi.dki_data_64; + length = efi.dki_length; + /* + * Some clients may attempt to request a PMBR for the + * zvol. Currently this interface will return EINVAL to + * such requests. These requests could be supported by + * adding a check for lba == 0 and consing up an appropriate + * PMBR. + */ + if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) + return (EINVAL); + + gpe.efi_gpe_StartingLBA = LE_64(34ULL); + gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); + UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); + + if (efi.dki_lba == 1) { + efi_gpt_t gpt = { 0 }; + + gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); + gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); + gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); + gpt.efi_gpt_MyLBA = LE_64(1ULL); + gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); + gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); + gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); + gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); + gpt.efi_gpt_SizeOfPartitionEntry = + LE_32(sizeof (efi_gpe_t)); + CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); + gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); + CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); + gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); + if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), + flag)) + return (EFAULT); + ptr += sizeof (gpt); + length -= sizeof (gpt); + } + if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), + length), flag)) + return (EFAULT); + return (0); +} + +/* + * BEGIN entry points to allow external callers access to the volume. + */ +/* + * Return the volume parameters needed for access from an external caller. + * These values are invariant as long as the volume is held open. + */ +int +zvol_get_volume_params(minor_t minor, uint64_t *blksize, + uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, + void **rl_hdl, void **bonus_hdl) +{ + zvol_state_t *zv; + + zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); + if (zv == NULL) + return (ENXIO); + if (zv->zv_flags & ZVOL_DUMPIFIED) + return (ENXIO); + + ASSERT(blksize && max_xfer_len && minor_hdl && + objset_hdl && zil_hdl && rl_hdl && bonus_hdl); + + *blksize = zv->zv_volblocksize; + *max_xfer_len = (uint64_t)zvol_maxphys; + *minor_hdl = zv; + *objset_hdl = zv->zv_objset; + *zil_hdl = zv->zv_zilog; + *rl_hdl = &zv->zv_znode; + *bonus_hdl = zv->zv_dbuf; + return (0); +} + +/* + * Return the current volume size to an external caller. + * The size can change while the volume is open. + */ +uint64_t +zvol_get_volume_size(void *minor_hdl) +{ + zvol_state_t *zv = minor_hdl; + + return (zv->zv_volsize); +} + +/* + * Return the current WCE setting to an external caller. + * The WCE setting can change while the volume is open. + */ +int +zvol_get_volume_wce(void *minor_hdl) +{ + zvol_state_t *zv = minor_hdl; + + return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); +} + +/* + * Entry point for external callers to zvol_log_write + */ +void +zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, + boolean_t sync) +{ + zvol_state_t *zv = minor_hdl; + + zvol_log_write(zv, tx, off, resid, sync); +} +/* + * END entry points to allow external callers access to the volume. + */ + +/* + * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). + */ +/*ARGSUSED*/ +int +zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) +{ + zvol_state_t *zv; + struct dk_cinfo dki; + struct dk_minfo dkm; + struct dk_callback *dkc; + int error = 0; + rl_t *rl; + + mutex_enter(&zfsdev_state_lock); + + zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); + + if (zv == NULL) { + mutex_exit(&zfsdev_state_lock); + return (ENXIO); + } + ASSERT(zv->zv_total_opens > 0); + + switch (cmd) { + + case DKIOCINFO: + bzero(&dki, sizeof (dki)); + (void) strcpy(dki.dki_cname, "zvol"); + (void) strcpy(dki.dki_dname, "zvol"); + dki.dki_ctype = DKC_UNKNOWN; + dki.dki_unit = getminor(dev); + dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); + mutex_exit(&zfsdev_state_lock); + if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) + error = EFAULT; + return (error); + + case DKIOCGMEDIAINFO: + bzero(&dkm, sizeof (dkm)); + dkm.dki_lbsize = 1U << zv->zv_min_bs; + dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; + dkm.dki_media_type = DK_UNKNOWN; + mutex_exit(&zfsdev_state_lock); + if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) + error = EFAULT; + return (error); + + case DKIOCGETEFI: + { + uint64_t vs = zv->zv_volsize; + uint8_t bs = zv->zv_min_bs; + + mutex_exit(&zfsdev_state_lock); + error = zvol_getefi((void *)arg, flag, vs, bs); + return (error); + } + + case DKIOCFLUSHWRITECACHE: + dkc = (struct dk_callback *)arg; + mutex_exit(&zfsdev_state_lock); + zil_commit(zv->zv_zilog, ZVOL_OBJ); + if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { + (*dkc->dkc_callback)(dkc->dkc_cookie, error); + error = 0; + } + return (error); + + case DKIOCGETWCE: + { + int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; + if (ddi_copyout(&wce, (void *)arg, sizeof (int), + flag)) + error = EFAULT; + break; + } + case DKIOCSETWCE: + { + int wce; + if (ddi_copyin((void *)arg, &wce, sizeof (int), + flag)) { + error = EFAULT; + break; + } + if (wce) { + zv->zv_flags |= ZVOL_WCE; + mutex_exit(&zfsdev_state_lock); + } else { + zv->zv_flags &= ~ZVOL_WCE; + mutex_exit(&zfsdev_state_lock); + zil_commit(zv->zv_zilog, ZVOL_OBJ); + } + return (0); + } + + case DKIOCGGEOM: + case DKIOCGVTOC: + /* + * commands using these (like prtvtoc) expect ENOTSUP + * since we're emulating an EFI label + */ + error = ENOTSUP; + break; + + case DKIOCDUMPINIT: + rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, + RL_WRITER); + error = zvol_dumpify(zv); + zfs_range_unlock(rl); + break; + + case DKIOCDUMPFINI: + if (!(zv->zv_flags & ZVOL_DUMPIFIED)) + break; + rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, + RL_WRITER); + error = zvol_dump_fini(zv); + zfs_range_unlock(rl); + break; + + default: + error = ENOTTY; + break; + + } + mutex_exit(&zfsdev_state_lock); + return (error); +} + +int +zvol_busy(void) +{ + return (zvol_minors != 0); +} + +void +zvol_init(void) +{ + VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), + 1) == 0); + mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +zvol_fini(void) +{ + mutex_destroy(&zfsdev_state_lock); + ddi_soft_state_fini(&zfsdev_state); +} + +static int +zvol_dump_init(zvol_state_t *zv, boolean_t resize) +{ + dmu_tx_t *tx; + int error = 0; + objset_t *os = zv->zv_objset; + nvlist_t *nv = NULL; + uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, + DMU_OBJECT_END); + /* wait for dmu_free_long_range to actually free the blocks */ + txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + dmu_tx_hold_bonus(tx, ZVOL_OBJ); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + /* + * If we are resizing the dump device then we only need to + * update the refreservation to match the newly updated + * zvolsize. Otherwise, we save off the original state of the + * zvol so that we can restore them if the zvol is ever undumpified. + */ + if (resize) { + error = zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, + &zv->zv_volsize, tx); + } else { + uint64_t checksum, compress, refresrv, vbs, dedup; + + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); + error = error ? error : dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); + error = error ? error : dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); + error = error ? error : dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); + if (version >= SPA_VERSION_DEDUP) { + error = error ? error : + dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); + } + + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, + &compress, tx); + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, + &refresrv, tx); + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, + &vbs, tx); + error = error ? error : dmu_object_set_blocksize( + os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); + if (version >= SPA_VERSION_DEDUP) { + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, + &dedup, tx); + } + if (error == 0) + zv->zv_volblocksize = SPA_MAXBLOCKSIZE; + } + dmu_tx_commit(tx); + + /* + * We only need update the zvol's property if we are initializing + * the dump area for the first time. + */ + if (!resize) { + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), + ZIO_COMPRESS_OFF) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), + ZIO_CHECKSUM_OFF) == 0); + if (version >= SPA_VERSION_DEDUP) { + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_DEDUP), + ZIO_CHECKSUM_OFF) == 0); + } + + error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, + nv, NULL); + nvlist_free(nv); + + if (error) + return (error); + } + + /* Allocate the space for the dump */ + error = zvol_prealloc(zv); + return (error); +} + +static int +zvol_dumpify(zvol_state_t *zv) +{ + int error = 0; + uint64_t dumpsize = 0; + dmu_tx_t *tx; + objset_t *os = zv->zv_objset; + + if (zv->zv_flags & ZVOL_RDONLY) + return (EROFS); + + if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, + 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { + boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; + + if ((error = zvol_dump_init(zv, resize)) != 0) { + (void) zvol_dump_fini(zv); + return (error); + } + } + + /* + * Build up our lba mapping. + */ + error = zvol_get_lbas(zv); + if (error) { + (void) zvol_dump_fini(zv); + return (error); + } + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + (void) zvol_dump_fini(zv); + return (error); + } + + zv->zv_flags |= ZVOL_DUMPIFIED; + error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, + &zv->zv_volsize, tx); + dmu_tx_commit(tx); + + if (error) { + (void) zvol_dump_fini(zv); + return (error); + } + + txg_wait_synced(dmu_objset_pool(os), 0); + return (0); +} + +static int +zvol_dump_fini(zvol_state_t *zv) +{ + dmu_tx_t *tx; + objset_t *os = zv->zv_objset; + nvlist_t *nv; + int error = 0; + uint64_t checksum, compress, refresrv, vbs, dedup; + uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); + + /* + * Attempt to restore the zvol back to its pre-dumpified state. + * This is a best-effort attempt as it's possible that not all + * of these properties were initialized during the dumpify process + * (i.e. error during zvol_dump_init). + */ + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); + dmu_tx_commit(tx); + + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); + + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); + if (version >= SPA_VERSION_DEDUP && + zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); + } + (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, + nv, NULL); + nvlist_free(nv); + + zvol_free_extents(zv); + zv->zv_flags &= ~ZVOL_DUMPIFIED; + (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); + /* wait for dmu_free_long_range to actually free the blocks */ + txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, ZVOL_OBJ); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) + zv->zv_volblocksize = vbs; + dmu_tx_commit(tx); + + return (0); +} diff --git a/uts/common/os/callb.c b/uts/common/os/callb.c new file mode 100644 index 000000000000..5c98caac907f --- /dev/null +++ b/uts/common/os/callb.c @@ -0,0 +1,410 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/cpuvar.h> +#include <sys/user.h> +#include <sys/proc.h> +#include <sys/callb.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/swap.h> +#include <sys/vmsystm.h> +#include <sys/class.h> +#include <sys/debug.h> +#include <sys/thread.h> +#include <sys/kobj.h> +#include <sys/ddi.h> /* for delay() */ +#include <sys/taskq.h> /* For TASKQ_NAMELEN */ + +#define CB_MAXNAME TASKQ_NAMELEN + +/* + * The callb mechanism provides generic event scheduling/echoing. + * A callb function is registered and called on behalf of the event. + */ +typedef struct callb { + struct callb *c_next; /* next in class or on freelist */ + kthread_id_t c_thread; /* ptr to caller's thread struct */ + char c_flag; /* info about the callb state */ + uchar_t c_class; /* this callb's class */ + kcondvar_t c_done_cv; /* signal callb completion */ + boolean_t (*c_func)(); /* cb function: returns true if ok */ + void *c_arg; /* arg to c_func */ + char c_name[CB_MAXNAME+1]; /* debug:max func name length */ +} callb_t; + +/* + * callb c_flag bitmap definitions + */ +#define CALLB_FREE 0x0 +#define CALLB_TAKEN 0x1 +#define CALLB_EXECUTING 0x2 + +/* + * Basic structure for a callb table. + * All callbs are organized into different class groups described + * by ct_class array. + * The callbs within a class are single-linked and normally run by a + * serial execution. + */ +typedef struct callb_table { + kmutex_t ct_lock; /* protect all callb states */ + callb_t *ct_freelist; /* free callb structures */ + int ct_busy; /* != 0 prevents additions */ + kcondvar_t ct_busy_cv; /* to wait for not busy */ + int ct_ncallb; /* num of callbs allocated */ + callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */ +} callb_table_t; + +int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC; + +static callb_id_t callb_add_common(boolean_t (*)(void *, int), + void *, int, char *, kthread_id_t); + +static callb_table_t callb_table; /* system level callback table */ +static callb_table_t *ct = &callb_table; +static kmutex_t callb_safe_mutex; +callb_cpr_t callb_cprinfo_safe = { + &callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, 0, 0 }; + +/* + * Init all callb tables in the system. + */ +void +callb_init() +{ + callb_table.ct_busy = 0; /* mark table open for additions */ + mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL); +} + +/* + * callout_add() is called to register func() be called later. + */ +static callb_id_t +callb_add_common(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name, kthread_id_t t) +{ + callb_t *cp; + + ASSERT(class < NCBCLASS); + + mutex_enter(&ct->ct_lock); + while (ct->ct_busy) + cv_wait(&ct->ct_busy_cv, &ct->ct_lock); + if ((cp = ct->ct_freelist) == NULL) { + ct->ct_ncallb++; + cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP); + } + ct->ct_freelist = cp->c_next; + cp->c_thread = t; + cp->c_func = func; + cp->c_arg = arg; + cp->c_class = (uchar_t)class; + cp->c_flag |= CALLB_TAKEN; +#ifdef DEBUG + if (strlen(name) > CB_MAXNAME) + cmn_err(CE_WARN, "callb_add: name of callback function '%s' " + "too long -- truncated to %d chars", + name, CB_MAXNAME); +#endif + (void) strncpy(cp->c_name, name, CB_MAXNAME); + cp->c_name[CB_MAXNAME] = '\0'; + + /* + * Insert the new callb at the head of its class list. + */ + cp->c_next = ct->ct_first_cb[class]; + ct->ct_first_cb[class] = cp; + + mutex_exit(&ct->ct_lock); + return ((callb_id_t)cp); +} + +/* + * The default function to add an entry to the callback table. Since + * it uses curthread as the thread identifier to store in the table, + * it should be used for the normal case of a thread which is calling + * to add ITSELF to the table. + */ +callb_id_t +callb_add(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name) +{ + return (callb_add_common(func, arg, class, name, curthread)); +} + +/* + * A special version of callb_add() above for use by threads which + * might be adding an entry to the table on behalf of some other + * thread (for example, one which is constructed but not yet running). + * In this version the thread id is an argument. + */ +callb_id_t +callb_add_thread(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name, kthread_id_t t) +{ + return (callb_add_common(func, arg, class, name, t)); +} + +/* + * callout_delete() is called to remove an entry identified by id + * that was originally placed there by a call to callout_add(). + * return -1 if fail to delete a callb entry otherwise return 0. + */ +int +callb_delete(callb_id_t id) +{ + callb_t **pp; + callb_t *me = (callb_t *)id; + + mutex_enter(&ct->ct_lock); + + for (;;) { + pp = &ct->ct_first_cb[me->c_class]; + while (*pp != NULL && *pp != me) + pp = &(*pp)->c_next; + +#ifdef DEBUG + if (*pp != me) { + cmn_err(CE_WARN, "callb delete bogus entry 0x%p", + (void *)me); + mutex_exit(&ct->ct_lock); + return (-1); + } +#endif /* DEBUG */ + + /* + * It is not allowed to delete a callb in the middle of + * executing otherwise, the callb_execute() will be confused. + */ + if (!(me->c_flag & CALLB_EXECUTING)) + break; + + cv_wait(&me->c_done_cv, &ct->ct_lock); + } + /* relink the class list */ + *pp = me->c_next; + + /* clean up myself and return the free callb to the head of freelist */ + me->c_flag = CALLB_FREE; + me->c_next = ct->ct_freelist; + ct->ct_freelist = me; + + mutex_exit(&ct->ct_lock); + return (0); +} + +/* + * class: indicates to execute all callbs in the same class; + * code: optional argument for the callb functions. + * return: = 0: success + * != 0: ptr to string supplied when callback was registered + */ +void * +callb_execute_class(int class, int code) +{ + callb_t *cp; + void *ret = NULL; + + ASSERT(class < NCBCLASS); + + mutex_enter(&ct->ct_lock); + + for (cp = ct->ct_first_cb[class]; + cp != NULL && ret == 0; cp = cp->c_next) { + while (cp->c_flag & CALLB_EXECUTING) + cv_wait(&cp->c_done_cv, &ct->ct_lock); + /* + * cont if the callb is deleted while we're sleeping + */ + if (cp->c_flag == CALLB_FREE) + continue; + cp->c_flag |= CALLB_EXECUTING; + +#ifdef CALLB_DEBUG + printf("callb_execute: name=%s func=%p arg=%p\n", + cp->c_name, (void *)cp->c_func, (void *)cp->c_arg); +#endif /* CALLB_DEBUG */ + + mutex_exit(&ct->ct_lock); + /* If callback function fails, pass back client's name */ + if (!(*cp->c_func)(cp->c_arg, code)) + ret = cp->c_name; + mutex_enter(&ct->ct_lock); + + cp->c_flag &= ~CALLB_EXECUTING; + cv_broadcast(&cp->c_done_cv); + } + mutex_exit(&ct->ct_lock); + return (ret); +} + +/* + * callers make sure no recursive entries to this func. + * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure. + * + * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we + * use a cv_timedwait() in case the kernel thread is blocked. + * + * Note that this is a generic callback handler for daemon CPR and + * should NOT be changed to accommodate any specific requirement in a daemon. + * Individual daemons that require changes to the handler shall write + * callback routines in their own daemon modules. + */ +boolean_t +callb_generic_cpr(void *arg, int code) +{ + callb_cpr_t *cp = (callb_cpr_t *)arg; + clock_t ret = 0; /* assume success */ + + mutex_enter(cp->cc_lockp); + + switch (code) { + case CB_CODE_CPR_CHKPT: + cp->cc_events |= CALLB_CPR_START; +#ifdef CPR_NOT_THREAD_SAFE + while (!(cp->cc_events & CALLB_CPR_SAFE)) + /* cv_timedwait() returns -1 if it times out. */ + if ((ret = cv_reltimedwait(&cp->cc_callb_cv, + cp->cc_lockp, (callb_timeout_sec * hz), + TR_CLOCK_TICK)) == -1) + break; +#endif + break; + + case CB_CODE_CPR_RESUME: + cp->cc_events &= ~CALLB_CPR_START; + cv_signal(&cp->cc_stop_cv); + break; + } + mutex_exit(cp->cc_lockp); + return (ret != -1); +} + +/* + * The generic callback function associated with kernel threads which + * are always considered safe. + */ +/* ARGSUSED */ +boolean_t +callb_generic_cpr_safe(void *arg, int code) +{ + return (B_TRUE); +} +/* + * Prevent additions to callback table. + */ +void +callb_lock_table(void) +{ + mutex_enter(&ct->ct_lock); + ASSERT(ct->ct_busy == 0); + ct->ct_busy = 1; + mutex_exit(&ct->ct_lock); +} + +/* + * Allow additions to callback table. + */ +void +callb_unlock_table(void) +{ + mutex_enter(&ct->ct_lock); + ASSERT(ct->ct_busy != 0); + ct->ct_busy = 0; + cv_broadcast(&ct->ct_busy_cv); + mutex_exit(&ct->ct_lock); +} + +/* + * Return a boolean value indicating whether a particular kernel thread is + * stopped in accordance with the cpr callback protocol. If returning + * false, also return a pointer to the thread name via the 2nd argument. + */ +boolean_t +callb_is_stopped(kthread_id_t tp, caddr_t *thread_name) +{ + callb_t *cp; + boolean_t ret_val; + + mutex_enter(&ct->ct_lock); + + for (cp = ct->ct_first_cb[CB_CL_CPR_DAEMON]; + cp != NULL && tp != cp->c_thread; cp = cp->c_next) + ; + + ret_val = (cp != NULL); + if (ret_val) { + /* + * We found the thread in the callback table and have + * provisionally set the return value to true. Now + * see if it is marked "safe" and is sleeping or stopped. + */ + callb_cpr_t *ccp = (callb_cpr_t *)cp->c_arg; + + *thread_name = cp->c_name; /* in case not stopped */ + mutex_enter(ccp->cc_lockp); + + if (ccp->cc_events & CALLB_CPR_SAFE) { + int retry; + + mutex_exit(ccp->cc_lockp); + for (retry = 0; retry < CALLB_MAX_RETRY; retry++) { + thread_lock(tp); + if (tp->t_state & (TS_SLEEP | TS_STOPPED)) { + thread_unlock(tp); + break; + } + thread_unlock(tp); + delay(CALLB_THREAD_DELAY); + } + ret_val = retry < CALLB_MAX_RETRY; + } else { + ret_val = + (ccp->cc_events & CALLB_CPR_ALWAYS_SAFE) != 0; + mutex_exit(ccp->cc_lockp); + } + } else { + /* + * Thread not found in callback table. Make the best + * attempt to identify the thread in the error message. + */ + ulong_t offset; + char *sym = kobj_getsymname((uintptr_t)tp->t_startpc, + &offset); + + *thread_name = sym ? sym : "*unknown*"; + } + + mutex_exit(&ct->ct_lock); + return (ret_val); +} diff --git a/uts/common/os/fm.c b/uts/common/os/fm.c new file mode 100644 index 000000000000..4efcff4f464a --- /dev/null +++ b/uts/common/os/fm.c @@ -0,0 +1,1386 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Fault Management Architecture (FMA) Resource and Protocol Support + * + * The routines contained herein provide services to support kernel subsystems + * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). + * + * Name-Value Pair Lists + * + * The embodiment of an FMA protocol element (event, fmri or authority) is a + * name-value pair list (nvlist_t). FMA-specific nvlist construtor and + * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used + * to create an nvpair list using custom allocators. Callers may choose to + * allocate either from the kernel memory allocator, or from a preallocated + * buffer, useful in constrained contexts like high-level interrupt routines. + * + * Protocol Event and FMRI Construction + * + * Convenience routines are provided to construct nvlist events according to + * the FMA Event Protocol and Naming Schema specification for ereports and + * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. + * + * ENA Manipulation + * + * Routines to generate ENA formats 0, 1 and 2 are available as well as + * routines to increment formats 1 and 2. Individual fields within the + * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), + * fm_ena_format_get() and fm_ena_gen_get(). + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/sysevent.h> +#include <sys/sysevent_impl.h> +#include <sys/nvpair.h> +#include <sys/cmn_err.h> +#include <sys/cpuvar.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/ddifm.h> +#include <sys/ddifm_impl.h> +#include <sys/spl.h> +#include <sys/dumphdr.h> +#include <sys/compress.h> +#include <sys/cpuvar.h> +#include <sys/console.h> +#include <sys/panic.h> +#include <sys/kobj.h> +#include <sys/sunddi.h> +#include <sys/systeminfo.h> +#include <sys/sysevent/eventdefs.h> +#include <sys/fm/util.h> +#include <sys/fm/protocol.h> + +/* + * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These + * values must be kept in sync with the FMA source code in usr/src/cmd/fm. + */ +static const char *fm_url = "http://www.sun.com/msg"; +static const char *fm_msgid = "SUNOS-8000-0G"; +static char *volatile fm_panicstr = NULL; + +errorq_t *ereport_errorq; +void *ereport_dumpbuf; +size_t ereport_dumplen; + +static uint_t ereport_chanlen = ERPT_EVCH_MAX; +static evchan_t *ereport_chan = NULL; +static ulong_t ereport_qlen = 0; +static size_t ereport_size = 0; +static int ereport_cols = 80; + +extern void fastreboot_disable_highpil(void); + +/* + * Common fault management kstats to record ereport generation + * failures + */ + +struct erpt_kstat { + kstat_named_t erpt_dropped; /* num erpts dropped on post */ + kstat_named_t erpt_set_failed; /* num erpt set failures */ + kstat_named_t fmri_set_failed; /* num fmri set failures */ + kstat_named_t payload_set_failed; /* num payload set failures */ +}; + +static struct erpt_kstat erpt_kstat_data = { + { "erpt-dropped", KSTAT_DATA_UINT64 }, + { "erpt-set-failed", KSTAT_DATA_UINT64 }, + { "fmri-set-failed", KSTAT_DATA_UINT64 }, + { "payload-set-failed", KSTAT_DATA_UINT64 } +}; + +/*ARGSUSED*/ +static void +fm_drain(void *private, void *data, errorq_elem_t *eep) +{ + nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep); + + if (!panicstr) + (void) fm_ereport_post(nvl, EVCH_TRYHARD); + else + fm_nvprint(nvl); +} + +void +fm_init(void) +{ + kstat_t *ksp; + + (void) sysevent_evc_bind(FM_ERROR_CHAN, + &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND); + + (void) sysevent_evc_control(ereport_chan, + EVCH_SET_CHAN_LEN, &ereport_chanlen); + + if (ereport_qlen == 0) + ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4); + + if (ereport_size == 0) + ereport_size = ERPT_DATA_SZ; + + ereport_errorq = errorq_nvcreate("fm_ereport_queue", + (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size, + FM_ERR_PIL, ERRORQ_VITAL); + if (ereport_errorq == NULL) + panic("failed to create required ereport error queue"); + + ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP); + ereport_dumplen = ereport_size; + + /* Initialize ereport allocation and generation kstats */ + ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED, + sizeof (struct erpt_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (ksp != NULL) { + ksp->ks_data = &erpt_kstat_data; + kstat_install(ksp); + } else { + cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); + + } +} + +/* + * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of + * output so they aren't split across console lines, and return the end column. + */ +/*PRINTFLIKE4*/ +static int +fm_printf(int depth, int c, int cols, const char *format, ...) +{ + va_list ap; + int width; + char c1; + + va_start(ap, format); + width = vsnprintf(&c1, sizeof (c1), format, ap); + va_end(ap); + + if (c + width >= cols) { + console_printf("\n\r"); + c = 0; + if (format[0] != ' ' && depth > 0) { + console_printf(" "); + c++; + } + } + + va_start(ap, format); + console_vprintf(format, ap); + va_end(ap); + + return ((c + width) % cols); +} + +/* + * Recursively print a nvlist in the specified column width and return the + * column we end up in. This function is called recursively by fm_nvprint(), + * below. We generically format the entire nvpair using hexadecimal + * integers and strings, and elide any integer arrays. Arrays are basically + * used for cache dumps right now, so we suppress them so as not to overwhelm + * the amount of console output we produce at panic time. This can be further + * enhanced as FMA technology grows based upon the needs of consumers. All + * FMA telemetry is logged using the dump device transport, so the console + * output serves only as a fallback in case this procedure is unsuccessful. + */ +static int +fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) +{ + nvpair_t *nvp; + + for (nvp = nvlist_next_nvpair(nvl, NULL); + nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { + + data_type_t type = nvpair_type(nvp); + const char *name = nvpair_name(nvp); + + boolean_t b; + uint8_t i8; + uint16_t i16; + uint32_t i32; + uint64_t i64; + char *str; + nvlist_t *cnv; + + if (strcmp(name, FM_CLASS) == 0) + continue; /* already printed by caller */ + + c = fm_printf(d, c, cols, " %s=", name); + + switch (type) { + case DATA_TYPE_BOOLEAN: + c = fm_printf(d + 1, c, cols, " 1"); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(nvp, &b); + c = fm_printf(d + 1, c, cols, b ? "1" : "0"); + break; + + case DATA_TYPE_BYTE: + (void) nvpair_value_byte(nvp, &i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_INT8: + (void) nvpair_value_int8(nvp, (void *)&i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_UINT8: + (void) nvpair_value_uint8(nvp, &i8); + c = fm_printf(d + 1, c, cols, "%x", i8); + break; + + case DATA_TYPE_INT16: + (void) nvpair_value_int16(nvp, (void *)&i16); + c = fm_printf(d + 1, c, cols, "%x", i16); + break; + + case DATA_TYPE_UINT16: + (void) nvpair_value_uint16(nvp, &i16); + c = fm_printf(d + 1, c, cols, "%x", i16); + break; + + case DATA_TYPE_INT32: + (void) nvpair_value_int32(nvp, (void *)&i32); + c = fm_printf(d + 1, c, cols, "%x", i32); + break; + + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &i32); + c = fm_printf(d + 1, c, cols, "%x", i32); + break; + + case DATA_TYPE_INT64: + (void) nvpair_value_int64(nvp, (void *)&i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_HRTIME: + (void) nvpair_value_hrtime(nvp, (void *)&i64); + c = fm_printf(d + 1, c, cols, "%llx", + (u_longlong_t)i64); + break; + + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &str); + c = fm_printf(d + 1, c, cols, "\"%s\"", + str ? str : "<NULL>"); + break; + + case DATA_TYPE_NVLIST: + c = fm_printf(d + 1, c, cols, "["); + (void) nvpair_value_nvlist(nvp, &cnv); + c = fm_nvprintr(cnv, d + 1, c, cols); + c = fm_printf(d + 1, c, cols, " ]"); + break; + + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "["); + (void) nvpair_value_nvlist_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) { + c = fm_nvprintr(val[i], d + 1, c, cols); + } + c = fm_printf(d + 1, c, cols, " ]"); + } + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + c = fm_printf(d + 1, c, cols, "[...]"); + break; + case DATA_TYPE_UNKNOWN: + c = fm_printf(d + 1, c, cols, "<unknown>"); + break; + } + } + + return (c); +} + +void +fm_nvprint(nvlist_t *nvl) +{ + char *class; + int c = 0; + + console_printf("\r"); + + if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) + c = fm_printf(0, c, ereport_cols, "%s", class); + + if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0) + console_printf("\n"); + + console_printf("\n"); +} + +/* + * Wrapper for panic() that first produces an FMA-style message for admins. + * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this + * is the one exception to that rule and the only error that gets messaged. + * This function is intended for use by subsystems that have detected a fatal + * error and enqueued appropriate ereports and wish to then force a panic. + */ +/*PRINTFLIKE1*/ +void +fm_panic(const char *format, ...) +{ + va_list ap; + + (void) casptr((void *)&fm_panicstr, NULL, (void *)format); +#if defined(__i386) || defined(__amd64) + fastreboot_disable_highpil(); +#endif /* __i386 || __amd64 */ + va_start(ap, format); + vpanic(format, ap); + va_end(ap); +} + +/* + * Simply tell the caller if fm_panicstr is set, ie. an fma event has + * caused the panic. If so, something other than the default panic + * diagnosis method will diagnose the cause of the panic. + */ +int +is_fm_panic() +{ + if (fm_panicstr) + return (1); + else + return (0); +} + +/* + * Print any appropriate FMA banner message before the panic message. This + * function is called by panicsys() and prints the message for fm_panic(). + * We print the message here so that it comes after the system is quiesced. + * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix). + * The rest of the message is for the console only and not needed in the log, + * so it is printed using console_printf(). We break it up into multiple + * chunks so as to avoid overflowing any small legacy prom_printf() buffers. + */ +void +fm_banner(void) +{ + timespec_t tod; + hrtime_t now; + + if (!fm_panicstr) + return; /* panic was not initiated by fm_panic(); do nothing */ + + if (panicstr) { + tod = panic_hrestime; + now = panic_hrtime; + } else { + gethrestime(&tod); + now = gethrtime_waitfree(); + } + + cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, " + "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid); + + console_printf( +"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n" +"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n", + fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now); + + console_printf( +"PLATFORM: %s, CSN: -, HOSTNAME: %s\n" +"SOURCE: %s, REV: %s %s\n", + platform, utsname.nodename, utsname.sysname, + utsname.release, utsname.version); + + console_printf( +"DESC: Errors have been detected that require a reboot to ensure system\n" +"integrity. See %s/%s for more information.\n", + fm_url, fm_msgid); + + console_printf( +"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n" +"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n" +"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n"); + + console_printf("\n"); +} + +/* + * Utility function to write all of the pending ereports to the dump device. + * This function is called at either normal reboot or panic time, and simply + * iterates over the in-transit messages in the ereport sysevent channel. + */ +void +fm_ereport_dump(void) +{ + evchanq_t *chq; + sysevent_t *sep; + erpt_dump_t ed; + + timespec_t tod; + hrtime_t now; + char *buf; + size_t len; + + if (panicstr) { + tod = panic_hrestime; + now = panic_hrtime; + } else { + if (ereport_errorq != NULL) + errorq_drain(ereport_errorq); + gethrestime(&tod); + now = gethrtime_waitfree(); + } + + /* + * In the panic case, sysevent_evc_walk_init() will return NULL. + */ + if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL && + !panicstr) + return; /* event channel isn't initialized yet */ + + while ((sep = sysevent_evc_walk_step(chq)) != NULL) { + if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL) + break; + + ed.ed_magic = ERPT_MAGIC; + ed.ed_chksum = checksum32(buf, len); + ed.ed_size = (uint32_t)len; + ed.ed_pad = 0; + ed.ed_hrt_nsec = SE_TIME(sep); + ed.ed_hrt_base = now; + ed.ed_tod_base.sec = tod.tv_sec; + ed.ed_tod_base.nsec = tod.tv_nsec; + + dumpvp_write(&ed, sizeof (ed)); + dumpvp_write(buf, len); + } + + sysevent_evc_walk_fini(chq); +} + +/* + * Post an error report (ereport) to the sysevent error channel. The error + * channel must be established with a prior call to sysevent_evc_create() + * before publication may occur. + */ +void +fm_ereport_post(nvlist_t *ereport, int evc_flag) +{ + size_t nvl_size = 0; + evchan_t *error_chan; + + (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE); + if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } + + if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan, + EVCH_CREAT|EVCH_HOLD_PEND) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } + + if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR, + SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + (void) sysevent_evc_unbind(error_chan); + return; + } + (void) sysevent_evc_unbind(error_chan); +} + +/* + * Wrapppers for FM nvlist allocators + */ +/* ARGSUSED */ +static void * +i_fm_alloc(nv_alloc_t *nva, size_t size) +{ + return (kmem_zalloc(size, KM_SLEEP)); +} + +/* ARGSUSED */ +static void +i_fm_free(nv_alloc_t *nva, void *buf, size_t size) +{ + kmem_free(buf, size); +} + +const nv_alloc_ops_t fm_mem_alloc_ops = { + NULL, + NULL, + i_fm_alloc, + i_fm_free, + NULL +}; + +/* + * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer + * to the newly allocated nv_alloc_t structure is returned upon success or NULL + * is returned to indicate that the nv_alloc structure could not be created. + */ +nv_alloc_t * +fm_nva_xcreate(char *buf, size_t bufsz) +{ + nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); + + if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { + kmem_free(nvhdl, sizeof (nv_alloc_t)); + return (NULL); + } + + return (nvhdl); +} + +/* + * Destroy a previously allocated nv_alloc structure. The fixed buffer + * associated with nva must be freed by the caller. + */ +void +fm_nva_xdestroy(nv_alloc_t *nva) +{ + nv_alloc_fini(nva); + kmem_free(nva, sizeof (nv_alloc_t)); +} + +/* + * Create a new nv list. A pointer to a new nv list structure is returned + * upon success or NULL is returned to indicate that the structure could + * not be created. The newly created nv list is created and managed by the + * operations installed in nva. If nva is NULL, the default FMA nva + * operations are installed and used. + * + * When called from the kernel and nva == NULL, this function must be called + * from passive kernel context with no locks held that can prevent a + * sleeping memory allocation from occurring. Otherwise, this function may + * be called from other kernel contexts as long a valid nva created via + * fm_nva_create() is supplied. + */ +nvlist_t * +fm_nvlist_create(nv_alloc_t *nva) +{ + int hdl_alloced = 0; + nvlist_t *nvl; + nv_alloc_t *nvhdl; + + if (nva == NULL) { + nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); + + if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { + kmem_free(nvhdl, sizeof (nv_alloc_t)); + return (NULL); + } + hdl_alloced = 1; + } else { + nvhdl = nva; + } + + if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { + if (hdl_alloced) { + nv_alloc_fini(nvhdl); + kmem_free(nvhdl, sizeof (nv_alloc_t)); + } + return (NULL); + } + + return (nvl); +} + +/* + * Destroy a previously allocated nvlist structure. flag indicates whether + * or not the associated nva structure should be freed (FM_NVA_FREE) or + * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows + * it to be re-used for future nvlist creation operations. + */ +void +fm_nvlist_destroy(nvlist_t *nvl, int flag) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); + + nvlist_free(nvl); + + if (nva != NULL) { + if (flag == FM_NVA_FREE) + fm_nva_xdestroy(nva); + } +} + +int +i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) +{ + int nelem, ret = 0; + data_type_t type; + + while (ret == 0 && name != NULL) { + type = va_arg(ap, data_type_t); + switch (type) { + case DATA_TYPE_BYTE: + ret = nvlist_add_byte(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_BYTE_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_byte_array(payload, name, + va_arg(ap, uchar_t *), nelem); + break; + case DATA_TYPE_BOOLEAN_VALUE: + ret = nvlist_add_boolean_value(payload, name, + va_arg(ap, boolean_t)); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_boolean_array(payload, name, + va_arg(ap, boolean_t *), nelem); + break; + case DATA_TYPE_INT8: + ret = nvlist_add_int8(payload, name, + va_arg(ap, int)); + break; + case DATA_TYPE_INT8_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int8_array(payload, name, + va_arg(ap, int8_t *), nelem); + break; + case DATA_TYPE_UINT8: + ret = nvlist_add_uint8(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_UINT8_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint8_array(payload, name, + va_arg(ap, uint8_t *), nelem); + break; + case DATA_TYPE_INT16: + ret = nvlist_add_int16(payload, name, + va_arg(ap, int)); + break; + case DATA_TYPE_INT16_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int16_array(payload, name, + va_arg(ap, int16_t *), nelem); + break; + case DATA_TYPE_UINT16: + ret = nvlist_add_uint16(payload, name, + va_arg(ap, uint_t)); + break; + case DATA_TYPE_UINT16_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint16_array(payload, name, + va_arg(ap, uint16_t *), nelem); + break; + case DATA_TYPE_INT32: + ret = nvlist_add_int32(payload, name, + va_arg(ap, int32_t)); + break; + case DATA_TYPE_INT32_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int32_array(payload, name, + va_arg(ap, int32_t *), nelem); + break; + case DATA_TYPE_UINT32: + ret = nvlist_add_uint32(payload, name, + va_arg(ap, uint32_t)); + break; + case DATA_TYPE_UINT32_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint32_array(payload, name, + va_arg(ap, uint32_t *), nelem); + break; + case DATA_TYPE_INT64: + ret = nvlist_add_int64(payload, name, + va_arg(ap, int64_t)); + break; + case DATA_TYPE_INT64_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_int64_array(payload, name, + va_arg(ap, int64_t *), nelem); + break; + case DATA_TYPE_UINT64: + ret = nvlist_add_uint64(payload, name, + va_arg(ap, uint64_t)); + break; + case DATA_TYPE_UINT64_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_uint64_array(payload, name, + va_arg(ap, uint64_t *), nelem); + break; + case DATA_TYPE_STRING: + ret = nvlist_add_string(payload, name, + va_arg(ap, char *)); + break; + case DATA_TYPE_STRING_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_string_array(payload, name, + va_arg(ap, char **), nelem); + break; + case DATA_TYPE_NVLIST: + ret = nvlist_add_nvlist(payload, name, + va_arg(ap, nvlist_t *)); + break; + case DATA_TYPE_NVLIST_ARRAY: + nelem = va_arg(ap, int); + ret = nvlist_add_nvlist_array(payload, name, + va_arg(ap, nvlist_t **), nelem); + break; + default: + ret = EINVAL; + } + + name = va_arg(ap, char *); + } + return (ret); +} + +void +fm_payload_set(nvlist_t *payload, ...) +{ + int ret; + const char *name; + va_list ap; + + va_start(ap, payload); + name = va_arg(ap, char *); + ret = i_fm_payload_set(payload, name, ap); + va_end(ap); + + if (ret) + atomic_add_64( + &erpt_kstat_data.payload_set_failed.value.ui64, 1); +} + +/* + * Set-up and validate the members of an ereport event according to: + * + * Member name Type Value + * ==================================================== + * class string ereport + * version uint8_t 0 + * ena uint64_t <ena> + * detector nvlist_t <detector> + * ereport-payload nvlist_t <var args> + * + * We don't actually add a 'version' member to the payload. Really, + * the version quoted to us by our caller is that of the category 1 + * "ereport" event class (and we require FM_EREPORT_VERS0) but + * the payload version of the actual leaf class event under construction + * may be something else. Callers should supply a version in the varargs, + * or (better) we could take two version arguments - one for the + * ereport category 1 classification (expect FM_EREPORT_VERS0) and one + * for the leaf class. + */ +void +fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, + uint64_t ena, const nvlist_t *detector, ...) +{ + char ereport_class[FM_MAX_CLASS]; + const char *name; + va_list ap; + int ret; + + if (version != FM_EREPORT_VERS0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } + + (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", + FM_EREPORT_CLASS, erpt_class); + if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + } + + if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, + (nvlist_t *)detector) != 0) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + } + + va_start(ap, detector); + name = va_arg(ap, const char *); + ret = i_fm_payload_set(ereport, name, ap); + va_end(ap); + + if (ret) + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); +} + +/* + * Set-up and validate the members of an hc fmri according to; + * + * Member name Type Value + * =================================================== + * version uint8_t 0 + * auth nvlist_t <auth> + * hc-name string <name> + * hc-id string <id> + * + * Note that auth and hc-id are optional members. + */ + +#define HC_MAXPAIRS 20 +#define HC_MAXNAMELEN 50 + +static int +fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) +{ + if (version != FM_HC_SCHEME_VERSION) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || + nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return (0); + } + + return (1); +} + +void +fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, + nvlist_t *snvl, int npairs, ...) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); + nvlist_t *pairs[HC_MAXPAIRS]; + va_list ap; + int i; + + if (!fm_fmri_hc_set_common(fmri, version, auth)) + return; + + npairs = MIN(npairs, HC_MAXPAIRS); + + va_start(ap, npairs); + for (i = 0; i < npairs; i++) { + const char *name = va_arg(ap, const char *); + uint32_t id = va_arg(ap, uint32_t); + char idstr[11]; + + (void) snprintf(idstr, sizeof (idstr), "%u", id); + + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } + va_end(ap); + + if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0) + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + + for (i = 0; i < npairs; i++) + fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); + + if (snvl != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } +} + +/* + * Set-up and validate the members of an dev fmri according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t <auth> + * devpath string <devpath> + * [devid] string <devid> + * [target-port-l0id] string <target-port-lun0-id> + * + * Note that auth and devid are optional members. + */ +void +fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, + const char *devpath, const char *devid, const char *tpl0) +{ + int err = 0; + + if (version != DEV_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); + err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); + + if (auth != NULL) { + err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, + (nvlist_t *)auth); + } + + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); + + if (devid != NULL) + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); + + if (tpl0 != NULL) + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); + + if (err) + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + +} + +/* + * Set-up and validate the members of an cpu fmri according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t <auth> + * cpuid uint32_t <cpu_id> + * cpumask uint8_t <cpu_mask> + * serial uint64_t <serial_id> + * + * Note that auth, cpumask, serial are optional members. + * + */ +void +fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, + uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) +{ + uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; + + if (version < CPU_SCHEME_VERSION1) { + atomic_add_64(failedp, 1); + return; + } + + if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { + atomic_add_64(failedp, 1); + return; + } + + if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, + FM_FMRI_SCHEME_CPU) != 0) { + atomic_add_64(failedp, 1); + return; + } + + if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) + atomic_add_64(failedp, 1); + + if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) + atomic_add_64(failedp, 1); + + if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, + *cpu_maskp) != 0) + atomic_add_64(failedp, 1); + + if (serial_idp == NULL || nvlist_add_string(fmri_cpu, + FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) + atomic_add_64(failedp, 1); +} + +/* + * Set-up and validate the members of a mem according to: + * + * Member name Type Value + * ==================================================== + * version uint8_t 0 + * auth nvlist_t <auth> [optional] + * unum string <unum> + * serial string <serial> [optional*] + * offset uint64_t <offset> [optional] + * + * * serial is required if offset is present + */ +void +fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, + const char *unum, const char *serial, uint64_t offset) +{ + if (version != MEM_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (!serial && (offset != (uint64_t)-1)) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (auth != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, + (nvlist_t *)auth) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } + + if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + + if (serial != NULL) { + if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, + (char **)&serial, 1) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + if (offset != (uint64_t)-1) { + if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET, + offset) != 0) { + atomic_add_64(&erpt_kstat_data. + fmri_set_failed.value.ui64, 1); + } + } + } +} + +void +fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, + uint64_t vdev_guid) +{ + if (version != ZFS_SCHEME_VERSION0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + + if (vdev_guid != 0) { + if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } + } +} + +uint64_t +fm_ena_increment(uint64_t ena) +{ + uint64_t new_ena; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); + break; + case FM_ENA_FMT2: + new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); + break; + default: + new_ena = 0; + } + + return (new_ena); +} + +uint64_t +fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) +{ + uint64_t ena = 0; + + switch (format) { + case FM_ENA_FMT1: + if (timestamp) { + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((cpuid << ENA_FMT1_CPUID_SHFT) & + ENA_FMT1_CPUID_MASK) | + ((timestamp << ENA_FMT1_TIME_SHFT) & + ENA_FMT1_TIME_MASK)); + } else { + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((cpuid << ENA_FMT1_CPUID_SHFT) & + ENA_FMT1_CPUID_MASK) | + ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) & + ENA_FMT1_TIME_MASK)); + } + break; + case FM_ENA_FMT2: + ena = (uint64_t)((format & ENA_FORMAT_MASK) | + ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); + break; + default: + break; + } + + return (ena); +} + +uint64_t +fm_ena_generate(uint64_t timestamp, uchar_t format) +{ + return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format)); +} + +uint64_t +fm_ena_generation_get(uint64_t ena) +{ + uint64_t gen; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; + break; + case FM_ENA_FMT2: + gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; + break; + default: + gen = 0; + break; + } + + return (gen); +} + +uchar_t +fm_ena_format_get(uint64_t ena) +{ + + return (ENA_FORMAT(ena)); +} + +uint64_t +fm_ena_id_get(uint64_t ena) +{ + uint64_t id; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; + break; + case FM_ENA_FMT2: + id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; + break; + default: + id = 0; + } + + return (id); +} + +uint64_t +fm_ena_time_get(uint64_t ena) +{ + uint64_t time; + + switch (ENA_FORMAT(ena)) { + case FM_ENA_FMT1: + time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; + break; + case FM_ENA_FMT2: + time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; + break; + default: + time = 0; + } + + return (time); +} + +/* + * Convert a getpcstack() trace to symbolic name+offset, and add the resulting + * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK. + */ +void +fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth) +{ + int i; + char *sym; + ulong_t off; + char *stkpp[FM_STK_DEPTH]; + char buf[FM_STK_DEPTH * FM_SYM_SZ]; + char *stkp = buf; + + for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) { + if ((sym = kobj_getsymname(stack[i], &off)) != NULL) + (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off); + else + (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]); + stkpp[i] = stkp; + } + + fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK, + DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL); +} + +void +print_msg_hwerr(ctid_t ct_id, proc_t *p) +{ + uprintf("Killed process %d (%s) in contract id %d " + "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id); +} + +void +fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, + nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); + nvlist_t *pairs[HC_MAXPAIRS]; + nvlist_t **hcl; + uint_t n; + int i, j; + va_list ap; + char *hcname, *hcid; + + if (!fm_fmri_hc_set_common(fmri, version, auth)) + return; + + /* + * copy the bboard nvpairs to the pairs array + */ + if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) + != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < n; i++) { + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, + &hcname) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + + /* + * create the pairs from passed in pairs + */ + npairs = MIN(npairs, HC_MAXPAIRS); + + va_start(ap, npairs); + for (i = n; i < npairs + n; i++) { + const char *name = va_arg(ap, const char *); + uint32_t id = va_arg(ap, uint32_t); + char idstr[11]; + (void) snprintf(idstr, sizeof (idstr), "%u", id); + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + va_end(ap); + + /* + * Create the fmri hc list + */ + if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, + npairs + n) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < npairs + n; i++) { + fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); + } + + if (snvl != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } +} diff --git a/uts/common/os/nvpair_alloc_system.c b/uts/common/os/nvpair_alloc_system.c new file mode 100644 index 000000000000..e7655779590b --- /dev/null +++ b/uts/common/os/nvpair_alloc_system.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <sys/nvpair.h> + +static void * +nv_alloc_sys(nv_alloc_t *nva, size_t size) +{ + return (kmem_alloc(size, (int)(uintptr_t)nva->nva_arg)); +} + +/*ARGSUSED*/ +static void +nv_free_sys(nv_alloc_t *nva, void *buf, size_t size) +{ + kmem_free(buf, size); +} + +static const nv_alloc_ops_t system_ops = { + NULL, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_alloc_sys, /* nv_ao_alloc() */ + nv_free_sys, /* nv_ao_free() */ + NULL /* nv_ao_reset() */ +}; + +nv_alloc_t nv_alloc_sleep_def = { + &system_ops, + (void *)KM_SLEEP +}; + +nv_alloc_t nv_alloc_nosleep_def = { + &system_ops, + (void *)KM_NOSLEEP +}; + +nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def; +nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def; diff --git a/uts/common/sys/acl.h b/uts/common/sys/acl.h new file mode 100644 index 000000000000..35c9772b8e9b --- /dev/null +++ b/uts/common/sys/acl.h @@ -0,0 +1,302 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ACL_H +#define _SYS_ACL_H + +#include <sys/types.h> +#include <sys/acl_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_ACL_ENTRIES (1024) /* max entries of each type */ +typedef struct acl { + int a_type; /* the type of ACL entry */ + uid_t a_id; /* the entry in -uid or gid */ + o_mode_t a_perm; /* the permission field */ +} aclent_t; + +typedef struct ace { + uid_t a_who; /* uid or gid */ + uint32_t a_access_mask; /* read,write,... */ + uint16_t a_flags; /* see below */ + uint16_t a_type; /* allow or deny */ +} ace_t; + +typedef struct acl_info acl_t; + +/* + * The following are Defined types for an aclent_t. + */ +#define USER_OBJ (0x01) /* object owner */ +#define USER (0x02) /* additional users */ +#define GROUP_OBJ (0x04) /* owning group of the object */ +#define GROUP (0x08) /* additional groups */ +#define CLASS_OBJ (0x10) /* file group class and mask entry */ +#define OTHER_OBJ (0x20) /* other entry for the object */ +#define ACL_DEFAULT (0x1000) /* default flag */ +/* default object owner */ +#define DEF_USER_OBJ (ACL_DEFAULT | USER_OBJ) +/* default additional users */ +#define DEF_USER (ACL_DEFAULT | USER) +/* default owning group */ +#define DEF_GROUP_OBJ (ACL_DEFAULT | GROUP_OBJ) +/* default additional groups */ +#define DEF_GROUP (ACL_DEFAULT | GROUP) +/* default mask entry */ +#define DEF_CLASS_OBJ (ACL_DEFAULT | CLASS_OBJ) +/* default other entry */ +#define DEF_OTHER_OBJ (ACL_DEFAULT | OTHER_OBJ) + +/* + * The following are defined for ace_t. + */ +#define ACE_READ_DATA 0x00000001 +#define ACE_LIST_DIRECTORY 0x00000001 +#define ACE_WRITE_DATA 0x00000002 +#define ACE_ADD_FILE 0x00000002 +#define ACE_APPEND_DATA 0x00000004 +#define ACE_ADD_SUBDIRECTORY 0x00000004 +#define ACE_READ_NAMED_ATTRS 0x00000008 +#define ACE_WRITE_NAMED_ATTRS 0x00000010 +#define ACE_EXECUTE 0x00000020 +#define ACE_DELETE_CHILD 0x00000040 +#define ACE_READ_ATTRIBUTES 0x00000080 +#define ACE_WRITE_ATTRIBUTES 0x00000100 +#define ACE_DELETE 0x00010000 +#define ACE_READ_ACL 0x00020000 +#define ACE_WRITE_ACL 0x00040000 +#define ACE_WRITE_OWNER 0x00080000 +#define ACE_SYNCHRONIZE 0x00100000 + +#define ACE_FILE_INHERIT_ACE 0x0001 +#define ACE_DIRECTORY_INHERIT_ACE 0x0002 +#define ACE_NO_PROPAGATE_INHERIT_ACE 0x0004 +#define ACE_INHERIT_ONLY_ACE 0x0008 +#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010 +#define ACE_FAILED_ACCESS_ACE_FLAG 0x0020 +#define ACE_IDENTIFIER_GROUP 0x0040 +#define ACE_INHERITED_ACE 0x0080 +#define ACE_OWNER 0x1000 +#define ACE_GROUP 0x2000 +#define ACE_EVERYONE 0x4000 + +#define ACE_ACCESS_ALLOWED_ACE_TYPE 0x0000 +#define ACE_ACCESS_DENIED_ACE_TYPE 0x0001 +#define ACE_SYSTEM_AUDIT_ACE_TYPE 0x0002 +#define ACE_SYSTEM_ALARM_ACE_TYPE 0x0003 + +#define ACL_AUTO_INHERIT 0x0001 +#define ACL_PROTECTED 0x0002 +#define ACL_DEFAULTED 0x0004 +#define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED| \ + ACL_DEFAULTED) + +#ifdef _KERNEL + +/* + * These are only applicable in a CIFS context. + */ +#define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04 +#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 +#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 +#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 +#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 +#define ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09 +#define ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A +#define ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B +#define ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C +#define ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D +#define ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E +#define ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F +#define ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 + +#define ACE_ALL_TYPES 0x001F + +typedef struct ace_object { + uid_t a_who; /* uid or gid */ + uint32_t a_access_mask; /* read,write,... */ + uint16_t a_flags; /* see below */ + uint16_t a_type; /* allow or deny */ + uint8_t a_obj_type[16]; /* obj type */ + uint8_t a_inherit_obj_type[16]; /* inherit obj */ +} ace_object_t; + +#endif + +#define ACE_ALL_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ + ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \ + ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \ + ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \ + ACE_WRITE_OWNER|ACE_SYNCHRONIZE) + +#define ACE_ALL_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA| \ + ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS|ACE_WRITE_ACL| \ + ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD) + +#define ACE_READ_PERMS (ACE_READ_DATA|ACE_READ_ACL|ACE_READ_ATTRIBUTES| \ + ACE_READ_NAMED_ATTRS) + +#define ACE_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES| \ + ACE_WRITE_NAMED_ATTRS) + +#define ACE_MODIFY_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ + ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \ + ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \ + ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_SYNCHRONIZE) +/* + * The following flags are supported by both NFSv4 ACLs and ace_t. + */ +#define ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \ + ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE | \ + ACE_INHERIT_ONLY_ACE | \ + ACE_IDENTIFIER_GROUP) + +#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \ + ACE_IDENTIFIER_GROUP) +#define ACE_INHERIT_FLAGS (ACE_FILE_INHERIT_ACE| \ + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE) + +/* cmd args to acl(2) for aclent_t */ +#define GETACL 1 +#define SETACL 2 +#define GETACLCNT 3 + +/* cmd's to manipulate ace acls. */ +#define ACE_GETACL 4 +#define ACE_SETACL 5 +#define ACE_GETACLCNT 6 + +/* minimal acl entries from GETACLCNT */ +#define MIN_ACL_ENTRIES 4 + +#if !defined(_KERNEL) + +/* acl check errors */ +#define GRP_ERROR 1 +#define USER_ERROR 2 +#define OTHER_ERROR 3 +#define CLASS_ERROR 4 +#define DUPLICATE_ERROR 5 +#define MISS_ERROR 6 +#define MEM_ERROR 7 +#define ENTRY_ERROR 8 + + +/* + * similar to ufs_acl.h: changed to char type for user commands (tar, cpio) + * Attribute types + */ +#define UFSD_FREE ('0') /* Free entry */ +#define UFSD_ACL ('1') /* Access Control Lists */ +#define UFSD_DFACL ('2') /* reserved for future use */ +#define ACE_ACL ('3') /* ace_t style acls */ + +/* + * flag to [f]acl_get() + * controls whether a trivial acl should be returned. + */ +#define ACL_NO_TRIVIAL 0x2 + + +/* + * Flags to control acl_totext() + */ + +#define ACL_APPEND_ID 0x1 /* append uid/gid to user/group entries */ +#define ACL_COMPACT_FMT 0x2 /* build ACL in ls -V format */ +#define ACL_NORESOLVE 0x4 /* don't do name service lookups */ +#define ACL_SID_FMT 0x8 /* use usersid/groupsid when appropriate */ + +/* + * Legacy aclcheck errors for aclent_t ACLs + */ +#define EACL_GRP_ERROR GRP_ERROR +#define EACL_USER_ERROR USER_ERROR +#define EACL_OTHER_ERROR OTHER_ERROR +#define EACL_CLASS_ERROR CLASS_ERROR +#define EACL_DUPLICATE_ERROR DUPLICATE_ERROR +#define EACL_MISS_ERROR MISS_ERROR +#define EACL_MEM_ERROR MEM_ERROR +#define EACL_ENTRY_ERROR ENTRY_ERROR + +#define EACL_INHERIT_ERROR 9 /* invalid inherit flags */ +#define EACL_FLAGS_ERROR 10 /* unknown flag value */ +#define EACL_PERM_MASK_ERROR 11 /* unknown permission */ +#define EACL_COUNT_ERROR 12 /* invalid acl count */ + +#define EACL_INVALID_SLOT 13 /* invalid acl slot */ +#define EACL_NO_ACL_ENTRY 14 /* Entry doesn't exist */ +#define EACL_DIFF_TYPE 15 /* acls aren't same type */ + +#define EACL_INVALID_USER_GROUP 16 /* need user/group name */ +#define EACL_INVALID_STR 17 /* invalid acl string */ +#define EACL_FIELD_NOT_BLANK 18 /* can't have blank field */ +#define EACL_INVALID_ACCESS_TYPE 19 /* invalid access type */ +#define EACL_UNKNOWN_DATA 20 /* Unrecognized data in ACL */ +#define EACL_MISSING_FIELDS 21 /* missing fields in acl */ + +#define EACL_INHERIT_NOTDIR 22 /* Need dir for inheritance */ + +extern int aclcheck(aclent_t *, int, int *); +extern int acltomode(aclent_t *, int, mode_t *); +extern int aclfrommode(aclent_t *, int, mode_t *); +extern int aclsort(int, int, aclent_t *); +extern char *acltotext(aclent_t *, int); +extern aclent_t *aclfromtext(char *, int *); +extern void acl_free(acl_t *); +extern int acl_get(const char *, int, acl_t **); +extern int facl_get(int, int, acl_t **); +extern int acl_set(const char *, acl_t *acl); +extern int facl_set(int, acl_t *acl); +extern int acl_strip(const char *, uid_t, gid_t, mode_t); +extern int acl_trivial(const char *); +extern char *acl_totext(acl_t *, int); +extern int acl_fromtext(const char *, acl_t **); +extern int acl_check(acl_t *, int); + +#else /* !defined(_KERNEL) */ + +extern void ksort(caddr_t, int, int, int (*)(void *, void *)); +extern int cmp2acls(void *, void *); + +#endif /* !defined(_KERNEL) */ + +#if defined(__STDC__) +extern int acl(const char *path, int cmd, int cnt, void *buf); +extern int facl(int fd, int cmd, int cnt, void *buf); +#else /* !__STDC__ */ +extern int acl(); +extern int facl(); +#endif /* defined(__STDC__) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ACL_H */ diff --git a/uts/common/sys/acl_impl.h b/uts/common/sys/acl_impl.h new file mode 100644 index 000000000000..b82f259143ac --- /dev/null +++ b/uts/common/sys/acl_impl.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ACL_IMPL_H +#define _SYS_ACL_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * acl flags + * + * ACL_AUTO_INHERIT, ACL_PROTECTED and ACL_DEFAULTED + * flags can also be stored in this field. + */ +#define ACL_IS_TRIVIAL 0x10000 +#define ACL_IS_DIR 0x20000 + +typedef enum acl_type { + ACLENT_T = 0, + ACE_T = 1 +} acl_type_t; + +struct acl_info { + acl_type_t acl_type; /* style of acl */ + int acl_cnt; /* number of acl entries */ + int acl_entry_size; /* sizeof acl entry */ + int acl_flags; /* special flags about acl */ + void *acl_aclp; /* the acl */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ACL_IMPL_H */ diff --git a/uts/common/sys/avl.h b/uts/common/sys/avl.h new file mode 100644 index 000000000000..ba305c908239 --- /dev/null +++ b/uts/common/sys/avl.h @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _AVL_H +#define _AVL_H + +/* + * This is a private header file. Applications should not directly include + * this file. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/avl_impl.h> + +/* + * This is a generic implemenatation of AVL trees for use in the Solaris kernel. + * The interfaces provide an efficient way of implementing an ordered set of + * data structures. + * + * AVL trees provide an alternative to using an ordered linked list. Using AVL + * trees will usually be faster, however they requires more storage. An ordered + * linked list in general requires 2 pointers in each data structure. The + * AVL tree implementation uses 3 pointers. The following chart gives the + * approximate performance of operations with the different approaches: + * + * Operation Link List AVL tree + * --------- -------- -------- + * lookup O(n) O(log(n)) + * + * insert 1 node constant constant + * + * delete 1 node constant between constant and O(log(n)) + * + * delete all nodes O(n) O(n) + * + * visit the next + * or prev node constant between constant and O(log(n)) + * + * + * The data structure nodes are anchored at an "avl_tree_t" (the equivalent + * of a list header) and the individual nodes will have a field of + * type "avl_node_t" (corresponding to list pointers). + * + * The type "avl_index_t" is used to indicate a position in the list for + * certain calls. + * + * The usage scenario is generally: + * + * 1. Create the list/tree with: avl_create() + * + * followed by any mixture of: + * + * 2a. Insert nodes with: avl_add(), or avl_find() and avl_insert() + * + * 2b. Visited elements with: + * avl_first() - returns the lowest valued node + * avl_last() - returns the highest valued node + * AVL_NEXT() - given a node go to next higher one + * AVL_PREV() - given a node go to previous lower one + * + * 2c. Find the node with the closest value either less than or greater + * than a given value with avl_nearest(). + * + * 2d. Remove individual nodes from the list/tree with avl_remove(). + * + * and finally when the list is being destroyed + * + * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes. + * Note that once you use avl_destroy_nodes(), you can no longer + * use any routine except avl_destroy_nodes() and avl_destoy(). + * + * 4. Use avl_destroy() to destroy the AVL tree itself. + * + * Any locking for multiple thread access is up to the user to provide, just + * as is needed for any linked list implementation. + */ + + +/* + * Type used for the root of the AVL tree. + */ +typedef struct avl_tree avl_tree_t; + +/* + * The data nodes in the AVL tree must have a field of this type. + */ +typedef struct avl_node avl_node_t; + +/* + * An opaque type used to locate a position in the tree where a node + * would be inserted. + */ +typedef uintptr_t avl_index_t; + + +/* + * Direction constants used for avl_nearest(). + */ +#define AVL_BEFORE (0) +#define AVL_AFTER (1) + + +/* + * Prototypes + * + * Where not otherwise mentioned, "void *" arguments are a pointer to the + * user data structure which must contain a field of type avl_node_t. + * + * Also assume the user data structures looks like: + * stuct my_type { + * ... + * avl_node_t my_link; + * ... + * }; + */ + +/* + * Initialize an AVL tree. Arguments are: + * + * tree - the tree to be initialized + * compar - function to compare two nodes, it must return exactly: -1, 0, or +1 + * -1 for <, 0 for ==, and +1 for > + * size - the value of sizeof(struct my_type) + * offset - the value of OFFSETOF(struct my_type, my_link) + */ +extern void avl_create(avl_tree_t *tree, + int (*compar) (const void *, const void *), size_t size, size_t offset); + + +/* + * Find a node with a matching value in the tree. Returns the matching node + * found. If not found, it returns NULL and then if "where" is not NULL it sets + * "where" for use with avl_insert() or avl_nearest(). + * + * node - node that has the value being looked for + * where - position for use with avl_nearest() or avl_insert(), may be NULL + */ +extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where); + +/* + * Insert a node into the tree. + * + * node - the node to insert + * where - position as returned from avl_find() + */ +extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where); + +/* + * Insert "new_data" in "tree" in the given "direction" either after + * or before the data "here". + * + * This might be usefull for avl clients caching recently accessed + * data to avoid doing avl_find() again for insertion. + * + * new_data - new data to insert + * here - existing node in "tree" + * direction - either AVL_AFTER or AVL_BEFORE the data "here". + */ +extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here, + int direction); + + +/* + * Return the first or last valued node in the tree. Will return NULL + * if the tree is empty. + * + */ +extern void *avl_first(avl_tree_t *tree); +extern void *avl_last(avl_tree_t *tree); + + +/* + * Return the next or previous valued node in the tree. + * AVL_NEXT() will return NULL if at the last node. + * AVL_PREV() will return NULL if at the first node. + * + * node - the node from which the next or previous node is found + */ +#define AVL_NEXT(tree, node) avl_walk(tree, node, AVL_AFTER) +#define AVL_PREV(tree, node) avl_walk(tree, node, AVL_BEFORE) + + +/* + * Find the node with the nearest value either greater or less than + * the value from a previous avl_find(). Returns the node or NULL if + * there isn't a matching one. + * + * where - position as returned from avl_find() + * direction - either AVL_BEFORE or AVL_AFTER + * + * EXAMPLE get the greatest node that is less than a given value: + * + * avl_tree_t *tree; + * struct my_data look_for_value = {....}; + * struct my_data *node; + * struct my_data *less; + * avl_index_t where; + * + * node = avl_find(tree, &look_for_value, &where); + * if (node != NULL) + * less = AVL_PREV(tree, node); + * else + * less = avl_nearest(tree, where, AVL_BEFORE); + */ +extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction); + + +/* + * Add a single node to the tree. + * The node must not be in the tree, and it must not + * compare equal to any other node already in the tree. + * + * node - the node to add + */ +extern void avl_add(avl_tree_t *tree, void *node); + + +/* + * Remove a single node from the tree. The node must be in the tree. + * + * node - the node to remove + */ +extern void avl_remove(avl_tree_t *tree, void *node); + +/* + * Reinsert a node only if its order has changed relative to its nearest + * neighbors. To optimize performance avl_update_lt() checks only the previous + * node and avl_update_gt() checks only the next node. Use avl_update_lt() and + * avl_update_gt() only if you know the direction in which the order of the + * node may change. + */ +extern boolean_t avl_update(avl_tree_t *, void *); +extern boolean_t avl_update_lt(avl_tree_t *, void *); +extern boolean_t avl_update_gt(avl_tree_t *, void *); + +/* + * Return the number of nodes in the tree + */ +extern ulong_t avl_numnodes(avl_tree_t *tree); + +/* + * Return B_TRUE if there are zero nodes in the tree, B_FALSE otherwise. + */ +extern boolean_t avl_is_empty(avl_tree_t *tree); + +/* + * Used to destroy any remaining nodes in a tree. The cookie argument should + * be initialized to NULL before the first call. Returns a node that has been + * removed from the tree and may be free()'d. Returns NULL when the tree is + * empty. + * + * Once you call avl_destroy_nodes(), you can only continuing calling it and + * finally avl_destroy(). No other AVL routines will be valid. + * + * cookie - a "void *" used to save state between calls to avl_destroy_nodes() + * + * EXAMPLE: + * avl_tree_t *tree; + * struct my_data *node; + * void *cookie; + * + * cookie = NULL; + * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL) + * free(node); + * avl_destroy(tree); + */ +extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie); + + +/* + * Final destroy of an AVL tree. Arguments are: + * + * tree - the empty tree to destroy + */ +extern void avl_destroy(avl_tree_t *tree); + + + +#ifdef __cplusplus +} +#endif + +#endif /* _AVL_H */ diff --git a/uts/common/sys/avl_impl.h b/uts/common/sys/avl_impl.h new file mode 100644 index 000000000000..620685f370d4 --- /dev/null +++ b/uts/common/sys/avl_impl.h @@ -0,0 +1,164 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _AVL_IMPL_H +#define _AVL_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This is a private header file. Applications should not directly include + * this file. + */ + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * generic AVL tree implementation for kernel use + * + * There are 5 pieces of information stored for each node in an AVL tree + * + * pointer to less than child + * pointer to greater than child + * a pointer to the parent of this node + * an indication [0/1] of which child I am of my parent + * a "balance" (-1, 0, +1) indicating which child tree is taller + * + * Since they only need 3 bits, the last two fields are packed into the + * bottom bits of the parent pointer on 64 bit machines to save on space. + */ + +#ifndef _LP64 + +struct avl_node { + struct avl_node *avl_child[2]; /* left/right children */ + struct avl_node *avl_parent; /* this node's parent */ + unsigned short avl_child_index; /* my index in parent's avl_child[] */ + short avl_balance; /* balance value: -1, 0, +1 */ +}; + +#define AVL_XPARENT(n) ((n)->avl_parent) +#define AVL_SETPARENT(n, p) ((n)->avl_parent = (p)) + +#define AVL_XCHILD(n) ((n)->avl_child_index) +#define AVL_SETCHILD(n, c) ((n)->avl_child_index = (unsigned short)(c)) + +#define AVL_XBALANCE(n) ((n)->avl_balance) +#define AVL_SETBALANCE(n, b) ((n)->avl_balance = (short)(b)) + +#else /* _LP64 */ + +/* + * for 64 bit machines, avl_pcb contains parent pointer, balance and child_index + * values packed in the following manner: + * + * |63 3| 2 |1 0 | + * |-------------------------------------|-----------------|-------------| + * | avl_parent hi order bits | avl_child_index | avl_balance | + * | | | + 1 | + * |-------------------------------------|-----------------|-------------| + * + */ +struct avl_node { + struct avl_node *avl_child[2]; /* left/right children nodes */ + uintptr_t avl_pcb; /* parent, child_index, balance */ +}; + +/* + * macros to extract/set fields in avl_pcb + * + * pointer to the parent of the current node is the high order bits + */ +#define AVL_XPARENT(n) ((struct avl_node *)((n)->avl_pcb & ~7)) +#define AVL_SETPARENT(n, p) \ + ((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p))) + +/* + * index of this node in its parent's avl_child[]: bit #2 + */ +#define AVL_XCHILD(n) (((n)->avl_pcb >> 2) & 1) +#define AVL_SETCHILD(n, c) \ + ((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2))) + +/* + * balance indication for a node, lowest 2 bits. A valid balance is + * -1, 0, or +1, and is encoded by adding 1 to the value to get the + * unsigned values of 0, 1, 2. + */ +#define AVL_XBALANCE(n) ((int)(((n)->avl_pcb & 3) - 1)) +#define AVL_SETBALANCE(n, b) \ + ((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1)))) + +#endif /* _LP64 */ + + + +/* + * switch between a node and data pointer for a given tree + * the value of "o" is tree->avl_offset + */ +#define AVL_NODE2DATA(n, o) ((void *)((uintptr_t)(n) - (o))) +#define AVL_DATA2NODE(d, o) ((struct avl_node *)((uintptr_t)(d) + (o))) + + + +/* + * macros used to create/access an avl_index_t + */ +#define AVL_INDEX2NODE(x) ((avl_node_t *)((x) & ~1)) +#define AVL_INDEX2CHILD(x) ((x) & 1) +#define AVL_MKINDEX(n, c) ((avl_index_t)(n) | (c)) + + +/* + * The tree structure. The fields avl_root, avl_compar, and avl_offset come + * first since they are needed for avl_find(). We want them to fit into + * a single 64 byte cache line to make avl_find() as fast as possible. + */ +struct avl_tree { + struct avl_node *avl_root; /* root node in tree */ + int (*avl_compar)(const void *, const void *); + size_t avl_offset; /* offsetof(type, avl_link_t field) */ + ulong_t avl_numnodes; /* number of nodes in the tree */ + size_t avl_size; /* sizeof user type struct */ +}; + + +/* + * This will only by used via AVL_NEXT() or AVL_PREV() + */ +extern void *avl_walk(struct avl_tree *, void *, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _AVL_IMPL_H */ diff --git a/uts/common/sys/bitmap.h b/uts/common/sys/bitmap.h new file mode 100644 index 000000000000..d0dd12b68318 --- /dev/null +++ b/uts/common/sys/bitmap.h @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +#ifndef _SYS_BITMAP_H +#define _SYS_BITMAP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/feature_tests.h> +#if defined(__GNUC__) && defined(_ASM_INLINES) && \ + (defined(__i386) || defined(__amd64)) +#include <asm/bitmap.h> +#endif + +/* + * Operations on bitmaps of arbitrary size + * A bitmap is a vector of 1 or more ulong_t's. + * The user of the package is responsible for range checks and keeping + * track of sizes. + */ + +#ifdef _LP64 +#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */ +#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */ +#else +#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */ +#endif + +#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */ +#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */ + +#ifdef _LP64 +#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */ +#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */ +#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */ +#else +#define BT_ULMAXMASK 0xffffffff +#endif + +/* + * bitmap is a ulong_t *, bitindex an index_t + * + * The macros BT_WIM and BT_BIW internal; there is no need + * for users of this package to use them. + */ + +/* + * word in map + */ +#define BT_WIM(bitmap, bitindex) \ + ((bitmap)[(bitindex) >> BT_ULSHIFT]) +/* + * bit in word + */ +#define BT_BIW(bitindex) \ + (1UL << ((bitindex) & BT_ULMASK)) + +#ifdef _LP64 +#define BT_WIM32(bitmap, bitindex) \ + ((bitmap)[(bitindex) >> BT_ULSHIFT32]) + +#define BT_BIW32(bitindex) \ + (1UL << ((bitindex) & BT_ULMASK32)) +#endif + +/* + * These are public macros + * + * BT_BITOUL == n bits to n ulong_t's + */ +#define BT_BITOUL(nbits) \ + (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL) +#define BT_SIZEOFMAP(nbits) \ + (BT_BITOUL(nbits) * sizeof (ulong_t)) +#define BT_TEST(bitmap, bitindex) \ + ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0) +#define BT_SET(bitmap, bitindex) \ + { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); } +#define BT_CLEAR(bitmap, bitindex) \ + { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); } + +#ifdef _LP64 +#define BT_BITOUL32(nbits) \ + (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32) +#define BT_SIZEOFMAP32(nbits) \ + (BT_BITOUL32(nbits) * sizeof (uint_t)) +#define BT_TEST32(bitmap, bitindex) \ + ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0) +#define BT_SET32(bitmap, bitindex) \ + { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); } +#define BT_CLEAR32(bitmap, bitindex) \ + { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); } +#endif /* _LP64 */ + + +/* + * BIT_ONLYONESET is a private macro not designed for bitmaps of + * arbitrary size. u must be an unsigned integer/long. It returns + * true if one and only one bit is set in u. + */ +#define BIT_ONLYONESET(u) \ + ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0)) + +#if defined(_KERNEL) && !defined(_ASM) +#include <sys/atomic.h> + +/* + * return next available bit index from map with specified number of bits + */ +extern index_t bt_availbit(ulong_t *bitmap, size_t nbits); +/* + * find the highest order bit that is on, and is within or below + * the word specified by wx + */ +extern int bt_gethighbit(ulong_t *mapp, int wx); +extern int bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2, + size_t end_pos); +/* + * Find highest and lowest one bit set. + * Returns bit number + 1 of bit that is set, otherwise returns 0. + * Low order bit is 0, high order bit is 31. + */ +extern int highbit(ulong_t); +extern int lowbit(ulong_t); +extern int bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop); +extern void bt_copy(ulong_t *, ulong_t *, ulong_t); + +/* + * find the parity + */ +extern int odd_parity(ulong_t); + +/* + * Atomically set/clear bits + * Atomic exclusive operations will set "result" to "-1" + * if the bit is already set/cleared. "result" will be set + * to 0 otherwise. + */ +#define BT_ATOMIC_SET(bitmap, bitindex) \ + { atomic_or_long(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); } +#define BT_ATOMIC_CLEAR(bitmap, bitindex) \ + { atomic_and_long(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); } + +#define BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \ + { result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)), \ + (bitindex) % BT_NBIPUL); } +#define BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \ + { result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)), \ + (bitindex) % BT_NBIPUL); } + +/* + * Extracts bits between index h (high, inclusive) and l (low, exclusive) from + * u, which must be an unsigned integer. + */ +#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU)) + +#endif /* _KERNEL && !_ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_BITMAP_H */ diff --git a/uts/common/sys/callb.h b/uts/common/sys/callb.h new file mode 100644 index 000000000000..302f314b800a --- /dev/null +++ b/uts/common/sys/callb.h @@ -0,0 +1,213 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CALLB_H +#define _SYS_CALLB_H + +#include <sys/t_lock.h> +#include <sys/thread.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * definitions of callback classes (c_class) + * + * Callbacks belong in the same class if (1) their callback routines + * do the same kind of processing (ideally, using the same callback function) + * and (2) they can/should be executed at the same time in a cpr + * suspend/resume operation. + * + * Note: The DAEMON class, in particular, is for stopping kernel threads + * and nothing else. The CALLB_* macros below should be used to deal + * with kernel threads, and the callback function should be callb_generic_cpr. + * Another idiosyncrasy of the DAEMON class is that if a suspend operation + * fails, some of the callback functions may be called with the RESUME + * code which were never called with SUSPEND. Not a problem currently, + * but see bug 4201851. + */ +#define CB_CL_CPR_DAEMON 0 +#define CB_CL_CPR_VM 1 +#define CB_CL_CPR_CALLOUT 2 +#define CB_CL_CPR_OBP 3 +#define CB_CL_CPR_FB 4 +#define CB_CL_PANIC 5 +#define CB_CL_CPR_RPC 6 +#define CB_CL_CPR_PROMPRINTF 7 +#define CB_CL_UADMIN 8 +#define CB_CL_CPR_PM 9 +#define CB_CL_HALT 10 +#define CB_CL_CPR_DMA 11 +#define CB_CL_CPR_POST_USER 12 +#define CB_CL_UADMIN_PRE_VFS 13 +#define CB_CL_MDBOOT CB_CL_UADMIN +#define CB_CL_ENTER_DEBUGGER 14 +#define CB_CL_CPR_POST_KERNEL 15 +#define CB_CL_CPU_DEEP_IDLE 16 +#define NCBCLASS 17 /* CHANGE ME if classes are added/removed */ + +/* + * CB_CL_CPR_DAEMON class specific definitions are given below: + */ + +/* + * code for CPR callb_execute_class + */ +#define CB_CODE_CPR_CHKPT 0 +#define CB_CODE_CPR_RESUME 1 + +typedef void * callb_id_t; +/* + * Per kernel thread structure for CPR daemon callbacks. + * Must be protected by either a existing lock in the daemon or + * a new lock created for such a purpose. + */ +typedef struct callb_cpr { + kmutex_t *cc_lockp; /* lock to protect this struct */ + char cc_events; /* various events for CPR */ + callb_id_t cc_id; /* callb id address */ + kcondvar_t cc_callb_cv; /* cv for callback waiting */ + kcondvar_t cc_stop_cv; /* cv to checkpoint block */ +} callb_cpr_t; + +/* + * cc_events definitions + */ +#define CALLB_CPR_START 1 /* a checkpoint request's started */ +#define CALLB_CPR_SAFE 2 /* thread is safe for CPR */ +#define CALLB_CPR_ALWAYS_SAFE 4 /* thread is ALWAYS safe for CPR */ + +/* + * Used when checking that all kernel threads are stopped. + */ +#define CALLB_MAX_RETRY 3 /* when waiting for kthread to sleep */ +#define CALLB_THREAD_DELAY 10 /* ticks allowed to reach sleep */ +#define CPR_KTHREAD_TIMEOUT_SEC 90 /* secs before callback times out -- */ + /* due to pwr mgmt of disks, make -- */ + /* big enough for worst spinup time */ + +#ifdef _KERNEL +/* + * + * CALLB_CPR_INIT macro is used by kernel threads to add their entry to + * the callback table and perform other initialization. It automatically + * adds the thread as being in the callback class CB_CL_CPR_DAEMON. + * + * cp - ptr to the callb_cpr_t structure for this kernel thread + * + * lockp - pointer to mutex protecting the callb_cpr_t stuct + * + * func - pointer to the callback function for this kernel thread. + * It has the prototype boolean_t <func>(void *arg, int code) + * where: arg - ptr to the callb_cpr_t structure + * code - not used for this type of callback + * returns: B_TRUE if successful; B_FALSE if unsuccessful. + * + * name - a string giving the name of the kernel thread + * + * Note: lockp is the lock to protect the callb_cpr_t (cp) structure + * later on. No lock held is needed for this initialization. + */ +#define CALLB_CPR_INIT(cp, lockp, func, name) { \ + bzero((caddr_t)(cp), sizeof (callb_cpr_t)); \ + (cp)->cc_lockp = lockp; \ + (cp)->cc_id = callb_add(func, (void *)(cp), \ + CB_CL_CPR_DAEMON, name); \ + } + +#ifndef __lock_lint +#define CALLB_CPR_ASSERT(cp) ASSERT(MUTEX_HELD((cp)->cc_lockp)); +#else +#define CALLB_CPR_ASSERT(cp) +#endif +/* + * Some threads (like the idle threads) do not adhere to the callback + * protocol and are always considered safe. Such threads must never exit. + * They register their presence by calling this macro during their + * initialization. + * + * Args: + * t - thread pointer of the client kernel thread + * name - a string giving the name of the kernel thread + */ +#define CALLB_CPR_INIT_SAFE(t, name) { \ + (void) callb_add_thread(callb_generic_cpr_safe, \ + (void *) &callb_cprinfo_safe, CB_CL_CPR_DAEMON, \ + name, t); \ + } +/* + * The lock to protect cp's content must be held before + * calling the following two macros. + * + * Any code region between CALLB_CPR_SAFE_BEGIN and CALLB_CPR_SAFE_END + * is safe for checkpoint/resume. + */ +#define CALLB_CPR_SAFE_BEGIN(cp) { \ + CALLB_CPR_ASSERT(cp) \ + (cp)->cc_events |= CALLB_CPR_SAFE; \ + if ((cp)->cc_events & CALLB_CPR_START) \ + cv_signal(&(cp)->cc_callb_cv); \ + } +#define CALLB_CPR_SAFE_END(cp, lockp) { \ + CALLB_CPR_ASSERT(cp) \ + while ((cp)->cc_events & CALLB_CPR_START) \ + cv_wait(&(cp)->cc_stop_cv, lockp); \ + (cp)->cc_events &= ~CALLB_CPR_SAFE; \ + } +/* + * cv_destroy is nop right now but may be needed in the future. + */ +#define CALLB_CPR_EXIT(cp) { \ + CALLB_CPR_ASSERT(cp) \ + (cp)->cc_events |= CALLB_CPR_SAFE; \ + if ((cp)->cc_events & CALLB_CPR_START) \ + cv_signal(&(cp)->cc_callb_cv); \ + mutex_exit((cp)->cc_lockp); \ + (void) callb_delete((cp)->cc_id); \ + cv_destroy(&(cp)->cc_callb_cv); \ + cv_destroy(&(cp)->cc_stop_cv); \ + } + +extern callb_cpr_t callb_cprinfo_safe; +extern void callb_init(void); +extern callb_id_t callb_add(boolean_t (*)(void *, int), void *, int, char *); +extern callb_id_t callb_add_thread(boolean_t (*)(void *, int), + void *, int, char *, kthread_id_t); +extern int callb_delete(callb_id_t); +extern void callb_execute(callb_id_t, int); +extern void *callb_execute_class(int, int); +extern boolean_t callb_generic_cpr(void *, int); +extern boolean_t callb_generic_cpr_safe(void *, int); +extern boolean_t callb_is_stopped(kthread_id_t, caddr_t *); +extern void callb_lock_table(void); +extern void callb_unlock_table(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CALLB_H */ diff --git a/uts/common/sys/ccompile.h b/uts/common/sys/ccompile.h new file mode 100644 index 000000000000..c9857b086575 --- /dev/null +++ b/uts/common/sys/ccompile.h @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CCOMPILE_H +#define _SYS_CCOMPILE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file contains definitions designed to enable different compilers + * to be used harmoniously on Solaris systems. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allow for version tests for compiler bugs and features. + */ +#if defined(__GNUC__) +#define __GNUC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else +#define __GNUC_VERSION 0 +#endif + +#if defined(__ATTRIBUTE_IMPLEMENTED) || defined(__GNUC__) + +/* + * analogous to lint's PRINTFLIKEn + */ +#define __sun_attr___PRINTFLIKE__(__n) \ + __attribute__((__format__(printf, __n, (__n)+1))) +#define __sun_attr___VPRINTFLIKE__(__n) \ + __attribute__((__format__(printf, __n, 0))) + +/* + * Handle the kernel printf routines that can take '%b' too + */ +#if __GNUC_VERSION < 30402 +/* + * XX64 at least this doesn't work correctly yet with 3.4.1 anyway! + */ +#define __sun_attr___KPRINTFLIKE__ __sun_attr___PRINTFLIKE__ +#define __sun_attr___KVPRINTFLIKE__ __sun_attr___VPRINTFLIKE__ +#else +#define __sun_attr___KPRINTFLIKE__(__n) \ + __attribute__((__format__(cmn_err, __n, (__n)+1))) +#define __sun_attr___KVPRINTFLIKE__(__n) \ + __attribute__((__format__(cmn_err, __n, 0))) +#endif + +/* + * This one's pretty obvious -- the function never returns + */ +#define __sun_attr___noreturn__ __attribute__((__noreturn__)) + + +/* + * This is an appropriate label for functions that do not + * modify their arguments, e.g. strlen() + */ +#define __sun_attr___pure__ __attribute__((__pure__)) + +/* + * This is a stronger form of __pure__. Can be used for functions + * that do not modify their arguments and don't depend on global + * memory. + */ +#define __sun_attr___const__ __attribute__((__const__)) + +/* + * structure packing like #pragma pack(1) + */ +#define __sun_attr___packed__ __attribute__((__packed__)) + +#define ___sun_attr_inner(__a) __sun_attr_##__a +#define __sun_attr__(__a) ___sun_attr_inner __a + +#else /* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */ + +#define __sun_attr__(__a) + +#endif /* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */ + +/* + * Shorthand versions for readability + */ + +#define __PRINTFLIKE(__n) __sun_attr__((__PRINTFLIKE__(__n))) +#define __VPRINTFLIKE(__n) __sun_attr__((__VPRINTFLIKE__(__n))) +#define __KPRINTFLIKE(__n) __sun_attr__((__KPRINTFLIKE__(__n))) +#define __KVPRINTFLIKE(__n) __sun_attr__((__KVPRINTFLIKE__(__n))) +#define __NORETURN __sun_attr__((__noreturn__)) +#define __CONST __sun_attr__((__const__)) +#define __PURE __sun_attr__((__pure__)) + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CCOMPILE_H */ diff --git a/uts/common/sys/compress.h b/uts/common/sys/compress.h new file mode 100644 index 000000000000..3d79d9511092 --- /dev/null +++ b/uts/common/sys/compress.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1998 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#ifndef _SYS_COMPRESS_H +#define _SYS_COMPRESS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern size_t compress(void *, void *, size_t); +extern size_t decompress(void *, void *, size_t, size_t); +extern uint32_t checksum32(void *, size_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_COMPRESS_H */ diff --git a/uts/common/sys/cpupart.h b/uts/common/sys/cpupart.h index b9e0da4e1993..b44dda5e8418 100644 --- a/uts/common/sys/cpupart.h +++ b/uts/common/sys/cpupart.h @@ -19,15 +19,12 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_CPUPART_H #define _SYS_CPUPART_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/processor.h> #include <sys/cpuvar.h> @@ -58,16 +55,6 @@ typedef int cpupartid_t; #define CP_ALL 0 /* return all cpu partitions */ #define CP_NONEMPTY 1 /* return only non-empty ones */ -#if defined(_MACHDEP) -struct mach_cpupart { - cpuset_t mc_haltset; -}; - -extern struct mach_cpupart cp_default_mach; -#else -struct mach_cpupart; -#endif - typedef struct cpupart { disp_t cp_kp_queue; /* partition-wide kpreempt queue */ cpupartid_t cp_id; /* partition ID */ @@ -103,8 +90,7 @@ typedef struct cpupart { lgrp_gen_t cp_gen; /* generation number */ lgrp_id_t cp_lgrp_hint; /* last home lgroup chosen */ bitset_t cp_cmt_pgs; /* CMT PGs represented */ - - struct mach_cpupart *cp_mach; /* mach-specific */ + bitset_t cp_haltset; /* halted CPUs */ } cpupart_t; typedef struct cpupart_kstat { @@ -138,6 +124,15 @@ extern cpupart_t *cp_list_head; extern uint_t cp_numparts; extern uint_t cp_numparts_nonempty; +/* + * Each partition contains a bitset that indicates which CPUs are halted and + * which ones are running. Given the growing number of CPUs in current and + * future platforms, it's important to fanout each CPU within its partition's + * haltset to prevent contention due to false sharing. The fanout factor + * is platform specific, and declared accordingly. + */ +extern uint_t cp_haltset_fanout; + extern void cpupart_initialize_default(); extern cpupart_t *cpupart_find(psetid_t); extern int cpupart_create(psetid_t *); diff --git a/uts/common/sys/cpuvar.h b/uts/common/sys/cpuvar.h index cd0d0278662c..d4075d580be7 100644 --- a/uts/common/sys/cpuvar.h +++ b/uts/common/sys/cpuvar.h @@ -20,15 +20,12 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_CPUVAR_H #define _SYS_CPUVAR_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/thread.h> #include <sys/sysinfo.h> /* has cpu_stat_t definition */ #include <sys/disp.h> @@ -170,7 +167,7 @@ typedef struct cpu { ftrace_data_t cpu_ftrace; /* per cpu ftrace data */ - clock_t cpu_deadman_lbolt; /* used by deadman() */ + clock_t cpu_deadman_counter; /* used by deadman() */ uint_t cpu_deadman_countdown; /* used by deadman() */ kmutex_t cpu_cpc_ctxlock; /* protects context for idle thread */ @@ -213,12 +210,27 @@ typedef struct cpu { uint64_t cpu_curr_clock; /* current clock freq in Hz */ char *cpu_supp_freqs; /* supported freqs in Hz */ + uintptr_t cpu_cpcprofile_pc; /* kernel PC in cpc interrupt */ + uintptr_t cpu_cpcprofile_upc; /* user PC in cpc interrupt */ + /* * Interrupt load factor used by dispatcher & softcall */ hrtime_t cpu_intrlast; /* total interrupt time (nsec) */ int cpu_intrload; /* interrupt load factor (0-99%) */ + uint_t cpu_rotor; /* for cheap pseudo-random numbers */ + + struct cu_cpu_info *cpu_cu_info; /* capacity & util. info */ + + /* + * cpu_generation is updated whenever CPU goes on-line or off-line. + * Updates to cpu_generation are protected by cpu_lock. + * + * See CPU_NEW_GENERATION() macro below. + */ + volatile uint_t cpu_generation; /* tracking on/off-line */ + /* * New members must be added /before/ this member, as the CTF tools * rely on this being the last field before cpu_m, so they can @@ -240,12 +252,13 @@ typedef struct cpu { * is up to the platform to assure that this is performed properly. Note that * the structure is sized to avoid false sharing. */ -#define CPUC_SIZE (sizeof (uint16_t) + sizeof (uintptr_t) + \ - sizeof (kmutex_t)) +#define CPUC_SIZE (sizeof (uint16_t) + sizeof (uint8_t) + \ + sizeof (uintptr_t) + sizeof (kmutex_t)) #define CPUC_PADSIZE CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE typedef struct cpu_core { uint16_t cpuc_dtrace_flags; /* DTrace flags */ + uint8_t cpuc_dcpc_intr_state; /* DCPC provider intr state */ uint8_t cpuc_pad[CPUC_PADSIZE]; /* padding */ uintptr_t cpuc_dtrace_illval; /* DTrace illegal value */ kmutex_t cpuc_pid_lock; /* DTrace pid provider lock */ @@ -263,6 +276,28 @@ extern cpu_core_t cpu_core[]; */ #define CPU_ON_INTR(cpup) ((cpup)->cpu_intr_actv >> (LOCK_LEVEL + 1)) +/* + * Check to see if an interrupt thread might be active at a given ipl. + * If so return true. + * We must be conservative--it is ok to give a false yes, but a false no + * will cause disaster. (But if the situation changes after we check it is + * ok--the caller is trying to ensure that an interrupt routine has been + * exited). + * This is used when trying to remove an interrupt handler from an autovector + * list in avintr.c. + */ +#define INTR_ACTIVE(cpup, level) \ + ((level) <= LOCK_LEVEL ? \ + ((cpup)->cpu_intr_actv & (1 << (level))) : (CPU_ON_INTR(cpup))) + +/* + * CPU_PSEUDO_RANDOM() returns a per CPU value that changes each time one + * looks at it. It's meant as a cheap mechanism to be incorporated in routines + * wanting to avoid biasing, but where true randomness isn't needed (just + * something that changes). + */ +#define CPU_PSEUDO_RANDOM() (CPU->cpu_rotor++) + #if defined(_KERNEL) || defined(_KMEMUSER) #define INTR_STACK_SIZE MAX(DEFAULTSTKSZ, PAGESIZE) @@ -354,7 +389,6 @@ extern cpu_core_t cpu_core[]; #define CPU_DISP_DONTSTEAL 0x01 /* CPU undergoing context swtch */ #define CPU_DISP_HALTED 0x02 /* CPU halted waiting for interrupt */ - #endif /* _KERNEL || _KMEMUSER */ #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP) @@ -518,15 +552,18 @@ extern cpuset_t cpu_seqid_inuse; #if defined(_KERNEL) || defined(_KMEMUSER) extern struct cpu *cpu[]; /* indexed by CPU number */ +extern struct cpu **cpu_seq; /* indexed by sequential CPU id */ extern cpu_t *cpu_list; /* list of CPUs */ extern cpu_t *cpu_active; /* list of active CPUs */ extern int ncpus; /* number of CPUs present */ extern int ncpus_online; /* number of CPUs not quiesced */ extern int max_ncpus; /* max present before ncpus is known */ extern int boot_max_ncpus; /* like max_ncpus but for real */ +extern int boot_ncpus; /* # cpus present @ boot */ extern processorid_t max_cpuid; /* maximum CPU number */ extern struct cpu *cpu_inmotion; /* offline or partition move target */ extern cpu_t *clock_cpu_list; +extern processorid_t max_cpu_seqid_ever; /* maximum seqid ever given */ #if defined(__i386) || defined(__amd64) extern struct cpu *curcpup(void); @@ -570,6 +607,13 @@ extern struct cpu *curcpup(void); #define CPU_STATS(cp, stat) \ ((cp)->cpu_stats.stat) +/* + * Increment CPU generation value. + * This macro should be called whenever CPU goes on-line or off-line. + * Updates to cpu_generation should be protected by cpu_lock. + */ +#define CPU_NEW_GENERATION(cp) ((cp)->cpu_generation++) + #endif /* _KERNEL || _KMEMUSER */ /* @@ -659,6 +703,7 @@ int cpu_get_state(cpu_t *); /* get current cpu state */ const char *cpu_get_state_str(cpu_t *); /* get current cpu state as string */ +void cpu_set_curr_clock(uint64_t); /* indicate the current CPU's freq */ void cpu_set_supp_freqs(cpu_t *, const char *); /* set the CPU supported */ /* frequencies */ @@ -698,6 +743,49 @@ void cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */ */ extern kmutex_t cpu_lock; /* lock protecting CPU data */ +/* + * CPU state change events + * + * Various subsystems need to know when CPUs change their state. They get this + * information by registering CPU state change callbacks using + * register_cpu_setup_func(). Whenever any CPU changes its state, the callback + * function is called. The callback function is passed three arguments: + * + * Event, described by cpu_setup_t + * CPU ID + * Transparent pointer passed when registering the callback + * + * The callback function is called with cpu_lock held. The return value from the + * callback function is usually ignored, except for CPU_CONFIG and CPU_UNCONFIG + * events. For these two events, non-zero return value indicates a failure and + * prevents successful completion of the operation. + * + * New events may be added in the future. Callback functions should ignore any + * events that they do not understand. + * + * The following events provide notification callbacks: + * + * CPU_INIT A new CPU is started and added to the list of active CPUs + * This event is only used during boot + * + * CPU_CONFIG A newly inserted CPU is prepared for starting running code + * This event is called by DR code + * + * CPU_UNCONFIG CPU has been powered off and needs cleanup + * This event is called by DR code + * + * CPU_ON CPU is enabled but does not run anything yet + * + * CPU_INTR_ON CPU is enabled and has interrupts enabled + * + * CPU_OFF CPU is going offline but can still run threads + * + * CPU_CPUPART_OUT CPU is going to move out of its partition + * + * CPU_CPUPART_IN CPU is going to move to a new partition + * + * CPU_SETUP CPU is set up during boot and can run threads + */ typedef enum { CPU_INIT, CPU_CONFIG, @@ -705,7 +793,9 @@ typedef enum { CPU_ON, CPU_OFF, CPU_CPUPART_IN, - CPU_CPUPART_OUT + CPU_CPUPART_OUT, + CPU_SETUP, + CPU_INTR_ON } cpu_setup_t; typedef int cpu_setup_func_t(cpu_setup_t, int, void *); @@ -719,12 +809,20 @@ extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *); extern void cpu_state_change_notify(int, cpu_setup_t); /* + * Call specified function on the given CPU + */ +typedef void (*cpu_call_func_t)(uintptr_t, uintptr_t); +extern void cpu_call(cpu_t *, cpu_call_func_t, uintptr_t, uintptr_t); + + +/* * Create various strings that describe the given CPU for the * processor_info system call and configuration-related kstats. */ #define CPU_IDSTRLEN 100 extern void init_cpu_info(struct cpu *); +extern void populate_idstr(struct cpu *); extern void cpu_vm_data_init(struct cpu *); extern void cpu_vm_data_destroy(struct cpu *); diff --git a/uts/common/sys/cred.h b/uts/common/sys/cred.h new file mode 100644 index 000000000000..5056f9a51105 --- /dev/null +++ b/uts/common/sys/cred.h @@ -0,0 +1,193 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#ifndef _SYS_CRED_H +#define _SYS_CRED_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The credential is an opaque kernel private data structure defined in + * <sys/cred_impl.h>. + */ + +typedef struct cred cred_t; + +#ifdef _KERNEL + +#define CRED() curthread->t_cred + +struct proc; /* cred.h is included in proc.h */ +struct prcred; +struct ksid; +struct ksidlist; +struct credklpd; +struct credgrp; + +struct auditinfo_addr; /* cred.h is included in audit.h */ + +extern int ngroups_max; +/* + * kcred is used when you need all privileges. + */ +extern struct cred *kcred; + +extern void cred_init(void); +extern void crhold(cred_t *); +extern void crfree(cred_t *); +extern cred_t *cralloc(void); /* all but ref uninitialized */ +extern cred_t *cralloc_ksid(void); /* cralloc() + ksid alloc'ed */ +extern cred_t *crget(void); /* initialized */ +extern cred_t *crcopy(cred_t *); +extern void crcopy_to(cred_t *, cred_t *); +extern cred_t *crdup(cred_t *); +extern void crdup_to(cred_t *, cred_t *); +extern cred_t *crgetcred(void); +extern void crset(struct proc *, cred_t *); +extern void crset_zone_privall(cred_t *); +extern int groupmember(gid_t, const cred_t *); +extern int supgroupmember(gid_t, const cred_t *); +extern int hasprocperm(const cred_t *, const cred_t *); +extern int prochasprocperm(struct proc *, struct proc *, const cred_t *); +extern int crcmp(const cred_t *, const cred_t *); +extern cred_t *zone_kcred(void); + +extern uid_t crgetuid(const cred_t *); +extern uid_t crgetruid(const cred_t *); +extern uid_t crgetsuid(const cred_t *); +extern gid_t crgetgid(const cred_t *); +extern gid_t crgetrgid(const cred_t *); +extern gid_t crgetsgid(const cred_t *); +extern zoneid_t crgetzoneid(const cred_t *); +extern projid_t crgetprojid(const cred_t *); + +extern cred_t *crgetmapped(const cred_t *); + + +extern const struct auditinfo_addr *crgetauinfo(const cred_t *); +extern struct auditinfo_addr *crgetauinfo_modifiable(cred_t *); + +extern uint_t crgetref(const cred_t *); + +extern const gid_t *crgetgroups(const cred_t *); +extern const gid_t *crgetggroups(const struct credgrp *); + +extern int crgetngroups(const cred_t *); + +/* + * Sets real, effective and/or saved uid/gid; + * -1 argument accepted as "no change". + */ +extern int crsetresuid(cred_t *, uid_t, uid_t, uid_t); +extern int crsetresgid(cred_t *, gid_t, gid_t, gid_t); + +/* + * Sets real, effective and saved uids/gids all to the same + * values. Both values must be non-negative and <= MAXUID + */ +extern int crsetugid(cred_t *, uid_t, gid_t); + +/* + * Functions to handle the supplemental group list. + */ +extern int crsetgroups(cred_t *, int, gid_t *); +extern struct credgrp *crgrpcopyin(int, gid_t *); +extern void crgrprele(struct credgrp *); +extern void crsetcredgrp(cred_t *, struct credgrp *); + +/* + * Private interface for setting zone association of credential. + */ +struct zone; +extern void crsetzone(cred_t *, struct zone *); +extern struct zone *crgetzone(const cred_t *); + +/* + * Private interface for setting project id in credential. + */ +extern void crsetprojid(cred_t *, projid_t); + +/* + * Private interface for nfs. + */ +extern cred_t *crnetadjust(cred_t *); + +/* + * Private interface for procfs. + */ +extern void cred2prcred(const cred_t *, struct prcred *); + +/* + * Private interfaces for Rampart Trusted Solaris. + */ +struct ts_label_s; +extern struct ts_label_s *crgetlabel(const cred_t *); +extern boolean_t crisremote(const cred_t *); + +/* + * Private interfaces for ephemeral uids. + */ +#define VALID_UID(id, zn) \ + ((id) <= MAXUID || valid_ephemeral_uid((zn), (id))) + +#define VALID_GID(id, zn) \ + ((id) <= MAXUID || valid_ephemeral_gid((zn), (id))) + +extern boolean_t valid_ephemeral_uid(struct zone *, uid_t); +extern boolean_t valid_ephemeral_gid(struct zone *, gid_t); + +extern int eph_uid_alloc(struct zone *, int, uid_t *, int); +extern int eph_gid_alloc(struct zone *, int, gid_t *, int); + +extern void crsetsid(cred_t *, struct ksid *, int); +extern void crsetsidlist(cred_t *, struct ksidlist *); + +extern struct ksid *crgetsid(const cred_t *, int); +extern struct ksidlist *crgetsidlist(const cred_t *); + +extern int crsetpriv(cred_t *, ...); + +extern struct credklpd *crgetcrklpd(const cred_t *); +extern void crsetcrklpd(cred_t *, struct credklpd *); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CRED_H */ diff --git a/uts/common/sys/debug.h b/uts/common/sys/debug.h new file mode 100644 index 000000000000..4de39d255e71 --- /dev/null +++ b/uts/common/sys/debug.h @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#ifndef _SYS_DEBUG_H +#define _SYS_DEBUG_H + +#include <sys/isa_defs.h> +#include <sys/types.h> +#include <sys/note.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * ASSERT(ex) causes a panic or debugger entry if expression ex is not + * true. ASSERT() is included only for debugging, and is a no-op in + * production kernels. VERIFY(ex), on the other hand, behaves like + * ASSERT and is evaluated on both debug and non-debug kernels. + */ + +#if defined(__STDC__) +extern int assfail(const char *, const char *, int); +#define VERIFY(EX) ((void)((EX) || assfail(#EX, __FILE__, __LINE__))) +#if DEBUG +#define ASSERT(EX) ((void)((EX) || assfail(#EX, __FILE__, __LINE__))) +#else +#define ASSERT(x) ((void)0) +#endif +#else /* defined(__STDC__) */ +extern int assfail(); +#define VERIFY(EX) ((void)((EX) || assfail("EX", __FILE__, __LINE__))) +#if DEBUG +#define ASSERT(EX) ((void)((EX) || assfail("EX", __FILE__, __LINE__))) +#else +#define ASSERT(x) ((void)0) +#endif +#endif /* defined(__STDC__) */ + +/* + * Assertion variants sensitive to the compilation data model + */ +#if defined(_LP64) +#define ASSERT64(x) ASSERT(x) +#define ASSERT32(x) +#else +#define ASSERT64(x) +#define ASSERT32(x) ASSERT(x) +#endif + +/* + * IMPLY and EQUIV are assertions of the form: + * + * if (a) then (b) + * and + * if (a) then (b) *AND* if (b) then (a) + */ +#if DEBUG +#define IMPLY(A, B) \ + ((void)(((!(A)) || (B)) || \ + assfail("(" #A ") implies (" #B ")", __FILE__, __LINE__))) +#define EQUIV(A, B) \ + ((void)((!!(A) == !!(B)) || \ + assfail("(" #A ") is equivalent to (" #B ")", __FILE__, __LINE__))) +#else +#define IMPLY(A, B) ((void)0) +#define EQUIV(A, B) ((void)0) +#endif + +/* + * ASSERT3() behaves like ASSERT() except that it is an explicit conditional, + * and prints out the values of the left and right hand expressions as part of + * the panic message to ease debugging. The three variants imply the type + * of their arguments. ASSERT3S() is for signed data types, ASSERT3U() is + * for unsigned, and ASSERT3P() is for pointers. The VERIFY3*() macros + * have the same relationship as above. + */ +extern void assfail3(const char *, uintmax_t, const char *, uintmax_t, + const char *, int); +#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \ + const TYPE __left = (TYPE)(LEFT); \ + const TYPE __right = (TYPE)(RIGHT); \ + if (!(__left OP __right)) \ + assfail3(#LEFT " " #OP " " #RIGHT, \ + (uintmax_t)__left, #OP, (uintmax_t)__right, \ + __FILE__, __LINE__); \ +_NOTE(CONSTCOND) } while (0) + +#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t) +#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t) +#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t) +#if DEBUG +#define ASSERT3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t) +#define ASSERT3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t) +#define ASSERT3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t) +#else +#define ASSERT3S(x, y, z) ((void)0) +#define ASSERT3U(x, y, z) ((void)0) +#define ASSERT3P(x, y, z) ((void)0) +#endif + +#ifdef _KERNEL + +extern void abort_sequence_enter(char *); +extern void debug_enter(char *); + +#endif /* _KERNEL */ + +#if defined(DEBUG) && !defined(__sun) +/* CSTYLED */ +#define STATIC +#else +/* CSTYLED */ +#define STATIC static +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DEBUG_H */ diff --git a/uts/common/sys/dtrace.h b/uts/common/sys/dtrace.h index b6e52ec1c4da..007502d7d856 100644 --- a/uts/common/sys/dtrace.h +++ b/uts/common/sys/dtrace.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_DTRACE_H #define _SYS_DTRACE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -664,6 +662,20 @@ typedef struct dof_sec { #define DOF_SECF_LOAD 1 /* section should be loaded */ +#define DOF_SEC_ISLOADABLE(x) \ + (((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) || \ + ((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) || \ + ((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) || \ + ((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) || \ + ((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) || \ + ((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) || \ + ((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) || \ + ((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) || \ + ((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) || \ + ((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) || \ + ((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) || \ + ((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS)) + typedef struct dof_ecbdesc { dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ @@ -1382,7 +1394,7 @@ typedef struct dof_helper { * dtps_provide_module(); see "Arguments and Notes" for dtrace_register(), * below. * - * 1.4 void dtps_enable(void *arg, dtrace_id_t id, void *parg) + * 1.4 int dtps_enable(void *arg, dtrace_id_t id, void *parg) * * 1.4.1 Overview * @@ -1403,7 +1415,8 @@ typedef struct dof_helper { * * 1.4.3 Return value * - * None. + * On success, dtps_enable() should return 0. On failure, -1 should be + * returned. * * 1.4.4 Caller's context * @@ -1957,7 +1970,7 @@ typedef struct dof_helper { typedef struct dtrace_pops { void (*dtps_provide)(void *arg, const dtrace_probedesc_t *spec); void (*dtps_provide_module)(void *arg, struct modctl *mp); - void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg); + int (*dtps_enable)(void *arg, dtrace_id_t id, void *parg); void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg); void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg); void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg); diff --git a/uts/common/sys/errorq.h b/uts/common/sys/errorq.h new file mode 100644 index 000000000000..971b19e6ccd8 --- /dev/null +++ b/uts/common/sys/errorq.h @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ERRORQ_H +#define _ERRORQ_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/nvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct errorq errorq_t; +typedef struct errorq_elem errorq_elem_t; +typedef void (*errorq_func_t)(void *, const void *, const errorq_elem_t *); + +/* + * Public flags for errorq_create(): bit range 0-15 + */ +#define ERRORQ_VITAL 0x0001 /* drain queue automatically on system reset */ + +/* + * Public flags for errorq_dispatch(): + */ +#define ERRORQ_ASYNC 0 /* schedule async queue drain for caller */ +#define ERRORQ_SYNC 1 /* do not schedule drain; caller will drain */ + +#ifdef _KERNEL + +extern errorq_t *errorq_create(const char *, errorq_func_t, void *, + ulong_t, size_t, uint_t, uint_t); + +extern errorq_t *errorq_nvcreate(const char *, errorq_func_t, void *, + ulong_t, size_t, uint_t, uint_t); + +extern void errorq_destroy(errorq_t *); +extern void errorq_dispatch(errorq_t *, const void *, size_t, uint_t); +extern void errorq_drain(errorq_t *); +extern void errorq_init(void); +extern void errorq_panic(void); +extern errorq_elem_t *errorq_reserve(errorq_t *); +extern void errorq_commit(errorq_t *, errorq_elem_t *, uint_t); +extern void errorq_cancel(errorq_t *, errorq_elem_t *); +extern nvlist_t *errorq_elem_nvl(errorq_t *, const errorq_elem_t *); +extern nv_alloc_t *errorq_elem_nva(errorq_t *, const errorq_elem_t *); +extern void *errorq_elem_dup(errorq_t *, const errorq_elem_t *, + errorq_elem_t **); +extern void errorq_dump(); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ERRORQ_H */ diff --git a/uts/common/sys/extdirent.h b/uts/common/sys/extdirent.h new file mode 100644 index 000000000000..3f9a665f0076 --- /dev/null +++ b/uts/common/sys/extdirent.h @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_EXTDIRENT_H +#define _SYS_EXTDIRENT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +#if defined(_KERNEL) + +/* + * Extended file-system independent directory entry. This style of + * dirent provides additional informational flag bits for each + * directory entry. This dirent will be returned instead of the + * standard dirent if a VOP_READDIR() requests dirent flags via + * V_RDDIR_ENTFLAGS, and if the file system supports the flags. + */ +typedef struct edirent { + ino64_t ed_ino; /* "inode number" of entry */ + off64_t ed_off; /* offset of disk directory entry */ + uint32_t ed_eflags; /* per-entry flags */ + unsigned short ed_reclen; /* length of this record */ + char ed_name[1]; /* name of file */ +} edirent_t; + +#define EDIRENT_RECLEN(namelen) \ + ((offsetof(edirent_t, ed_name[0]) + 1 + (namelen) + 7) & ~ 7) +#define EDIRENT_NAMELEN(reclen) \ + ((reclen) - (offsetof(edirent_t, ed_name[0]))) + +/* + * Extended entry flags + * Extended entries include a bitfield of extra information + * regarding that entry. + */ +#define ED_CASE_CONFLICT 0x10 /* Disconsidering case, entry is not unique */ + +/* + * Extended flags accessor function + */ +#define ED_CASE_CONFLICTS(x) ((x)->ed_eflags & ED_CASE_CONFLICT) + +#endif /* defined(_KERNEL) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_EXTDIRENT_H */ diff --git a/uts/common/sys/feature_tests.h b/uts/common/sys/feature_tests.h new file mode 100644 index 000000000000..43339a83cd7f --- /dev/null +++ b/uts/common/sys/feature_tests.h @@ -0,0 +1,396 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FEATURE_TESTS_H +#define _SYS_FEATURE_TESTS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ccompile.h> +#include <sys/isa_defs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Values of _POSIX_C_SOURCE + * + * undefined not a POSIX compilation + * 1 POSIX.1-1990 compilation + * 2 POSIX.2-1992 compilation + * 199309L POSIX.1b-1993 compilation (Real Time) + * 199506L POSIX.1c-1995 compilation (POSIX Threads) + * 200112L POSIX.1-2001 compilation (Austin Group Revision) + */ +#if defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE) +#define _POSIX_C_SOURCE 1 +#endif + +/* + * The feature test macros __XOPEN_OR_POSIX, _STRICT_STDC, and _STDC_C99 + * are Sun implementation specific macros created in order to compress + * common standards specified feature test macros for easier reading. + * These macros should not be used by the application developer as + * unexpected results may occur. Instead, the user should reference + * standards(5) for correct usage of the standards feature test macros. + * + * __XOPEN_OR_POSIX Used in cases where a symbol is defined by both + * X/Open or POSIX or in the negative, when neither + * X/Open or POSIX defines a symbol. + * + * _STRICT_STDC __STDC__ is specified by the C Standards and defined + * by the compiler. For Sun compilers the value of + * __STDC__ is either 1, 0, or not defined based on the + * compilation mode (see cc(1)). When the value of + * __STDC__ is 1 and in the absence of any other feature + * test macros, the namespace available to the application + * is limited to only those symbols defined by the C + * Standard. _STRICT_STDC provides a more readable means + * of identifying symbols defined by the standard, or in + * the negative, symbols that are extensions to the C + * Standard. See additional comments for GNU C differences. + * + * _STDC_C99 __STDC_VERSION__ is specified by the C standards and + * defined by the compiler and indicates the version of + * the C standard. A value of 199901L indicates a + * compiler that complies with ISO/IEC 9899:1999, other- + * wise known as the C99 standard. + */ + +#if defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE) +#define __XOPEN_OR_POSIX +#endif + +/* + * ISO/IEC 9899:1990 and it's revision, ISO/IEC 9899:1999 specify the + * following predefined macro name: + * + * __STDC__ The integer constant 1, intended to indicate a conforming + * implementation. + * + * Furthermore, a strictly conforming program shall use only those features + * of the language and library specified in these standards. A conforming + * implementation shall accept any strictly conforming program. + * + * Based on these requirements, Sun's C compiler defines __STDC__ to 1 for + * strictly conforming environments and __STDC__ to 0 for environments that + * use ANSI C semantics but allow extensions to the C standard. For non-ANSI + * C semantics, Sun's C compiler does not define __STDC__. + * + * The GNU C project interpretation is that __STDC__ should always be defined + * to 1 for compilation modes that accept ANSI C syntax regardless of whether + * or not extensions to the C standard are used. Violations of conforming + * behavior are conditionally flagged as warnings via the use of the + * -pedantic option. In addition to defining __STDC__ to 1, the GNU C + * compiler also defines __STRICT_ANSI__ as a means of specifying strictly + * conforming environments using the -ansi or -std=<standard> options. + * + * In the absence of any other compiler options, Sun and GNU set the value + * of __STDC__ as follows when using the following options: + * + * Value of __STDC__ __STRICT_ANSI__ + * + * cc -Xa (default) 0 undefined + * cc -Xt (transitional) 0 undefined + * cc -Xc (strictly conforming) 1 undefined + * cc -Xs (K&R C) undefined undefined + * + * gcc (default) 1 undefined + * gcc -ansi, -std={c89, c99,...) 1 defined + * gcc -traditional (K&R) undefined undefined + * + * The default compilation modes for Sun C compilers versus GNU C compilers + * results in a differing value for __STDC__ which results in a more + * restricted namespace when using Sun compilers. To allow both GNU and Sun + * interpretations to peacefully co-exist, we use the following Sun + * implementation _STRICT_STDC_ macro: + */ + +#if (__STDC__ - 0 == 1 && !defined(__GNUC__)) || \ + (defined(__GNUC__) && defined(__STRICT_ANSI__)) +#define _STRICT_STDC +#else +#undef _STRICT_STDC +#endif + +/* + * Compiler complies with ISO/IEC 9899:1999 + */ + +#if __STDC_VERSION__ - 0 >= 199901L +#define _STDC_C99 +#endif + +/* + * Large file interfaces: + * + * _LARGEFILE_SOURCE + * 1 large file-related additions to POSIX + * interfaces requested (fseeko, etc.) + * _LARGEFILE64_SOURCE + * 1 transitional large-file-related interfaces + * requested (seek64, stat64, etc.) + * + * The corresponding announcement macros are respectively: + * _LFS_LARGEFILE + * _LFS64_LARGEFILE + * (These are set in <unistd.h>.) + * + * Requesting _LARGEFILE64_SOURCE implies requesting _LARGEFILE_SOURCE as + * well. + * + * The large file interfaces are made visible regardless of the initial values + * of the feature test macros under certain circumstances: + * - If no explicit standards-conforming environment is requested (neither + * of _POSIX_SOURCE nor _XOPEN_SOURCE is defined and the value of + * __STDC__ does not imply standards conformance). + * - Extended system interfaces are explicitly requested (__EXTENSIONS__ + * is defined). + * - Access to in-kernel interfaces is requested (_KERNEL or _KMEMUSER is + * defined). (Note that this dependency is an artifact of the current + * kernel implementation and may change in future releases.) + */ +#if (!defined(_STRICT_STDC) && !defined(__XOPEN_OR_POSIX)) || \ + defined(_KERNEL) || defined(_KMEMUSER) || \ + defined(__EXTENSIONS__) +#undef _LARGEFILE64_SOURCE +#define _LARGEFILE64_SOURCE 1 +#endif +#if _LARGEFILE64_SOURCE - 0 == 1 +#undef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE 1 +#endif + +/* + * Large file compilation environment control: + * + * The setting of _FILE_OFFSET_BITS controls the size of various file-related + * types and governs the mapping between file-related source function symbol + * names and the corresponding binary entry points. + * + * In the 32-bit environment, the default value is 32; if not set, set it to + * the default here, to simplify tests in other headers. + * + * In the 64-bit compilation environment, the only value allowed is 64. + */ +#if defined(_LP64) +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif +#if _FILE_OFFSET_BITS - 0 != 64 +#error "invalid _FILE_OFFSET_BITS value specified" +#endif +#else /* _LP64 */ +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 32 +#endif +#if _FILE_OFFSET_BITS - 0 != 32 && _FILE_OFFSET_BITS - 0 != 64 +#error "invalid _FILE_OFFSET_BITS value specified" +#endif +#endif /* _LP64 */ + +/* + * Use of _XOPEN_SOURCE + * + * The following X/Open specifications are supported: + * + * X/Open Portability Guide, Issue 3 (XPG3) + * X/Open CAE Specification, Issue 4 (XPG4) + * X/Open CAE Specification, Issue 4, Version 2 (XPG4v2) + * X/Open CAE Specification, Issue 5 (XPG5) + * Open Group Technical Standard, Issue 6 (XPG6), also referred to as + * IEEE Std. 1003.1-2001 and ISO/IEC 9945:2002. + * + * XPG4v2 is also referred to as UNIX 95 (SUS or SUSv1). + * XPG5 is also referred to as UNIX 98 or the Single Unix Specification, + * Version 2 (SUSv2) + * XPG6 is the result of a merge of the X/Open and POSIX specifications + * and as such is also referred to as IEEE Std. 1003.1-2001 in + * addition to UNIX 03 and SUSv3. + * + * When writing a conforming X/Open application, as per the specification + * requirements, the appropriate feature test macros must be defined at + * compile time. These are as follows. For more info, see standards(5). + * + * Feature Test Macro Specification + * ------------------------------------------------ ------------- + * _XOPEN_SOURCE XPG3 + * _XOPEN_SOURCE && _XOPEN_VERSION = 4 XPG4 + * _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED = 1 XPG4v2 + * _XOPEN_SOURCE = 500 XPG5 + * _XOPEN_SOURCE = 600 (or POSIX_C_SOURCE=200112L) XPG6 + * + * In order to simplify the guards within the headers, the following + * implementation private test macros have been created. Applications + * must NOT use these private test macros as unexpected results will + * occur. + * + * Note that in general, the use of these private macros is cumulative. + * For example, the use of _XPG3 with no other restrictions on the X/Open + * namespace will make the symbols visible for XPG3 through XPG6 + * compilation environments. The use of _XPG4_2 with no other X/Open + * namespace restrictions indicates that the symbols were introduced in + * XPG4v2 and are therefore visible for XPG4v2 through XPG6 compilation + * environments, but not for XPG3 or XPG4 compilation environments. + * + * _XPG3 X/Open Portability Guide, Issue 3 (XPG3) + * _XPG4 X/Open CAE Specification, Issue 4 (XPG4) + * _XPG4_2 X/Open CAE Specification, Issue 4, Version 2 (XPG4v2/UNIX 95/SUS) + * _XPG5 X/Open CAE Specification, Issue 5 (XPG5/UNIX 98/SUSv2) + * _XPG6 Open Group Technical Standard, Issue 6 (XPG6/UNIX 03/SUSv3) + */ + +/* X/Open Portability Guide, Issue 3 */ +#if defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE - 0 < 500) && \ + (_XOPEN_VERSION - 0 < 4) && !defined(_XOPEN_SOURCE_EXTENDED) +#define _XPG3 +/* X/Open CAE Specification, Issue 4 */ +#elif (defined(_XOPEN_SOURCE) && _XOPEN_VERSION - 0 == 4) +#define _XPG4 +#define _XPG3 +/* X/Open CAE Specification, Issue 4, Version 2 */ +#elif (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE_EXTENDED - 0 == 1) +#define _XPG4_2 +#define _XPG4 +#define _XPG3 +/* X/Open CAE Specification, Issue 5 */ +#elif (_XOPEN_SOURCE - 0 == 500) +#define _XPG5 +#define _XPG4_2 +#define _XPG4 +#define _XPG3 +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199506L +/* Open Group Technical Standard , Issue 6 */ +#elif (_XOPEN_SOURCE - 0 == 600) || (_POSIX_C_SOURCE - 0 == 200112L) +#define _XPG6 +#define _XPG5 +#define _XPG4_2 +#define _XPG4 +#define _XPG3 +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200112L +#undef _XOPEN_SOURCE +#define _XOPEN_SOURCE 600 +#endif + +/* + * _XOPEN_VERSION is defined by the X/Open specifications and is not + * normally defined by the application, except in the case of an XPG4 + * application. On the implementation side, _XOPEN_VERSION defined with + * the value of 3 indicates an XPG3 application. _XOPEN_VERSION defined + * with the value of 4 indicates an XPG4 or XPG4v2 (UNIX 95) application. + * _XOPEN_VERSION defined with a value of 500 indicates an XPG5 (UNIX 98) + * application and with a value of 600 indicates an XPG6 (UNIX 03) + * application. The appropriate version is determined by the use of the + * feature test macros described earlier. The value of _XOPEN_VERSION + * defaults to 3 otherwise indicating support for XPG3 applications. + */ +#ifndef _XOPEN_VERSION +#ifdef _XPG6 +#define _XOPEN_VERSION 600 +#elif defined(_XPG5) +#define _XOPEN_VERSION 500 +#elif defined(_XPG4_2) +#define _XOPEN_VERSION 4 +#else +#define _XOPEN_VERSION 3 +#endif +#endif + +/* + * ANSI C and ISO 9899:1990 say the type long long doesn't exist in strictly + * conforming environments. ISO 9899:1999 says it does. + * + * The presence of _LONGLONG_TYPE says "long long exists" which is therefore + * defined in all but strictly conforming environments that disallow it. + */ +#if !defined(_STDC_C99) && defined(_STRICT_STDC) && !defined(__GNUC__) +/* + * Resist attempts to force the definition of long long in this case. + */ +#if defined(_LONGLONG_TYPE) +#error "No long long in strictly conforming ANSI C & 1990 ISO C environments" +#endif +#else +#if !defined(_LONGLONG_TYPE) +#define _LONGLONG_TYPE +#endif +#endif + +/* + * It is invalid to compile an XPG3, XPG4, XPG4v2, or XPG5 application + * using c99. The same is true for POSIX.1-1990, POSIX.2-1992, POSIX.1b, + * and POSIX.1c applications. Likewise, it is invalid to compile an XPG6 + * or a POSIX.1-2001 application with anything other than a c99 or later + * compiler. Therefore, we force an error in both cases. + */ +#if defined(_STDC_C99) && (defined(__XOPEN_OR_POSIX) && !defined(_XPG6)) +#error "Compiler or options invalid for pre-UNIX 03 X/Open applications \ + and pre-2001 POSIX applications" +#elif !defined(_STDC_C99) && \ + (defined(__XOPEN_OR_POSIX) && defined(_XPG6)) +#error "Compiler or options invalid; UNIX 03 and POSIX.1-2001 applications \ + require the use of c99" +#endif + +/* + * The following macro defines a value for the ISO C99 restrict + * keyword so that _RESTRICT_KYWD resolves to "restrict" if + * an ISO C99 compiler is used and "" (null string) if any other + * compiler is used. This allows for the use of single prototype + * declarations regardless of compiler version. + */ +#if (defined(__STDC__) && defined(_STDC_C99)) +#define _RESTRICT_KYWD restrict +#else +#define _RESTRICT_KYWD +#endif + +/* + * The following macro indicates header support for the ANSI C++ + * standard. The ISO/IEC designation for this is ISO/IEC FDIS 14882. + */ +#define _ISO_CPP_14882_1998 + +/* + * The following macro indicates header support for the C99 standard, + * ISO/IEC 9899:1999, Programming Languages - C. + */ +#define _ISO_C_9899_1999 + +/* + * The following macro indicates header support for DTrace. The value is an + * integer that corresponds to the major version number for DTrace. + */ +#define _DTRACE_VERSION 1 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FEATURE_TESTS_H */ diff --git a/uts/common/sys/fm/fs/zfs.h b/uts/common/sys/fm/fs/zfs.h new file mode 100644 index 000000000000..c752edc99bbd --- /dev/null +++ b/uts/common/sys/fm/fs/zfs.h @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FM_FS_ZFS_H +#define _SYS_FM_FS_ZFS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZFS_ERROR_CLASS "fs.zfs" + +#define FM_EREPORT_ZFS_CHECKSUM "checksum" +#define FM_EREPORT_ZFS_IO "io" +#define FM_EREPORT_ZFS_DATA "data" +#define FM_EREPORT_ZFS_POOL "zpool" +#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" +#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" +#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data" +#define FM_EREPORT_ZFS_DEVICE_NO_REPLICAS "vdev.no_replicas" +#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum" +#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small" +#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label" +#define FM_EREPORT_ZFS_IO_FAILURE "io_failure" +#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" +#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" + +#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" +#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" +#define FM_EREPORT_PAYLOAD_ZFS_POOL_GUID "pool_guid" +#define FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT "pool_context" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID "vdev_guid" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru" +#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" +#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" +#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path" +#define FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID "parent_devid" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET "zio_objset" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT "zio_object" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL "zio_level" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID "zio_blkid" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR "zio_err" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size" +#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state" +#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected" +#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual" +#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm" +#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS "bad_range_sets" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram" +#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram" + +#define FM_EREPORT_FAILMODE_WAIT "wait" +#define FM_EREPORT_FAILMODE_CONTINUE "continue" +#define FM_EREPORT_FAILMODE_PANIC "panic" + +#define FM_RESOURCE_REMOVED "removed" +#define FM_RESOURCE_AUTOREPLACE "autoreplace" +#define FM_RESOURCE_STATECHANGE "statechange" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FM_FS_ZFS_H */ diff --git a/uts/common/sys/fm/protocol.h b/uts/common/sys/fm/protocol.h new file mode 100644 index 000000000000..5eca760dadc5 --- /dev/null +++ b/uts/common/sys/fm/protocol.h @@ -0,0 +1,371 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FM_PROTOCOL_H +#define _SYS_FM_PROTOCOL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +#include <sys/varargs.h> +#include <sys/nvpair.h> +#else +#include <libnvpair.h> +#include <stdarg.h> +#endif +#include <sys/processor.h> + +/* FM common member names */ +#define FM_CLASS "class" +#define FM_VERSION "version" + +/* FM protocol category 1 class names */ +#define FM_EREPORT_CLASS "ereport" +#define FM_FAULT_CLASS "fault" +#define FM_DEFECT_CLASS "defect" +#define FM_RSRC_CLASS "resource" +#define FM_LIST_EVENT "list" +#define FM_IREPORT_CLASS "ireport" + +/* FM list.* event class values */ +#define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect" +#define FM_LIST_ISOLATED_CLASS FM_LIST_EVENT ".isolated" +#define FM_LIST_REPAIRED_CLASS FM_LIST_EVENT ".repaired" +#define FM_LIST_UPDATED_CLASS FM_LIST_EVENT ".updated" +#define FM_LIST_RESOLVED_CLASS FM_LIST_EVENT ".resolved" + +/* ereport class subcategory values */ +#define FM_ERROR_CPU "cpu" +#define FM_ERROR_IO "io" + +/* ereport version and payload member names */ +#define FM_EREPORT_VERS0 0 +#define FM_EREPORT_VERSION FM_EREPORT_VERS0 + +/* ereport payload member names */ +#define FM_EREPORT_DETECTOR "detector" +#define FM_EREPORT_ENA "ena" + +/* list.* event payload member names */ +#define FM_LIST_EVENT_SIZE "list-sz" + +/* ireport.* event payload member names */ +#define FM_IREPORT_DETECTOR "detector" +#define FM_IREPORT_UUID "uuid" +#define FM_IREPORT_PRIORITY "pri" +#define FM_IREPORT_ATTRIBUTES "attr" + +/* + * list.suspect, isolated, updated, repaired and resolved + * versions/payload member names. + */ +#define FM_SUSPECT_UUID "uuid" +#define FM_SUSPECT_DIAG_CODE "code" +#define FM_SUSPECT_DIAG_TIME "diag-time" +#define FM_SUSPECT_DE "de" +#define FM_SUSPECT_FAULT_LIST "fault-list" +#define FM_SUSPECT_FAULT_SZ "fault-list-sz" +#define FM_SUSPECT_FAULT_STATUS "fault-status" +#define FM_SUSPECT_INJECTED "__injected" +#define FM_SUSPECT_MESSAGE "message" +#define FM_SUSPECT_RETIRE "retire" +#define FM_SUSPECT_RESPONSE "response" +#define FM_SUSPECT_SEVERITY "severity" + +#define FM_SUSPECT_VERS0 0 +#define FM_SUSPECT_VERSION FM_SUSPECT_VERS0 + +#define FM_SUSPECT_FAULTY 0x1 +#define FM_SUSPECT_UNUSABLE 0x2 +#define FM_SUSPECT_NOT_PRESENT 0x4 +#define FM_SUSPECT_DEGRADED 0x8 +#define FM_SUSPECT_REPAIRED 0x10 +#define FM_SUSPECT_REPLACED 0x20 +#define FM_SUSPECT_ACQUITTED 0x40 + +/* fault event versions and payload member names */ +#define FM_FAULT_VERS0 0 +#define FM_FAULT_VERSION FM_FAULT_VERS0 + +#define FM_FAULT_ASRU "asru" +#define FM_FAULT_FRU "fru" +#define FM_FAULT_FRU_LABEL "fru-label" +#define FM_FAULT_CERTAINTY "certainty" +#define FM_FAULT_RESOURCE "resource" +#define FM_FAULT_LOCATION "location" + +/* resource event versions and payload member names */ +#define FM_RSRC_VERS0 0 +#define FM_RSRC_VERSION FM_RSRC_VERS0 +#define FM_RSRC_RESOURCE "resource" + +/* resource.fm.asru.* payload member names */ +#define FM_RSRC_ASRU_UUID "uuid" +#define FM_RSRC_ASRU_CODE "code" +#define FM_RSRC_ASRU_FAULTY "faulty" +#define FM_RSRC_ASRU_REPAIRED "repaired" +#define FM_RSRC_ASRU_REPLACED "replaced" +#define FM_RSRC_ASRU_ACQUITTED "acquitted" +#define FM_RSRC_ASRU_RESOLVED "resolved" +#define FM_RSRC_ASRU_UNUSABLE "unusable" +#define FM_RSRC_ASRU_EVENT "event" + +/* resource.fm.xprt.* versions and payload member names */ +#define FM_RSRC_XPRT_VERS0 0 +#define FM_RSRC_XPRT_VERSION FM_RSRC_XPRT_VERS0 +#define FM_RSRC_XPRT_UUID "uuid" +#define FM_RSRC_XPRT_SUBCLASS "subclass" +#define FM_RSRC_XPRT_FAULT_STATUS "fault-status" +#define FM_RSRC_XPRT_FAULT_HAS_ASRU "fault-has-asru" + +/* + * FM ENA Format Macros + */ +#define ENA_FORMAT_MASK 0x3 +#define ENA_FORMAT(ena) ((ena) & ENA_FORMAT_MASK) + +/* ENA format types */ +#define FM_ENA_FMT0 0 +#define FM_ENA_FMT1 1 +#define FM_ENA_FMT2 2 + +/* Format 1 */ +#define ENA_FMT1_GEN_MASK 0x00000000000003FCull +#define ENA_FMT1_ID_MASK 0xFFFFFFFFFFFFFC00ull +#define ENA_FMT1_CPUID_MASK 0x00000000000FFC00ull +#define ENA_FMT1_TIME_MASK 0xFFFFFFFFFFF00000ull +#define ENA_FMT1_GEN_SHFT 2 +#define ENA_FMT1_ID_SHFT 10 +#define ENA_FMT1_CPUID_SHFT ENA_FMT1_ID_SHFT +#define ENA_FMT1_TIME_SHFT 20 + +/* Format 2 */ +#define ENA_FMT2_GEN_MASK 0x00000000000003FCull +#define ENA_FMT2_ID_MASK 0xFFFFFFFFFFFFFC00ull +#define ENA_FMT2_TIME_MASK ENA_FMT2_ID_MASK +#define ENA_FMT2_GEN_SHFT 2 +#define ENA_FMT2_ID_SHFT 10 +#define ENA_FMT2_TIME_SHFT ENA_FMT2_ID_SHFT + +/* Common FMRI type names */ +#define FM_FMRI_AUTHORITY "authority" +#define FM_FMRI_SCHEME "scheme" +#define FM_FMRI_SVC_AUTHORITY "svc-authority" +#define FM_FMRI_FACILITY "facility" + +/* FMRI authority-type member names */ +#define FM_FMRI_AUTH_CHASSIS "chassis-id" +#define FM_FMRI_AUTH_PRODUCT_SN "product-sn" +#define FM_FMRI_AUTH_PRODUCT "product-id" +#define FM_FMRI_AUTH_DOMAIN "domain-id" +#define FM_FMRI_AUTH_SERVER "server-id" +#define FM_FMRI_AUTH_HOST "host-id" + +#define FM_AUTH_VERS0 0 +#define FM_FMRI_AUTH_VERSION FM_AUTH_VERS0 + +/* scheme name values */ +#define FM_FMRI_SCHEME_FMD "fmd" +#define FM_FMRI_SCHEME_DEV "dev" +#define FM_FMRI_SCHEME_HC "hc" +#define FM_FMRI_SCHEME_SVC "svc" +#define FM_FMRI_SCHEME_CPU "cpu" +#define FM_FMRI_SCHEME_MEM "mem" +#define FM_FMRI_SCHEME_MOD "mod" +#define FM_FMRI_SCHEME_PKG "pkg" +#define FM_FMRI_SCHEME_LEGACY "legacy-hc" +#define FM_FMRI_SCHEME_ZFS "zfs" +#define FM_FMRI_SCHEME_SW "sw" + +/* Scheme versions */ +#define FMD_SCHEME_VERSION0 0 +#define FM_FMD_SCHEME_VERSION FMD_SCHEME_VERSION0 +#define DEV_SCHEME_VERSION0 0 +#define FM_DEV_SCHEME_VERSION DEV_SCHEME_VERSION0 +#define FM_HC_VERS0 0 +#define FM_HC_SCHEME_VERSION FM_HC_VERS0 +#define CPU_SCHEME_VERSION0 0 +#define CPU_SCHEME_VERSION1 1 +#define FM_CPU_SCHEME_VERSION CPU_SCHEME_VERSION1 +#define MEM_SCHEME_VERSION0 0 +#define FM_MEM_SCHEME_VERSION MEM_SCHEME_VERSION0 +#define MOD_SCHEME_VERSION0 0 +#define FM_MOD_SCHEME_VERSION MOD_SCHEME_VERSION0 +#define PKG_SCHEME_VERSION0 0 +#define FM_PKG_SCHEME_VERSION PKG_SCHEME_VERSION0 +#define LEGACY_SCHEME_VERSION0 0 +#define FM_LEGACY_SCHEME_VERSION LEGACY_SCHEME_VERSION0 +#define SVC_SCHEME_VERSION0 0 +#define FM_SVC_SCHEME_VERSION SVC_SCHEME_VERSION0 +#define ZFS_SCHEME_VERSION0 0 +#define FM_ZFS_SCHEME_VERSION ZFS_SCHEME_VERSION0 +#define SW_SCHEME_VERSION0 0 +#define FM_SW_SCHEME_VERSION SW_SCHEME_VERSION0 + +/* hc scheme member names */ +#define FM_FMRI_HC_SERIAL_ID "serial" +#define FM_FMRI_HC_PART "part" +#define FM_FMRI_HC_REVISION "revision" +#define FM_FMRI_HC_ROOT "hc-root" +#define FM_FMRI_HC_LIST_SZ "hc-list-sz" +#define FM_FMRI_HC_LIST "hc-list" +#define FM_FMRI_HC_SPECIFIC "hc-specific" + +/* facility member names */ +#define FM_FMRI_FACILITY_NAME "facility-name" +#define FM_FMRI_FACILITY_TYPE "facility-type" + +/* hc-list version and member names */ +#define FM_FMRI_HC_NAME "hc-name" +#define FM_FMRI_HC_ID "hc-id" + +#define HC_LIST_VERSION0 0 +#define FM_HC_LIST_VERSION HC_LIST_VERSION0 + +/* hc-specific member names */ +#define FM_FMRI_HC_SPECIFIC_OFFSET "offset" +#define FM_FMRI_HC_SPECIFIC_PHYSADDR "physaddr" + +/* fmd module scheme member names */ +#define FM_FMRI_FMD_NAME "mod-name" +#define FM_FMRI_FMD_VERSION "mod-version" + +/* dev scheme member names */ +#define FM_FMRI_DEV_ID "devid" +#define FM_FMRI_DEV_TGTPTLUN0 "target-port-l0id" +#define FM_FMRI_DEV_PATH "device-path" + +/* pkg scheme member names */ +#define FM_FMRI_PKG_BASEDIR "pkg-basedir" +#define FM_FMRI_PKG_INST "pkg-inst" +#define FM_FMRI_PKG_VERSION "pkg-version" + +/* svc scheme member names */ +#define FM_FMRI_SVC_NAME "svc-name" +#define FM_FMRI_SVC_INSTANCE "svc-instance" +#define FM_FMRI_SVC_CONTRACT_ID "svc-contract-id" + +/* svc-authority member names */ +#define FM_FMRI_SVC_AUTH_SCOPE "scope" +#define FM_FMRI_SVC_AUTH_SYSTEM_FQN "system-fqn" + +/* cpu scheme member names */ +#define FM_FMRI_CPU_ID "cpuid" +#define FM_FMRI_CPU_SERIAL_ID "serial" +#define FM_FMRI_CPU_MASK "cpumask" +#define FM_FMRI_CPU_VID "cpuvid" +#define FM_FMRI_CPU_CPUFRU "cpufru" +#define FM_FMRI_CPU_CACHE_INDEX "cacheindex" +#define FM_FMRI_CPU_CACHE_WAY "cacheway" +#define FM_FMRI_CPU_CACHE_BIT "cachebit" +#define FM_FMRI_CPU_CACHE_TYPE "cachetype" + +#define FM_FMRI_CPU_CACHE_TYPE_L2 0 +#define FM_FMRI_CPU_CACHE_TYPE_L3 1 + +/* legacy-hc scheme member names */ +#define FM_FMRI_LEGACY_HC "component" +#define FM_FMRI_LEGACY_HC_PREFIX FM_FMRI_SCHEME_HC":///" \ + FM_FMRI_LEGACY_HC"=" + +/* mem scheme member names */ +#define FM_FMRI_MEM_UNUM "unum" +#define FM_FMRI_MEM_SERIAL_ID "serial" +#define FM_FMRI_MEM_PHYSADDR "physaddr" +#define FM_FMRI_MEM_MEMCONFIG "memconfig" +#define FM_FMRI_MEM_OFFSET "offset" + +/* mod scheme member names */ +#define FM_FMRI_MOD_PKG "mod-pkg" +#define FM_FMRI_MOD_NAME "mod-name" +#define FM_FMRI_MOD_ID "mod-id" +#define FM_FMRI_MOD_DESC "mod-desc" + +/* zfs scheme member names */ +#define FM_FMRI_ZFS_POOL "pool" +#define FM_FMRI_ZFS_VDEV "vdev" + +/* sw scheme member names - extra indentation for members of an nvlist */ +#define FM_FMRI_SW_OBJ "object" +#define FM_FMRI_SW_OBJ_PATH "path" +#define FM_FMRI_SW_OBJ_ROOT "root" +#define FM_FMRI_SW_OBJ_PKG "pkg" +#define FM_FMRI_SW_SITE "site" +#define FM_FMRI_SW_SITE_TOKEN "token" +#define FM_FMRI_SW_SITE_MODULE "module" +#define FM_FMRI_SW_SITE_FILE "file" +#define FM_FMRI_SW_SITE_LINE "line" +#define FM_FMRI_SW_SITE_FUNC "func" +#define FM_FMRI_SW_CTXT "context" +#define FM_FMRI_SW_CTXT_ORIGIN "origin" +#define FM_FMRI_SW_CTXT_EXECNAME "execname" +#define FM_FMRI_SW_CTXT_PID "pid" +#define FM_FMRI_SW_CTXT_ZONE "zone" +#define FM_FMRI_SW_CTXT_CTID "ctid" +#define FM_FMRI_SW_CTXT_STACK "stack" + +extern nv_alloc_t *fm_nva_xcreate(char *, size_t); +extern void fm_nva_xdestroy(nv_alloc_t *); + +extern nvlist_t *fm_nvlist_create(nv_alloc_t *); +extern void fm_nvlist_destroy(nvlist_t *, int); + +#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */ +#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */ + +extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t, + const nvlist_t *, ...); +extern void fm_payload_set(nvlist_t *, ...); +extern int i_fm_payload_set(nvlist_t *, const char *, va_list); +extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *, + int, ...); +extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *, + const char *, const char *); +extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *); +extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t, + uint8_t *, const char *); +extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *, + const char *, uint64_t); +extern void fm_authority_set(nvlist_t *, int, const char *, const char *, + const char *, const char *); +extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t); +extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *, + nvlist_t *, int, ...); + +extern uint64_t fm_ena_increment(uint64_t); +extern uint64_t fm_ena_generate(uint64_t, uchar_t); +extern uint64_t fm_ena_generate_cpu(uint64_t, processorid_t, uchar_t); +extern uint64_t fm_ena_generation_get(uint64_t); +extern uchar_t fm_ena_format_get(uint64_t); +extern uint64_t fm_ena_id_get(uint64_t); +extern uint64_t fm_ena_time_get(uint64_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FM_PROTOCOL_H */ diff --git a/uts/common/sys/fm/util.h b/uts/common/sys/fm/util.h new file mode 100644 index 000000000000..37334101b3cf --- /dev/null +++ b/uts/common/sys/fm/util.h @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_FM_UTIL_H +#define _SYS_FM_UTIL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/nvpair.h> +#include <sys/errorq.h> + +/* + * Shared user/kernel definitions for class length, error channel name, + * and kernel event publisher string. + */ +#define FM_MAX_CLASS 100 +#define FM_ERROR_CHAN "com.sun:fm:error" +#define FM_PUB "fm" + +/* + * ereport dump device transport support + * + * Ereports are written out to the dump device at a proscribed offset from the + * end, similar to in-transit log messages. The ereports are represented as a + * erpt_dump_t header followed by ed_size bytes of packed native nvlist data. + * + * NOTE: All of these constants and the header must be defined so they have the + * same representation for *both* 32-bit and 64-bit producers and consumers. + */ +#define ERPT_MAGIC 0xf00d4eddU +#define ERPT_MAX_ERRS 16 +#define ERPT_DATA_SZ (6 * 1024) +#define ERPT_EVCH_MAX 256 +#define ERPT_HIWAT 64 + +typedef struct erpt_dump { + uint32_t ed_magic; /* ERPT_MAGIC or zero to indicate end */ + uint32_t ed_chksum; /* checksum32() of packed nvlist data */ + uint32_t ed_size; /* ereport (nvl) fixed buf size */ + uint32_t ed_pad; /* reserved for future use */ + hrtime_t ed_hrt_nsec; /* hrtime of this ereport */ + hrtime_t ed_hrt_base; /* hrtime sample corresponding to ed_tod_base */ + struct { + uint64_t sec; /* seconds since gettimeofday() Epoch */ + uint64_t nsec; /* nanoseconds past ed_tod_base.sec */ + } ed_tod_base; +} erpt_dump_t; + +#ifdef _KERNEL +#include <sys/systm.h> + +#define FM_STK_DEPTH 20 /* maximum stack depth */ +#define FM_SYM_SZ 64 /* maximum symbol size */ +#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */ + +#define FM_EREPORT_PAYLOAD_NAME_STACK "stack" + +extern errorq_t *ereport_errorq; +extern void *ereport_dumpbuf; +extern size_t ereport_dumplen; + +extern void fm_init(void); +extern void fm_nvprint(nvlist_t *); +extern void fm_panic(const char *, ...); +extern void fm_banner(void); + +extern void fm_ereport_dump(void); +extern void fm_ereport_post(nvlist_t *, int); + +extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int); + +extern int is_fm_panic(); +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FM_UTIL_H */ diff --git a/uts/common/sys/fs/zfs.h b/uts/common/sys/fs/zfs.h new file mode 100644 index 000000000000..da0b12bab4a9 --- /dev/null +++ b/uts/common/sys/fs/zfs.h @@ -0,0 +1,912 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#ifndef _SYS_FS_ZFS_H +#define _SYS_FS_ZFS_H + +#include <sys/time.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Types and constants shared between userland and the kernel. + */ + +/* + * Each dataset can be one of the following types. These constants can be + * combined into masks that can be passed to various functions. + */ +typedef enum { + ZFS_TYPE_FILESYSTEM = 0x1, + ZFS_TYPE_SNAPSHOT = 0x2, + ZFS_TYPE_VOLUME = 0x4, + ZFS_TYPE_POOL = 0x8 +} zfs_type_t; + +#define ZFS_TYPE_DATASET \ + (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) + +#define ZAP_MAXNAMELEN 256 +#define ZAP_MAXVALUELEN (1024 * 8) +#define ZAP_OLDMAXVALUELEN 1024 + +/* + * Dataset properties are identified by these constants and must be added to + * the end of this list to ensure that external consumers are not affected + * by the change. If you make any changes to this list, be sure to update + * the property table in usr/src/common/zfs/zfs_prop.c. + */ +typedef enum { + ZFS_PROP_TYPE, + ZFS_PROP_CREATION, + ZFS_PROP_USED, + ZFS_PROP_AVAILABLE, + ZFS_PROP_REFERENCED, + ZFS_PROP_COMPRESSRATIO, + ZFS_PROP_MOUNTED, + ZFS_PROP_ORIGIN, + ZFS_PROP_QUOTA, + ZFS_PROP_RESERVATION, + ZFS_PROP_VOLSIZE, + ZFS_PROP_VOLBLOCKSIZE, + ZFS_PROP_RECORDSIZE, + ZFS_PROP_MOUNTPOINT, + ZFS_PROP_SHARENFS, + ZFS_PROP_CHECKSUM, + ZFS_PROP_COMPRESSION, + ZFS_PROP_ATIME, + ZFS_PROP_DEVICES, + ZFS_PROP_EXEC, + ZFS_PROP_SETUID, + ZFS_PROP_READONLY, + ZFS_PROP_ZONED, + ZFS_PROP_SNAPDIR, + ZFS_PROP_PRIVATE, /* not exposed to user, temporary */ + ZFS_PROP_ACLINHERIT, + ZFS_PROP_CREATETXG, /* not exposed to the user */ + ZFS_PROP_NAME, /* not exposed to the user */ + ZFS_PROP_CANMOUNT, + ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */ + ZFS_PROP_XATTR, + ZFS_PROP_NUMCLONES, /* not exposed to the user */ + ZFS_PROP_COPIES, + ZFS_PROP_VERSION, + ZFS_PROP_UTF8ONLY, + ZFS_PROP_NORMALIZE, + ZFS_PROP_CASE, + ZFS_PROP_VSCAN, + ZFS_PROP_NBMAND, + ZFS_PROP_SHARESMB, + ZFS_PROP_REFQUOTA, + ZFS_PROP_REFRESERVATION, + ZFS_PROP_GUID, + ZFS_PROP_PRIMARYCACHE, + ZFS_PROP_SECONDARYCACHE, + ZFS_PROP_USEDSNAP, + ZFS_PROP_USEDDS, + ZFS_PROP_USEDCHILD, + ZFS_PROP_USEDREFRESERV, + ZFS_PROP_USERACCOUNTING, /* not exposed to the user */ + ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */ + ZFS_PROP_DEFER_DESTROY, + ZFS_PROP_USERREFS, + ZFS_PROP_LOGBIAS, + ZFS_PROP_UNIQUE, /* not exposed to the user */ + ZFS_PROP_OBJSETID, /* not exposed to the user */ + ZFS_PROP_DEDUP, + ZFS_PROP_MLSLABEL, + ZFS_PROP_SYNC, + ZFS_NUM_PROPS +} zfs_prop_t; + +typedef enum { + ZFS_PROP_USERUSED, + ZFS_PROP_USERQUOTA, + ZFS_PROP_GROUPUSED, + ZFS_PROP_GROUPQUOTA, + ZFS_NUM_USERQUOTA_PROPS +} zfs_userquota_prop_t; + +extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS]; + +/* + * Pool properties are identified by these constants and must be added to the + * end of this list to ensure that external consumers are not affected + * by the change. If you make any changes to this list, be sure to update + * the property table in usr/src/common/zfs/zpool_prop.c. + */ +typedef enum { + ZPOOL_PROP_NAME, + ZPOOL_PROP_SIZE, + ZPOOL_PROP_CAPACITY, + ZPOOL_PROP_ALTROOT, + ZPOOL_PROP_HEALTH, + ZPOOL_PROP_GUID, + ZPOOL_PROP_VERSION, + ZPOOL_PROP_BOOTFS, + ZPOOL_PROP_DELEGATION, + ZPOOL_PROP_AUTOREPLACE, + ZPOOL_PROP_CACHEFILE, + ZPOOL_PROP_FAILUREMODE, + ZPOOL_PROP_LISTSNAPS, + ZPOOL_PROP_AUTOEXPAND, + ZPOOL_PROP_DEDUPDITTO, + ZPOOL_PROP_DEDUPRATIO, + ZPOOL_PROP_FREE, + ZPOOL_PROP_ALLOCATED, + ZPOOL_PROP_READONLY, + ZPOOL_NUM_PROPS +} zpool_prop_t; + +#define ZPROP_CONT -2 +#define ZPROP_INVAL -1 + +#define ZPROP_VALUE "value" +#define ZPROP_SOURCE "source" + +typedef enum { + ZPROP_SRC_NONE = 0x1, + ZPROP_SRC_DEFAULT = 0x2, + ZPROP_SRC_TEMPORARY = 0x4, + ZPROP_SRC_LOCAL = 0x8, + ZPROP_SRC_INHERITED = 0x10, + ZPROP_SRC_RECEIVED = 0x20 +} zprop_source_t; + +#define ZPROP_SRC_ALL 0x3f + +#define ZPROP_SOURCE_VAL_RECVD "$recvd" +#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS" +/* + * Dataset flag implemented as a special entry in the props zap object + * indicating that the dataset has received properties on or after + * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties + * just as it did in earlier versions, and thereafter, local properties are + * preserved. + */ +#define ZPROP_HAS_RECVD "$hasrecvd" + +typedef enum { + ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */ + ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */ +} zprop_errflags_t; + +typedef int (*zprop_func)(int, void *); + +/* + * Properties to be set on the root file system of a new pool + * are stuffed into their own nvlist, which is then included in + * the properties nvlist with the pool properties. + */ +#define ZPOOL_ROOTFS_PROPS "root-props-nvl" + +/* + * Dataset property functions shared between libzfs and kernel. + */ +const char *zfs_prop_default_string(zfs_prop_t); +uint64_t zfs_prop_default_numeric(zfs_prop_t); +boolean_t zfs_prop_readonly(zfs_prop_t); +boolean_t zfs_prop_inheritable(zfs_prop_t); +boolean_t zfs_prop_setonce(zfs_prop_t); +const char *zfs_prop_to_name(zfs_prop_t); +zfs_prop_t zfs_name_to_prop(const char *); +boolean_t zfs_prop_user(const char *); +boolean_t zfs_prop_userquota(const char *); +int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **); +int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *); +uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed); +boolean_t zfs_prop_valid_for_type(int, zfs_type_t); + +/* + * Pool property functions shared between libzfs and kernel. + */ +zpool_prop_t zpool_name_to_prop(const char *); +const char *zpool_prop_to_name(zpool_prop_t); +const char *zpool_prop_default_string(zpool_prop_t); +uint64_t zpool_prop_default_numeric(zpool_prop_t); +boolean_t zpool_prop_readonly(zpool_prop_t); +int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **); +int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *); +uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed); + +/* + * Definitions for the Delegation. + */ +typedef enum { + ZFS_DELEG_WHO_UNKNOWN = 0, + ZFS_DELEG_USER = 'u', + ZFS_DELEG_USER_SETS = 'U', + ZFS_DELEG_GROUP = 'g', + ZFS_DELEG_GROUP_SETS = 'G', + ZFS_DELEG_EVERYONE = 'e', + ZFS_DELEG_EVERYONE_SETS = 'E', + ZFS_DELEG_CREATE = 'c', + ZFS_DELEG_CREATE_SETS = 'C', + ZFS_DELEG_NAMED_SET = 's', + ZFS_DELEG_NAMED_SET_SETS = 'S' +} zfs_deleg_who_type_t; + +typedef enum { + ZFS_DELEG_NONE = 0, + ZFS_DELEG_PERM_LOCAL = 1, + ZFS_DELEG_PERM_DESCENDENT = 2, + ZFS_DELEG_PERM_LOCALDESCENDENT = 3, + ZFS_DELEG_PERM_CREATE = 4 +} zfs_deleg_inherit_t; + +#define ZFS_DELEG_PERM_UID "uid" +#define ZFS_DELEG_PERM_GID "gid" +#define ZFS_DELEG_PERM_GROUPS "groups" + +#define ZFS_MLSLABEL_DEFAULT "none" + +#define ZFS_SMB_ACL_SRC "src" +#define ZFS_SMB_ACL_TARGET "target" + +typedef enum { + ZFS_CANMOUNT_OFF = 0, + ZFS_CANMOUNT_ON = 1, + ZFS_CANMOUNT_NOAUTO = 2 +} zfs_canmount_type_t; + +typedef enum { + ZFS_LOGBIAS_LATENCY = 0, + ZFS_LOGBIAS_THROUGHPUT = 1 +} zfs_logbias_op_t; + +typedef enum zfs_share_op { + ZFS_SHARE_NFS = 0, + ZFS_UNSHARE_NFS = 1, + ZFS_SHARE_SMB = 2, + ZFS_UNSHARE_SMB = 3 +} zfs_share_op_t; + +typedef enum zfs_smb_acl_op { + ZFS_SMB_ACL_ADD, + ZFS_SMB_ACL_REMOVE, + ZFS_SMB_ACL_RENAME, + ZFS_SMB_ACL_PURGE +} zfs_smb_acl_op_t; + +typedef enum zfs_cache_type { + ZFS_CACHE_NONE = 0, + ZFS_CACHE_METADATA = 1, + ZFS_CACHE_ALL = 2 +} zfs_cache_type_t; + +typedef enum { + ZFS_SYNC_STANDARD = 0, + ZFS_SYNC_ALWAYS = 1, + ZFS_SYNC_DISABLED = 2 +} zfs_sync_type_t; + + +/* + * On-disk version number. + */ +#define SPA_VERSION_1 1ULL +#define SPA_VERSION_2 2ULL +#define SPA_VERSION_3 3ULL +#define SPA_VERSION_4 4ULL +#define SPA_VERSION_5 5ULL +#define SPA_VERSION_6 6ULL +#define SPA_VERSION_7 7ULL +#define SPA_VERSION_8 8ULL +#define SPA_VERSION_9 9ULL +#define SPA_VERSION_10 10ULL +#define SPA_VERSION_11 11ULL +#define SPA_VERSION_12 12ULL +#define SPA_VERSION_13 13ULL +#define SPA_VERSION_14 14ULL +#define SPA_VERSION_15 15ULL +#define SPA_VERSION_16 16ULL +#define SPA_VERSION_17 17ULL +#define SPA_VERSION_18 18ULL +#define SPA_VERSION_19 19ULL +#define SPA_VERSION_20 20ULL +#define SPA_VERSION_21 21ULL +#define SPA_VERSION_22 22ULL +#define SPA_VERSION_23 23ULL +#define SPA_VERSION_24 24ULL +#define SPA_VERSION_25 25ULL +#define SPA_VERSION_26 26ULL +#define SPA_VERSION_27 27ULL +#define SPA_VERSION_28 28ULL + +/* + * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk + * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, + * and do the appropriate changes. Also bump the version number in + * usr/src/grub/capability. + */ +#define SPA_VERSION SPA_VERSION_28 +#define SPA_VERSION_STRING "28" + +/* + * Symbolic names for the changes that caused a SPA_VERSION switch. + * Used in the code when checking for presence or absence of a feature. + * Feel free to define multiple symbolic names for each version if there + * were multiple changes to on-disk structures during that version. + * + * NOTE: When checking the current SPA_VERSION in your code, be sure + * to use spa_version() since it reports the version of the + * last synced uberblock. Checking the in-flight version can + * be dangerous in some cases. + */ +#define SPA_VERSION_INITIAL SPA_VERSION_1 +#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2 +#define SPA_VERSION_SPARES SPA_VERSION_3 +#define SPA_VERSION_RAIDZ2 SPA_VERSION_3 +#define SPA_VERSION_BPOBJ_ACCOUNT SPA_VERSION_3 +#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3 +#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3 +#define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4 +#define SPA_VERSION_GZIP_COMPRESSION SPA_VERSION_5 +#define SPA_VERSION_BOOTFS SPA_VERSION_6 +#define SPA_VERSION_SLOGS SPA_VERSION_7 +#define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8 +#define SPA_VERSION_FUID SPA_VERSION_9 +#define SPA_VERSION_REFRESERVATION SPA_VERSION_9 +#define SPA_VERSION_REFQUOTA SPA_VERSION_9 +#define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9 +#define SPA_VERSION_L2CACHE SPA_VERSION_10 +#define SPA_VERSION_NEXT_CLONES SPA_VERSION_11 +#define SPA_VERSION_ORIGIN SPA_VERSION_11 +#define SPA_VERSION_DSL_SCRUB SPA_VERSION_11 +#define SPA_VERSION_SNAP_PROPS SPA_VERSION_12 +#define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13 +#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14 +#define SPA_VERSION_USERSPACE SPA_VERSION_15 +#define SPA_VERSION_STMF_PROP SPA_VERSION_16 +#define SPA_VERSION_RAIDZ3 SPA_VERSION_17 +#define SPA_VERSION_USERREFS SPA_VERSION_18 +#define SPA_VERSION_HOLES SPA_VERSION_19 +#define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20 +#define SPA_VERSION_DEDUP SPA_VERSION_21 +#define SPA_VERSION_RECVD_PROPS SPA_VERSION_22 +#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23 +#define SPA_VERSION_SA SPA_VERSION_24 +#define SPA_VERSION_SCAN SPA_VERSION_25 +#define SPA_VERSION_DIR_CLONES SPA_VERSION_26 +#define SPA_VERSION_DEADLISTS SPA_VERSION_26 +#define SPA_VERSION_FAST_SNAP SPA_VERSION_27 +#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 + +/* + * ZPL version - rev'd whenever an incompatible on-disk format change + * occurs. This is independent of SPA/DMU/ZAP versioning. You must + * also update the version_table[] and help message in zfs_prop.c. + * + * When changing, be sure to teach GRUB how to read the new format! + * See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*} + */ +#define ZPL_VERSION_1 1ULL +#define ZPL_VERSION_2 2ULL +#define ZPL_VERSION_3 3ULL +#define ZPL_VERSION_4 4ULL +#define ZPL_VERSION_5 5ULL +#define ZPL_VERSION ZPL_VERSION_5 +#define ZPL_VERSION_STRING "5" + +#define ZPL_VERSION_INITIAL ZPL_VERSION_1 +#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2 +#define ZPL_VERSION_FUID ZPL_VERSION_3 +#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3 +#define ZPL_VERSION_SYSATTR ZPL_VERSION_3 +#define ZPL_VERSION_USERSPACE ZPL_VERSION_4 +#define ZPL_VERSION_SA ZPL_VERSION_5 + +/* Rewind request information */ +#define ZPOOL_NO_REWIND 1 /* No policy - default behavior */ +#define ZPOOL_NEVER_REWIND 2 /* Do not search for best txg or rewind */ +#define ZPOOL_TRY_REWIND 4 /* Search for best txg, but do not rewind */ +#define ZPOOL_DO_REWIND 8 /* Rewind to best txg w/in deferred frees */ +#define ZPOOL_EXTREME_REWIND 16 /* Allow extreme measures to find best txg */ +#define ZPOOL_REWIND_MASK 28 /* All the possible rewind bits */ +#define ZPOOL_REWIND_POLICIES 31 /* All the possible policy bits */ + +typedef struct zpool_rewind_policy { + uint32_t zrp_request; /* rewind behavior requested */ + uint64_t zrp_maxmeta; /* max acceptable meta-data errors */ + uint64_t zrp_maxdata; /* max acceptable data errors */ + uint64_t zrp_txg; /* specific txg to load */ +} zpool_rewind_policy_t; + +/* + * The following are configuration names used in the nvlist describing a pool's + * configuration. + */ +#define ZPOOL_CONFIG_VERSION "version" +#define ZPOOL_CONFIG_POOL_NAME "name" +#define ZPOOL_CONFIG_POOL_STATE "state" +#define ZPOOL_CONFIG_POOL_TXG "txg" +#define ZPOOL_CONFIG_POOL_GUID "pool_guid" +#define ZPOOL_CONFIG_CREATE_TXG "create_txg" +#define ZPOOL_CONFIG_TOP_GUID "top_guid" +#define ZPOOL_CONFIG_VDEV_TREE "vdev_tree" +#define ZPOOL_CONFIG_TYPE "type" +#define ZPOOL_CONFIG_CHILDREN "children" +#define ZPOOL_CONFIG_ID "id" +#define ZPOOL_CONFIG_GUID "guid" +#define ZPOOL_CONFIG_PATH "path" +#define ZPOOL_CONFIG_DEVID "devid" +#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" +#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" +#define ZPOOL_CONFIG_ASHIFT "ashift" +#define ZPOOL_CONFIG_ASIZE "asize" +#define ZPOOL_CONFIG_DTL "DTL" +#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" +#define ZPOOL_CONFIG_ERRCOUNT "error_count" +#define ZPOOL_CONFIG_NOT_PRESENT "not_present" +#define ZPOOL_CONFIG_SPARES "spares" +#define ZPOOL_CONFIG_IS_SPARE "is_spare" +#define ZPOOL_CONFIG_NPARITY "nparity" +#define ZPOOL_CONFIG_HOSTID "hostid" +#define ZPOOL_CONFIG_HOSTNAME "hostname" +#define ZPOOL_CONFIG_LOADED_TIME "initial_load_time" +#define ZPOOL_CONFIG_UNSPARE "unspare" +#define ZPOOL_CONFIG_PHYS_PATH "phys_path" +#define ZPOOL_CONFIG_IS_LOG "is_log" +#define ZPOOL_CONFIG_L2CACHE "l2cache" +#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" +#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" +#define ZPOOL_CONFIG_IS_HOLE "is_hole" +#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" +#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" +#define ZPOOL_CONFIG_DDT_STATS "ddt_stats" +#define ZPOOL_CONFIG_SPLIT "splitcfg" +#define ZPOOL_CONFIG_ORIG_GUID "orig_guid" +#define ZPOOL_CONFIG_SPLIT_GUID "split_guid" +#define ZPOOL_CONFIG_SPLIT_LIST "guid_list" +#define ZPOOL_CONFIG_REMOVING "removing" +#define ZPOOL_CONFIG_RESILVERING "resilvering" +#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ +#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ +#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ +#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */ +#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */ +/* + * The persistent vdev state is stored as separate values rather than a single + * 'vdev_state' entry. This is because a device can be in multiple states, such + * as offline and degraded. + */ +#define ZPOOL_CONFIG_OFFLINE "offline" +#define ZPOOL_CONFIG_FAULTED "faulted" +#define ZPOOL_CONFIG_DEGRADED "degraded" +#define ZPOOL_CONFIG_REMOVED "removed" +#define ZPOOL_CONFIG_FRU "fru" +#define ZPOOL_CONFIG_AUX_STATE "aux_state" + +/* Rewind policy parameters */ +#define ZPOOL_REWIND_POLICY "rewind-policy" +#define ZPOOL_REWIND_REQUEST "rewind-request" +#define ZPOOL_REWIND_REQUEST_TXG "rewind-request-txg" +#define ZPOOL_REWIND_META_THRESH "rewind-meta-thresh" +#define ZPOOL_REWIND_DATA_THRESH "rewind-data-thresh" + +/* Rewind data discovered */ +#define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts" +#define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors" +#define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind" + +#define VDEV_TYPE_ROOT "root" +#define VDEV_TYPE_MIRROR "mirror" +#define VDEV_TYPE_REPLACING "replacing" +#define VDEV_TYPE_RAIDZ "raidz" +#define VDEV_TYPE_DISK "disk" +#define VDEV_TYPE_FILE "file" +#define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_HOLE "hole" +#define VDEV_TYPE_SPARE "spare" +#define VDEV_TYPE_LOG "log" +#define VDEV_TYPE_L2CACHE "l2cache" + +/* + * This is needed in userland to report the minimum necessary device size. + */ +#define SPA_MINDEVSIZE (64ULL << 20) + +/* + * The location of the pool configuration repository, shared between kernel and + * userland. + */ +#define ZPOOL_CACHE "/etc/zfs/zpool.cache" + +/* + * vdev states are ordered from least to most healthy. + * A vdev that's CANT_OPEN or below is considered unusable. + */ +typedef enum vdev_state { + VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ + VDEV_STATE_CLOSED, /* Not currently open */ + VDEV_STATE_OFFLINE, /* Not allowed to open */ + VDEV_STATE_REMOVED, /* Explicitly removed from system */ + VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ + VDEV_STATE_FAULTED, /* External request to fault device */ + VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ + VDEV_STATE_HEALTHY /* Presumed good */ +} vdev_state_t; + +#define VDEV_STATE_ONLINE VDEV_STATE_HEALTHY + +/* + * vdev aux states. When a vdev is in the CANT_OPEN state, the aux field + * of the vdev stats structure uses these constants to distinguish why. + */ +typedef enum vdev_aux { + VDEV_AUX_NONE, /* no error */ + VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */ + VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */ + VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */ + VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */ + VDEV_AUX_TOO_SMALL, /* vdev size is too small */ + VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ + VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ + VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ + VDEV_AUX_SPARED, /* hot spare used in another pool */ + VDEV_AUX_ERR_EXCEEDED, /* too many errors */ + VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ + VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ + VDEV_AUX_EXTERNAL, /* external diagnosis */ + VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */ +} vdev_aux_t; + +/* + * pool state. The following states are written to disk as part of the normal + * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE. The remaining + * states are software abstractions used at various levels to communicate + * pool state. + */ +typedef enum pool_state { + POOL_STATE_ACTIVE = 0, /* In active use */ + POOL_STATE_EXPORTED, /* Explicitly exported */ + POOL_STATE_DESTROYED, /* Explicitly destroyed */ + POOL_STATE_SPARE, /* Reserved for hot spare use */ + POOL_STATE_L2CACHE, /* Level 2 ARC device */ + POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ + POOL_STATE_UNAVAIL, /* Internal libzfs state */ + POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ +} pool_state_t; + +/* + * Scan Functions. + */ +typedef enum pool_scan_func { + POOL_SCAN_NONE, + POOL_SCAN_SCRUB, + POOL_SCAN_RESILVER, + POOL_SCAN_FUNCS +} pool_scan_func_t; + +/* + * ZIO types. Needed to interpret vdev statistics below. + */ +typedef enum zio_type { + ZIO_TYPE_NULL = 0, + ZIO_TYPE_READ, + ZIO_TYPE_WRITE, + ZIO_TYPE_FREE, + ZIO_TYPE_CLAIM, + ZIO_TYPE_IOCTL, + ZIO_TYPES +} zio_type_t; + +/* + * Pool statistics. Note: all fields should be 64-bit because this + * is passed between kernel and userland as an nvlist uint64 array. + */ +typedef struct pool_scan_stat { + /* values stored on disk */ + uint64_t pss_func; /* pool_scan_func_t */ + uint64_t pss_state; /* dsl_scan_state_t */ + uint64_t pss_start_time; /* scan start time */ + uint64_t pss_end_time; /* scan end time */ + uint64_t pss_to_examine; /* total bytes to scan */ + uint64_t pss_examined; /* total examined bytes */ + uint64_t pss_to_process; /* total bytes to process */ + uint64_t pss_processed; /* total processed bytes */ + uint64_t pss_errors; /* scan errors */ + + /* values not stored on disk */ + uint64_t pss_pass_exam; /* examined bytes per scan pass */ + uint64_t pss_pass_start; /* start time of a scan pass */ +} pool_scan_stat_t; + +typedef enum dsl_scan_state { + DSS_NONE, + DSS_SCANNING, + DSS_FINISHED, + DSS_CANCELED, + DSS_NUM_STATES +} dsl_scan_state_t; + + +/* + * Vdev statistics. Note: all fields should be 64-bit because this + * is passed between kernel and userland as an nvlist uint64 array. + */ +typedef struct vdev_stat { + hrtime_t vs_timestamp; /* time since vdev load */ + uint64_t vs_state; /* vdev state */ + uint64_t vs_aux; /* see vdev_aux_t */ + uint64_t vs_alloc; /* space allocated */ + uint64_t vs_space; /* total capacity */ + uint64_t vs_dspace; /* deflated capacity */ + uint64_t vs_rsize; /* replaceable dev size */ + uint64_t vs_ops[ZIO_TYPES]; /* operation count */ + uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ + uint64_t vs_read_errors; /* read errors */ + uint64_t vs_write_errors; /* write errors */ + uint64_t vs_checksum_errors; /* checksum errors */ + uint64_t vs_self_healed; /* self-healed bytes */ + uint64_t vs_scan_removing; /* removing? */ + uint64_t vs_scan_processed; /* scan processed bytes */ +} vdev_stat_t; + +/* + * DDT statistics. Note: all fields should be 64-bit because this + * is passed between kernel and userland as an nvlist uint64 array. + */ +typedef struct ddt_object { + uint64_t ddo_count; /* number of elments in ddt */ + uint64_t ddo_dspace; /* size of ddt on disk */ + uint64_t ddo_mspace; /* size of ddt in-core */ +} ddt_object_t; + +typedef struct ddt_stat { + uint64_t dds_blocks; /* blocks */ + uint64_t dds_lsize; /* logical size */ + uint64_t dds_psize; /* physical size */ + uint64_t dds_dsize; /* deflated allocated size */ + uint64_t dds_ref_blocks; /* referenced blocks */ + uint64_t dds_ref_lsize; /* referenced lsize * refcnt */ + uint64_t dds_ref_psize; /* referenced psize * refcnt */ + uint64_t dds_ref_dsize; /* referenced dsize * refcnt */ +} ddt_stat_t; + +typedef struct ddt_histogram { + ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */ +} ddt_histogram_t; + +#define ZVOL_DRIVER "zvol" +#define ZFS_DRIVER "zfs" +#define ZFS_DEV "/dev/zfs" + +/* general zvol path */ +#define ZVOL_DIR "/dev/zvol" +/* expansion */ +#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:" +/* for dump and swap */ +#define ZVOL_FULL_DEV_DIR ZVOL_DIR "/dsk/" +#define ZVOL_FULL_RDEV_DIR ZVOL_DIR "/rdsk/" + +#define ZVOL_PROP_NAME "name" +#define ZVOL_DEFAULT_BLOCKSIZE 8192 + +/* + * /dev/zfs ioctl numbers. + */ +#define ZFS_IOC ('Z' << 8) + +typedef enum zfs_ioc { + ZFS_IOC_POOL_CREATE = ZFS_IOC, + ZFS_IOC_POOL_DESTROY, + ZFS_IOC_POOL_IMPORT, + ZFS_IOC_POOL_EXPORT, + ZFS_IOC_POOL_CONFIGS, + ZFS_IOC_POOL_STATS, + ZFS_IOC_POOL_TRYIMPORT, + ZFS_IOC_POOL_SCAN, + ZFS_IOC_POOL_FREEZE, + ZFS_IOC_POOL_UPGRADE, + ZFS_IOC_POOL_GET_HISTORY, + ZFS_IOC_VDEV_ADD, + ZFS_IOC_VDEV_REMOVE, + ZFS_IOC_VDEV_SET_STATE, + ZFS_IOC_VDEV_ATTACH, + ZFS_IOC_VDEV_DETACH, + ZFS_IOC_VDEV_SETPATH, + ZFS_IOC_VDEV_SETFRU, + ZFS_IOC_OBJSET_STATS, + ZFS_IOC_OBJSET_ZPLPROPS, + ZFS_IOC_DATASET_LIST_NEXT, + ZFS_IOC_SNAPSHOT_LIST_NEXT, + ZFS_IOC_SET_PROP, + ZFS_IOC_CREATE, + ZFS_IOC_DESTROY, + ZFS_IOC_ROLLBACK, + ZFS_IOC_RENAME, + ZFS_IOC_RECV, + ZFS_IOC_SEND, + ZFS_IOC_INJECT_FAULT, + ZFS_IOC_CLEAR_FAULT, + ZFS_IOC_INJECT_LIST_NEXT, + ZFS_IOC_ERROR_LOG, + ZFS_IOC_CLEAR, + ZFS_IOC_PROMOTE, + ZFS_IOC_DESTROY_SNAPS, + ZFS_IOC_SNAPSHOT, + ZFS_IOC_DSOBJ_TO_DSNAME, + ZFS_IOC_OBJ_TO_PATH, + ZFS_IOC_POOL_SET_PROPS, + ZFS_IOC_POOL_GET_PROPS, + ZFS_IOC_SET_FSACL, + ZFS_IOC_GET_FSACL, + ZFS_IOC_SHARE, + ZFS_IOC_INHERIT_PROP, + ZFS_IOC_SMB_ACL, + ZFS_IOC_USERSPACE_ONE, + ZFS_IOC_USERSPACE_MANY, + ZFS_IOC_USERSPACE_UPGRADE, + ZFS_IOC_HOLD, + ZFS_IOC_RELEASE, + ZFS_IOC_GET_HOLDS, + ZFS_IOC_OBJSET_RECVD_PROPS, + ZFS_IOC_VDEV_SPLIT, + ZFS_IOC_NEXT_OBJ, + ZFS_IOC_DIFF, + ZFS_IOC_TMP_SNAPSHOT, + ZFS_IOC_OBJ_TO_STATS +} zfs_ioc_t; + +/* + * Internal SPA load state. Used by FMA diagnosis engine. + */ +typedef enum { + SPA_LOAD_NONE, /* no load in progress */ + SPA_LOAD_OPEN, /* normal open */ + SPA_LOAD_IMPORT, /* import in progress */ + SPA_LOAD_TRYIMPORT, /* tryimport in progress */ + SPA_LOAD_RECOVER, /* recovery requested */ + SPA_LOAD_ERROR /* load failed */ +} spa_load_state_t; + +/* + * Bookmark name values. + */ +#define ZPOOL_ERR_LIST "error list" +#define ZPOOL_ERR_DATASET "dataset" +#define ZPOOL_ERR_OBJECT "object" + +#define HIS_MAX_RECORD_LEN (MAXPATHLEN + MAXPATHLEN + 1) + +/* + * The following are names used in the nvlist describing + * the pool's history log. + */ +#define ZPOOL_HIST_RECORD "history record" +#define ZPOOL_HIST_TIME "history time" +#define ZPOOL_HIST_CMD "history command" +#define ZPOOL_HIST_WHO "history who" +#define ZPOOL_HIST_ZONE "history zone" +#define ZPOOL_HIST_HOST "history hostname" +#define ZPOOL_HIST_TXG "history txg" +#define ZPOOL_HIST_INT_EVENT "history internal event" +#define ZPOOL_HIST_INT_STR "history internal str" + +/* + * Flags for ZFS_IOC_VDEV_SET_STATE + */ +#define ZFS_ONLINE_CHECKREMOVE 0x1 +#define ZFS_ONLINE_UNSPARE 0x2 +#define ZFS_ONLINE_FORCEFAULT 0x4 +#define ZFS_ONLINE_EXPAND 0x8 +#define ZFS_OFFLINE_TEMPORARY 0x1 + +/* + * Flags for ZFS_IOC_POOL_IMPORT + */ +#define ZFS_IMPORT_NORMAL 0x0 +#define ZFS_IMPORT_VERBATIM 0x1 +#define ZFS_IMPORT_ANY_HOST 0x2 +#define ZFS_IMPORT_MISSING_LOG 0x4 +#define ZFS_IMPORT_ONLY 0x8 + +/* + * Sysevent payload members. ZFS will generate the following sysevents with the + * given payloads: + * + * ESC_ZFS_RESILVER_START + * ESC_ZFS_RESILVER_END + * ESC_ZFS_POOL_DESTROY + * + * ZFS_EV_POOL_NAME DATA_TYPE_STRING + * ZFS_EV_POOL_GUID DATA_TYPE_UINT64 + * + * ESC_ZFS_VDEV_REMOVE + * ESC_ZFS_VDEV_CLEAR + * ESC_ZFS_VDEV_CHECK + * + * ZFS_EV_POOL_NAME DATA_TYPE_STRING + * ZFS_EV_POOL_GUID DATA_TYPE_UINT64 + * ZFS_EV_VDEV_PATH DATA_TYPE_STRING (optional) + * ZFS_EV_VDEV_GUID DATA_TYPE_UINT64 + */ +#define ZFS_EV_POOL_NAME "pool_name" +#define ZFS_EV_POOL_GUID "pool_guid" +#define ZFS_EV_VDEV_PATH "vdev_path" +#define ZFS_EV_VDEV_GUID "vdev_guid" + +/* + * Note: This is encoded on-disk, so new events must be added to the + * end, and unused events can not be removed. Be sure to edit + * libzfs_pool.c: hist_event_table[]. + */ +typedef enum history_internal_events { + LOG_NO_EVENT = 0, + LOG_POOL_CREATE, + LOG_POOL_VDEV_ADD, + LOG_POOL_REMOVE, + LOG_POOL_DESTROY, + LOG_POOL_EXPORT, + LOG_POOL_IMPORT, + LOG_POOL_VDEV_ATTACH, + LOG_POOL_VDEV_REPLACE, + LOG_POOL_VDEV_DETACH, + LOG_POOL_VDEV_ONLINE, + LOG_POOL_VDEV_OFFLINE, + LOG_POOL_UPGRADE, + LOG_POOL_CLEAR, + LOG_POOL_SCAN, + LOG_POOL_PROPSET, + LOG_DS_CREATE, + LOG_DS_CLONE, + LOG_DS_DESTROY, + LOG_DS_DESTROY_BEGIN, + LOG_DS_INHERIT, + LOG_DS_PROPSET, + LOG_DS_QUOTA, + LOG_DS_PERM_UPDATE, + LOG_DS_PERM_REMOVE, + LOG_DS_PERM_WHO_REMOVE, + LOG_DS_PROMOTE, + LOG_DS_RECEIVE, + LOG_DS_RENAME, + LOG_DS_RESERVATION, + LOG_DS_REPLAY_INC_SYNC, + LOG_DS_REPLAY_FULL_SYNC, + LOG_DS_ROLLBACK, + LOG_DS_SNAPSHOT, + LOG_DS_UPGRADE, + LOG_DS_REFQUOTA, + LOG_DS_REFRESERV, + LOG_POOL_SCAN_DONE, + LOG_DS_USER_HOLD, + LOG_DS_USER_RELEASE, + LOG_POOL_SPLIT, + LOG_END +} history_internal_events_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_ZFS_H */ diff --git a/uts/common/sys/fs/zut.h b/uts/common/sys/fs/zut.h new file mode 100644 index 000000000000..36c9eaa7f18e --- /dev/null +++ b/uts/common/sys/fs/zut.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZUT_H +#define _ZUT_H + +/* + * IOCTLs for the zfs unit test driver + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> + +#define ZUT_DRIVER "zut" +#define ZUT_DEV "/dev/zut" + +#define ZUT_VERSION_STRING "1" + +/* + * /dev/zut ioctl numbers. + */ +#define ZUT_IOC ('U' << 8) + +/* Request flags */ +#define ZUT_IGNORECASE 0x01 +#define ZUT_ACCFILTER 0x02 +#define ZUT_XATTR 0x04 +#define ZUT_EXTRDDIR 0x08 +#define ZUT_GETSTAT 0x10 + +typedef struct zut_lookup { + int zl_reqflags; + int zl_deflags; /* output */ + int zl_retcode; /* output */ + char zl_dir[MAXPATHLEN]; + char zl_file[MAXNAMELEN]; + char zl_xfile[MAXNAMELEN]; + char zl_real[MAXPATHLEN]; /* output */ + uint64_t zl_xvattrs; /* output */ + struct stat64 zl_statbuf; /* output */ +} zut_lookup_t; + +typedef struct zut_readdir { + uint64_t zr_buf; /* pointer to output buffer */ + uint64_t zr_loffset; /* output */ + char zr_dir[MAXPATHLEN]; + char zr_file[MAXNAMELEN]; + int zr_reqflags; + int zr_retcode; /* output */ + int zr_eof; /* output */ + uint_t zr_bytes; /* output */ + uint_t zr_buflen; +} zut_readdir_t; + +typedef enum zut_ioc { + ZUT_IOC_MIN_CMD = ZUT_IOC - 1, + ZUT_IOC_LOOKUP = ZUT_IOC, + ZUT_IOC_READDIR, + ZUT_IOC_MAX_CMD +} zut_ioc_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZUT_H */ diff --git a/uts/common/sys/gfs.h b/uts/common/sys/gfs.h new file mode 100644 index 000000000000..71c7c2ef78c9 --- /dev/null +++ b/uts/common/sys/gfs.h @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * These are Consolidation Private interfaces and are subject to change. + */ + +#ifndef _SYS_GFS_H +#define _SYS_GFS_H + +#include <sys/types.h> +#include <sys/vnode.h> +#include <sys/vfs_opreg.h> +#include <sys/mutex.h> +#include <sys/dirent.h> +#include <sys/extdirent.h> +#include <sys/uio.h> +#include <sys/list.h> +#include <sys/pathname.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct gfs_opsvec { + const char *gfsv_name; /* vnode description */ + const fs_operation_def_t *gfsv_template; /* ops template */ + vnodeops_t **gfsv_ops; /* ptr to result */ +} gfs_opsvec_t; + +int gfs_make_opsvec(gfs_opsvec_t *); + +#define GFS_CACHE_VNODE 0x1 + +typedef struct gfs_dirent { + char *gfse_name; /* entry name */ + vnode_t *(*gfse_ctor)(vnode_t *); /* constructor */ + int gfse_flags; /* flags */ + list_node_t gfse_link; /* dynamic list */ + vnode_t *gfse_vnode; /* cached vnode */ +} gfs_dirent_t; + +typedef enum gfs_type { + GFS_DIR, + GFS_FILE +} gfs_type_t; + +typedef struct gfs_file { + vnode_t *gfs_vnode; /* current vnode */ + vnode_t *gfs_parent; /* parent vnode */ + size_t gfs_size; /* size of private data structure */ + gfs_type_t gfs_type; /* type of vnode */ + int gfs_index; /* index in parent dir */ + ino64_t gfs_ino; /* inode for this vnode */ +} gfs_file_t; + +typedef int (*gfs_readdir_cb)(vnode_t *, void *, int *, offset_t *, + offset_t *, void *, int); +typedef int (*gfs_lookup_cb)(vnode_t *, const char *, vnode_t **, ino64_t *, + cred_t *, int, int *, pathname_t *); +typedef ino64_t (*gfs_inode_cb)(vnode_t *, int); + +typedef struct gfs_dir { + gfs_file_t gfsd_file; /* generic file attributes */ + gfs_dirent_t *gfsd_static; /* statically defined entries */ + int gfsd_nstatic; /* # static entries */ + kmutex_t gfsd_lock; /* protects entries */ + int gfsd_maxlen; /* maximum name length */ + gfs_readdir_cb gfsd_readdir; /* readdir() callback */ + gfs_lookup_cb gfsd_lookup; /* lookup() callback */ + gfs_inode_cb gfsd_inode; /* get an inode number */ +} gfs_dir_t; + +struct vfs; + +extern vnode_t *gfs_file_create(size_t, vnode_t *, vnodeops_t *); +extern vnode_t *gfs_dir_create(size_t, vnode_t *, vnodeops_t *, + gfs_dirent_t *, gfs_inode_cb, int, gfs_readdir_cb, gfs_lookup_cb); +extern vnode_t *gfs_root_create(size_t, struct vfs *, vnodeops_t *, ino64_t, + gfs_dirent_t *, gfs_inode_cb, int, gfs_readdir_cb, gfs_lookup_cb); +extern vnode_t *gfs_root_create_file(size_t, struct vfs *, vnodeops_t *, + ino64_t); + +extern void *gfs_file_inactive(vnode_t *); +extern void *gfs_dir_inactive(vnode_t *); + +extern int gfs_dir_case_lookup(vnode_t *, const char *, vnode_t **, cred_t *, + int, int *, pathname_t *); +extern int gfs_dir_lookup(vnode_t *, const char *, vnode_t **, cred_t *, + int, int *, pathname_t *); +extern int gfs_dir_readdir(vnode_t *, uio_t *, int *, void *, cred_t *, + caller_context_t *, int flags); + +#define gfs_dir_lock(gd) mutex_enter(&(gd)->gfsd_lock) +#define gfs_dir_unlock(gd) mutex_exit(&(gd)->gfsd_lock) +#define GFS_DIR_LOCKED(gd) MUTEX_HELD(&(gd)->gfsd_lock) + +#define gfs_file_parent(vp) (((gfs_file_t *)(vp)->v_data)->gfs_parent) + +#define gfs_file_index(vp) (((gfs_file_t *)(vp)->v_data)->gfs_index) +#define gfs_file_set_index(vp, idx) \ + (((gfs_file_t *)(vp)->v_data)->gfs_index = (idx)) + +#define gfs_file_inode(vp) (((gfs_file_t *)(vp)->v_data)->gfs_ino) +#define gfs_file_set_inode(vp, ino) \ + (((gfs_file_t *)(vp)->v_data)->gfs_ino = (ino)) + +typedef struct gfs_readdir_state { + void *grd_dirent; /* directory entry buffer */ + size_t grd_namlen; /* max file name length */ + size_t grd_ureclen; /* exported record size */ + ssize_t grd_oresid; /* original uio_resid */ + ino64_t grd_parent; /* inode of parent */ + ino64_t grd_self; /* inode of self */ + int grd_flags; /* flags from VOP_READDIR */ +} gfs_readdir_state_t; + +extern int gfs_readdir_init(gfs_readdir_state_t *, int, int, uio_t *, ino64_t, + ino64_t, int); +extern int gfs_readdir_emit(gfs_readdir_state_t *, uio_t *, offset_t, ino64_t, + const char *, int); +extern int gfs_readdir_emitn(gfs_readdir_state_t *, uio_t *, offset_t, ino64_t, + unsigned long); +extern int gfs_readdir_pred(gfs_readdir_state_t *, uio_t *, offset_t *); +extern int gfs_readdir_fini(gfs_readdir_state_t *, int, int *, int); +extern int gfs_get_parent_ino(vnode_t *, cred_t *, caller_context_t *, + ino64_t *, ino64_t *); + +/* + * Objects with real extended attributes will get their . and .. + * readdir entries from the real xattr directory. GFS_STATIC_ENTRY_OFFSET + * lets us skip right to the static entries in the GFS directory. + */ +#define GFS_STATIC_ENTRY_OFFSET ((offset_t)2) + +extern int gfs_lookup_dot(vnode_t **, vnode_t *, vnode_t *, const char *); + +extern int gfs_vop_lookup(vnode_t *, char *, vnode_t **, pathname_t *, + int, vnode_t *, cred_t *, caller_context_t *, int *, pathname_t *); +extern int gfs_vop_readdir(vnode_t *, uio_t *, cred_t *, int *, + caller_context_t *, int); +extern int gfs_vop_map(vnode_t *, offset_t, struct as *, caddr_t *, + size_t, uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *); +extern void gfs_vop_inactive(vnode_t *, cred_t *, caller_context_t *); + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_GFS_H */ diff --git a/uts/common/sys/idmap.h b/uts/common/sys/idmap.h new file mode 100644 index 000000000000..39eeb905c72b --- /dev/null +++ b/uts/common/sys/idmap.h @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_IDMAP_H +#define _SYS_IDMAP_H + + +/* Idmap status codes */ +#define IDMAP_SUCCESS 0 +#define IDMAP_NEXT 1 +#define IDMAP_ERR_OTHER -10000 +#define IDMAP_ERR_INTERNAL -9999 +#define IDMAP_ERR_MEMORY -9998 +#define IDMAP_ERR_NORESULT -9997 +#define IDMAP_ERR_NOTUSER -9996 +#define IDMAP_ERR_NOTGROUP -9995 +#define IDMAP_ERR_NOTSUPPORTED -9994 +#define IDMAP_ERR_W2U_NAMERULE -9993 +#define IDMAP_ERR_U2W_NAMERULE -9992 +#define IDMAP_ERR_CACHE -9991 +#define IDMAP_ERR_DB -9990 +#define IDMAP_ERR_ARG -9989 +#define IDMAP_ERR_SID -9988 +#define IDMAP_ERR_IDTYPE -9987 +#define IDMAP_ERR_RPC_HANDLE -9986 +#define IDMAP_ERR_RPC -9985 +#define IDMAP_ERR_CLIENT_HANDLE -9984 +#define IDMAP_ERR_BUSY -9983 +#define IDMAP_ERR_PERMISSION_DENIED -9982 +#define IDMAP_ERR_NOMAPPING -9981 +#define IDMAP_ERR_NEW_ID_ALLOC_REQD -9980 +#define IDMAP_ERR_DOMAIN -9979 +#define IDMAP_ERR_SECURITY -9978 +#define IDMAP_ERR_NOTFOUND -9977 +#define IDMAP_ERR_DOMAIN_NOTFOUND -9976 +#define IDMAP_ERR_UPDATE_NOTALLOWED -9975 +#define IDMAP_ERR_CFG -9974 +#define IDMAP_ERR_CFG_CHANGE -9973 +#define IDMAP_ERR_NOTMAPPED_WELLKNOWN -9972 +#define IDMAP_ERR_RETRIABLE_NET_ERR -9971 +#define IDMAP_ERR_W2U_NAMERULE_CONFLICT -9970 +#define IDMAP_ERR_U2W_NAMERULE_CONFLICT -9969 +#define IDMAP_ERR_BAD_UTF8 -9968 +#define IDMAP_ERR_NONE_GENERATED -9967 +#define IDMAP_ERR_PROP_UNKNOWN -9966 +#define IDMAP_ERR_NS_LDAP_OP_FAILED -9965 +#define IDMAP_ERR_NS_LDAP_PARTIAL -9964 +#define IDMAP_ERR_NS_LDAP_CFG -9963 +#define IDMAP_ERR_NS_LDAP_BAD_WINNAME -9962 +#define IDMAP_ERR_NO_ACTIVEDIRECTORY -9961 + +/* Reserved GIDs for some well-known SIDs */ +#define IDMAP_WK_LOCAL_SYSTEM_GID 2147483648U /* 0x80000000 */ +#define IDMAP_WK_CREATOR_GROUP_GID 2147483649U +#define IDMAP_WK__MAX_GID 2147483649U + +/* Reserved UIDs for some well-known SIDs */ +#define IDMAP_WK_CREATOR_OWNER_UID 2147483648U +#define IDMAP_WK__MAX_UID 2147483648U + +/* Reserved SIDs */ +#define IDMAP_WK_CREATOR_SID_AUTHORITY "S-1-3" + +/* + * Max door RPC size for ID mapping (can't be too large relative to the + * default user-land thread stack size, since clnt_door_call() + * alloca()s). See libidmap:idmap_init(). + */ +#define IDMAP_MAX_DOOR_RPC (256 * 1024) + +#define IDMAP_SENTINEL_PID UINT32_MAX +#define IDMAP_ID_IS_EPHEMERAL(pid) \ + (((pid) > INT32_MAX) && ((pid) != IDMAP_SENTINEL_PID)) + +#endif /* _SYS_IDMAP_H */ diff --git a/uts/common/sys/isa_defs.h b/uts/common/sys/isa_defs.h new file mode 100644 index 000000000000..c5849546d874 --- /dev/null +++ b/uts/common/sys/isa_defs.h @@ -0,0 +1,487 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ISA_DEFS_H +#define _SYS_ISA_DEFS_H + +/* + * This header file serves to group a set of well known defines and to + * set these for each instruction set architecture. These defines may + * be divided into two groups; characteristics of the processor and + * implementation choices for Solaris on a processor. + * + * Processor Characteristics: + * + * _LITTLE_ENDIAN / _BIG_ENDIAN: + * The natural byte order of the processor. A pointer to an int points + * to the least/most significant byte of that int. + * + * _STACK_GROWS_UPWARD / _STACK_GROWS_DOWNWARD: + * The processor specific direction of stack growth. A push onto the + * stack increases/decreases the stack pointer, so it stores data at + * successively higher/lower addresses. (Stackless machines ignored + * without regrets). + * + * _LONG_LONG_HTOL / _LONG_LONG_LTOH: + * A pointer to a long long points to the most/least significant long + * within that long long. + * + * _BIT_FIELDS_HTOL / _BIT_FIELDS_LTOH: + * The C compiler assigns bit fields from the high/low to the low/high end + * of an int (most to least significant vs. least to most significant). + * + * _IEEE_754: + * The processor (or supported implementations of the processor) + * supports the ieee-754 floating point standard. No other floating + * point standards are supported (or significant). Any other supported + * floating point formats are expected to be cased on the ISA processor + * symbol. + * + * _CHAR_IS_UNSIGNED / _CHAR_IS_SIGNED: + * The C Compiler implements objects of type `char' as `unsigned' or + * `signed' respectively. This is really an implementation choice of + * the compiler writer, but it is specified in the ABI and tends to + * be uniform across compilers for an instruction set architecture. + * Hence, it has the properties of a processor characteristic. + * + * _CHAR_ALIGNMENT / _SHORT_ALIGNMENT / _INT_ALIGNMENT / _LONG_ALIGNMENT / + * _LONG_LONG_ALIGNMENT / _DOUBLE_ALIGNMENT / _LONG_DOUBLE_ALIGNMENT / + * _POINTER_ALIGNMENT / _FLOAT_ALIGNMENT: + * The ABI defines alignment requirements of each of the primitive + * object types. Some, if not all, may be hardware requirements as + * well. The values are expressed in "byte-alignment" units. + * + * _MAX_ALIGNMENT: + * The most stringent alignment requirement as specified by the ABI. + * Equal to the maximum of all the above _XXX_ALIGNMENT values. + * + * _ALIGNMENT_REQUIRED: + * True or false (1 or 0) whether or not the hardware requires the ABI + * alignment. + * + * _LONG_LONG_ALIGNMENT_32 + * The 32-bit ABI supported by a 64-bit kernel may have different + * alignment requirements for primitive object types. The value of this + * identifier is expressed in "byte-alignment" units. + * + * _HAVE_CPUID_INSN + * This indicates that the architecture supports the 'cpuid' + * instruction as defined by Intel. (Intel allows other vendors + * to extend the instruction for their own purposes.) + * + * + * Implementation Choices: + * + * _ILP32 / _LP64: + * This specifies the compiler data type implementation as specified in + * the relevant ABI. The choice between these is strongly influenced + * by the underlying hardware, but is not absolutely tied to it. + * Currently only two data type models are supported: + * + * _ILP32: + * Int/Long/Pointer are 32 bits. This is the historical UNIX + * and Solaris implementation. Due to its historical standing, + * this is the default case. + * + * _LP64: + * Long/Pointer are 64 bits, Int is 32 bits. This is the chosen + * implementation for 64-bit ABIs such as SPARC V9. + * + * _I32LPx: + * A compilation environment where 'int' is 32-bit, and + * longs and pointers are simply the same size. + * + * In all cases, Char is 8 bits and Short is 16 bits. + * + * _SUNOS_VTOC_8 / _SUNOS_VTOC_16 / _SVR4_VTOC_16: + * This specifies the form of the disk VTOC (or label): + * + * _SUNOS_VTOC_8: + * This is a VTOC form which is upwardly compatible with the + * SunOS 4.x disk label and allows 8 partitions per disk. + * + * _SUNOS_VTOC_16: + * In this format the incore vtoc image matches the ondisk + * version. It allows 16 slices per disk, and is not + * compatible with the SunOS 4.x disk label. + * + * Note that these are not the only two VTOC forms possible and + * additional forms may be added. One possible form would be the + * SVr4 VTOC form. The symbol for that is reserved now, although + * it is not implemented. + * + * _SVR4_VTOC_16: + * This VTOC form is compatible with the System V Release 4 + * VTOC (as implemented on the SVr4 Intel and 3b ports) with + * 16 partitions per disk. + * + * + * _DMA_USES_PHYSADDR / _DMA_USES_VIRTADDR + * This describes the type of addresses used by system DMA: + * + * _DMA_USES_PHYSADDR: + * This type of DMA, used in the x86 implementation, + * requires physical addresses for DMA buffers. The 24-bit + * addresses used by some legacy boards is the source of the + * "low-memory" (<16MB) requirement for some devices using DMA. + * + * _DMA_USES_VIRTADDR: + * This method of DMA allows the use of virtual addresses for + * DMA transfers. + * + * _FIRMWARE_NEEDS_FDISK / _NO_FDISK_PRESENT + * This indicates the presence/absence of an fdisk table. + * + * _FIRMWARE_NEEDS_FDISK + * The fdisk table is required by system firmware. If present, + * it allows a disk to be subdivided into multiple fdisk + * partitions, each of which is equivalent to a separate, + * virtual disk. This enables the co-existence of multiple + * operating systems on a shared hard disk. + * + * _NO_FDISK_PRESENT + * If the fdisk table is absent, it is assumed that the entire + * media is allocated for a single operating system. + * + * _HAVE_TEM_FIRMWARE + * Defined if this architecture has the (fallback) option of + * using prom_* calls for doing I/O if a suitable kernel driver + * is not available to do it. + * + * _DONT_USE_1275_GENERIC_NAMES + * Controls whether or not device tree node names should + * comply with the IEEE 1275 "Generic Names" Recommended + * Practice. With _DONT_USE_GENERIC_NAMES, device-specific + * names identifying the particular device will be used. + * + * __i386_COMPAT + * This indicates whether the i386 ABI is supported as a *non-native* + * mode for the platform. When this symbol is defined: + * - 32-bit xstat-style system calls are enabled + * - 32-bit xmknod-style system calls are enabled + * - 32-bit system calls use i386 sizes -and- alignments + * + * Note that this is NOT defined for the i386 native environment! + * + * __x86 + * This is ONLY a synonym for defined(__i386) || defined(__amd64) + * which is useful only insofar as these two architectures share + * common attributes. Analogous to __sparc. + * + * _PSM_MODULES + * This indicates whether or not the implementation uses PSM + * modules for processor support, reading /etc/mach from inside + * the kernel to extract a list. + * + * _RTC_CONFIG + * This indicates whether or not the implementation uses /etc/rtc_config + * to configure the real-time clock in the kernel. + * + * _UNIX_KRTLD + * This indicates that the implementation uses a dynamically + * linked unix + krtld to form the core kernel image at boot + * time, or (in the absence of this symbol) a prelinked kernel image. + * + * _OBP + * This indicates the firmware interface is OBP. + * + * _SOFT_HOSTID + * This indicates that the implementation obtains the hostid + * from the file /etc/hostid, rather than from hardware. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The following set of definitions characterize Solaris on AMD's + * 64-bit systems. + */ +#if defined(__x86_64) || defined(__amd64) + +#if !defined(__amd64) +#define __amd64 /* preferred guard */ +#endif + +#if !defined(__x86) +#define __x86 +#endif + +/* + * Define the appropriate "processor characteristics" + */ +#define _LITTLE_ENDIAN +#define _STACK_GROWS_DOWNWARD +#define _LONG_LONG_LTOH +#define _BIT_FIELDS_LTOH +#define _IEEE_754 +#define _CHAR_IS_SIGNED +#define _BOOL_ALIGNMENT 1 +#define _CHAR_ALIGNMENT 1 +#define _SHORT_ALIGNMENT 2 +#define _INT_ALIGNMENT 4 +#define _FLOAT_ALIGNMENT 4 +#define _FLOAT_COMPLEX_ALIGNMENT 4 +#define _LONG_ALIGNMENT 8 +#define _LONG_LONG_ALIGNMENT 8 +#define _DOUBLE_ALIGNMENT 8 +#define _DOUBLE_COMPLEX_ALIGNMENT 8 +#define _LONG_DOUBLE_ALIGNMENT 16 +#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 16 +#define _POINTER_ALIGNMENT 8 +#define _MAX_ALIGNMENT 16 +#define _ALIGNMENT_REQUIRED 1 + +/* + * Different alignment constraints for the i386 ABI in compatibility mode + */ +#define _LONG_LONG_ALIGNMENT_32 4 + +/* + * Define the appropriate "implementation choices". + */ +#if !defined(_LP64) +#define _LP64 +#endif +#if !defined(_I32LPx) && defined(_KERNEL) +#define _I32LPx +#endif +#define _MULTI_DATAMODEL +#define _SUNOS_VTOC_16 +#define _DMA_USES_PHYSADDR +#define _FIRMWARE_NEEDS_FDISK +#define __i386_COMPAT +#define _PSM_MODULES +#define _RTC_CONFIG +#define _SOFT_HOSTID +#define _DONT_USE_1275_GENERIC_NAMES +#define _HAVE_CPUID_INSN + +/* + * The feature test macro __i386 is generic for all processors implementing + * the Intel 386 instruction set or a superset of it. Specifically, this + * includes all members of the 386, 486, and Pentium family of processors. + */ +#elif defined(__i386) || defined(__i386__) + +#if !defined(__i386) +#define __i386 +#endif + +#if !defined(__x86) +#define __x86 +#endif + +/* + * Define the appropriate "processor characteristics" + */ +#define _LITTLE_ENDIAN +#define _STACK_GROWS_DOWNWARD +#define _LONG_LONG_LTOH +#define _BIT_FIELDS_LTOH +#define _IEEE_754 +#define _CHAR_IS_SIGNED +#define _BOOL_ALIGNMENT 1 +#define _CHAR_ALIGNMENT 1 +#define _SHORT_ALIGNMENT 2 +#define _INT_ALIGNMENT 4 +#define _FLOAT_ALIGNMENT 4 +#define _FLOAT_COMPLEX_ALIGNMENT 4 +#define _LONG_ALIGNMENT 4 +#define _LONG_LONG_ALIGNMENT 4 +#define _DOUBLE_ALIGNMENT 4 +#define _DOUBLE_COMPLEX_ALIGNMENT 4 +#define _LONG_DOUBLE_ALIGNMENT 4 +#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 4 +#define _POINTER_ALIGNMENT 4 +#define _MAX_ALIGNMENT 4 +#define _ALIGNMENT_REQUIRED 0 + +#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGNMENT + +/* + * Define the appropriate "implementation choices". + */ +#define _ILP32 +#if !defined(_I32LPx) && defined(_KERNEL) +#define _I32LPx +#endif +#define _SUNOS_VTOC_16 +#define _DMA_USES_PHYSADDR +#define _FIRMWARE_NEEDS_FDISK +#define _PSM_MODULES +#define _RTC_CONFIG +#define _SOFT_HOSTID +#define _DONT_USE_1275_GENERIC_NAMES +#define _HAVE_CPUID_INSN + +/* + * The following set of definitions characterize the Solaris on SPARC systems. + * + * The symbol __sparc indicates any of the SPARC family of processor + * architectures. This includes SPARC V7, SPARC V8 and SPARC V9. + * + * The symbol __sparcv8 indicates the 32-bit SPARC V8 architecture as defined + * by Version 8 of the SPARC Architecture Manual. (SPARC V7 is close enough + * to SPARC V8 for the former to be subsumed into the latter definition.) + * + * The symbol __sparcv9 indicates the 64-bit SPARC V9 architecture as defined + * by Version 9 of the SPARC Architecture Manual. + * + * The symbols __sparcv8 and __sparcv9 are mutually exclusive, and are only + * relevant when the symbol __sparc is defined. + */ +/* + * XXX Due to the existence of 5110166, "defined(__sparcv9)" needs to be added + * to support backwards builds. This workaround should be removed in s10_71. + */ +#elif defined(__sparc) || defined(__sparcv9) || defined(__sparc__) +#if !defined(__sparc) +#define __sparc +#endif + +/* + * You can be 32-bit or 64-bit, but not both at the same time. + */ +#if defined(__sparcv8) && defined(__sparcv9) +#error "SPARC Versions 8 and 9 are mutually exclusive choices" +#endif + +/* + * Existing compilers do not set __sparcv8. Years will transpire before + * the compilers can be depended on to set the feature test macro. In + * the interim, we'll set it here on the basis of historical behaviour; + * if you haven't asked for SPARC V9, then you must've meant SPARC V8. + */ +#if !defined(__sparcv9) && !defined(__sparcv8) +#define __sparcv8 +#endif + +/* + * Define the appropriate "processor characteristics" shared between + * all Solaris on SPARC systems. + */ +#define _BIG_ENDIAN +#define _STACK_GROWS_DOWNWARD +#define _LONG_LONG_HTOL +#define _BIT_FIELDS_HTOL +#define _IEEE_754 +#define _CHAR_IS_SIGNED +#define _BOOL_ALIGNMENT 1 +#define _CHAR_ALIGNMENT 1 +#define _SHORT_ALIGNMENT 2 +#define _INT_ALIGNMENT 4 +#define _FLOAT_ALIGNMENT 4 +#define _FLOAT_COMPLEX_ALIGNMENT 4 +#define _LONG_LONG_ALIGNMENT 8 +#define _DOUBLE_ALIGNMENT 8 +#define _DOUBLE_COMPLEX_ALIGNMENT 8 +#define _ALIGNMENT_REQUIRED 1 + +/* + * Define the appropriate "implementation choices" shared between versions. + */ +#define _SUNOS_VTOC_8 +#define _DMA_USES_VIRTADDR +#define _NO_FDISK_PRESENT +#define _HAVE_TEM_FIRMWARE +#define _OBP + +/* + * The following set of definitions characterize the implementation of + * 32-bit Solaris on SPARC V8 systems. + */ +#if defined(__sparcv8) + +/* + * Define the appropriate "processor characteristics" + */ +#define _LONG_ALIGNMENT 4 +#define _LONG_DOUBLE_ALIGNMENT 8 +#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 8 +#define _POINTER_ALIGNMENT 4 +#define _MAX_ALIGNMENT 8 + +#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGNMENT + +/* + * Define the appropriate "implementation choices" + */ +#define _ILP32 +#if !defined(_I32LPx) && defined(_KERNEL) +#define _I32LPx +#endif + +/* + * The following set of definitions characterize the implementation of + * 64-bit Solaris on SPARC V9 systems. + */ +#elif defined(__sparcv9) + +/* + * Define the appropriate "processor characteristics" + */ +#define _LONG_ALIGNMENT 8 +#define _LONG_DOUBLE_ALIGNMENT 16 +#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 16 +#define _POINTER_ALIGNMENT 8 +#define _MAX_ALIGNMENT 16 + +#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGNMENT + +/* + * Define the appropriate "implementation choices" + */ +#if !defined(_LP64) +#define _LP64 +#endif +#if !defined(_I32LPx) +#define _I32LPx +#endif +#define _MULTI_DATAMODEL + +#else +#error "unknown SPARC version" +#endif + +/* + * #error is strictly ansi-C, but works as well as anything for K&R systems. + */ +#else +#error "ISA not supported" +#endif + +#if defined(_ILP32) && defined(_LP64) +#error "Both _ILP32 and _LP64 are defined" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ISA_DEFS_H */ diff --git a/uts/common/sys/list.h b/uts/common/sys/list.h new file mode 100644 index 000000000000..8339b6226d11 --- /dev/null +++ b/uts/common/sys/list.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LIST_H +#define _SYS_LIST_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/list_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct list_node list_node_t; +typedef struct list list_t; + +void list_create(list_t *, size_t, size_t); +void list_destroy(list_t *); + +void list_insert_after(list_t *, void *, void *); +void list_insert_before(list_t *, void *, void *); +void list_insert_head(list_t *, void *); +void list_insert_tail(list_t *, void *); +void list_remove(list_t *, void *); +void *list_remove_head(list_t *); +void *list_remove_tail(list_t *); +void list_move_tail(list_t *, list_t *); + +void *list_head(list_t *); +void *list_tail(list_t *); +void *list_next(list_t *, void *); +void *list_prev(list_t *, void *); +int list_is_empty(list_t *); + +void list_link_init(list_node_t *); +void list_link_replace(list_node_t *, list_node_t *); + +int list_link_active(list_node_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LIST_H */ diff --git a/uts/common/sys/list_impl.h b/uts/common/sys/list_impl.h new file mode 100644 index 000000000000..9c42f8832023 --- /dev/null +++ b/uts/common/sys/list_impl.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LIST_IMPL_H +#define _SYS_LIST_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct list_node { + struct list_node *list_next; + struct list_node *list_prev; +}; + +struct list { + size_t list_size; + size_t list_offset; + struct list_node list_head; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LIST_IMPL_H */ diff --git a/uts/common/sys/note.h b/uts/common/sys/note.h new file mode 100644 index 000000000000..2cb7fd89b7dd --- /dev/null +++ b/uts/common/sys/note.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1994 by Sun Microsystems, Inc. + */ + +/* + * sys/note.h: interface for annotating source with info for tools + * + * This is the underlying interface; NOTE (/usr/include/note.h) is the + * preferred interface, but all exported header files should include this + * file directly and use _NOTE so as not to take "NOTE" from the user's + * namespace. For consistency, *all* kernel source should use _NOTE. + * + * By default, annotations expand to nothing. This file implements + * that. Tools using annotations will interpose a different version + * of this file that will expand annotations as needed. + */ + +#ifndef _SYS_NOTE_H +#define _SYS_NOTE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _NOTE +#define _NOTE(s) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_NOTE_H */ diff --git a/uts/common/sys/nvpair.h b/uts/common/sys/nvpair.h new file mode 100644 index 000000000000..30ff4e0667b3 --- /dev/null +++ b/uts/common/sys/nvpair.h @@ -0,0 +1,281 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_NVPAIR_H +#define _SYS_NVPAIR_H + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/va_list.h> + +#if defined(_KERNEL) && !defined(_BOOT) +#include <sys/kmem.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DATA_TYPE_UNKNOWN = 0, + DATA_TYPE_BOOLEAN, + DATA_TYPE_BYTE, + DATA_TYPE_INT16, + DATA_TYPE_UINT16, + DATA_TYPE_INT32, + DATA_TYPE_UINT32, + DATA_TYPE_INT64, + DATA_TYPE_UINT64, + DATA_TYPE_STRING, + DATA_TYPE_BYTE_ARRAY, + DATA_TYPE_INT16_ARRAY, + DATA_TYPE_UINT16_ARRAY, + DATA_TYPE_INT32_ARRAY, + DATA_TYPE_UINT32_ARRAY, + DATA_TYPE_INT64_ARRAY, + DATA_TYPE_UINT64_ARRAY, + DATA_TYPE_STRING_ARRAY, + DATA_TYPE_HRTIME, + DATA_TYPE_NVLIST, + DATA_TYPE_NVLIST_ARRAY, + DATA_TYPE_BOOLEAN_VALUE, + DATA_TYPE_INT8, + DATA_TYPE_UINT8, + DATA_TYPE_BOOLEAN_ARRAY, + DATA_TYPE_INT8_ARRAY, +#if !defined(_KERNEL) + DATA_TYPE_UINT8_ARRAY, + DATA_TYPE_DOUBLE +#else + DATA_TYPE_UINT8_ARRAY +#endif +} data_type_t; + +typedef struct nvpair { + int32_t nvp_size; /* size of this nvpair */ + int16_t nvp_name_sz; /* length of name string */ + int16_t nvp_reserve; /* not used */ + int32_t nvp_value_elem; /* number of elements for array types */ + data_type_t nvp_type; /* type of value */ + /* name string */ + /* aligned ptr array for string arrays */ + /* aligned array of data for value */ +} nvpair_t; + +/* nvlist header */ +typedef struct nvlist { + int32_t nvl_version; + uint32_t nvl_nvflag; /* persistent flags */ + uint64_t nvl_priv; /* ptr to private data if not packed */ + uint32_t nvl_flag; + int32_t nvl_pad; /* currently not used, for alignment */ +} nvlist_t; + +/* nvp implementation version */ +#define NV_VERSION 0 + +/* nvlist pack encoding */ +#define NV_ENCODE_NATIVE 0 +#define NV_ENCODE_XDR 1 + +/* nvlist persistent unique name flags, stored in nvl_nvflags */ +#define NV_UNIQUE_NAME 0x1 +#define NV_UNIQUE_NAME_TYPE 0x2 + +/* nvlist lookup pairs related flags */ +#define NV_FLAG_NOENTOK 0x1 + +/* convenience macros */ +#define NV_ALIGN(x) (((ulong_t)(x) + 7ul) & ~7ul) +#define NV_ALIGN4(x) (((x) + 3) & ~3) + +#define NVP_SIZE(nvp) ((nvp)->nvp_size) +#define NVP_NAME(nvp) ((char *)(nvp) + sizeof (nvpair_t)) +#define NVP_TYPE(nvp) ((nvp)->nvp_type) +#define NVP_NELEM(nvp) ((nvp)->nvp_value_elem) +#define NVP_VALUE(nvp) ((char *)(nvp) + NV_ALIGN(sizeof (nvpair_t) \ + + (nvp)->nvp_name_sz)) + +#define NVL_VERSION(nvl) ((nvl)->nvl_version) +#define NVL_SIZE(nvl) ((nvl)->nvl_size) +#define NVL_FLAG(nvl) ((nvl)->nvl_flag) + +/* NV allocator framework */ +typedef struct nv_alloc_ops nv_alloc_ops_t; + +typedef struct nv_alloc { + const nv_alloc_ops_t *nva_ops; + void *nva_arg; +} nv_alloc_t; + +struct nv_alloc_ops { + int (*nv_ao_init)(nv_alloc_t *, __va_list); + void (*nv_ao_fini)(nv_alloc_t *); + void *(*nv_ao_alloc)(nv_alloc_t *, size_t); + void (*nv_ao_free)(nv_alloc_t *, void *, size_t); + void (*nv_ao_reset)(nv_alloc_t *); +}; + +extern const nv_alloc_ops_t *nv_fixed_ops; +extern nv_alloc_t *nv_alloc_nosleep; + +#if defined(_KERNEL) && !defined(_BOOT) +extern nv_alloc_t *nv_alloc_sleep; +#endif + +int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...); +void nv_alloc_reset(nv_alloc_t *); +void nv_alloc_fini(nv_alloc_t *); + +/* list management */ +int nvlist_alloc(nvlist_t **, uint_t, int); +void nvlist_free(nvlist_t *); +int nvlist_size(nvlist_t *, size_t *, int); +int nvlist_pack(nvlist_t *, char **, size_t *, int, int); +int nvlist_unpack(char *, size_t, nvlist_t **, int); +int nvlist_dup(nvlist_t *, nvlist_t **, int); +int nvlist_merge(nvlist_t *, nvlist_t *, int); + +uint_t nvlist_nvflag(nvlist_t *); + +int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *); +int nvlist_xpack(nvlist_t *, char **, size_t *, int, nv_alloc_t *); +int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *); +int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *); +nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *); + +int nvlist_add_nvpair(nvlist_t *, nvpair_t *); +int nvlist_add_boolean(nvlist_t *, const char *); +int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); +int nvlist_add_byte(nvlist_t *, const char *, uchar_t); +int nvlist_add_int8(nvlist_t *, const char *, int8_t); +int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); +int nvlist_add_int16(nvlist_t *, const char *, int16_t); +int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); +int nvlist_add_int32(nvlist_t *, const char *, int32_t); +int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); +int nvlist_add_int64(nvlist_t *, const char *, int64_t); +int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); +int nvlist_add_string(nvlist_t *, const char *, const char *); +int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); +int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t); +int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t); +int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t); +int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t); +int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t); +int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t); +int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t); +int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t); +int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t); +int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t); +int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t); +int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t); +int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t); +#if !defined(_KERNEL) +int nvlist_add_double(nvlist_t *, const char *, double); +#endif + +int nvlist_remove(nvlist_t *, const char *, data_type_t); +int nvlist_remove_all(nvlist_t *, const char *); +int nvlist_remove_nvpair(nvlist_t *, nvpair_t *); + +int nvlist_lookup_boolean(nvlist_t *, const char *); +int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *); +int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *); +int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *); +int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *); +int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *); +int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *); +int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *); +int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *); +int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *); +int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *); +int nvlist_lookup_string(nvlist_t *, const char *, char **); +int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **); +int nvlist_lookup_boolean_array(nvlist_t *, const char *, + boolean_t **, uint_t *); +int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, uint_t *); +int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, uint_t *); +int nvlist_lookup_uint8_array(nvlist_t *, const char *, uint8_t **, uint_t *); +int nvlist_lookup_int16_array(nvlist_t *, const char *, int16_t **, uint_t *); +int nvlist_lookup_uint16_array(nvlist_t *, const char *, uint16_t **, uint_t *); +int nvlist_lookup_int32_array(nvlist_t *, const char *, int32_t **, uint_t *); +int nvlist_lookup_uint32_array(nvlist_t *, const char *, uint32_t **, uint_t *); +int nvlist_lookup_int64_array(nvlist_t *, const char *, int64_t **, uint_t *); +int nvlist_lookup_uint64_array(nvlist_t *, const char *, uint64_t **, uint_t *); +int nvlist_lookup_string_array(nvlist_t *, const char *, char ***, uint_t *); +int nvlist_lookup_nvlist_array(nvlist_t *, const char *, + nvlist_t ***, uint_t *); +int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *); +int nvlist_lookup_pairs(nvlist_t *, int, ...); +#if !defined(_KERNEL) +int nvlist_lookup_double(nvlist_t *, const char *, double *); +#endif + +int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **); +int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, nvpair_t **, + int *, char **); +boolean_t nvlist_exists(nvlist_t *, const char *); +boolean_t nvlist_empty(nvlist_t *); + +/* processing nvpair */ +nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *); +nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *); +char *nvpair_name(nvpair_t *); +data_type_t nvpair_type(nvpair_t *); +int nvpair_type_is_array(nvpair_t *); +int nvpair_value_boolean_value(nvpair_t *, boolean_t *); +int nvpair_value_byte(nvpair_t *, uchar_t *); +int nvpair_value_int8(nvpair_t *, int8_t *); +int nvpair_value_uint8(nvpair_t *, uint8_t *); +int nvpair_value_int16(nvpair_t *, int16_t *); +int nvpair_value_uint16(nvpair_t *, uint16_t *); +int nvpair_value_int32(nvpair_t *, int32_t *); +int nvpair_value_uint32(nvpair_t *, uint32_t *); +int nvpair_value_int64(nvpair_t *, int64_t *); +int nvpair_value_uint64(nvpair_t *, uint64_t *); +int nvpair_value_string(nvpair_t *, char **); +int nvpair_value_nvlist(nvpair_t *, nvlist_t **); +int nvpair_value_boolean_array(nvpair_t *, boolean_t **, uint_t *); +int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *); +int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *); +int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *); +int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *); +int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *); +int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *); +int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *); +int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *); +int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *); +int nvpair_value_string_array(nvpair_t *, char ***, uint_t *); +int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *); +int nvpair_value_hrtime(nvpair_t *, hrtime_t *); +#if !defined(_KERNEL) +int nvpair_value_double(nvpair_t *, double *); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_NVPAIR_H */ diff --git a/uts/common/sys/nvpair_impl.h b/uts/common/sys/nvpair_impl.h new file mode 100644 index 000000000000..f12dbbfe6ef5 --- /dev/null +++ b/uts/common/sys/nvpair_impl.h @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _NVPAIR_IMPL_H +#define _NVPAIR_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/nvpair.h> + +/* + * The structures here provided for information and debugging purposes only + * may be changed in the future. + */ + +/* + * implementation linked list for pre-packed data + */ +typedef struct i_nvp i_nvp_t; + +struct i_nvp { + union { + uint64_t _nvi_align; /* ensure alignment */ + struct { + i_nvp_t *_nvi_next; /* pointer to next nvpair */ + i_nvp_t *_nvi_prev; /* pointer to prev nvpair */ + } _nvi; + } _nvi_un; + nvpair_t nvi_nvp; /* nvpair */ +}; +#define nvi_next _nvi_un._nvi._nvi_next +#define nvi_prev _nvi_un._nvi._nvi_prev + +typedef struct { + i_nvp_t *nvp_list; /* linked list of nvpairs */ + i_nvp_t *nvp_last; /* last nvpair */ + i_nvp_t *nvp_curr; /* current walker nvpair */ + nv_alloc_t *nvp_nva; /* pluggable allocator */ + uint32_t nvp_stat; /* internal state */ +} nvpriv_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _NVPAIR_IMPL_H */ diff --git a/uts/common/sys/processor.h b/uts/common/sys/processor.h new file mode 100644 index 000000000000..c0fe6e21b85f --- /dev/null +++ b/uts/common/sys/processor.h @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T + * All Rights Reserved + * + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_PROCESSOR_H +#define _SYS_PROCESSOR_H + +#include <sys/types.h> +#include <sys/procset.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Definitions for p_online, processor_info & lgrp system calls. + */ + +/* + * Type for an lgrpid + */ +typedef uint16_t lgrpid_t; + +/* + * Type for processor name (CPU number). + */ +typedef int processorid_t; +typedef int chipid_t; + +/* + * Flags and return values for p_online(2), and pi_state for processor_info(2). + * These flags are *not* for in-kernel examination of CPU states. + * See <sys/cpuvar.h> for appropriate informational functions. + */ +#define P_OFFLINE 0x0001 /* processor is offline, as quiet as possible */ +#define P_ONLINE 0x0002 /* processor is online */ +#define P_STATUS 0x0003 /* value passed to p_online to request status */ +#define P_FAULTED 0x0004 /* processor is offline, in faulted state */ +#define P_POWEROFF 0x0005 /* processor is powered off */ +#define P_NOINTR 0x0006 /* processor is online, but no I/O interrupts */ +#define P_SPARE 0x0007 /* processor is offline, can be reactivated */ +#define P_BAD P_FAULTED /* unused but defined by USL */ +#define P_FORCED 0x10000000 /* force processor offline */ + +/* + * String names for processor states defined above. + */ +#define PS_OFFLINE "off-line" +#define PS_ONLINE "on-line" +#define PS_FAULTED "faulted" +#define PS_POWEROFF "powered-off" +#define PS_NOINTR "no-intr" +#define PS_SPARE "spare" + +/* + * Structure filled in by processor_info(2). This structure + * SHOULD NOT BE MODIFIED. Changes to the structure would + * negate ABI compatibility. + * + * The string fields are guaranteed to contain a NULL. + * + * The pi_fputypes field contains a (possibly empty) comma-separated + * list of floating point identifier strings. + */ +#define PI_TYPELEN 16 /* max size of CPU type string */ +#define PI_FPUTYPE 32 /* max size of FPU types string */ + +typedef struct { + int pi_state; /* processor state, see above */ + char pi_processor_type[PI_TYPELEN]; /* ASCII CPU type */ + char pi_fputypes[PI_FPUTYPE]; /* ASCII FPU types */ + int pi_clock; /* CPU clock freq in MHz */ +} processor_info_t; + +/* + * Binding values for processor_bind(2) + */ +#define PBIND_NONE -1 /* LWP/thread is not bound */ +#define PBIND_QUERY -2 /* don't set, just return the binding */ +#define PBIND_HARD -3 /* prevents offlining CPU (default) */ +#define PBIND_SOFT -4 /* allows offlining CPU */ +#define PBIND_QUERY_TYPE -5 /* Return binding type */ + +/* + * User-level system call interface prototypes + */ +#ifndef _KERNEL +#ifdef __STDC__ + +extern int p_online(processorid_t processorid, int flag); +extern int processor_info(processorid_t processorid, + processor_info_t *infop); +extern int processor_bind(idtype_t idtype, id_t id, + processorid_t processorid, processorid_t *obind); +extern processorid_t getcpuid(void); +extern lgrpid_t gethomelgroup(void); + +#else + +extern int p_online(); +extern int processor_info(); +extern int processor_bind(); +extern processorid_t getcpuid(); +extern lgrpid_t gethomelgroup(); + +#endif /* __STDC__ */ + +#else /* _KERNEL */ + +/* + * Internal interface prototypes + */ +extern int p_online_internal(processorid_t, int, int *); +extern int p_online_internal_locked(processorid_t, int, int *); + +#endif /* !_KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PROCESSOR_H */ diff --git a/uts/common/sys/procset.h b/uts/common/sys/procset.h new file mode 100644 index 000000000000..c3b58675746e --- /dev/null +++ b/uts/common/sys/procset.h @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +#ifndef _SYS_PROCSET_H +#define _SYS_PROCSET_H + +#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.6 */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/feature_tests.h> +#include <sys/types.h> +#include <sys/signal.h> + +/* + * This file defines the data needed to specify a set of + * processes. These types are used by the sigsend, sigsendset, + * priocntl, priocntlset, waitid, evexit, and evexitset system + * calls. + */ +#define P_INITPID 1 +#define P_INITUID 0 +#define P_INITPGID 0 + + +/* + * The following defines the values for an identifier type. It + * specifies the interpretation of an id value. An idtype and + * id together define a simple set of processes. + */ +typedef enum +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) + idtype /* pollutes XPG4.2 namespace */ +#endif + { + P_PID, /* A process identifier. */ + P_PPID, /* A parent process identifier. */ + P_PGID, /* A process group (job control group) */ + /* identifier. */ + P_SID, /* A session identifier. */ + P_CID, /* A scheduling class identifier. */ + P_UID, /* A user identifier. */ + P_GID, /* A group identifier. */ + P_ALL, /* All processes. */ + P_LWPID, /* An LWP identifier. */ + P_TASKID, /* A task identifier. */ + P_PROJID, /* A project identifier. */ + P_POOLID, /* A pool identifier. */ + P_ZONEID, /* A zone identifier. */ + P_CTID, /* A (process) contract identifier. */ + P_CPUID, /* CPU identifier. */ + P_PSETID /* Processor set identifier */ +} idtype_t; + + +/* + * The following defines the operations which can be performed to + * combine two simple sets of processes to form another set of + * processes. + */ +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) +typedef enum idop { + POP_DIFF, /* Set difference. The processes which */ + /* are in the left operand set and not */ + /* in the right operand set. */ + POP_AND, /* Set disjunction. The processes */ + /* which are in both the left and right */ + /* operand sets. */ + POP_OR, /* Set conjunction. The processes */ + /* which are in either the left or the */ + /* right operand sets (or both). */ + POP_XOR /* Set exclusive or. The processes */ + /* which are in either the left or */ + /* right operand sets but not in both. */ +} idop_t; + + +/* + * The following structure is used to define a set of processes. + * The set is defined in terms of two simple sets of processes + * and an operator which operates on these two operand sets. + */ +typedef struct procset { + idop_t p_op; /* The operator connection the */ + /* following two operands each */ + /* of which is a simple set of */ + /* processes. */ + + idtype_t p_lidtype; + /* The type of the left operand */ + /* simple set. */ + id_t p_lid; /* The id of the left operand. */ + + idtype_t p_ridtype; + /* The type of the right */ + /* operand simple set. */ + id_t p_rid; /* The id of the right operand. */ +} procset_t; + +/* + * The following macro can be used to initialize a procset_t + * structure. + */ +#define setprocset(psp, op, ltype, lid, rtype, rid) \ + (psp)->p_op = (op); \ + (psp)->p_lidtype = (ltype); \ + (psp)->p_lid = (lid); \ + (psp)->p_ridtype = (rtype); \ + (psp)->p_rid = (rid); + +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ + +#ifdef _KERNEL + +struct proc; + +extern int dotoprocs(procset_t *, int (*)(), char *); +extern int dotolwp(procset_t *, int (*)(), char *); +extern int procinset(struct proc *, procset_t *); +extern int sigsendproc(struct proc *, sigsend_t *); +extern int sigsendset(procset_t *, sigsend_t *); +extern boolean_t cur_inset_only(procset_t *); +extern id_t getmyid(idtype_t); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PROCSET_H */ diff --git a/uts/common/sys/synch.h b/uts/common/sys/synch.h new file mode 100644 index 000000000000..6431bf22bca0 --- /dev/null +++ b/uts/common/sys/synch.h @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SYNCH_H +#define _SYS_SYNCH_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef _ASM +#include <sys/types.h> +#include <sys/int_types.h> +#endif /* _ASM */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ASM +/* + * Thread and LWP mutexes have the same type + * definitions. + * + * NOTE: + * + * POSIX requires that <pthread.h> define the structures pthread_mutex_t + * and pthread_cond_t. Although these structures are identical to mutex_t + * (lwp_mutex_t) and cond_t (lwp_cond_t), defined here, a typedef of these + * types would require including <synch.h> in <pthread.h>, pulling in + * non-posix symbols/constants, violating POSIX namespace restrictions. Hence, + * pthread_mutex_t/pthread_cond_t have been redefined (in <sys/types.h>). + * Any modifications done to mutex_t/lwp_mutex_t or cond_t/lwp_cond_t must + * also be done to pthread_mutex_t/pthread_cond_t. + */ +typedef struct _lwp_mutex { + struct { + uint16_t flag1; + uint8_t flag2; + uint8_t ceiling; + union { + uint16_t bcptype; + struct { + uint8_t count_type1; + uint8_t count_type2; + } mtype_rcount; + } mbcp_type_un; + uint16_t magic; + } flags; + union { + struct { + uint8_t pad[8]; + } lock64; + struct { + uint32_t ownerpid; + uint32_t lockword; + } lock32; + upad64_t owner64; + } lock; + upad64_t data; +} lwp_mutex_t; + +/* + * Thread and LWP condition variables have the same + * type definition. + * NOTE: + * The layout of the following structure should be kept in sync with the + * layout of pthread_cond_t in sys/types.h. See NOTE above for lwp_mutex_t. + */ +typedef struct _lwp_cond { + struct { + uint8_t flag[4]; + uint16_t type; + uint16_t magic; + } flags; + upad64_t data; +} lwp_cond_t; + +/* + * LWP semaphores + */ +typedef struct _lwp_sema { + uint32_t count; /* semaphore count */ + uint16_t type; + uint16_t magic; + uint8_t flags[8]; /* last byte reserved for waiters */ + upad64_t data; /* optional data */ +} lwp_sema_t; + +/* + * Thread and LWP rwlocks have the same type definition. + * NOTE: The layout of this structure should be kept in sync with the layout + * of the correponding structure of pthread_rwlock_t in sys/types.h. + * Also, because we have to deal with C++, there is an identical structure + * for rwlock_t in head/sync.h that we cannot change. + */ +typedef struct _lwp_rwlock { + int32_t readers; /* rwstate word */ + uint16_t type; + uint16_t magic; + lwp_mutex_t mutex; /* used with process-shared rwlocks */ + lwp_cond_t readercv; /* used only to indicate ownership */ + lwp_cond_t writercv; /* used only to indicate ownership */ +} lwp_rwlock_t; + +#endif /* _ASM */ +/* + * Definitions of synchronization types. + */ +#define USYNC_THREAD 0x00 /* private to a process */ +#define USYNC_PROCESS 0x01 /* shared by processes */ + +/* Keep the following values in sync with pthread.h */ +#define LOCK_NORMAL 0x00 /* same as USYNC_THREAD */ +#define LOCK_SHARED 0x01 /* same as USYNC_PROCESS */ +#define LOCK_ERRORCHECK 0x02 /* error check lock */ +#define LOCK_RECURSIVE 0x04 /* recursive lock */ +#define LOCK_PRIO_INHERIT 0x10 /* priority inheritance lock */ +#define LOCK_PRIO_PROTECT 0x20 /* priority ceiling lock */ +#define LOCK_ROBUST 0x40 /* robust lock */ + +/* + * USYNC_PROCESS_ROBUST is a deprecated historical type. It is mapped + * into (USYNC_PROCESS | LOCK_ROBUST) by mutex_init(). Application code + * should be revised to use (USYNC_PROCESS | LOCK_ROBUST) rather than this. + */ +#define USYNC_PROCESS_ROBUST 0x08 + +/* + * lwp_mutex_t flags + */ +#define LOCK_OWNERDEAD 0x1 +#define LOCK_NOTRECOVERABLE 0x2 +#define LOCK_INITED 0x4 +#define LOCK_UNMAPPED 0x8 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYNCH_H */ diff --git a/uts/common/sys/sysevent.h b/uts/common/sys/sysevent.h new file mode 100644 index 000000000000..2b2644197384 --- /dev/null +++ b/uts/common/sys/sysevent.h @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SYSEVENT_H +#define _SYS_SYSEVENT_H + +#include <sys/nvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef NULL +#if defined(_LP64) && !defined(__cplusplus) +#define NULL 0L +#else +#define NULL 0 +#endif +#endif + +/* Internal registration class and subclass */ +#define EC_ALL "register_all_classes" +#define EC_SUB_ALL "register_all_subclasses" + +/* + * Event allocation/enqueuing sleep/nosleep flags + */ +#define SE_SLEEP 0 +#define SE_NOSLEEP 1 + +/* Framework error codes */ +#define SE_EINVAL 1 /* Invalid argument */ +#define SE_ENOMEM 2 /* Unable to allocate memory */ +#define SE_EQSIZE 3 /* Maximum event q size exceeded */ +#define SE_EFAULT 4 /* Copy fault */ +#define SE_NOTFOUND 5 /* Attribute not found */ +#define SE_NO_TRANSPORT 6 /* sysevent transport down */ + +/* Internal data types */ + +#define SE_DATA_TYPE_BYTE DATA_TYPE_BYTE +#define SE_DATA_TYPE_INT16 DATA_TYPE_INT16 +#define SE_DATA_TYPE_UINT16 DATA_TYPE_UINT16 +#define SE_DATA_TYPE_INT32 DATA_TYPE_INT32 +#define SE_DATA_TYPE_UINT32 DATA_TYPE_UINT32 +#define SE_DATA_TYPE_INT64 DATA_TYPE_INT64 +#define SE_DATA_TYPE_UINT64 DATA_TYPE_UINT64 +#define SE_DATA_TYPE_STRING DATA_TYPE_STRING +#define SE_DATA_TYPE_BYTES DATA_TYPE_BYTE_ARRAY +#define SE_DATA_TYPE_TIME DATA_TYPE_HRTIME + +#define SE_KERN_PID 0 + +#define SUNW_VENDOR "SUNW" +#define SE_USR_PUB "usr:" +#define SE_KERN_PUB "kern:" +#define SUNW_KERN_PUB SUNW_VENDOR":"SE_KERN_PUB +#define SUNW_USR_PUB SUNW_VENDOR":"SE_USR_PUB + +/* + * Event header and attribute value limits + */ +#define MAX_ATTR_NAME 1024 +#define MAX_STRING_SZ 1024 +#define MAX_BYTE_ARRAY 1024 + +#define MAX_CLASS_LEN 64 +#define MAX_SUBCLASS_LEN 64 +#define MAX_PUB_LEN 128 +#define MAX_CHNAME_LEN 128 +#define MAX_SUBID_LEN 16 + +/* + * Limit for the event payload size + */ +#define MAX_EV_SIZE_LEN (SHRT_MAX/4) + +/* Opaque sysevent_t data type */ +typedef void *sysevent_t; + +/* Opaque channel bind data type */ +typedef void evchan_t; + +/* sysevent attribute list */ +typedef nvlist_t sysevent_attr_list_t; + +/* sysevent attribute name-value pair */ +typedef nvpair_t sysevent_attr_t; + +/* Unique event identifier */ +typedef struct sysevent_id { + uint64_t eid_seq; + hrtime_t eid_ts; +} sysevent_id_t; + +/* Event attribute value structures */ +typedef struct sysevent_bytes { + int32_t size; + uchar_t *data; +} sysevent_bytes_t; + +typedef struct sysevent_value { + int32_t value_type; /* data type */ + union { + uchar_t sv_byte; + int16_t sv_int16; + uint16_t sv_uint16; + int32_t sv_int32; + uint32_t sv_uint32; + int64_t sv_int64; + uint64_t sv_uint64; + hrtime_t sv_time; + char *sv_string; + sysevent_bytes_t sv_bytes; + } value; +} sysevent_value_t; + +/* + * The following flags determine the memory allocation semantics to use for + * kernel event buffer allocation by userland and kernel versions of + * sysevent_evc_publish(). + * + * EVCH_SLEEP and EVCH_NOSLEEP respectively map to KM_SLEEP and KM_NOSLEEP. + * EVCH_TRYHARD is a kernel-only publish flag that allow event allocation + * routines to use use alternate kmem caches in situations where free memory + * may be low. Kernel callers of sysevent_evc_publish() must set flags to + * one of EVCH_SLEEP, EVCH_NOSLEEP or EVCH_TRYHARD. Userland callers of + * sysevent_evc_publish() must set flags to one of EVCH_SLEEP or EVCH_NOSLEEP. + * + * EVCH_QWAIT determines whether or not we should wait for slots in the event + * queue at publication time. EVCH_QWAIT may be used by kernel and userland + * publishers and must be used in conjunction with any of one of EVCH_SLEEP, + * EVCH_NOSLEEP or EVCH_TRYHARD (kernel-only). + */ + +#define EVCH_NOSLEEP 0x0001 /* No sleep on kmem_alloc() */ +#define EVCH_SLEEP 0x0002 /* Sleep on kmem_alloc() */ +#define EVCH_TRYHARD 0x0004 /* May use alternate kmem cache for alloc */ +#define EVCH_QWAIT 0x0008 /* Wait for slot in event queue */ + +/* + * Meaning of flags for subscribe. Bits 8 to 15 are dedicated to + * the consolidation private interface, so flags defined here are restricted + * to the LSB. + * + * EVCH_SUB_KEEP indicates that this subscription should persist even if + * this subscriber id should die unexpectedly; matching events will be + * queued (up to a limit) and will be delivered if/when we restart again + * with the same subscriber id. + */ +#define EVCH_SUB_KEEP 0x01 + +/* + * Subscriptions may be wildcarded, but we limit the number of + * wildcards permitted. + */ +#define EVCH_WILDCARD_MAX 10 + +/* + * Used in unsubscribe to indicate all subscriber ids for a channel. + */ +#define EVCH_ALLSUB "all_subs" + +/* + * Meaning of flags parameter of channel bind function + * + * EVCH_CREAT indicates to create a channel if not already present. + * + * EVCH_HOLD_PEND indicates that events should be published to this + * channel even if there are no matching subscribers present; when + * a subscriber belatedly binds to the channel and registers their + * subscriptions they will receive events that predate their bind. + * If the channel is closed, however, with no remaining bindings then + * the channel is destroyed. + * + * EVCH_HOLD_PEND_INDEF is a stronger version of EVCH_HOLD_PEND - + * even if the channel has no remaining bindings it will not be + * destroyed so long as events remain unconsumed. This is suitable for + * use with short-lived event producers that may bind to (create) the + * channel and exit before the intended consumer has started. + */ +#define EVCH_CREAT 0x0001 +#define EVCH_HOLD_PEND 0x0002 +#define EVCH_HOLD_PEND_INDEF 0x0004 +#define EVCH_B_FLAGS 0x0007 /* All valid bits */ + +/* + * Meaning of commands of evc_control function + */ +#define EVCH_GET_CHAN_LEN_MAX 1 /* Get event queue length limit */ +#define EVCH_GET_CHAN_LEN 2 /* Get event queue length */ +#define EVCH_SET_CHAN_LEN 3 /* Set event queue length */ +#define EVCH_CMD_LAST EVCH_SET_CHAN_LEN /* Last command */ + +/* + * Shared user/kernel event channel interface definitions + */ +extern int sysevent_evc_bind(const char *, evchan_t **, uint32_t); +extern int sysevent_evc_unbind(evchan_t *); +extern int sysevent_evc_subscribe(evchan_t *, const char *, const char *, + int (*)(sysevent_t *, void *), void *, uint32_t); +extern int sysevent_evc_unsubscribe(evchan_t *, const char *); +extern int sysevent_evc_publish(evchan_t *, const char *, const char *, + const char *, const char *, nvlist_t *, uint32_t); +extern int sysevent_evc_control(evchan_t *, int, ...); +extern int sysevent_evc_setpropnvl(evchan_t *, nvlist_t *); +extern int sysevent_evc_getpropnvl(evchan_t *, nvlist_t **); + +#ifndef _KERNEL + +/* + * Userland-only event channel interfaces + */ + +#include <door.h> + +typedef struct sysevent_subattr sysevent_subattr_t; + +extern sysevent_subattr_t *sysevent_subattr_alloc(void); +extern void sysevent_subattr_free(sysevent_subattr_t *); + +extern void sysevent_subattr_thrattr(sysevent_subattr_t *, pthread_attr_t *); +extern void sysevent_subattr_sigmask(sysevent_subattr_t *, sigset_t *); + +extern void sysevent_subattr_thrcreate(sysevent_subattr_t *, + door_xcreate_server_func_t *, void *); +extern void sysevent_subattr_thrsetup(sysevent_subattr_t *, + door_xcreate_thrsetup_func_t *, void *); + +extern int sysevent_evc_xsubscribe(evchan_t *, const char *, const char *, + int (*)(sysevent_t *, void *), void *, uint32_t, sysevent_subattr_t *); + +#else + +/* + * Kernel log_event interfaces. + */ +extern int log_sysevent(sysevent_t *, int, sysevent_id_t *); + +extern sysevent_t *sysevent_alloc(char *, char *, char *, int); +extern void sysevent_free(sysevent_t *); +extern int sysevent_add_attr(sysevent_attr_list_t **, char *, + sysevent_value_t *, int); +extern void sysevent_free_attr(sysevent_attr_list_t *); +extern int sysevent_attach_attributes(sysevent_t *, sysevent_attr_list_t *); +extern void sysevent_detach_attributes(sysevent_t *); +extern char *sysevent_get_class_name(sysevent_t *); +extern char *sysevent_get_subclass_name(sysevent_t *); +extern uint64_t sysevent_get_seq(sysevent_t *); +extern void sysevent_get_time(sysevent_t *, hrtime_t *); +extern size_t sysevent_get_size(sysevent_t *); +extern char *sysevent_get_pub(sysevent_t *); +extern int sysevent_get_attr_list(sysevent_t *, nvlist_t **); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_H */ diff --git a/uts/common/sys/sysevent/dev.h b/uts/common/sys/sysevent/dev.h new file mode 100644 index 000000000000..9d3107d09011 --- /dev/null +++ b/uts/common/sys/sysevent/dev.h @@ -0,0 +1,256 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SYSEVENT_DEV_H +#define _SYS_SYSEVENT_DEV_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/sysevent/eventdefs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Event schema for EC_DEV_ADD/ESC_DISK + * + * Event Class - EC_DEV_ADD + * Event Sub-Class - ESC_DISK + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev name to the raw device. + * The name does not include the slice number component. + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property> + * Attribute Type - data type of the devinfo_node_property + * Attribute Value - value of the devinfo_node_property + * + * + * Event schema for EC_DEV_ADD/ESC_NETWORK + * + * Event Class - EC_DEV_ADD + * Event Sub-Class - ESC_NETWORK + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev name associated with the device if exists. + * /dev name associated with the driver for DLPI + * Style-2 only drivers. + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property> + * Attribute Type - data type of the devinfo_node_property + * Attribute Value - value of the devinfo_node_property + * + * + * Event schema for EC_DEV_ADD/ESC_PRINTER + * + * Event Class - EC_DEV_ADD + * Event Sub-Class - ESC_PRINTER + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev/printers name associated with the device + * if exists. + * /dev name associated with the device if it exists + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property> + * Attribute Type - data type of the devinfo_node_property + * Attribute Value - value of the devinfo_node_property + * + * + * Event schema for EC_DEV_REMOVE/ESC_DISK + * + * Event Class - EC_DEV_REMOVE + * Event Sub-Class - ESC_DISK + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev name to the raw device. + * The name does not include the slice number component. + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * + * Event schema for EC_DEV_REMOVE/ESC_NETWORK + * + * Event Class - EC_DEV_REMOVE + * Event Sub-Class - ESC_NETWORK + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev name associated with the device if exists. + * /dev name associated with the driver for DLPI + * Style-2 only drivers. + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * + * Event schema for EC_DEV_REMOVE/ESC_PRINTER + * + * Event Class - EC_DEV_REMOVE + * Event Sub-Class - ESC_PRINTER + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - /dev/printers name associated with the device + * if exists. + * /dev name associated with the device if it exists + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path of the device without the "/devices" + * prefix. + * + * Attribute Name - DEV_DRIVER_NAME + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - driver name + * + * Attribute Name - DEV_INSTANCE + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - driver instance number + * + * + * Event schema for EC_DEV_BRANCH/ESC_DEV_BRANCH_ADD or ESC_DEV_BRANCH_REMOVE + * + * Event Class - EC_DEV_BRANCH + * Event Sub-Class - ESC_DEV_BRANCH_ADD or ESC_DEV_BRANCH_REMOVE + * + * Attribute Name - EV_VERSION + * Attribute Type - DATA_TYPE_INT32 + * Attribute Value - event version number + * + * Attribute Name - DEV_PHYS_PATH + * Attribute Type - DATA_TYPE_STRING + * Attribute Value - physical path to the root node of the device subtree + * without the "/devices" prefix. + */ + +#define EV_VERSION "version" +#define DEV_PHYS_PATH "phys_path" +#define DEV_NAME "dev_name" +#define DEV_DRIVER_NAME "driver_name" +#define DEV_INSTANCE "instance" +#define DEV_PROP_PREFIX "prop-" + +#define EV_V1 1 + +/* maximum number of devinfo node properties added to the event */ +#define MAX_PROP_COUNT 100 + +/* only properties with size less than PROP_LEN_LIMIT are added to the event */ +#define PROP_LEN_LIMIT 1024 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_DEV_H */ diff --git a/uts/common/sys/sysevent/eventdefs.h b/uts/common/sys/sysevent/eventdefs.h new file mode 100644 index 000000000000..3ed9bb298018 --- /dev/null +++ b/uts/common/sys/sysevent/eventdefs.h @@ -0,0 +1,275 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_SYSEVENT_EVENTDEFS_H +#define _SYS_SYSEVENT_EVENTDEFS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * eventdefs.h contains public definitions for sysevent types (classes + * and subclasses). All additions/removal/changes are subject + * to PSARC approval. + */ + +/* Sysevent Class definitions */ +#define EC_NONE "EC_none" +#define EC_PRIV "EC_priv" +#define EC_PLATFORM "EC_platform" /* events private to platform */ +#define EC_DR "EC_dr" /* Dynamic reconfiguration event class */ +#define EC_ENV "EC_env" /* Environmental monitor event class */ +#define EC_DOMAIN "EC_domain" /* Domain event class */ +#define EC_AP_DRIVER "EC_ap_driver" /* Alternate Pathing event class */ +#define EC_IPMP "EC_ipmp" /* IP Multipathing event class */ +#define EC_DEV_ADD "EC_dev_add" /* device add event class */ +#define EC_DEV_REMOVE "EC_dev_remove" /* device remove event class */ +#define EC_DEV_BRANCH "EC_dev_branch" /* device tree branch event class */ +#define EC_DEV_STATUS "EC_dev_status" /* device status event class */ +#define EC_FM "EC_fm" /* FMA error report event */ +#define EC_ZFS "EC_zfs" /* ZFS event */ +#define EC_DATALINK "EC_datalink" /* datalink event */ +#define EC_VRRP "EC_vrrp" /* VRRP event */ + +/* + * The following event class is reserved for exclusive use + * by Sun Cluster software. + */ +#define EC_CLUSTER "EC_Cluster" + +/* + * The following classes are exclusively reserved for use by the + * Solaris Volume Manager (SVM) + */ +#define EC_SVM_CONFIG "EC_SVM_Config" +#define EC_SVM_STATE "EC_SVM_State" + +/* + * EC_SVM_CONFIG subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/svm.h + */ +#define ESC_SVM_CREATE "ESC_SVM_Create" +#define ESC_SVM_DELETE "ESC_SVM_Delete" +#define ESC_SVM_ADD "ESC_SVM_Add" +#define ESC_SVM_REMOVE "ESC_SVM_Remove" +#define ESC_SVM_REPLACE "ESC_SVM_Replace" +#define ESC_SVM_GROW "ESC_SVM_Grow" +#define ESC_SVM_RENAME_SRC "ESC_SVM_Rename_Src" +#define ESC_SVM_RENAME_DST "ESC_SVM_Rename_Dst" +#define ESC_SVM_MEDIATOR_ADD "ESC_SVM_Mediator_Add" +#define ESC_SVM_MEDIATOR_DELETE "ESC_SVM_Mediator_Delete" +#define ESC_SVM_HOST_ADD "ESC_SVM_Host_Add" +#define ESC_SVM_HOST_DELETE "ESC_SVM_Host_Delete" +#define ESC_SVM_DRIVE_ADD "ESC_SVM_Drive_Add" +#define ESC_SVM_DRIVE_DELETE "ESC_SVM_Drive_Delete" +#define ESC_SVM_DETACH "ESC_SVM_Detach" +#define ESC_SVM_DETACHING "ESC_SVM_Detaching" +#define ESC_SVM_ATTACH "ESC_SVM_Attach" +#define ESC_SVM_ATTACHING "ESC_SVM_Attaching" + +/* + * EC_SVM_STATE subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/svm.h + */ +#define ESC_SVM_INIT_START "ESC_SVM_Init_Start" +#define ESC_SVM_INIT_FAILED "ESC_SVM_Init_Failed" +#define ESC_SVM_INIT_FATAL "ESC_SVM_Init_Fatal" +#define ESC_SVM_INIT_SUCCESS "ESC_SVM_Init_Success" +#define ESC_SVM_IOERR "ESC_SVM_Ioerr" +#define ESC_SVM_ERRED "ESC_SVM_Erred" +#define ESC_SVM_LASTERRED "ESC_SVM_Lasterred" +#define ESC_SVM_OK "ESC_SVM_Ok" +#define ESC_SVM_ENABLE "ESC_SVM_Enable" +#define ESC_SVM_RESYNC_START "ESC_SVM_Resync_Start" +#define ESC_SVM_RESYNC_FAILED "ESC_SVM_Resync_Failed" +#define ESC_SVM_RESYNC_SUCCESS "ESC_SVM_Resync_Success" +#define ESC_SVM_RESYNC_DONE "ESC_SVM_Resync_Done" +#define ESC_SVM_HOTSPARED "ESC_SVM_Hotspared" +#define ESC_SVM_HS_FREED "ESC_SVM_HS_Freed" +#define ESC_SVM_HS_CHANGED "ESC_SVM_HS_Changed" +#define ESC_SVM_TAKEOVER "ESC_SVM_Takeover" +#define ESC_SVM_RELEASE "ESC_SVM_Release" +#define ESC_SVM_OPEN_FAIL "ESC_SVM_Open_Fail" +#define ESC_SVM_OFFLINE "ESC_SVM_Offline" +#define ESC_SVM_ONLINE "ESC_SVM_Online" +#define ESC_SVM_CHANGE "ESC_SVM_Change" +#define ESC_SVM_EXCHANGE "ESC_SVM_Exchange" +#define ESC_SVM_REGEN_START "ESC_SVM_Regen_Start" +#define ESC_SVM_REGEN_DONE "ESC_SVM_Regen_Done" +#define ESC_SVM_REGEN_FAILED "ESC_SVM_Regen_Failed" + +/* + * EC_DR subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/dr.h + */ + +/* Attachment point state change */ +#define ESC_DR_AP_STATE_CHANGE "ESC_dr_ap_state_change" +#define ESC_DR_REQ "ESC_dr_req" /* Request DR */ +#define ESC_DR_TARGET_STATE_CHANGE "ESC_dr_target_state_change" + +/* + * EC_ENV subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/env.h + */ +#define ESC_ENV_TEMP "ESC_env_temp" /* Temperature change event subclass */ +#define ESC_ENV_FAN "ESC_env_fan" /* Fan status change event subclass */ +#define ESC_ENV_POWER "ESC_env_power" /* Power supply change event subclass */ +#define ESC_ENV_LED "ESC_env_led" /* LED change event subclass */ + +/* + * EC_DOMAIN subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/domain.h + */ + +/* Domain state change */ +#define ESC_DOMAIN_STATE_CHANGE "ESC_domain_state_change" +/* Domain loghost name change */ +#define ESC_DOMAIN_LOGHOST_CHANGE "ESC_domain_loghost_change" + +/* + * EC_AP_DRIVER subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/ap_driver.h + */ + +/* Alternate Pathing path switch */ +#define ESC_AP_DRIVER_PATHSWITCH "ESC_ap_driver_pathswitch" +/* Alternate Pathing database commit */ +#define ESC_AP_DRIVER_COMMIT "ESC_ap_driver_commit" +/* Alternate Pathing physical path status change */ +#define ESC_AP_DRIVER_PHYS_PATH_STATUS_CHANGE \ + "ESC_ap_driver_phys_path_status_change" + +/* + * EC_IPMP subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/ipmp.h + */ + +/* IPMP group has changed state */ +#define ESC_IPMP_GROUP_STATE "ESC_ipmp_group_state" + +/* IPMP group has been created or removed */ +#define ESC_IPMP_GROUP_CHANGE "ESC_ipmp_group_change" + +/* IPMP group has had an interface added or removed */ +#define ESC_IPMP_GROUP_MEMBER_CHANGE "ESC_ipmp_group_member_change" + +/* Interface within an IPMP group has changed state or type */ +#define ESC_IPMP_IF_CHANGE "ESC_ipmp_if_change" + +/* IPMP probe has changed state */ +#define ESC_IPMP_PROBE_STATE "ESC_ipmp_probe_state" + +/* + * EC_DEV_ADD and EC_DEV_REMOVE subclass definitions - supporting attributes + * (name/value pairs) are found in sys/sysevent/dev.h + */ +#define ESC_DISK "disk" /* disk device */ +#define ESC_NETWORK "network" /* network interface */ +#define ESC_PRINTER "printer" /* printer device */ +#define ESC_LOFI "lofi" /* lofi device */ + +/* + * EC_DEV_BRANCH subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/dev.h + */ + +/* device tree branch added */ +#define ESC_DEV_BRANCH_ADD "ESC_dev_branch_add" + +/* device tree branch removed */ +#define ESC_DEV_BRANCH_REMOVE "ESC_dev_branch_remove" + +/* + * EC_DEV_STATUS subclass definitions + * + * device capacity dynamically changed + */ +#define ESC_DEV_DLE "ESC_dev_dle" + +/* LUN has received an eject request from the user */ +#define ESC_DEV_EJECT_REQUEST "ESC_dev_eject_request" + +/* FMA Fault and Error event protocol subclass */ +#define ESC_FM_ERROR "ESC_FM_error" +#define ESC_FM_ERROR_REPLAY "ESC_FM_error_replay" + +/* Service processor subclass definitions */ +#define ESC_PLATFORM_SP_RESET "ESC_platform_sp_reset" + +/* + * EC_PWRCTL subclass definitions + */ +#define EC_PWRCTL "EC_pwrctl" +#define ESC_PWRCTL_ADD "ESC_pwrctl_add" +#define ESC_PWRCTL_REMOVE "ESC_pwrctl_remove" +#define ESC_PWRCTL_WARN "ESC_pwrctl_warn" +#define ESC_PWRCTL_LOW "ESC_pwrctl_low" +#define ESC_PWRCTL_STATE_CHANGE "ESC_pwrctl_state_change" +#define ESC_PWRCTL_POWER_BUTTON "ESC_pwrctl_power_button" +#define ESC_PWRCTL_BRIGHTNESS_UP "ESC_pwrctl_brightness_up" +#define ESC_PWRCTL_BRIGHTNESS_DOWN "ESC_pwrctl_brightness_down" + +/* EC_ACPIEV subclass definitions */ +#define EC_ACPIEV "EC_acpiev" +#define ESC_ACPIEV_DISPLAY_SWITCH "ESC_acpiev_display_switch" +#define ESC_ACPIEV_SCREEN_LOCK "ESC_acpiev_screen_lock" +#define ESC_ACPIEV_SLEEP "ESC_acpiev_sleep" +#define ESC_ACPIEV_AUDIO_MUTE "ESC_acpiev_audio_mute" +#define ESC_ACPIEV_WIFI "ESC_acpiev_wifi" +#define ESC_ACPIEV_TOUCHPAD "ESC_acpiev_touchpad" + +/* + * ZFS subclass definitions. supporting attributes (name/value paris) are found + * in sys/fs/zfs.h + */ +#define ESC_ZFS_RESILVER_START "ESC_ZFS_resilver_start" +#define ESC_ZFS_RESILVER_FINISH "ESC_ZFS_resilver_finish" +#define ESC_ZFS_VDEV_REMOVE "ESC_ZFS_vdev_remove" +#define ESC_ZFS_POOL_DESTROY "ESC_ZFS_pool_destroy" +#define ESC_ZFS_VDEV_CLEAR "ESC_ZFS_vdev_clear" +#define ESC_ZFS_VDEV_CHECK "ESC_ZFS_vdev_check" +#define ESC_ZFS_CONFIG_SYNC "ESC_ZFS_config_sync" +#define ESC_ZFS_SCRUB_START "ESC_ZFS_scrub_start" +#define ESC_ZFS_SCRUB_FINISH "ESC_ZFS_scrub_finish" +#define ESC_ZFS_VDEV_SPARE "ESC_ZFS_vdev_spare" +#define ESC_ZFS_BOOTFS_VDEV_ATTACH "ESC_ZFS_bootfs_vdev_attach" + +/* + * datalink subclass definitions. + */ +#define ESC_DATALINK_PHYS_ADD "ESC_datalink_phys_add" /* new physical link */ + +/* + * VRRP subclass definitions. Supporting attributes (name/value paris) are + * found in sys/sysevent/vrrp.h + */ +#define ESC_VRRP_STATE_CHANGE "ESC_vrrp_state_change" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_EVENTDEFS_H */ diff --git a/uts/common/sys/sysmacros.h b/uts/common/sys/sysmacros.h new file mode 100644 index 000000000000..89a672db2f8c --- /dev/null +++ b/uts/common/sys/sysmacros.h @@ -0,0 +1,378 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SYSMACROS_H +#define _SYS_SYSMACROS_H + +#include <sys/param.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Some macros for units conversion + */ +/* + * Disk blocks (sectors) and bytes. + */ +#define dtob(DD) ((DD) << DEV_BSHIFT) +#define btod(BB) (((BB) + DEV_BSIZE - 1) >> DEV_BSHIFT) +#define btodt(BB) ((BB) >> DEV_BSHIFT) +#define lbtod(BB) (((offset_t)(BB) + DEV_BSIZE - 1) >> DEV_BSHIFT) + +/* common macros */ +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a, b) ((a) < (b) ? (b) : (a)) +#endif +#ifndef ABS +#define ABS(a) ((a) < 0 ? -(a) : (a)) +#endif +#ifndef SIGNOF +#define SIGNOF(a) ((a) < 0 ? -1 : (a) > 0) +#endif + +#ifdef _KERNEL + +/* + * Convert a single byte to/from binary-coded decimal (BCD). + */ +extern unsigned char byte_to_bcd[256]; +extern unsigned char bcd_to_byte[256]; + +#define BYTE_TO_BCD(x) byte_to_bcd[(x) & 0xff] +#define BCD_TO_BYTE(x) bcd_to_byte[(x) & 0xff] + +#endif /* _KERNEL */ + +/* + * WARNING: The device number macros defined here should not be used by device + * drivers or user software. Device drivers should use the device functions + * defined in the DDI/DKI interface (see also ddi.h). Application software + * should make use of the library routines available in makedev(3). A set of + * new device macros are provided to operate on the expanded device number + * format supported in SVR4. Macro versions of the DDI device functions are + * provided for use by kernel proper routines only. Macro routines bmajor(), + * major(), minor(), emajor(), eminor(), and makedev() will be removed or + * their definitions changed at the next major release following SVR4. + */ + +#define O_BITSMAJOR 7 /* # of SVR3 major device bits */ +#define O_BITSMINOR 8 /* # of SVR3 minor device bits */ +#define O_MAXMAJ 0x7f /* SVR3 max major value */ +#define O_MAXMIN 0xff /* SVR3 max minor value */ + + +#define L_BITSMAJOR32 14 /* # of SVR4 major device bits */ +#define L_BITSMINOR32 18 /* # of SVR4 minor device bits */ +#define L_MAXMAJ32 0x3fff /* SVR4 max major value */ +#define L_MAXMIN32 0x3ffff /* MAX minor for 3b2 software drivers. */ + /* For 3b2 hardware devices the minor is */ + /* restricted to 256 (0-255) */ + +#ifdef _LP64 +#define L_BITSMAJOR 32 /* # of major device bits in 64-bit Solaris */ +#define L_BITSMINOR 32 /* # of minor device bits in 64-bit Solaris */ +#define L_MAXMAJ 0xfffffffful /* max major value */ +#define L_MAXMIN 0xfffffffful /* max minor value */ +#else +#define L_BITSMAJOR L_BITSMAJOR32 +#define L_BITSMINOR L_BITSMINOR32 +#define L_MAXMAJ L_MAXMAJ32 +#define L_MAXMIN L_MAXMIN32 +#endif + +#ifdef _KERNEL + +/* major part of a device internal to the kernel */ + +#define major(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ) +#define bmajor(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ) + +/* get internal major part of expanded device number */ + +#define getmajor(x) (major_t)((((dev_t)(x)) >> L_BITSMINOR) & L_MAXMAJ) + +/* minor part of a device internal to the kernel */ + +#define minor(x) (minor_t)((x) & O_MAXMIN) + +/* get internal minor part of expanded device number */ + +#define getminor(x) (minor_t)((x) & L_MAXMIN) + +#else + +/* major part of a device external from the kernel (same as emajor below) */ + +#define major(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ) + +/* minor part of a device external from the kernel (same as eminor below) */ + +#define minor(x) (minor_t)((x) & O_MAXMIN) + +#endif /* _KERNEL */ + +/* create old device number */ + +#define makedev(x, y) (unsigned short)(((x) << O_BITSMINOR) | ((y) & O_MAXMIN)) + +/* make an new device number */ + +#define makedevice(x, y) (dev_t)(((dev_t)(x) << L_BITSMINOR) | ((y) & L_MAXMIN)) + + +/* + * emajor() allows kernel/driver code to print external major numbers + * eminor() allows kernel/driver code to print external minor numbers + */ + +#define emajor(x) \ + (major_t)(((unsigned int)(x) >> O_BITSMINOR) > O_MAXMAJ) ? \ + NODEV : (((unsigned int)(x) >> O_BITSMINOR) & O_MAXMAJ) + +#define eminor(x) \ + (minor_t)((x) & O_MAXMIN) + +/* + * get external major and minor device + * components from expanded device number + */ +#define getemajor(x) (major_t)((((dev_t)(x) >> L_BITSMINOR) > L_MAXMAJ) ? \ + NODEV : (((dev_t)(x) >> L_BITSMINOR) & L_MAXMAJ)) +#define geteminor(x) (minor_t)((x) & L_MAXMIN) + +/* + * These are versions of the kernel routines for compressing and + * expanding long device numbers that don't return errors. + */ +#if (L_BITSMAJOR32 == L_BITSMAJOR) && (L_BITSMINOR32 == L_BITSMINOR) + +#define DEVCMPL(x) (x) +#define DEVEXPL(x) (x) + +#else + +#define DEVCMPL(x) \ + (dev32_t)((((x) >> L_BITSMINOR) > L_MAXMAJ32 || \ + ((x) & L_MAXMIN) > L_MAXMIN32) ? NODEV32 : \ + ((((x) >> L_BITSMINOR) << L_BITSMINOR32) | ((x) & L_MAXMIN32))) + +#define DEVEXPL(x) \ + (((x) == NODEV32) ? NODEV : \ + makedevice(((x) >> L_BITSMINOR32) & L_MAXMAJ32, (x) & L_MAXMIN32)) + +#endif /* L_BITSMAJOR32 ... */ + +/* convert to old (SVR3.2) dev format */ + +#define cmpdev(x) \ + (o_dev_t)((((x) >> L_BITSMINOR) > O_MAXMAJ || \ + ((x) & L_MAXMIN) > O_MAXMIN) ? NODEV : \ + ((((x) >> L_BITSMINOR) << O_BITSMINOR) | ((x) & O_MAXMIN))) + +/* convert to new (SVR4) dev format */ + +#define expdev(x) \ + (dev_t)(((dev_t)(((x) >> O_BITSMINOR) & O_MAXMAJ) << L_BITSMINOR) | \ + ((x) & O_MAXMIN)) + +/* + * Macro for checking power of 2 address alignment. + */ +#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) + +/* + * Macros for counting and rounding. + */ +#define howmany(x, y) (((x)+((y)-1))/(y)) +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) + +/* + * Macro to determine if value is a power of 2 + */ +#define ISP2(x) (((x) & ((x) - 1)) == 0) + +/* + * Macros for various sorts of alignment and rounding. The "align" must + * be a power of 2. Often times it is a block, sector, or page. + */ + +/* + * return x rounded down to an align boundary + * eg, P2ALIGN(1200, 1024) == 1024 (1*align) + * eg, P2ALIGN(1024, 1024) == 1024 (1*align) + * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align) + * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align) + */ +#define P2ALIGN(x, align) ((x) & -(align)) + +/* + * return x % (mod) align + * eg, P2PHASE(0x1234, 0x100) == 0x34 (x-0x12*align) + * eg, P2PHASE(0x5600, 0x100) == 0x00 (x-0x56*align) + */ +#define P2PHASE(x, align) ((x) & ((align) - 1)) + +/* + * return how much space is left in this block (but if it's perfectly + * aligned, return 0). + * eg, P2NPHASE(0x1234, 0x100) == 0xcc (0x13*align-x) + * eg, P2NPHASE(0x5600, 0x100) == 0x00 (0x56*align-x) + */ +#define P2NPHASE(x, align) (-(x) & ((align) - 1)) + +/* + * return x rounded up to an align boundary + * eg, P2ROUNDUP(0x1234, 0x100) == 0x1300 (0x13*align) + * eg, P2ROUNDUP(0x5600, 0x100) == 0x5600 (0x56*align) + */ +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) + +/* + * return the ending address of the block that x is in + * eg, P2END(0x1234, 0x100) == 0x12ff (0x13*align - 1) + * eg, P2END(0x5600, 0x100) == 0x56ff (0x57*align - 1) + */ +#define P2END(x, align) (-(~(x) & -(align))) + +/* + * return x rounded up to the next phase (offset) within align. + * phase should be < align. + * eg, P2PHASEUP(0x1234, 0x100, 0x10) == 0x1310 (0x13*align + phase) + * eg, P2PHASEUP(0x5600, 0x100, 0x10) == 0x5610 (0x56*align + phase) + */ +#define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) + +/* + * return TRUE if adding len to off would cause it to cross an align + * boundary. + * eg, P2BOUNDARY(0x1234, 0xe0, 0x100) == TRUE (0x1234 + 0xe0 == 0x1314) + * eg, P2BOUNDARY(0x1234, 0x50, 0x100) == FALSE (0x1234 + 0x50 == 0x1284) + */ +#define P2BOUNDARY(off, len, align) \ + (((off) ^ ((off) + (len) - 1)) > (align) - 1) + +/* + * Return TRUE if they have the same highest bit set. + * eg, P2SAMEHIGHBIT(0x1234, 0x1001) == TRUE (the high bit is 0x1000) + * eg, P2SAMEHIGHBIT(0x1234, 0x3010) == FALSE (high bit of 0x3010 is 0x2000) + */ +#define P2SAMEHIGHBIT(x, y) (((x) ^ (y)) < ((x) & (y))) + +/* + * Typed version of the P2* macros. These macros should be used to ensure + * that the result is correctly calculated based on the data type of (x), + * which is passed in as the last argument, regardless of the data + * type of the alignment. For example, if (x) is of type uint64_t, + * and we want to round it up to a page boundary using "PAGESIZE" as + * the alignment, we can do either + * P2ROUNDUP(x, (uint64_t)PAGESIZE) + * or + * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t) + */ +#define P2ALIGN_TYPED(x, align, type) \ + ((type)(x) & -(type)(align)) +#define P2PHASE_TYPED(x, align, type) \ + ((type)(x) & ((type)(align) - 1)) +#define P2NPHASE_TYPED(x, align, type) \ + (-(type)(x) & ((type)(align) - 1)) +#define P2ROUNDUP_TYPED(x, align, type) \ + (-(-(type)(x) & -(type)(align))) +#define P2END_TYPED(x, align, type) \ + (-(~(type)(x) & -(type)(align))) +#define P2PHASEUP_TYPED(x, align, phase, type) \ + ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align))) +#define P2CROSS_TYPED(x, y, align, type) \ + (((type)(x) ^ (type)(y)) > (type)(align) - 1) +#define P2SAMEHIGHBIT_TYPED(x, y, type) \ + (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y))) + +/* + * Macros to atomically increment/decrement a variable. mutex and var + * must be pointers. + */ +#define INCR_COUNT(var, mutex) mutex_enter(mutex), (*(var))++, mutex_exit(mutex) +#define DECR_COUNT(var, mutex) mutex_enter(mutex), (*(var))--, mutex_exit(mutex) + +/* + * Macros to declare bitfields - the order in the parameter list is + * Low to High - that is, declare bit 0 first. We only support 8-bit bitfields + * because if a field crosses a byte boundary it's not likely to be meaningful + * without reassembly in its nonnative endianness. + */ +#if defined(_BIT_FIELDS_LTOH) +#define DECL_BITFIELD2(_a, _b) \ + uint8_t _a, _b +#define DECL_BITFIELD3(_a, _b, _c) \ + uint8_t _a, _b, _c +#define DECL_BITFIELD4(_a, _b, _c, _d) \ + uint8_t _a, _b, _c, _d +#define DECL_BITFIELD5(_a, _b, _c, _d, _e) \ + uint8_t _a, _b, _c, _d, _e +#define DECL_BITFIELD6(_a, _b, _c, _d, _e, _f) \ + uint8_t _a, _b, _c, _d, _e, _f +#define DECL_BITFIELD7(_a, _b, _c, _d, _e, _f, _g) \ + uint8_t _a, _b, _c, _d, _e, _f, _g +#define DECL_BITFIELD8(_a, _b, _c, _d, _e, _f, _g, _h) \ + uint8_t _a, _b, _c, _d, _e, _f, _g, _h +#elif defined(_BIT_FIELDS_HTOL) +#define DECL_BITFIELD2(_a, _b) \ + uint8_t _b, _a +#define DECL_BITFIELD3(_a, _b, _c) \ + uint8_t _c, _b, _a +#define DECL_BITFIELD4(_a, _b, _c, _d) \ + uint8_t _d, _c, _b, _a +#define DECL_BITFIELD5(_a, _b, _c, _d, _e) \ + uint8_t _e, _d, _c, _b, _a +#define DECL_BITFIELD6(_a, _b, _c, _d, _e, _f) \ + uint8_t _f, _e, _d, _c, _b, _a +#define DECL_BITFIELD7(_a, _b, _c, _d, _e, _f, _g) \ + uint8_t _g, _f, _e, _d, _c, _b, _a +#define DECL_BITFIELD8(_a, _b, _c, _d, _e, _f, _g, _h) \ + uint8_t _h, _g, _f, _e, _d, _c, _b, _a +#else +#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined +#endif /* _BIT_FIELDS_LTOH */ + +#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof) + +/* avoid any possibility of clashing with <stddef.h> version */ + +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSMACROS_H */ diff --git a/uts/common/sys/taskq.h b/uts/common/sys/taskq.h new file mode 100644 index 000000000000..8b601c86a598 --- /dev/null +++ b/uts/common/sys/taskq.h @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_TASKQ_H +#define _SYS_TASKQ_H + +#include <sys/types.h> +#include <sys/thread.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define TASKQ_NAMELEN 31 + +typedef struct taskq taskq_t; +typedef uintptr_t taskqid_t; +typedef void (task_func_t)(void *); + +struct proc; + +/* + * Public flags for taskq_create(): bit range 0-15 + */ +#define TASKQ_PREPOPULATE 0x0001 /* Prepopulate with threads and data */ +#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */ +#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */ +#define TASKQ_THREADS_CPU_PCT 0x0008 /* number of threads as % of ncpu */ +#define TASKQ_DC_BATCH 0x0010 /* Taskq uses SDC in batch mode */ + +/* + * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as + * KM_SLEEP/KM_NOSLEEP. + */ +#define TQ_SLEEP 0x00 /* Can block for memory */ +#define TQ_NOSLEEP 0x01 /* cannot block for memory; may fail */ +#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ +#define TQ_NOALLOC 0x04 /* cannot allocate memory; may fail */ +#define TQ_FRONT 0x08 /* Put task at the front of the queue */ + +#ifdef _KERNEL + +extern taskq_t *system_taskq; + +extern void taskq_init(void); +extern void taskq_mp_init(void); + +extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); +extern taskq_t *taskq_create_instance(const char *, int, int, pri_t, int, + int, uint_t); +extern taskq_t *taskq_create_proc(const char *, int, pri_t, int, int, + struct proc *, uint_t); +extern taskq_t *taskq_create_sysdc(const char *, int, int, int, + struct proc *, uint_t, uint_t); +extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); +extern void nulltask(void *); +extern void taskq_destroy(taskq_t *); +extern void taskq_wait(taskq_t *); +extern void taskq_suspend(taskq_t *); +extern int taskq_suspended(taskq_t *); +extern void taskq_resume(taskq_t *); +extern int taskq_member(taskq_t *, kthread_t *); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_TASKQ_H */ diff --git a/uts/common/sys/u8_textprep.h b/uts/common/sys/u8_textprep.h new file mode 100644 index 000000000000..e30f064b2d99 --- /dev/null +++ b/uts/common/sys/u8_textprep.h @@ -0,0 +1,113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_U8_TEXTPREP_H +#define _SYS_U8_TEXTPREP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/isa_defs.h> +#include <sys/types.h> +#include <sys/errno.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Unicode encoding conversion functions and their macros. + */ +#define UCONV_IN_BIG_ENDIAN 0x0001 +#define UCONV_OUT_BIG_ENDIAN 0x0002 +#define UCONV_IN_SYSTEM_ENDIAN 0x0004 +#define UCONV_OUT_SYSTEM_ENDIAN 0x0008 +#define UCONV_IN_LITTLE_ENDIAN 0x0010 +#define UCONV_OUT_LITTLE_ENDIAN 0x0020 +#define UCONV_IGNORE_NULL 0x0040 +#define UCONV_IN_ACCEPT_BOM 0x0080 +#define UCONV_OUT_EMIT_BOM 0x0100 + +extern int uconv_u16tou32(const uint16_t *, size_t *, uint32_t *, size_t *, + int); +extern int uconv_u16tou8(const uint16_t *, size_t *, uchar_t *, size_t *, int); +extern int uconv_u32tou16(const uint32_t *, size_t *, uint16_t *, size_t *, + int); +extern int uconv_u32tou8(const uint32_t *, size_t *, uchar_t *, size_t *, int); +extern int uconv_u8tou16(const uchar_t *, size_t *, uint16_t *, size_t *, int); +extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); + +/* + * UTF-8 text preparation functions and their macros. + * + * Among the macros defined, U8_CANON_DECOMP, U8_COMPAT_DECOMP, and + * U8_CANON_COMP are not public interfaces and must not be used directly + * at the flag input argument. + */ +#define U8_STRCMP_CS (0x00000001) +#define U8_STRCMP_CI_UPPER (0x00000002) +#define U8_STRCMP_CI_LOWER (0x00000004) + +#define U8_CANON_DECOMP (0x00000010) +#define U8_COMPAT_DECOMP (0x00000020) +#define U8_CANON_COMP (0x00000040) + +#define U8_STRCMP_NFD (U8_CANON_DECOMP) +#define U8_STRCMP_NFC (U8_CANON_DECOMP | U8_CANON_COMP) +#define U8_STRCMP_NFKD (U8_COMPAT_DECOMP) +#define U8_STRCMP_NFKC (U8_COMPAT_DECOMP | U8_CANON_COMP) + +#define U8_TEXTPREP_TOUPPER (U8_STRCMP_CI_UPPER) +#define U8_TEXTPREP_TOLOWER (U8_STRCMP_CI_LOWER) + +#define U8_TEXTPREP_NFD (U8_STRCMP_NFD) +#define U8_TEXTPREP_NFC (U8_STRCMP_NFC) +#define U8_TEXTPREP_NFKD (U8_STRCMP_NFKD) +#define U8_TEXTPREP_NFKC (U8_STRCMP_NFKC) + +#define U8_TEXTPREP_IGNORE_NULL (0x00010000) +#define U8_TEXTPREP_IGNORE_INVALID (0x00020000) +#define U8_TEXTPREP_NOWAIT (0x00040000) + +#define U8_UNICODE_320 (0) +#define U8_UNICODE_500 (1) +#define U8_UNICODE_LATEST (U8_UNICODE_500) + +#define U8_VALIDATE_ENTIRE (0x00100000) +#define U8_VALIDATE_CHECK_ADDITIONAL (0x00200000) +#define U8_VALIDATE_UCS2_RANGE (0x00400000) + +#define U8_ILLEGAL_CHAR (-1) +#define U8_OUT_OF_RANGE_CHAR (-2) + +extern int u8_validate(char *, size_t, char **, int, int *); +extern int u8_strcmp(const char *, const char *, size_t, int, size_t, int *); +extern size_t u8_textprep_str(char *, size_t *, char *, size_t *, int, size_t, + int *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_U8_TEXTPREP_H */ diff --git a/uts/common/sys/u8_textprep_data.h b/uts/common/sys/u8_textprep_data.h new file mode 100644 index 000000000000..de6866096160 --- /dev/null +++ b/uts/common/sys/u8_textprep_data.h @@ -0,0 +1,35376 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright (c) 1991-2006 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of the Unicode data files and any associated documentation (the + * "Data Files") or Unicode software and any associated documentation (the + * "Software") to deal in the Data Files or Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, and/or sell copies of the Data Files or Software, and + * to permit persons to whom the Data Files or Software are furnished to do so, + * provided that (a) the above copyright notice(s) and this permission notice + * appear with all copies of the Data Files or Software, (b) both the above + * copyright notice(s) and this permission notice appear in associated + * documentation, and (c) there is clear notice in each modified Data File or + * in the Software as well as in the documentation associated with the Data + * File(s) or Software that the data or software has been modified. + * + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF + * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR + * CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written authorization + * of the copyright holder. + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be + * registered in some jurisdictions. All other trademarks and registered + * trademarks mentioned herein are the property of their respective owners. + */ +/* + * This file has been modified by Sun Microsystems, Inc. + */ + +#ifndef _SYS_U8_TEXTPREP_DATA_H +#define _SYS_U8_TEXTPREP_DATA_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * To get to the combining class data, composition mappings, decomposition + * mappings, and case conversion mappings of Unicode, the data structures + * formulated and their meanings are like the following: + * + * Each UTF-8 character is seen as a 4-byte entity so that U+0061 (or 0x61 in + * UTF-8) would be seen as 0x00 0x00 0x00 0x61. Similarly, U+1D15E would be + * 0xF0 0x9D 0x85 0x9E in UTF-8. + * + * The first byte (MSB) value is an index to the b1_tbl, such as + * u8_common_b1_tbl and u8_composition_b1_tbl tables. A b1_tbl has + * indices to b2_tbl tables that have indices to b3_tbl. Each b3_tbl has + * either indices to b4_tbl or indices to b4_tbl and base values for + * displacement calculations later by using the u8_displacement_t type at + * below. Each b4_tbl table then has indices to the final tables. + * + * As an example, if we have a character with code value of U+1D15E which is + * 0xF0 0x9D 0x85 0x9E in UTF-8, the target decomposition character bytes + * that will be mapped by the mapping procedure would be the ones between + * the start_index and the end_index computed as like the following: + * + * b2_tbl_id = u8_common_b1_tbl[0][0xF0]; + * b3_tbl_id = u8_decomp_b2_tbl[0][b2_tbl_id][0x9D]; + * b4_tbl_id = u8_decomp_b3_tbl[0][b3_tbl_id][0x85].tbl_id; + * b4_base = u8_decomp_b3_tbl[0][b3_tbl_id][0x85].base; + * if (b4_tbl_id >= 0x8000) { + * b4_tbl_id -= 0x8000; + * start_index = u8_decomp_b4_16bit_tbl[0][b4_tbl_id][0x9E]; + * end_index = u8_decomp_b4_16bit_tbl[0][b4_tbl_id][0x9E + 1]; + * } else { + * start_index = u8_decomp_b4_tbl[0][b4_tbl_id][0x9E]; + * end_index = u8_decomp_b4_tbl[0][b4_tbl_id][0x9E + 1]; + * } + * + * The start_index and the end_index can be used to retrieve the bytes + * possibly of multiple UTF-8 characters from the final tables. + * + * The "[0]" at the above indicates this is for Unicode Version 3.2.0 data + * as of today. Consequently, the "[1]" indicates another Unicode version + * data and it is Unicode 5.0.0 as of today. + * + * The mapping procedures and the data structures are more or less similar or + * alike among different mappings. You might want to read the u8_textprep.c + * for specific details. + * + * The tool programs created and used to generate the tables in this file are + * saved at PSARC/2007/149/materials/ as tools.tar.gz file. + */ + +/* The following is a component type for the b4_tbl vectors. */ +typedef struct { + uint16_t tbl_id; + uint16_t base; +} u8_displacement_t; + +/* + * The U8_TBL_ELEMENT_NOT_DEF macro indicates a byte that is not defined or + * used. The U8_TBL_ELEMENT_FILLER indicates the end of a UTF-8 character at + * the final tables. + */ +#define U8_TBL_ELEMENT_NOT_DEF (0xff) +#define N_ U8_TBL_ELEMENT_NOT_DEF + +#define U8_TBL_ELEMENT_FILLER (0xf7) +#define FIL_ U8_TBL_ELEMENT_FILLER + +/* + * The common b1_tbl for combining class, decompositions, tolower, and + * toupper case conversion mappings. + */ +static const uchar_t u8_common_b1_tbl[2][256] = { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, +}; + +static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 5, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 6, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, 5, N_, N_, N_, N_, 6, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 7, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 8, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + +}; + +static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { + { + { /* Third byte table 0. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 0, 1, N_, N_, + N_, N_, 2, N_, N_, N_, 3, 4, + N_, 5, N_, 6, 7, 8, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 1. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, N_, 19, + N_, 20, N_, 21, N_, 22, N_, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 2. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 32, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 33, N_, N_, 34, + N_, N_, 35, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 3. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, 36, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 4. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 37, N_, 38, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 5. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 39, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 40, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 6. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 41, 42, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 7. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 8. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + }, + { + { /* Third byte table 0. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 0, 1, N_, N_, + N_, N_, 2, N_, N_, N_, 3, 4, + 5, 6, N_, 7, 8, 9, N_, 10, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 1. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, N_, 21, + N_, 22, 23, 24, N_, 25, N_, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 2. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 35, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 36, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 37, N_, N_, 38, + N_, N_, 39, N_, 40, N_, N_, N_, + 41, N_, N_, N_, 42, 43, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 44, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 3. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, 45, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 4. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 46, N_, 47, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 5. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 48, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 6. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, 49, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 50, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 7. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 51, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { /* Third byte table 8. */ + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 52, 53, N_, + N_, 54, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + }, +}; + +/* + * Unlike other b4_tbl, the b4_tbl for combining class data has + * the combining class values not indices to the final tables. + */ +static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 232, 220, 220, + 220, 220, 232, 216, 220, 220, 220, 220, + 220, 202, 202, 220, 220, 220, 220, 202, + 202, 220, 220, 220, 220, 220, 220, 220, + 220, 220, 220, 220, 1, 1, 1, 1, + 1, 220, 220, 220, 220, 230, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 230, 240, 230, 220, + 220, 220, 230, 230, 230, 220, 220, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 234, 234, 233, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 230, 230, 230, 230, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 220, 230, 230, 230, 230, 220, 230, + 230, 230, 222, 220, 230, 230, 230, 230, + 230, 230, 0, 220, 220, 220, 220, 220, + 230, 230, 220, 230, 230, 222, 228, 230, + 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 0, 20, 21, 22, 0, 23, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 24, 25, 0, 230, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 27, 28, 29, 30, 31, + 32, 33, 34, 230, 230, 220, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 35, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 230, 230, + 230, 230, 230, 230, 230, 0, 0, 230, + 230, 230, 230, 220, 230, 0, 0, 230, + 230, 0, 220, 230, 230, 220, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 220, 230, 230, 220, 230, 230, 220, + 220, 220, 230, 220, 220, 230, 220, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 220, 230, 220, 230, 220, 230, + 220, 230, 230, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 230, 220, 230, 230, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 84, 91, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 103, 103, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 107, 107, 107, 107, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 118, 118, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 122, 122, 122, 122, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 220, 220, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 220, 0, 220, + 0, 216, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 129, 130, 0, 132, 0, 0, 0, + 0, 0, 130, 130, 130, 130, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 130, 0, 230, 230, 9, 0, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 220, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 0, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 228, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 1, 1, 230, 230, 230, 230, + 1, 1, 1, 230, 230, 0, 0, 0, + 0, 230, 0, 0, 0, 1, 1, 230, + 220, 230, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 218, 228, 232, 222, 224, 224, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 26, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 41. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 216, 216, 1, + 1, 1, 0, 0, 0, 226, 216, 216, + 216, 216, 216, 0, 0, 0, 0, 0, + 0, 0, 0, 220, 220, 220, 220, 220, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 42. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 220, 220, 220, 0, 0, 230, 230, 230, + 230, 230, 220, 220, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 230, 230, 230, 230, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 43. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 44. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 45. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 46. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 47. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 48. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 49. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 50. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 51. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 52. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 53. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 54. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + }, + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 232, 220, 220, + 220, 220, 232, 216, 220, 220, 220, 220, + 220, 202, 202, 220, 220, 220, 220, 202, + 202, 220, 220, 220, 220, 220, 220, 220, + 220, 220, 220, 220, 1, 1, 1, 1, + 1, 220, 220, 220, 220, 230, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 230, 240, 230, 220, + 220, 220, 230, 230, 230, 220, 220, 0, + 230, 230, 230, 220, 220, 220, 220, 230, + 232, 220, 220, 230, 233, 234, 234, 233, + 234, 234, 233, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 230, 230, 230, 230, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 220, 230, 230, 230, 230, 220, 230, + 230, 230, 222, 220, 230, 230, 230, 230, + 230, 230, 220, 220, 220, 220, 220, 220, + 230, 230, 220, 230, 230, 222, 228, 230, + 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 19, 20, 21, 22, 0, 23, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 24, 25, 0, 230, 220, 0, 18, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 230, 230, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 27, 28, 29, 30, 31, + 32, 33, 34, 230, 230, 220, 220, 230, + 230, 230, 230, 230, 220, 230, 230, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 35, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 230, 230, + 230, 230, 230, 230, 230, 0, 0, 230, + 230, 230, 230, 220, 230, 0, 0, 230, + 230, 0, 220, 230, 230, 220, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 220, 230, 230, 220, 230, 230, 220, + 220, 220, 230, 220, 220, 230, 220, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 220, 230, 220, 230, 220, 230, + 220, 230, 230, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 230, 230, 230, 230, 230, + 230, 230, 220, 230, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 230, 220, 230, 230, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 84, 91, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 103, 103, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 107, 107, 107, 107, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 118, 118, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 122, 122, 122, 122, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 220, 220, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 220, 0, 220, + 0, 216, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 129, 130, 0, 132, 0, 0, 0, + 0, 0, 130, 130, 130, 130, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 130, 0, 230, 230, 9, 0, 230, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 220, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 0, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 230, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 228, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 222, 230, 220, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 41. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 230, + 220, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 42. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 43. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 230, 220, 230, 230, 230, + 230, 230, 230, 230, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 44. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 220, 230, 230, 230, 230, 230, + 230, 230, 220, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 230, 220, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 45. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 1, 1, 230, 230, 230, 230, + 1, 1, 1, 230, 230, 0, 0, 0, + 0, 230, 0, 0, 0, 1, 1, 230, + 220, 230, 1, 1, 220, 220, 220, 220, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 46. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 218, 228, 232, 222, 224, 224, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 47. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 48. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 49. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 26, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 50. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 230, 230, 230, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 51. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 220, 0, 230, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 230, 1, 220, 0, 0, 0, 0, 9, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 52. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 216, 216, 1, + 1, 1, 0, 0, 0, 226, 216, 216, + 216, 216, 216, 0, 0, 0, 0, 0, + 0, 0, 0, 220, 220, 220, 220, 220, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 53. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 220, 220, 220, 0, 0, 230, 230, 230, + 230, 230, 220, 220, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 230, 230, 230, 230, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { /* Fourth byte table 54. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 230, 230, 230, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + }, +}; + +static const uchar_t u8_composition_b1_tbl[2][256] = { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, +}; + +static const uchar_t u8_composition_b2_tbl[2][1][256] = { + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + +}; + +static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { + { + { /* Third byte table 0. */ + { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 2470 }, + { 0x8001, 2491 }, { 1, 2871 }, { 2, 2959 }, + { 3, 3061 }, { 4, 3212 }, { 5, 3226 }, + { N_, 0 }, { 6, 3270 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0x8002, 3277 }, + { 7, 3774 }, { 8, 3949 }, { 9, 4198 }, + { N_, 0 }, { 10, 4265 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 11, 4293 }, { 12, 4312 }, { N_, 0 }, + { 13, 4326 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 14, 4347 }, + { N_, 0 }, { N_, 0 }, { 15, 4374 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 16, 4391 }, + { 17, 4416 }, { 18, 4425 }, { N_, 0 }, + { 19, 4451 }, { 20, 4460 }, { 21, 4469 }, + { N_, 0 }, { 22, 4503 }, { N_, 0 }, + { 23, 4529 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 24, 4563 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 25, 4572 }, { 26, 4588 }, + { 27, 4620 }, { 28, 4666 }, { 0x8003, 4682 }, + { 0x8004, 5254 }, { 29, 5616 }, { 30, 5646 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 31, 5684 }, + { 32, 5708 }, { 33, 5732 }, { 34, 5780 }, + { 35, 5900 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 36, 6012 }, { 37, 6241 }, { 38, 6358 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + }, + { + { /* Third byte table 0. */ + { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 2470 }, + { 0x8001, 2491 }, { 1, 2871 }, { 2, 2959 }, + { 3, 3061 }, { 4, 3212 }, { 5, 3226 }, + { N_, 0 }, { 6, 3270 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0x8002, 3277 }, + { 7, 3774 }, { 8, 3949 }, { 9, 4198 }, + { N_, 0 }, { 10, 4265 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 11, 4293 }, { 12, 4312 }, { N_, 0 }, + { 13, 4326 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 14, 4347 }, + { N_, 0 }, { N_, 0 }, { 15, 4374 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 16, 4391 }, + { 17, 4416 }, { 18, 4425 }, { N_, 0 }, + { 19, 4451 }, { 20, 4460 }, { 21, 4469 }, + { N_, 0 }, { 22, 4503 }, { N_, 0 }, + { 23, 4529 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 24, 4563 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 25, 4572 }, { 26, 4662 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 27, 4671 }, { 28, 4687 }, + { 29, 4719 }, { 30, 4765 }, { 0x8003, 4781 }, + { 0x8004, 5353 }, { 31, 5715 }, { 32, 5745 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 33, 5783 }, + { 34, 5807 }, { 35, 5831 }, { 36, 5879 }, + { 37, 5999 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 38, 6111 }, { 39, 6340 }, { 40, 6457 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + }, +}; + +static const uchar_t u8_composition_b4_tbl[2][41][257] = { + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 29, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 73, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 15, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 38, 46, 46, 46, 46, + 46, 54, 62, 62, 62, 62, 62, 62, + 62, 70, 78, 86, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 36, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 108, 144, 144, 144, 144, 144, 144, 144, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 14, 22, 30, 30, 30, 30, 30, 37, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 15, 15, 70, 70, + 70, 70, 112, 133, 154, 154, 154, 162, + 162, 162, 162, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 20, 20, 20, 27, 27, 46, 59, + 66, 91, 91, 98, 98, 98, 98, 105, + 105, 105, 105, 105, 130, 130, 130, 130, + 137, 137, 137, 137, 144, 144, 151, 151, + 151, 164, 164, 164, 171, 171, 190, 203, + 210, 235, 235, 242, 242, 242, 242, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 25, 25, 25, 25, + 32, 32, 32, 32, 39, 39, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 60, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 21, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 14, 14, 14, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 9, 9, 9, 9, 9, 9, + 9, 18, 18, 18, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 17, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 25, + 25, 25, 25, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 17, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 25, 25, 25, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 16, 16, 16, 16, + 16, 16, 16, 24, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 38, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 16, + 16, 16, 16, 16, 16, 16, 16, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 16, 16, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 16, 16, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 8, 8, 8, + 8, 16, 16, 16, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 32, 32, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 8, 16, 16, + 16, 24, 24, 24, 24, 24, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 40, 40, 40, 48, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 64, 72, 72, 72, 80, + 88, 88, 88, 96, 104, 112, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 16, 16, 16, 24, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 40, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 56, 56, 56, 56, 56, + 56, 64, 72, 72, 80, 80, 80, 80, + 80, 80, 80, 88, 96, 104, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 9, 9, 9, 9, 18, 18, 27, 27, + 36, 36, 45, 45, 54, 54, 63, 63, + 72, 72, 81, 81, 90, 90, 99, 99, + 108, 108, 117, 117, 117, 126, 126, 135, + 135, 144, 144, 144, 144, 144, 144, 144, + 161, 161, 161, 178, 178, 178, 195, 195, + 195, 212, 212, 212, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 18, + 18, 18, 18, 18, 27, 27, 36, 36, + 45, 45, 54, 54, 63, 63, 72, 72, + 81, 81, 90, 90, 99, 99, 108, 108, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 9, 9, 18, 18, 27, + 27, 36, 36, 36, 36, 36, 36, 36, + 53, 53, 53, 70, 70, 70, 87, 87, + 87, 104, 104, 104, 121, 121, 121, 121, + 121, 121, 121, 121, 121, 121, 121, 121, + 121, 121, 121, 121, 121, 121, 121, 121, + 130, 139, 148, 157, 157, 157, 157, 157, + 157, 157, 157, 157, 157, 157, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + }, + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 29, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 73, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 15, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 38, 46, 46, 46, 46, + 46, 54, 62, 62, 62, 62, 62, 62, + 62, 70, 78, 86, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, + 102, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 36, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 108, 144, 144, 144, 144, 144, 144, 144, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, + 151, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 14, 22, 30, 30, 30, 30, 30, 37, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 15, 15, 70, 70, + 70, 70, 112, 133, 154, 154, 154, 162, + 162, 162, 162, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 20, 20, 20, 27, 27, 46, 59, + 66, 91, 91, 98, 98, 98, 98, 105, + 105, 105, 105, 105, 130, 130, 130, 130, + 137, 137, 137, 137, 144, 144, 151, 151, + 151, 164, 164, 164, 171, 171, 190, 203, + 210, 235, 235, 242, 242, 242, 242, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, + 249, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 25, 25, 25, 25, + 32, 32, 32, 32, 39, 39, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 60, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 21, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 14, 14, 14, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 9, 9, 9, 9, 9, 9, + 9, 18, 18, 18, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 17, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 25, + 25, 25, 25, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 17, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 25, 25, 25, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 18, 18, 27, 27, 36, 36, 45, 45, + 45, 45, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 63, 63, 72, 72, 81, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 16, 16, 16, 16, + 16, 16, 16, 24, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 38, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 16, + 16, 16, 16, 16, 16, 16, 16, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 16, 16, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 8, 16, 16, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 8, 8, 8, + 8, 16, 16, 16, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 32, 32, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 8, 16, 16, + 16, 24, 24, 24, 24, 24, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 40, 40, 40, 48, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 64, 72, 72, 72, 80, + 88, 88, 88, 96, 104, 112, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, + 120, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 16, 16, 16, 24, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 40, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 56, 56, 56, 56, 56, + 56, 64, 72, 72, 80, 80, 80, 80, + 80, 80, 80, 88, 96, 104, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 9, 9, 9, 9, 18, 18, 27, 27, + 36, 36, 45, 45, 54, 54, 63, 63, + 72, 72, 81, 81, 90, 90, 99, 99, + 108, 108, 117, 117, 117, 126, 126, 135, + 135, 144, 144, 144, 144, 144, 144, 144, + 161, 161, 161, 178, 178, 178, 195, 195, + 195, 212, 212, 212, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, 229, 229, 229, 229, 229, 229, 229, + 229, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 18, + 18, 18, 18, 18, 27, 27, 36, 36, + 45, 45, 54, 54, 63, 63, 72, 72, + 81, 81, 90, 90, 99, 99, 108, 108, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 9, 9, 18, 18, 27, + 27, 36, 36, 36, 36, 36, 36, 36, + 53, 53, 53, 70, 70, 70, 87, 87, + 87, 104, 104, 104, 121, 121, 121, 121, + 121, 121, 121, 121, 121, 121, 121, 121, + 121, 121, 121, 121, 121, 121, 121, 121, + 130, 139, 148, 157, 157, 157, 157, 157, + 157, 157, 157, 157, 157, 157, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 166, 166, + 166, + }, + }, +}; + +static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { + { + { /* Fourth byte 16-bit table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 16, 24, + 24, 24, 124, 146, 177, 219, 327, 335, + 379, 427, 521, 528, 562, 602, 624, 683, + 782, 797, 797, 849, 894, 941, 1061, 1076, + 1118, 1133, 1193, 1233, 1233, 1233, 1233, 1233, + 1233, 1233, 1333, 1355, 1386, 1428, 1536, 1544, + 1588, 1643, 1731, 1744, 1778, 1818, 1840, 1899, + 1998, 2013, 2013, 2065, 2110, 2164, 2284, 2299, + 2348, 2363, 2430, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, + }, + { /* Fourth byte 16-bit table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 29, 29, 36, 43, 56, + 64, 64, 64, 93, 93, 93, 93, 93, + 101, 101, 101, 101, 101, 130, 151, 158, + 158, 165, 165, 165, 165, 190, 190, 190, + 190, 190, 190, 219, 219, 226, 233, 246, + 254, 254, 254, 283, 283, 283, 283, 283, + 291, 291, 291, 291, 291, 320, 341, 348, + 348, 355, 355, 355, 355, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, + }, + { /* Fourth byte 16-bit table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 49, 49, 49, 49, 77, 77, + 112, 112, 160, 160, 160, 160, 160, 160, + 188, 188, 196, 196, 196, 196, 237, 237, + 237, 237, 272, 272, 272, 280, 280, 288, + 288, 288, 344, 344, 344, 344, 372, 372, + 414, 414, 469, 469, 469, 469, 469, 469, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, + }, + { /* Fourth byte 16-bit table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 29, 58, 66, 74, 82, 90, 98, + 106, 135, 164, 172, 180, 188, 196, 204, + 212, 227, 242, 242, 242, 242, 242, 242, + 242, 257, 272, 272, 272, 272, 272, 272, + 272, 301, 330, 338, 346, 354, 362, 370, + 378, 407, 436, 444, 452, 460, 468, 476, + 484, 506, 528, 528, 528, 528, 528, 528, + 528, 550, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, + }, + { /* Fourth byte 16-bit table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 30, 30, 30, 30, 30, 30, + 30, 45, 60, 60, 60, 60, 60, 60, + 60, 82, 104, 104, 104, 104, 104, 104, + 104, 104, 126, 126, 126, 126, 126, 126, + 126, 155, 184, 192, 200, 208, 216, 224, + 232, 261, 290, 298, 306, 314, 322, 330, + 338, 346, 346, 346, 346, 354, 354, 354, + 354, 354, 354, 354, 354, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, + }, + }, + { + { /* Fourth byte 16-bit table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 16, 24, + 24, 24, 124, 146, 177, 219, 327, 335, + 379, 427, 521, 528, 562, 602, 624, 683, + 782, 797, 797, 849, 894, 941, 1061, 1076, + 1118, 1133, 1193, 1233, 1233, 1233, 1233, 1233, + 1233, 1233, 1333, 1355, 1386, 1428, 1536, 1544, + 1588, 1643, 1731, 1744, 1778, 1818, 1840, 1899, + 1998, 2013, 2013, 2065, 2110, 2164, 2284, 2299, + 2348, 2363, 2430, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470, + 2470, + }, + { /* Fourth byte 16-bit table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 29, 29, 36, 43, 56, + 64, 64, 64, 93, 93, 93, 93, 93, + 101, 101, 101, 101, 101, 130, 151, 158, + 158, 165, 165, 165, 165, 190, 190, 190, + 190, 190, 190, 219, 219, 226, 233, 246, + 254, 254, 254, 283, 283, 283, 283, 283, + 291, 291, 291, 291, 291, 320, 341, 348, + 348, 355, 355, 355, 355, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 380, 380, 380, 380, + 380, + }, + { /* Fourth byte 16-bit table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 49, 49, 49, 49, 77, 77, + 112, 112, 160, 160, 160, 160, 160, 160, + 188, 188, 196, 196, 196, 196, 237, 237, + 237, 237, 272, 272, 272, 280, 280, 288, + 288, 288, 344, 344, 344, 344, 372, 372, + 414, 414, 469, 469, 469, 469, 469, 469, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, 497, 497, 497, 497, 497, 497, 497, + 497, + }, + { /* Fourth byte 16-bit table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 29, 58, 66, 74, 82, 90, 98, + 106, 135, 164, 172, 180, 188, 196, 204, + 212, 227, 242, 242, 242, 242, 242, 242, + 242, 257, 272, 272, 272, 272, 272, 272, + 272, 301, 330, 338, 346, 354, 362, 370, + 378, 407, 436, 444, 452, 460, 468, 476, + 484, 506, 528, 528, 528, 528, 528, 528, + 528, 550, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, 572, 572, 572, 572, 572, 572, 572, + 572, + }, + { /* Fourth byte 16-bit table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 30, 30, 30, 30, 30, 30, + 30, 45, 60, 60, 60, 60, 60, 60, + 60, 82, 104, 104, 104, 104, 104, 104, + 104, 104, 126, 126, 126, 126, 126, 126, + 126, 155, 184, 192, 200, 208, 216, 224, + 232, 261, 290, 298, 306, 314, 322, 330, + 338, 346, 346, 346, 346, 354, 354, 354, + 354, 354, 354, 354, 354, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 362, 362, 362, 362, + 362, + }, + }, +}; + +static const uchar_t u8_composition_final_tbl[2][6623] = { + { + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAF, FIL_, + 0x10, 0xCC, 0x86, FIL_, 0xC4, 0x82, FIL_, 0xCC, + 0x87, FIL_, 0xC8, 0xA6, FIL_, 0xCC, 0x8F, FIL_, + 0xC8, 0x80, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x82, + FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x81, FIL_, 0xCC, + 0x80, FIL_, 0xC3, 0x80, FIL_, 0xCC, 0x83, FIL_, + 0xC3, 0x83, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, + 0xA0, FIL_, 0xCC, 0xA5, FIL_, 0xE1, 0xB8, 0x80, + FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x82, FIL_, 0xCC, + 0x84, FIL_, 0xC4, 0x80, FIL_, 0xCC, 0x88, FIL_, + 0xC3, 0x84, FIL_, 0xCC, 0x8A, FIL_, 0xC3, 0x85, + FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x84, FIL_, 0xCC, + 0x89, FIL_, 0xE1, 0xBA, 0xA2, FIL_, 0xCC, 0x8C, + FIL_, 0xC7, 0x8D, FIL_, 0x03, 0xCC, 0x87, FIL_, + 0xE1, 0xB8, 0x82, FIL_, 0xCC, 0xB1, FIL_, 0xE1, + 0xB8, 0x86, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0x84, FIL_, 0x05, 0xCC, 0xA7, FIL_, 0xC3, 0x87, + FIL_, 0xCC, 0x81, FIL_, 0xC4, 0x86, FIL_, 0xCC, + 0x8C, FIL_, 0xC4, 0x8C, FIL_, 0xCC, 0x87, FIL_, + 0xC4, 0x8A, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x88, + FIL_, 0x06, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8E, + FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x90, FIL_, + 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x92, FIL_, 0xCC, + 0x87, FIL_, 0xE1, 0xB8, 0x8A, FIL_, 0xCC, 0x8C, + FIL_, 0xC4, 0x8E, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB8, 0x8C, FIL_, 0x11, 0xCC, 0x80, FIL_, 0xC3, + 0x88, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x89, FIL_, + 0xCC, 0x82, FIL_, 0xC3, 0x8A, FIL_, 0xCC, 0x88, + FIL_, 0xC3, 0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xC8, + 0xA8, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x86, FIL_, + 0xCC, 0x8F, FIL_, 0xC8, 0x84, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBA, 0xBA, FIL_, 0xCC, 0xB0, FIL_, + 0xE1, 0xB8, 0x9A, FIL_, 0xCC, 0xAD, FIL_, 0xE1, + 0xB8, 0x98, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA, + 0xBC, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xB8, + FIL_, 0xCC, 0x84, FIL_, 0xC4, 0x92, FIL_, 0xCC, + 0x86, FIL_, 0xC4, 0x94, FIL_, 0xCC, 0x87, FIL_, + 0xC4, 0x96, FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x98, + FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x9A, FIL_, 0x01, + 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9E, FIL_, 0x07, + 0xCC, 0x8C, FIL_, 0xC7, 0xA6, FIL_, 0xCC, 0x87, + FIL_, 0xC4, 0xA0, FIL_, 0xCC, 0x84, FIL_, 0xE1, + 0xB8, 0xA0, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x9C, + FIL_, 0xCC, 0x81, FIL_, 0xC7, 0xB4, FIL_, 0xCC, + 0xA7, FIL_, 0xC4, 0xA2, FIL_, 0xCC, 0x86, FIL_, + 0xC4, 0x9E, FIL_, 0x07, 0xCC, 0xAE, FIL_, 0xE1, + 0xB8, 0xAA, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB8, + 0xA2, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA6, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0xA4, FIL_, + 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0xA8, FIL_, 0xCC, + 0x8C, FIL_, 0xC8, 0x9E, FIL_, 0xCC, 0x82, FIL_, + 0xC4, 0xA4, FIL_, 0x0F, 0xCC, 0x84, FIL_, 0xC4, + 0xAA, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x8C, FIL_, + 0xCC, 0xA8, FIL_, 0xC4, 0xAE, FIL_, 0xCC, 0x83, + FIL_, 0xC4, 0xA8, FIL_, 0xCC, 0x88, FIL_, 0xC3, + 0x8F, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x8D, FIL_, + 0xCC, 0x8F, FIL_, 0xC8, 0x88, FIL_, 0xCC, 0x86, + FIL_, 0xC4, 0xAC, FIL_, 0xCC, 0x91, FIL_, 0xC8, + 0x8A, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8F, FIL_, + 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x88, FIL_, 0xCC, + 0x87, FIL_, 0xC4, 0xB0, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xBB, 0x8A, FIL_, 0xCC, 0xB0, FIL_, 0xE1, + 0xB8, 0xAC, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x8E, + FIL_, 0x01, 0xCC, 0x82, FIL_, 0xC4, 0xB4, FIL_, + 0x05, 0xCC, 0x8C, FIL_, 0xC7, 0xA8, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB8, 0xB4, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB8, 0xB0, FIL_, 0xCC, 0xA7, FIL_, + 0xC4, 0xB6, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB2, FIL_, 0x06, 0xCC, 0xA7, FIL_, 0xC4, 0xBB, + FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0xBD, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB8, 0xBA, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xB8, 0xB6, FIL_, 0xCC, 0xAD, FIL_, + 0xE1, 0xB8, 0xBC, FIL_, 0xCC, 0x81, FIL_, 0xC4, + 0xB9, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xB8, + 0xBE, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x82, + FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x80, FIL_, + 0x09, 0xCC, 0x80, FIL_, 0xC7, 0xB8, FIL_, 0xCC, + 0xAD, FIL_, 0xE1, 0xB9, 0x8A, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0x84, FIL_, 0xCC, 0xB1, FIL_, + 0xE1, 0xB9, 0x88, FIL_, 0xCC, 0x83, FIL_, 0xC3, + 0x91, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x86, + FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x83, FIL_, 0xCC, + 0xA7, FIL_, 0xC5, 0x85, FIL_, 0xCC, 0x8C, FIL_, + 0xC5, 0x87, FIL_, 0x10, 0xCC, 0xA8, FIL_, 0xC7, + 0xAA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x8E, FIL_, + 0xCC, 0x80, FIL_, 0xC3, 0x92, FIL_, 0xCC, 0x9B, + FIL_, 0xC6, 0xA0, FIL_, 0xCC, 0x8F, FIL_, 0xC8, + 0x8C, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x93, FIL_, + 0xCC, 0x87, FIL_, 0xC8, 0xAE, FIL_, 0xCC, 0x8C, + FIL_, 0xC7, 0x91, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xBB, 0x8C, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x94, + FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8C, FIL_, 0xCC, + 0x83, FIL_, 0xC3, 0x95, FIL_, 0xCC, 0x86, FIL_, + 0xC5, 0x8E, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x96, + FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0x90, FIL_, 0xCC, + 0x89, FIL_, 0xE1, 0xBB, 0x8E, FIL_, 0x02, 0xCC, + 0x87, FIL_, 0xE1, 0xB9, 0x96, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0x94, FIL_, 0x08, 0xCC, 0x91, + FIL_, 0xC8, 0x92, FIL_, 0xCC, 0xA7, FIL_, 0xC5, + 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0x98, FIL_, + 0xCC, 0xB1, FIL_, 0xE1, 0xB9, 0x9E, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xB9, 0x9A, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0x98, FIL_, 0xCC, 0x81, FIL_, + 0xC5, 0x94, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x90, + FIL_, 0x07, 0xCC, 0x81, FIL_, 0xC5, 0x9A, FIL_, + 0xCC, 0x82, FIL_, 0xC5, 0x9C, FIL_, 0xCC, 0xA7, + FIL_, 0xC5, 0x9E, FIL_, 0xCC, 0x8C, FIL_, 0xC5, + 0xA0, FIL_, 0xCC, 0xA6, FIL_, 0xC8, 0x98, FIL_, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA0, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xB9, 0xA2, FIL_, 0x07, 0xCC, + 0x8C, FIL_, 0xC5, 0xA4, FIL_, 0xCC, 0xB1, FIL_, + 0xE1, 0xB9, 0xAE, FIL_, 0xCC, 0xA6, FIL_, 0xC8, + 0x9A, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA2, FIL_, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xAA, FIL_, 0xCC, + 0xAD, FIL_, 0xE1, 0xB9, 0xB0, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xB9, 0xAC, FIL_, 0x13, 0xCC, 0xA8, + FIL_, 0xC5, 0xB2, FIL_, 0xCC, 0x83, FIL_, 0xC5, + 0xA8, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0xAA, FIL_, + 0xCC, 0x81, FIL_, 0xC3, 0x9A, FIL_, 0xCC, 0x86, + FIL_, 0xC5, 0xAC, FIL_, 0xCC, 0x8A, FIL_, 0xC5, + 0xAE, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x99, FIL_, + 0xCC, 0x91, FIL_, 0xC8, 0x96, FIL_, 0xCC, 0x8B, + FIL_, 0xC5, 0xB0, FIL_, 0xCC, 0xA4, FIL_, 0xE1, + 0xB9, 0xB2, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9, + 0xB4, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x94, FIL_, + 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB6, FIL_, 0xCC, + 0x9B, FIL_, 0xC6, 0xAF, FIL_, 0xCC, 0x82, FIL_, + 0xC3, 0x9B, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x9C, + FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x93, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBB, 0xA4, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0xA6, FIL_, 0x02, 0xCC, 0x83, + FIL_, 0xE1, 0xB9, 0xBC, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xB9, 0xBE, FIL_, 0x06, 0xCC, 0x82, FIL_, + 0xC5, 0xB4, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xBA, + 0x84, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xBA, 0x86, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x88, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x82, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBA, 0x80, FIL_, 0x02, 0xCC, + 0x87, FIL_, 0xE1, 0xBA, 0x8A, FIL_, 0xCC, 0x88, + FIL_, 0xE1, 0xBA, 0x8C, FIL_, 0x09, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0xB6, FIL_, 0xCC, 0x87, FIL_, + 0xE1, 0xBA, 0x8E, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xBB, 0xB4, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x9D, + FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xB2, FIL_, 0xCC, + 0x82, FIL_, 0xC5, 0xB6, FIL_, 0xCC, 0x88, FIL_, + 0xC5, 0xB8, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, + 0xB2, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xB8, + FIL_, 0x06, 0xCC, 0x87, FIL_, 0xC5, 0xBB, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x92, FIL_, 0xCC, + 0x8C, FIL_, 0xC5, 0xBD, FIL_, 0xCC, 0xB1, FIL_, + 0xE1, 0xBA, 0x94, FIL_, 0xCC, 0x82, FIL_, 0xE1, + 0xBA, 0x90, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xB9, + FIL_, 0x10, 0xCC, 0x8C, FIL_, 0xC7, 0x8E, FIL_, + 0xCC, 0x8F, FIL_, 0xC8, 0x81, FIL_, 0xCC, 0xA8, + FIL_, 0xC4, 0x85, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xBA, 0xA1, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x83, + FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA3, FIL_, + 0xCC, 0x84, FIL_, 0xC4, 0x81, FIL_, 0xCC, 0x91, + FIL_, 0xC8, 0x83, FIL_, 0xCC, 0x8A, FIL_, 0xC3, + 0xA5, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xA4, FIL_, + 0xCC, 0x83, FIL_, 0xC3, 0xA3, FIL_, 0xCC, 0x82, + FIL_, 0xC3, 0xA2, FIL_, 0xCC, 0x81, FIL_, 0xC3, + 0xA1, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xA0, FIL_, + 0xCC, 0x87, FIL_, 0xC8, 0xA7, FIL_, 0xCC, 0xA5, + FIL_, 0xE1, 0xB8, 0x81, FIL_, 0x03, 0xCC, 0xB1, + FIL_, 0xE1, 0xB8, 0x87, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xB8, 0x85, FIL_, 0xCC, 0x87, FIL_, 0xE1, + 0xB8, 0x83, FIL_, 0x05, 0xCC, 0x87, FIL_, 0xC4, + 0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xC3, 0xA7, FIL_, + 0xCC, 0x82, FIL_, 0xC4, 0x89, FIL_, 0xCC, 0x8C, + FIL_, 0xC4, 0x8D, FIL_, 0xCC, 0x81, FIL_, 0xC4, + 0x87, FIL_, 0x06, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, + 0x93, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x8B, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0x8D, FIL_, + 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8F, FIL_, 0xCC, + 0xA7, FIL_, 0xE1, 0xB8, 0x91, FIL_, 0xCC, 0x8C, + FIL_, 0xC4, 0x8F, FIL_, 0x11, 0xCC, 0xA8, FIL_, + 0xC4, 0x99, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x9B, + FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x97, FIL_, 0xCC, + 0x88, FIL_, 0xC3, 0xAB, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xBA, 0xB9, FIL_, 0xCC, 0xB0, FIL_, 0xE1, + 0xB8, 0x9B, FIL_, 0xCC, 0x84, FIL_, 0xC4, 0x93, + FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x99, FIL_, + 0xCC, 0x83, FIL_, 0xE1, 0xBA, 0xBD, FIL_, 0xCC, + 0x86, FIL_, 0xC4, 0x95, FIL_, 0xCC, 0xA7, FIL_, + 0xC8, 0xA9, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, + 0xBB, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x85, FIL_, + 0xCC, 0x81, FIL_, 0xC3, 0xA9, FIL_, 0xCC, 0x91, + FIL_, 0xC8, 0x87, FIL_, 0xCC, 0x80, FIL_, 0xC3, + 0xA8, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xAA, FIL_, + 0x01, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9F, FIL_, + 0x07, 0xCC, 0x86, FIL_, 0xC4, 0x9F, FIL_, 0xCC, + 0xA7, FIL_, 0xC4, 0xA3, FIL_, 0xCC, 0x81, FIL_, + 0xC7, 0xB5, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x9D, + FIL_, 0xCC, 0x87, FIL_, 0xC4, 0xA1, FIL_, 0xCC, + 0x8C, FIL_, 0xC7, 0xA7, FIL_, 0xCC, 0x84, FIL_, + 0xE1, 0xB8, 0xA1, FIL_, 0x08, 0xCC, 0x8C, FIL_, + 0xC8, 0x9F, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xA5, + FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA7, FIL_, + 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0xA3, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xBA, 0x96, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xB8, 0xA5, FIL_, 0xCC, 0xA7, FIL_, + 0xE1, 0xB8, 0xA9, FIL_, 0xCC, 0xAE, FIL_, 0xE1, + 0xB8, 0xAB, FIL_, 0x0E, 0xCC, 0x81, FIL_, 0xC3, + 0xAD, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xAC, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0x8B, FIL_, 0xCC, + 0x8C, FIL_, 0xC7, 0x90, FIL_, 0xCC, 0x89, FIL_, + 0xE1, 0xBB, 0x89, FIL_, 0xCC, 0x91, FIL_, 0xC8, + 0x8B, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x89, FIL_, + 0xCC, 0x82, FIL_, 0xC3, 0xAE, FIL_, 0xCC, 0xB0, + FIL_, 0xE1, 0xB8, 0xAD, FIL_, 0xCC, 0xA8, FIL_, + 0xC4, 0xAF, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0xAD, + FIL_, 0xCC, 0x84, FIL_, 0xC4, 0xAB, FIL_, 0xCC, + 0x83, FIL_, 0xC4, 0xA9, FIL_, 0xCC, 0x88, FIL_, + 0xC3, 0xAF, FIL_, 0x02, 0xCC, 0x82, FIL_, 0xC4, + 0xB5, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0xB0, FIL_, + 0x05, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0xB3, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xB1, FIL_, 0xCC, + 0xA7, FIL_, 0xC4, 0xB7, FIL_, 0xCC, 0x8C, FIL_, + 0xC7, 0xA9, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, + 0xB5, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB7, FIL_, 0xCC, 0x81, FIL_, 0xC4, 0xBA, FIL_, + 0xCC, 0xA7, FIL_, 0xC4, 0xBC, FIL_, 0xCC, 0x8C, + FIL_, 0xC4, 0xBE, FIL_, 0xCC, 0xB1, FIL_, 0xE1, + 0xB8, 0xBB, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, + 0xBD, FIL_, 0x03, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, + 0x83, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xBF, + FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x81, FIL_, + 0x09, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x87, FIL_, + 0xCC, 0x83, FIL_, 0xC3, 0xB1, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0x85, FIL_, 0xCC, 0xB1, FIL_, + 0xE1, 0xB9, 0x89, FIL_, 0xCC, 0x81, FIL_, 0xC5, + 0x84, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x86, FIL_, + 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0x8B, FIL_, 0xCC, + 0x8C, FIL_, 0xC5, 0x88, FIL_, 0xCC, 0x80, FIL_, + 0xC7, 0xB9, FIL_, 0x10, 0xCC, 0x89, FIL_, 0xE1, + 0xBB, 0x8F, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xB3, + FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xB2, FIL_, 0xCC, + 0x87, FIL_, 0xC8, 0xAF, FIL_, 0xCC, 0x8F, FIL_, + 0xC8, 0x8D, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, + 0x8D, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8D, FIL_, + 0xCC, 0x8C, FIL_, 0xC7, 0x92, FIL_, 0xCC, 0x86, + FIL_, 0xC5, 0x8F, FIL_, 0xCC, 0x8B, FIL_, 0xC5, + 0x91, FIL_, 0xCC, 0x9B, FIL_, 0xC6, 0xA1, FIL_, + 0xCC, 0x91, FIL_, 0xC8, 0x8F, FIL_, 0xCC, 0xA8, + FIL_, 0xC7, 0xAB, FIL_, 0xCC, 0x88, FIL_, 0xC3, + 0xB6, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xB5, FIL_, + 0xCC, 0x82, FIL_, 0xC3, 0xB4, FIL_, 0x02, 0xCC, + 0x87, FIL_, 0xE1, 0xB9, 0x97, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0x95, FIL_, 0x08, 0xCC, 0xB1, + FIL_, 0xE1, 0xB9, 0x9F, FIL_, 0xCC, 0x87, FIL_, + 0xE1, 0xB9, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC5, + 0x95, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x91, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x9B, FIL_, 0xCC, + 0x8C, FIL_, 0xC5, 0x99, FIL_, 0xCC, 0x91, FIL_, + 0xC8, 0x93, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x97, + FIL_, 0x07, 0xCC, 0xA6, FIL_, 0xC8, 0x99, FIL_, + 0xCC, 0x8C, FIL_, 0xC5, 0xA1, FIL_, 0xCC, 0x81, + FIL_, 0xC5, 0x9B, FIL_, 0xCC, 0x87, FIL_, 0xE1, + 0xB9, 0xA1, FIL_, 0xCC, 0x82, FIL_, 0xC5, 0x9D, + FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x9F, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xB9, 0xA3, FIL_, 0x08, 0xCC, + 0x88, FIL_, 0xE1, 0xBA, 0x97, FIL_, 0xCC, 0xAD, + FIL_, 0xE1, 0xB9, 0xB1, FIL_, 0xCC, 0xB1, FIL_, + 0xE1, 0xB9, 0xAF, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB9, 0xAD, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA5, + FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA3, FIL_, 0xCC, + 0x87, FIL_, 0xE1, 0xB9, 0xAB, FIL_, 0xCC, 0xA6, + FIL_, 0xC8, 0x9B, FIL_, 0x13, 0xCC, 0x81, FIL_, + 0xC3, 0xBA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x97, + FIL_, 0xCC, 0x83, FIL_, 0xC5, 0xA9, FIL_, 0xCC, + 0x8F, FIL_, 0xC8, 0x95, FIL_, 0xCC, 0xA8, FIL_, + 0xC5, 0xB3, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xBB, + FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xBC, FIL_, 0xCC, + 0x80, FIL_, 0xC3, 0xB9, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xBB, 0xA5, FIL_, 0xCC, 0xA4, FIL_, 0xE1, + 0xB9, 0xB3, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, + 0xA7, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9, 0xB5, + FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB7, FIL_, + 0xCC, 0x9B, FIL_, 0xC6, 0xB0, FIL_, 0xCC, 0x84, + FIL_, 0xC5, 0xAB, FIL_, 0xCC, 0x8B, FIL_, 0xC5, + 0xB1, FIL_, 0xCC, 0x86, FIL_, 0xC5, 0xAD, FIL_, + 0xCC, 0x8C, FIL_, 0xC7, 0x94, FIL_, 0xCC, 0x8A, + FIL_, 0xC5, 0xAF, FIL_, 0x02, 0xCC, 0x83, FIL_, + 0xE1, 0xB9, 0xBD, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB9, 0xBF, FIL_, 0x07, 0xCC, 0x82, FIL_, 0xC5, + 0xB5, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0x81, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x83, FIL_, + 0xCC, 0x88, FIL_, 0xE1, 0xBA, 0x85, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBA, 0x89, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xBA, 0x87, FIL_, 0xCC, 0x8A, FIL_, + 0xE1, 0xBA, 0x98, FIL_, 0x02, 0xCC, 0x87, FIL_, + 0xE1, 0xBA, 0x8B, FIL_, 0xCC, 0x88, FIL_, 0xE1, + 0xBA, 0x8D, FIL_, 0x0A, 0xCC, 0x87, FIL_, 0xE1, + 0xBA, 0x8F, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, + 0xB9, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xB3, + FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xB7, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB5, FIL_, 0xCC, + 0x82, FIL_, 0xC5, 0xB7, FIL_, 0xCC, 0x84, FIL_, + 0xC8, 0xB3, FIL_, 0xCC, 0x8A, FIL_, 0xE1, 0xBA, + 0x99, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xBF, FIL_, + 0xCC, 0x81, FIL_, 0xC3, 0xBD, FIL_, 0x06, 0xCC, + 0x8C, FIL_, 0xC5, 0xBE, FIL_, 0xCC, 0x87, FIL_, + 0xC5, 0xBC, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xBA, + 0x95, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x93, + FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xBA, FIL_, 0xCC, + 0x82, FIL_, 0xE1, 0xBA, 0x91, FIL_, 0x03, 0xCC, + 0x80, FIL_, 0xE1, 0xBF, 0xAD, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x81, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x85, FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, + 0xBA, 0xA8, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA, + 0xAA, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA4, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA6, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC7, 0x9E, FIL_, 0x01, + 0xCC, 0x81, FIL_, 0xC7, 0xBA, FIL_, 0x02, 0xCC, + 0x84, FIL_, 0xC7, 0xA2, FIL_, 0xCC, 0x81, FIL_, + 0xC7, 0xBC, FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, + 0xB8, 0x88, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, + 0xBA, 0xBE, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, + 0x80, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x84, + FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x82, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xAE, FIL_, + 0x04, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x96, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x90, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBB, 0x92, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0x94, FIL_, 0x03, 0xCC, 0x84, + FIL_, 0xC8, 0xAC, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xB9, 0x8C, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9, + 0x8E, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAA, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xC7, 0xBE, FIL_, + 0x04, 0xCC, 0x80, FIL_, 0xC7, 0x9B, FIL_, 0xCC, + 0x84, FIL_, 0xC7, 0x95, FIL_, 0xCC, 0x8C, FIL_, + 0xC7, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x97, + FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA9, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA7, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA5, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBA, 0xAB, FIL_, 0x01, 0xCC, + 0x84, FIL_, 0xC7, 0x9F, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xC7, 0xBB, FIL_, 0x02, 0xCC, 0x84, FIL_, + 0xC7, 0xA3, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0xBD, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x89, + FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x83, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xBF, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x81, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBB, 0x85, FIL_, 0x01, 0xCC, + 0x81, FIL_, 0xE1, 0xB8, 0xAF, FIL_, 0x04, 0xCC, + 0x83, FIL_, 0xE1, 0xBB, 0x97, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0x95, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBB, 0x93, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBB, 0x91, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, + 0xB9, 0x8D, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xAD, + FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9, 0x8F, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAB, FIL_, 0x01, + 0xCC, 0x81, FIL_, 0xC7, 0xBF, FIL_, 0x04, 0xCC, + 0x81, FIL_, 0xC7, 0x98, FIL_, 0xCC, 0x84, FIL_, + 0xC7, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x9A, + FIL_, 0xCC, 0x80, FIL_, 0xC7, 0x9C, FIL_, 0x04, + 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xB0, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBA, 0xAE, FIL_, 0xCC, 0x83, + FIL_, 0xE1, 0xBA, 0xB4, FIL_, 0xCC, 0x89, FIL_, + 0xE1, 0xBA, 0xB2, FIL_, 0x04, 0xCC, 0x80, FIL_, + 0xE1, 0xBA, 0xB1, FIL_, 0xCC, 0x83, FIL_, 0xE1, + 0xBA, 0xB5, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, + 0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xB3, + FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x96, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x94, FIL_, + 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x95, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x97, FIL_, 0x02, + 0xCC, 0x80, FIL_, 0xE1, 0xB9, 0x90, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xB9, 0x92, FIL_, 0x02, 0xCC, + 0x80, FIL_, 0xE1, 0xB9, 0x91, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0x93, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA4, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA5, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA6, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA7, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0xB8, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0xB9, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xE1, 0xB9, 0xBA, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xE1, 0xB9, 0xBB, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xBA, 0x9B, FIL_, 0x05, 0xCC, 0x80, + FIL_, 0xE1, 0xBB, 0x9C, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBB, 0x9A, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xBB, 0xA2, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, + 0xA0, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x9E, + FIL_, 0x05, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xA1, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x9B, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA3, FIL_, 0xCC, + 0x89, FIL_, 0xE1, 0xBB, 0x9F, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBB, 0x9D, FIL_, 0x05, 0xCC, 0x83, + FIL_, 0xE1, 0xBB, 0xAE, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xBB, 0xB0, FIL_, 0xCC, 0x89, FIL_, 0xE1, + 0xBB, 0xAC, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, + 0xA8, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xAA, + FIL_, 0x05, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB1, + FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xAF, FIL_, + 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xAD, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBB, 0xA9, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBB, 0xAB, FIL_, 0x01, 0xCC, 0x8C, + FIL_, 0xC7, 0xAE, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xC7, 0xAC, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, + 0xAD, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA0, + FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA1, FIL_, + 0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9C, FIL_, + 0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9D, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xB0, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xC8, 0xB1, FIL_, 0x01, 0xCC, + 0x8C, FIL_, 0xC7, 0xAF, FIL_, 0x07, 0xCC, 0x93, + FIL_, 0xE1, 0xBC, 0x88, FIL_, 0xCC, 0x94, FIL_, + 0xE1, 0xBC, 0x89, FIL_, 0xCC, 0x81, FIL_, 0xCE, + 0x86, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xBC, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBE, 0xBA, FIL_, + 0xCC, 0x84, FIL_, 0xE1, 0xBE, 0xB9, FIL_, 0xCC, + 0x86, FIL_, 0xE1, 0xBE, 0xB8, FIL_, 0x04, 0xCC, + 0x81, FIL_, 0xCE, 0x88, FIL_, 0xCC, 0x94, FIL_, + 0xE1, 0xBC, 0x99, FIL_, 0xCC, 0x93, FIL_, 0xE1, + 0xBC, 0x98, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, + 0x88, FIL_, 0x05, 0xCC, 0x94, FIL_, 0xE1, 0xBC, + 0xA9, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8A, + FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x89, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBF, 0x8C, FIL_, 0xCC, 0x93, + FIL_, 0xE1, 0xBC, 0xA8, FIL_, 0x07, 0xCC, 0x81, + FIL_, 0xCE, 0x8A, FIL_, 0xCC, 0x88, FIL_, 0xCE, + 0xAA, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0x98, + FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBF, 0x99, FIL_, + 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB8, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBC, 0xB9, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBF, 0x9A, FIL_, 0x04, 0xCC, 0x94, + FIL_, 0xE1, 0xBD, 0x89, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBF, 0xB8, FIL_, 0xCC, 0x81, FIL_, 0xCE, + 0x8C, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, 0x88, + FIL_, 0x01, 0xCC, 0x94, FIL_, 0xE1, 0xBF, 0xAC, + FIL_, 0x06, 0xCC, 0x81, FIL_, 0xCE, 0x8E, FIL_, + 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0xA8, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBD, 0x99, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBF, 0xAA, FIL_, 0xCC, 0x84, FIL_, + 0xE1, 0xBF, 0xA9, FIL_, 0xCC, 0x88, FIL_, 0xCE, + 0xAB, FIL_, 0x05, 0xCC, 0x80, FIL_, 0xE1, 0xBF, + 0xBA, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x8F, FIL_, + 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xBC, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBD, 0xA9, FIL_, 0xCC, 0x93, + FIL_, 0xE1, 0xBD, 0xA8, FIL_, 0x01, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0xB4, FIL_, 0x01, 0xCD, 0x85, + FIL_, 0xE1, 0xBF, 0x84, FIL_, 0x08, 0xCC, 0x81, + FIL_, 0xCE, 0xAC, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBD, 0xB0, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, + 0x80, FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x81, + FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBE, 0xB6, FIL_, + 0xCC, 0x86, FIL_, 0xE1, 0xBE, 0xB0, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xB3, FIL_, 0xCC, 0x84, + FIL_, 0xE1, 0xBE, 0xB1, FIL_, 0x04, 0xCC, 0x81, + FIL_, 0xCE, 0xAD, FIL_, 0xCC, 0x94, FIL_, 0xE1, + 0xBC, 0x91, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, + 0xB2, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0x90, + FIL_, 0x06, 0xCC, 0x81, FIL_, 0xCE, 0xAE, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB4, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBF, 0x83, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x86, FIL_, 0xCC, 0x94, FIL_, + 0xE1, 0xBC, 0xA1, FIL_, 0xCC, 0x93, FIL_, 0xE1, + 0xBC, 0xA0, FIL_, 0x08, 0xCD, 0x82, FIL_, 0xE1, + 0xBF, 0x96, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, + 0x90, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB0, + FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAF, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBC, 0xB1, FIL_, 0xCC, 0x84, + FIL_, 0xE1, 0xBF, 0x91, FIL_, 0xCC, 0x88, FIL_, + 0xCF, 0x8A, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, + 0xB6, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xCF, 0x8C, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB8, FIL_, + 0xCC, 0x93, FIL_, 0xE1, 0xBD, 0x80, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBD, 0x81, FIL_, 0x02, 0xCC, + 0x93, FIL_, 0xE1, 0xBF, 0xA4, FIL_, 0xCC, 0x94, + FIL_, 0xE1, 0xBF, 0xA5, FIL_, 0x08, 0xCC, 0x93, + FIL_, 0xE1, 0xBD, 0x90, FIL_, 0xCC, 0x94, FIL_, + 0xE1, 0xBD, 0x91, FIL_, 0xCC, 0x86, FIL_, 0xE1, + 0xBF, 0xA0, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, + 0xA6, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBF, 0xA1, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xBA, FIL_, + 0xCC, 0x81, FIL_, 0xCF, 0x8D, FIL_, 0xCC, 0x88, + FIL_, 0xCF, 0x8B, FIL_, 0x06, 0xCC, 0x94, FIL_, + 0xE1, 0xBD, 0xA1, FIL_, 0xCD, 0x85, FIL_, 0xE1, + 0xBF, 0xB3, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, + 0xBC, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0xB6, + FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, 0xA0, FIL_, + 0xCC, 0x81, FIL_, 0xCF, 0x8E, FIL_, 0x03, 0xCD, + 0x82, FIL_, 0xE1, 0xBF, 0x97, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBF, 0x92, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x90, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, + 0xBF, 0xA2, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xB0, + FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0xA7, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB4, FIL_, + 0x02, 0xCC, 0x88, FIL_, 0xCF, 0x94, FIL_, 0xCC, + 0x81, FIL_, 0xCF, 0x93, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD0, 0x87, FIL_, 0x02, 0xCC, 0x86, FIL_, + 0xD3, 0x90, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x92, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x83, FIL_, + 0x03, 0xCC, 0x86, FIL_, 0xD3, 0x96, FIL_, 0xCC, + 0x80, FIL_, 0xD0, 0x80, FIL_, 0xCC, 0x88, FIL_, + 0xD0, 0x81, FIL_, 0x02, 0xCC, 0x88, FIL_, 0xD3, + 0x9C, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x81, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9E, FIL_, 0x04, + 0xCC, 0x80, FIL_, 0xD0, 0x8D, FIL_, 0xCC, 0x88, + FIL_, 0xD3, 0xA4, FIL_, 0xCC, 0x86, FIL_, 0xD0, + 0x99, FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xA2, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x8C, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xA6, FIL_, 0x04, 0xCC, + 0x86, FIL_, 0xD0, 0x8E, FIL_, 0xCC, 0x8B, FIL_, + 0xD3, 0xB2, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB0, + FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xAE, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xB4, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xB8, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD3, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_, + 0xD3, 0x91, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x93, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x93, FIL_, + 0x03, 0xCC, 0x80, FIL_, 0xD1, 0x90, FIL_, 0xCC, + 0x88, FIL_, 0xD1, 0x91, FIL_, 0xCC, 0x86, FIL_, + 0xD3, 0x97, FIL_, 0x02, 0xCC, 0x88, FIL_, 0xD3, + 0x9D, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x82, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9F, FIL_, 0x04, + 0xCC, 0x88, FIL_, 0xD3, 0xA5, FIL_, 0xCC, 0x86, + FIL_, 0xD0, 0xB9, FIL_, 0xCC, 0x80, FIL_, 0xD1, + 0x9D, FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xA3, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x9C, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xA7, FIL_, 0x04, 0xCC, + 0x84, FIL_, 0xD3, 0xAF, FIL_, 0xCC, 0x86, FIL_, + 0xD1, 0x9E, FIL_, 0xCC, 0x8B, FIL_, 0xD3, 0xB3, + FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB1, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xB5, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xB9, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD3, 0xAD, FIL_, 0x01, 0xCC, 0x88, FIL_, + 0xD1, 0x97, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, + 0xB6, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, 0xB7, + FIL_, 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9A, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9B, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xAA, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xAB, FIL_, 0x03, 0xD9, 0x94, + FIL_, 0xD8, 0xA3, FIL_, 0xD9, 0x93, FIL_, 0xD8, + 0xA2, FIL_, 0xD9, 0x95, FIL_, 0xD8, 0xA5, FIL_, + 0x01, 0xD9, 0x94, FIL_, 0xD8, 0xA4, FIL_, 0x01, + 0xD9, 0x94, FIL_, 0xD8, 0xA6, FIL_, 0x01, 0xD9, + 0x94, FIL_, 0xDB, 0x82, FIL_, 0x01, 0xD9, 0x94, + FIL_, 0xDB, 0x93, FIL_, 0x01, 0xD9, 0x94, FIL_, + 0xDB, 0x80, FIL_, 0x01, 0xE0, 0xA4, 0xBC, FIL_, + 0xE0, 0xA4, 0xA9, FIL_, 0x01, 0xE0, 0xA4, 0xBC, + FIL_, 0xE0, 0xA4, 0xB1, FIL_, 0x01, 0xE0, 0xA4, + 0xBC, FIL_, 0xE0, 0xA4, 0xB4, FIL_, 0x02, 0xE0, + 0xA6, 0xBE, FIL_, 0xE0, 0xA7, 0x8B, FIL_, 0xE0, + 0xA7, 0x97, FIL_, 0xE0, 0xA7, 0x8C, FIL_, 0x03, + 0xE0, 0xAD, 0x97, FIL_, 0xE0, 0xAD, 0x8C, FIL_, + 0xE0, 0xAC, 0xBE, FIL_, 0xE0, 0xAD, 0x8B, FIL_, + 0xE0, 0xAD, 0x96, FIL_, 0xE0, 0xAD, 0x88, FIL_, + 0x01, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAE, 0x94, + FIL_, 0x02, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, 0xAF, + 0x8A, FIL_, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAF, + 0x8C, FIL_, 0x01, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, + 0xAF, 0x8B, FIL_, 0x01, 0xE0, 0xB1, 0x96, FIL_, + 0xE0, 0xB1, 0x88, FIL_, 0x01, 0xE0, 0xB3, 0x95, + FIL_, 0xE0, 0xB3, 0x80, FIL_, 0x03, 0xE0, 0xB3, + 0x95, FIL_, 0xE0, 0xB3, 0x87, FIL_, 0xE0, 0xB3, + 0x82, FIL_, 0xE0, 0xB3, 0x8A, FIL_, 0xE0, 0xB3, + 0x96, FIL_, 0xE0, 0xB3, 0x88, FIL_, 0x01, 0xE0, + 0xB3, 0x95, FIL_, 0xE0, 0xB3, 0x8B, FIL_, 0x02, + 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8A, FIL_, + 0xE0, 0xB5, 0x97, FIL_, 0xE0, 0xB5, 0x8C, FIL_, + 0x01, 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8B, + FIL_, 0x03, 0xE0, 0xB7, 0x8F, FIL_, 0xE0, 0xB7, + 0x9C, FIL_, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, 0xB7, + 0x9A, FIL_, 0xE0, 0xB7, 0x9F, FIL_, 0xE0, 0xB7, + 0x9E, FIL_, 0x01, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, + 0xB7, 0x9D, FIL_, 0x01, 0xE1, 0x80, 0xAE, FIL_, + 0xE1, 0x80, 0xA6, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xE1, 0xB8, 0xB8, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xE1, 0xB8, 0xB9, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xE1, 0xB9, 0x9C, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xE1, 0xB9, 0x9D, FIL_, 0x01, 0xCC, 0x87, FIL_, + 0xE1, 0xB9, 0xA8, FIL_, 0x01, 0xCC, 0x87, FIL_, + 0xE1, 0xB9, 0xA9, FIL_, 0x02, 0xCC, 0x86, FIL_, + 0xE1, 0xBA, 0xB6, FIL_, 0xCC, 0x82, FIL_, 0xE1, + 0xBA, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xE1, + 0xBA, 0xB7, FIL_, 0xCC, 0x82, FIL_, 0xE1, 0xBA, + 0xAD, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB, + 0x86, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB, + 0x87, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB, + 0x98, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB, + 0x99, FIL_, 0x04, 0xCC, 0x80, FIL_, 0xE1, 0xBC, + 0x82, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x84, + FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x80, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x86, FIL_, 0x04, + 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x87, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0x83, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBC, 0x85, FIL_, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x81, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x82, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x83, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x84, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x85, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x86, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x87, FIL_, 0x04, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x88, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBC, 0x8A, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, + 0x8E, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8C, + FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8D, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x8B, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x8F, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x89, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8A, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8B, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8C, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8D, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8E, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x8F, FIL_, 0x02, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0x92, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBC, 0x94, FIL_, 0x02, 0xCC, 0x80, + FIL_, 0xE1, 0xBC, 0x93, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBC, 0x95, FIL_, 0x02, 0xCC, 0x80, FIL_, + 0xE1, 0xBC, 0x9A, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBC, 0x9C, FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1, + 0xBC, 0x9B, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, + 0x9D, FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC, + 0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x90, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xA4, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA2, FIL_, 0x04, + 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA3, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBC, 0xA5, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBC, 0xA7, FIL_, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x91, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x92, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x93, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x94, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x95, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x96, FIL_, 0x01, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0x97, FIL_, 0x04, 0xCD, 0x82, FIL_, + 0xE1, 0xBC, 0xAE, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBC, 0xAC, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, + 0x98, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xAA, + FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xAF, + FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x99, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xAD, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0xAB, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9A, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9B, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9C, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9D, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9E, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0x9F, FIL_, 0x03, 0xCC, + 0x81, FIL_, 0xE1, 0xBC, 0xB4, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBC, 0xB6, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBC, 0xB2, FIL_, 0x03, 0xCC, 0x81, FIL_, + 0xE1, 0xBC, 0xB5, FIL_, 0xCD, 0x82, FIL_, 0xE1, + 0xBC, 0xB7, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, + 0xB3, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBC, + 0xBC, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xBA, + FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xBE, FIL_, + 0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xBB, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xBF, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBC, 0xBD, FIL_, 0x02, 0xCC, + 0x80, FIL_, 0xE1, 0xBD, 0x82, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBD, 0x84, FIL_, 0x02, 0xCC, 0x80, + FIL_, 0xE1, 0xBD, 0x83, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBD, 0x85, FIL_, 0x02, 0xCC, 0x81, FIL_, + 0xE1, 0xBD, 0x8C, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBD, 0x8A, FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1, + 0xBD, 0x8D, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, + 0x8B, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBD, + 0x94, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x96, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x92, FIL_, + 0x03, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x97, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x95, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBD, 0x93, FIL_, 0x03, 0xCC, + 0x81, FIL_, 0xE1, 0xBD, 0x9D, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBD, 0x9F, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBD, 0x9B, FIL_, 0x04, 0xCC, 0x81, FIL_, + 0xE1, 0xBD, 0xA4, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBD, 0xA2, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, + 0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA0, + FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xA7, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0xA5, FIL_, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA1, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBD, 0xA3, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA2, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA3, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA4, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA5, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA6, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA7, FIL_, 0x04, 0xCC, + 0x81, FIL_, 0xE1, 0xBD, 0xAC, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBD, 0xAA, FIL_, 0xCD, 0x82, FIL_, + 0xE1, 0xBD, 0xAE, FIL_, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xA8, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, + 0xBD, 0xAD, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, + 0xA9, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xAF, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xAB, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAA, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAB, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAC, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAD, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAE, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAF, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xB2, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0x82, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB2, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xB7, FIL_, + 0x03, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x8F, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8D, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBF, 0x8E, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBF, 0x87, FIL_, 0x01, 0xCD, + 0x85, FIL_, 0xE1, 0xBF, 0xB7, FIL_, 0x03, 0xCC, + 0x80, FIL_, 0xE1, 0xBF, 0x9D, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x9F, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBF, 0x9E, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x86, 0x9A, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x86, 0x9B, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x86, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x87, 0x8D, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x87, 0x8F, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x87, 0x8E, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x88, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x88, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x88, 0x8C, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x88, 0xA4, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x88, 0xA6, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0x81, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0x87, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xAD, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xA2, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB0, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB1, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB4, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB5, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB8, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x89, 0xB9, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x80, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x81, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xA0, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xA1, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x85, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x88, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xA2, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xA3, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0xAC, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0xAD, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8A, 0xAF, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xAA, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xAB, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xAC, FIL_, 0x01, 0xCC, 0xB8, FIL_, + 0xE2, 0x8B, 0xAD, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x82, 0x94, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x81, 0x8C, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x81, 0x8E, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x90, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x92, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, + 0x94, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x81, 0x96, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x81, 0x98, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x81, 0x9A, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x81, 0x9C, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x81, 0x9E, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xA0, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xA2, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, + 0xA5, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x81, 0xA7, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x81, 0xA9, FIL_, 0x02, 0xE3, 0x82, 0x9A, + FIL_, 0xE3, 0x81, 0xB1, FIL_, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x81, 0xB0, FIL_, 0x02, 0xE3, 0x82, + 0x9A, FIL_, 0xE3, 0x81, 0xB4, FIL_, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x81, 0xB3, FIL_, 0x02, 0xE3, + 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB7, FIL_, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB6, FIL_, 0x02, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB9, FIL_, + 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xBA, FIL_, + 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xBC, + FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xBD, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, + 0x9E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x83, 0xB4, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x82, 0xAC, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x82, 0xAE, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x82, 0xB0, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB2, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB4, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB6, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, + 0xB8, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x82, 0xBA, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x82, 0xBC, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x82, 0xBE, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x83, 0x80, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x83, 0x82, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x85, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x87, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, + 0x89, FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x83, 0x90, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, + 0x83, 0x91, FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x83, 0x93, FIL_, 0xE3, 0x82, 0x9A, FIL_, + 0xE3, 0x83, 0x94, FIL_, 0x02, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x83, 0x96, FIL_, 0xE3, 0x82, 0x9A, + FIL_, 0xE3, 0x83, 0x97, FIL_, 0x02, 0xE3, 0x82, + 0x9A, FIL_, 0xE3, 0x83, 0x9A, FIL_, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x83, 0x99, FIL_, 0x02, 0xE3, + 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x9D, FIL_, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x83, 0x9C, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0xB7, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0xB8, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, + 0xB9, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x83, 0xBA, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x83, 0xBE, FIL_, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + }, + { + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, + 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAF, FIL_, + 0x10, 0xCC, 0xA5, FIL_, 0xE1, 0xB8, 0x80, FIL_, + 0xCC, 0x87, FIL_, 0xC8, 0xA6, FIL_, 0xCC, 0x83, + FIL_, 0xC3, 0x83, FIL_, 0xCC, 0x91, FIL_, 0xC8, + 0x82, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x80, FIL_, + 0xCC, 0x8A, FIL_, 0xC3, 0x85, FIL_, 0xCC, 0x88, + FIL_, 0xC3, 0x84, FIL_, 0xCC, 0x89, FIL_, 0xE1, + 0xBA, 0xA2, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, + 0xA0, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8D, FIL_, + 0xCC, 0x80, FIL_, 0xC3, 0x80, FIL_, 0xCC, 0x81, + FIL_, 0xC3, 0x81, FIL_, 0xCC, 0x82, FIL_, 0xC3, + 0x82, FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x84, FIL_, + 0xCC, 0x86, FIL_, 0xC4, 0x82, FIL_, 0xCC, 0x84, + FIL_, 0xC4, 0x80, FIL_, 0x03, 0xCC, 0xB1, FIL_, + 0xE1, 0xB8, 0x86, FIL_, 0xCC, 0x87, FIL_, 0xE1, + 0xB8, 0x82, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0x84, FIL_, 0x05, 0xCC, 0xA7, FIL_, 0xC3, 0x87, + FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8C, FIL_, 0xCC, + 0x81, FIL_, 0xC4, 0x86, FIL_, 0xCC, 0x82, FIL_, + 0xC4, 0x88, FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x8A, + FIL_, 0x06, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x90, + FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8E, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB8, 0x8E, FIL_, 0xCC, 0xAD, + FIL_, 0xE1, 0xB8, 0x92, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xB8, 0x8C, FIL_, 0xCC, 0x87, FIL_, 0xE1, + 0xB8, 0x8A, FIL_, 0x11, 0xCC, 0x84, FIL_, 0xC4, + 0x92, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x94, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xB8, FIL_, 0xCC, + 0x91, FIL_, 0xC8, 0x86, FIL_, 0xCC, 0x82, FIL_, + 0xC3, 0x8A, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x84, + FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x98, FIL_, + 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xBA, FIL_, 0xCC, + 0xA7, FIL_, 0xC8, 0xA8, FIL_, 0xCC, 0x8C, FIL_, + 0xC4, 0x9A, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x88, + FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x98, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBA, 0xBC, FIL_, 0xCC, 0x87, + FIL_, 0xC4, 0x96, FIL_, 0xCC, 0x81, FIL_, 0xC3, + 0x89, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x8B, FIL_, + 0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0x9A, FIL_, 0x01, + 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9E, FIL_, 0x07, + 0xCC, 0x8C, FIL_, 0xC7, 0xA6, FIL_, 0xCC, 0x86, + FIL_, 0xC4, 0x9E, FIL_, 0xCC, 0x82, FIL_, 0xC4, + 0x9C, FIL_, 0xCC, 0xA7, FIL_, 0xC4, 0xA2, FIL_, + 0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xA0, FIL_, 0xCC, + 0x81, FIL_, 0xC7, 0xB4, FIL_, 0xCC, 0x87, FIL_, + 0xC4, 0xA0, FIL_, 0x07, 0xCC, 0x87, FIL_, 0xE1, + 0xB8, 0xA2, FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, + 0xA8, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xA4, FIL_, + 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA6, FIL_, 0xCC, + 0x8C, FIL_, 0xC8, 0x9E, FIL_, 0xCC, 0xAE, FIL_, + 0xE1, 0xB8, 0xAA, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB8, 0xA4, FIL_, 0x0F, 0xCC, 0xB0, FIL_, 0xE1, + 0xB8, 0xAC, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8F, + FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x8C, FIL_, 0xCC, + 0x89, FIL_, 0xE1, 0xBB, 0x88, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xBB, 0x8A, FIL_, 0xCC, 0x91, FIL_, + 0xC8, 0x8A, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x8F, + FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x8E, FIL_, 0xCC, + 0x81, FIL_, 0xC3, 0x8D, FIL_, 0xCC, 0x83, FIL_, + 0xC4, 0xA8, FIL_, 0xCC, 0x87, FIL_, 0xC4, 0xB0, + FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x88, FIL_, 0xCC, + 0xA8, FIL_, 0xC4, 0xAE, FIL_, 0xCC, 0x86, FIL_, + 0xC4, 0xAC, FIL_, 0xCC, 0x84, FIL_, 0xC4, 0xAA, + FIL_, 0x01, 0xCC, 0x82, FIL_, 0xC4, 0xB4, FIL_, + 0x05, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xB0, FIL_, + 0xCC, 0x8C, FIL_, 0xC7, 0xA8, FIL_, 0xCC, 0xB1, + FIL_, 0xE1, 0xB8, 0xB4, FIL_, 0xCC, 0xA7, FIL_, + 0xC4, 0xB6, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB2, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB6, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0xBD, FIL_, + 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0xBC, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB8, 0xBA, FIL_, 0xCC, 0xA7, + FIL_, 0xC4, 0xBB, FIL_, 0xCC, 0x81, FIL_, 0xC4, + 0xB9, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xB8, + 0xBE, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x80, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x82, FIL_, + 0x09, 0xCC, 0x83, FIL_, 0xC3, 0x91, FIL_, 0xCC, + 0x81, FIL_, 0xC5, 0x83, FIL_, 0xCC, 0xA7, FIL_, + 0xC5, 0x85, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0x87, + FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x84, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x86, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB9, 0x88, FIL_, 0xCC, 0xAD, + FIL_, 0xE1, 0xB9, 0x8A, FIL_, 0xCC, 0x80, FIL_, + 0xC7, 0xB8, FIL_, 0x10, 0xCC, 0x89, FIL_, 0xE1, + 0xBB, 0x8E, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8C, + FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x94, FIL_, 0xCC, + 0x86, FIL_, 0xC5, 0x8E, FIL_, 0xCC, 0x83, FIL_, + 0xC3, 0x95, FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0x90, + FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x96, FIL_, 0xCC, + 0x9B, FIL_, 0xC6, 0xA0, FIL_, 0xCC, 0x91, FIL_, + 0xC8, 0x8E, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x91, + FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x8C, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBB, 0x8C, FIL_, 0xCC, 0x80, + FIL_, 0xC3, 0x92, FIL_, 0xCC, 0xA8, FIL_, 0xC7, + 0xAA, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xAE, FIL_, + 0xCC, 0x81, FIL_, 0xC3, 0x93, FIL_, 0x02, 0xCC, + 0x87, FIL_, 0xE1, 0xB9, 0x96, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0x94, FIL_, 0x08, 0xCC, 0xA7, + FIL_, 0xC5, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC5, + 0x98, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x92, FIL_, + 0xCC, 0x8F, FIL_, 0xC8, 0x90, FIL_, 0xCC, 0x81, + FIL_, 0xC5, 0x94, FIL_, 0xCC, 0x87, FIL_, 0xE1, + 0xB9, 0x98, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9, + 0x9E, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x9A, + FIL_, 0x07, 0xCC, 0xA6, FIL_, 0xC8, 0x98, FIL_, + 0xCC, 0x81, FIL_, 0xC5, 0x9A, FIL_, 0xCC, 0x82, + FIL_, 0xC5, 0x9C, FIL_, 0xCC, 0xA7, FIL_, 0xC5, + 0x9E, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA0, FIL_, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA0, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xB9, 0xA2, FIL_, 0x07, 0xCC, + 0xA6, FIL_, 0xC8, 0x9A, FIL_, 0xCC, 0x87, FIL_, + 0xE1, 0xB9, 0xAA, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB9, 0xAC, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9, + 0xAE, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB0, + FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA2, FIL_, 0xCC, + 0x8C, FIL_, 0xC5, 0xA4, FIL_, 0x13, 0xCC, 0x8A, + FIL_, 0xC5, 0xAE, FIL_, 0xCC, 0x88, FIL_, 0xC3, + 0x9C, FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0xB0, FIL_, + 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB6, FIL_, 0xCC, + 0xA8, FIL_, 0xC5, 0xB2, FIL_, 0xCC, 0x8C, FIL_, + 0xC7, 0x93, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x99, + FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x94, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBB, 0xA4, FIL_, 0xCC, 0xA4, + FIL_, 0xE1, 0xB9, 0xB2, FIL_, 0xCC, 0x81, FIL_, + 0xC3, 0x9A, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x9B, + FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9, 0xB4, FIL_, + 0xCC, 0x83, FIL_, 0xC5, 0xA8, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0xA6, FIL_, 0xCC, 0x84, FIL_, + 0xC5, 0xAA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x96, + FIL_, 0xCC, 0x86, FIL_, 0xC5, 0xAC, FIL_, 0xCC, + 0x9B, FIL_, 0xC6, 0xAF, FIL_, 0x02, 0xCC, 0xA3, + FIL_, 0xE1, 0xB9, 0xBE, FIL_, 0xCC, 0x83, FIL_, + 0xE1, 0xB9, 0xBC, FIL_, 0x06, 0xCC, 0x88, FIL_, + 0xE1, 0xBA, 0x84, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBA, 0x82, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, + 0x80, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x88, + FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB4, FIL_, 0xCC, + 0x87, FIL_, 0xE1, 0xBA, 0x86, FIL_, 0x02, 0xCC, + 0x88, FIL_, 0xE1, 0xBA, 0x8C, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xBA, 0x8A, FIL_, 0x09, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0xB6, FIL_, 0xCC, 0xA3, FIL_, + 0xE1, 0xBB, 0xB4, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBB, 0xB2, FIL_, 0xCC, 0x88, FIL_, 0xC5, 0xB8, + FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x9D, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBB, 0xB8, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xBA, 0x8E, FIL_, 0xCC, 0x84, FIL_, + 0xC8, 0xB2, FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB6, + FIL_, 0x06, 0xCC, 0x82, FIL_, 0xE1, 0xBA, 0x90, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x92, FIL_, + 0xCC, 0xB1, FIL_, 0xE1, 0xBA, 0x94, FIL_, 0xCC, + 0x8C, FIL_, 0xC5, 0xBD, FIL_, 0xCC, 0x87, FIL_, + 0xC5, 0xBB, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xB9, + FIL_, 0x10, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xA1, + FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x85, FIL_, 0xCC, + 0x81, FIL_, 0xC3, 0xA1, FIL_, 0xCC, 0x82, FIL_, + 0xC3, 0xA2, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, + 0xA3, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xA3, FIL_, + 0xCC, 0x8C, FIL_, 0xC7, 0x8E, FIL_, 0xCC, 0x8A, + FIL_, 0xC3, 0xA5, FIL_, 0xCC, 0x88, FIL_, 0xC3, + 0xA4, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xA7, FIL_, + 0xCC, 0x91, FIL_, 0xC8, 0x83, FIL_, 0xCC, 0xA5, + FIL_, 0xE1, 0xB8, 0x81, FIL_, 0xCC, 0x84, FIL_, + 0xC4, 0x81, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x81, + FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x83, FIL_, 0xCC, + 0x80, FIL_, 0xC3, 0xA0, FIL_, 0x03, 0xCC, 0xA3, + FIL_, 0xE1, 0xB8, 0x85, FIL_, 0xCC, 0x87, FIL_, + 0xE1, 0xB8, 0x83, FIL_, 0xCC, 0xB1, FIL_, 0xE1, + 0xB8, 0x87, FIL_, 0x05, 0xCC, 0x87, FIL_, 0xC4, + 0x8B, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8D, FIL_, + 0xCC, 0x82, FIL_, 0xC4, 0x89, FIL_, 0xCC, 0x81, + FIL_, 0xC4, 0x87, FIL_, 0xCC, 0xA7, FIL_, 0xC3, + 0xA7, FIL_, 0x06, 0xCC, 0x87, FIL_, 0xE1, 0xB8, + 0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x91, + FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8F, FIL_, + 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0x8D, FIL_, 0xCC, + 0x8C, FIL_, 0xC4, 0x8F, FIL_, 0xCC, 0xAD, FIL_, + 0xE1, 0xB8, 0x93, FIL_, 0x11, 0xCC, 0x80, FIL_, + 0xC3, 0xA8, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xA9, + FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xAA, FIL_, 0xCC, + 0x88, FIL_, 0xC3, 0xAB, FIL_, 0xCC, 0x84, FIL_, + 0xC4, 0x93, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x95, + FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x97, FIL_, 0xCC, + 0xA8, FIL_, 0xC4, 0x99, FIL_, 0xCC, 0x8C, FIL_, + 0xC4, 0x9B, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x85, + FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x87, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBA, 0xB9, FIL_, 0xCC, 0xA7, + FIL_, 0xC8, 0xA9, FIL_, 0xCC, 0x83, FIL_, 0xE1, + 0xBA, 0xBD, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, + 0xBB, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x99, + FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0x9B, FIL_, + 0x01, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9F, FIL_, + 0x07, 0xCC, 0x86, FIL_, 0xC4, 0x9F, FIL_, 0xCC, + 0x87, FIL_, 0xC4, 0xA1, FIL_, 0xCC, 0x82, FIL_, + 0xC4, 0x9D, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xB8, + 0xA1, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0xA7, FIL_, + 0xCC, 0xA7, FIL_, 0xC4, 0xA3, FIL_, 0xCC, 0x81, + FIL_, 0xC7, 0xB5, FIL_, 0x08, 0xCC, 0xA7, FIL_, + 0xE1, 0xB8, 0xA9, FIL_, 0xCC, 0xB1, FIL_, 0xE1, + 0xBA, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC8, 0x9F, + FIL_, 0xCC, 0xAE, FIL_, 0xE1, 0xB8, 0xAB, FIL_, + 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA7, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xB8, 0xA5, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB8, 0xA3, FIL_, 0xCC, 0x82, FIL_, + 0xC4, 0xA5, FIL_, 0x0E, 0xCC, 0x88, FIL_, 0xC3, + 0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x89, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0x8B, FIL_, + 0xCC, 0x82, FIL_, 0xC3, 0xAE, FIL_, 0xCC, 0x81, + FIL_, 0xC3, 0xAD, FIL_, 0xCC, 0x80, FIL_, 0xC3, + 0xAC, FIL_, 0xCC, 0x83, FIL_, 0xC4, 0xA9, FIL_, + 0xCC, 0x84, FIL_, 0xC4, 0xAB, FIL_, 0xCC, 0x86, + FIL_, 0xC4, 0xAD, FIL_, 0xCC, 0xA8, FIL_, 0xC4, + 0xAF, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0xAD, + FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x90, FIL_, 0xCC, + 0x91, FIL_, 0xC8, 0x8B, FIL_, 0xCC, 0x8F, FIL_, + 0xC8, 0x89, FIL_, 0x02, 0xCC, 0x8C, FIL_, 0xC7, + 0xB0, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xB5, FIL_, + 0x05, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0xB5, FIL_, + 0xCC, 0xA7, FIL_, 0xC4, 0xB7, FIL_, 0xCC, 0x8C, + FIL_, 0xC7, 0xA9, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xB8, 0xB1, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB3, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, + 0xB7, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0xBD, + FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0xBB, FIL_, + 0xCC, 0xA7, FIL_, 0xC4, 0xBC, FIL_, 0xCC, 0x81, + FIL_, 0xC4, 0xBA, FIL_, 0xCC, 0x8C, FIL_, 0xC4, + 0xBE, FIL_, 0x03, 0xCC, 0x87, FIL_, 0xE1, 0xB9, + 0x81, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x83, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xBF, FIL_, + 0x09, 0xCC, 0x80, FIL_, 0xC7, 0xB9, FIL_, 0xCC, + 0xAD, FIL_, 0xE1, 0xB9, 0x8B, FIL_, 0xCC, 0x83, + FIL_, 0xC3, 0xB1, FIL_, 0xCC, 0x81, FIL_, 0xC5, + 0x84, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x87, + FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9, 0x89, FIL_, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x85, FIL_, 0xCC, + 0xA7, FIL_, 0xC5, 0x86, FIL_, 0xCC, 0x8C, FIL_, + 0xC5, 0x88, FIL_, 0x10, 0xCC, 0xA3, FIL_, 0xE1, + 0xBB, 0x8D, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xAF, + FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xB2, FIL_, 0xCC, + 0x91, FIL_, 0xC8, 0x8F, FIL_, 0xCC, 0x89, FIL_, + 0xE1, 0xBB, 0x8F, FIL_, 0xCC, 0x88, FIL_, 0xC3, + 0xB6, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xB5, FIL_, + 0xCC, 0x81, FIL_, 0xC3, 0xB3, FIL_, 0xCC, 0x8C, + FIL_, 0xC7, 0x92, FIL_, 0xCC, 0xA8, FIL_, 0xC7, + 0xAB, FIL_, 0xCC, 0x9B, FIL_, 0xC6, 0xA1, FIL_, + 0xCC, 0x84, FIL_, 0xC5, 0x8D, FIL_, 0xCC, 0x86, + FIL_, 0xC5, 0x8F, FIL_, 0xCC, 0x8B, FIL_, 0xC5, + 0x91, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xB4, FIL_, + 0xCC, 0x8F, FIL_, 0xC8, 0x8D, FIL_, 0x02, 0xCC, + 0x87, FIL_, 0xE1, 0xB9, 0x97, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0x95, FIL_, 0x08, 0xCC, 0x8C, + FIL_, 0xC5, 0x99, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB9, 0x9B, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x95, + FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x97, FIL_, 0xCC, + 0xB1, FIL_, 0xE1, 0xB9, 0x9F, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0x99, FIL_, 0xCC, 0x91, FIL_, + 0xC8, 0x93, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x91, + FIL_, 0x07, 0xCC, 0xA7, FIL_, 0xC5, 0x9F, FIL_, + 0xCC, 0x82, FIL_, 0xC5, 0x9D, FIL_, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA1, FIL_, 0xCC, 0xA6, FIL_, + 0xC8, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x9B, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0xA3, FIL_, + 0xCC, 0x8C, FIL_, 0xC5, 0xA1, FIL_, 0x08, 0xCC, + 0xA6, FIL_, 0xC8, 0x9B, FIL_, 0xCC, 0xAD, FIL_, + 0xE1, 0xB9, 0xB1, FIL_, 0xCC, 0xB1, FIL_, 0xE1, + 0xB9, 0xAF, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, + 0xAD, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xAB, + FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA5, FIL_, 0xCC, + 0xA7, FIL_, 0xC5, 0xA3, FIL_, 0xCC, 0x88, FIL_, + 0xE1, 0xBA, 0x97, FIL_, 0x13, 0xCC, 0x8A, FIL_, + 0xC5, 0xAF, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x95, + FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x94, FIL_, 0xCC, + 0x80, FIL_, 0xC3, 0xB9, FIL_, 0xCC, 0x9B, FIL_, + 0xC6, 0xB0, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xBB, + FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xBA, FIL_, 0xCC, + 0x88, FIL_, 0xC3, 0xBC, FIL_, 0xCC, 0x83, FIL_, + 0xC5, 0xA9, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, + 0xA7, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0xAB, FIL_, + 0xCC, 0x86, FIL_, 0xC5, 0xAD, FIL_, 0xCC, 0xAD, + FIL_, 0xE1, 0xB9, 0xB7, FIL_, 0xCC, 0x8B, FIL_, + 0xC5, 0xB1, FIL_, 0xCC, 0xA8, FIL_, 0xC5, 0xB3, + FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x97, FIL_, 0xCC, + 0xA4, FIL_, 0xE1, 0xB9, 0xB3, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xBB, 0xA5, FIL_, 0xCC, 0xB0, FIL_, + 0xE1, 0xB9, 0xB5, FIL_, 0x02, 0xCC, 0x83, FIL_, + 0xE1, 0xB9, 0xBD, FIL_, 0xCC, 0xA3, FIL_, 0xE1, + 0xB9, 0xBF, FIL_, 0x07, 0xCC, 0x8A, FIL_, 0xE1, + 0xBA, 0x98, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xBA, + 0x87, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x83, + FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB5, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBA, 0x81, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xBA, 0x89, FIL_, 0xCC, 0x88, FIL_, + 0xE1, 0xBA, 0x85, FIL_, 0x02, 0xCC, 0x87, FIL_, + 0xE1, 0xBA, 0x8B, FIL_, 0xCC, 0x88, FIL_, 0xE1, + 0xBA, 0x8D, FIL_, 0x0A, 0xCC, 0x87, FIL_, 0xE1, + 0xBA, 0x8F, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, + 0xB5, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xB7, + FIL_, 0xCC, 0x8A, FIL_, 0xE1, 0xBA, 0x99, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xB3, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBB, 0xB9, FIL_, 0xCC, 0x88, + FIL_, 0xC3, 0xBF, FIL_, 0xCC, 0x81, FIL_, 0xC3, + 0xBD, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xB3, FIL_, + 0xCC, 0x82, FIL_, 0xC5, 0xB7, FIL_, 0x06, 0xCC, + 0xB1, FIL_, 0xE1, 0xBA, 0x95, FIL_, 0xCC, 0xA3, + FIL_, 0xE1, 0xBA, 0x93, FIL_, 0xCC, 0x82, FIL_, + 0xE1, 0xBA, 0x91, FIL_, 0xCC, 0x81, FIL_, 0xC5, + 0xBA, FIL_, 0xCC, 0x87, FIL_, 0xC5, 0xBC, FIL_, + 0xCC, 0x8C, FIL_, 0xC5, 0xBE, FIL_, 0x03, 0xCC, + 0x80, FIL_, 0xE1, 0xBF, 0xAD, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x81, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x85, FIL_, 0x04, 0xCC, 0x83, FIL_, 0xE1, + 0xBA, 0xAA, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, + 0xA4, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA8, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA6, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC7, 0x9E, FIL_, 0x01, + 0xCC, 0x81, FIL_, 0xC7, 0xBA, FIL_, 0x02, 0xCC, + 0x84, FIL_, 0xC7, 0xA2, FIL_, 0xCC, 0x81, FIL_, + 0xC7, 0xBC, FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, + 0xB8, 0x88, FIL_, 0x04, 0xCC, 0x83, FIL_, 0xE1, + 0xBB, 0x84, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, + 0x80, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x82, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xBE, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xAE, FIL_, + 0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x90, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x92, FIL_, 0xCC, + 0x89, FIL_, 0xE1, 0xBB, 0x94, FIL_, 0xCC, 0x83, + FIL_, 0xE1, 0xBB, 0x96, FIL_, 0x03, 0xCC, 0x84, + FIL_, 0xC8, 0xAC, FIL_, 0xCC, 0x88, FIL_, 0xE1, + 0xB9, 0x8E, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB9, + 0x8C, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAA, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xC7, 0xBE, FIL_, + 0x04, 0xCC, 0x80, FIL_, 0xC7, 0x9B, FIL_, 0xCC, + 0x84, FIL_, 0xC7, 0x95, FIL_, 0xCC, 0x8C, FIL_, + 0xC7, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x97, + FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA5, + FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA, 0xAB, FIL_, + 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA9, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBA, 0xA7, FIL_, 0x01, 0xCC, + 0x84, FIL_, 0xC7, 0x9F, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xC7, 0xBB, FIL_, 0x02, 0xCC, 0x81, FIL_, + 0xC7, 0xBD, FIL_, 0xCC, 0x84, FIL_, 0xC7, 0xA3, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x89, + FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x83, + FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x85, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x81, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBA, 0xBF, FIL_, 0x01, 0xCC, + 0x81, FIL_, 0xE1, 0xB8, 0xAF, FIL_, 0x04, 0xCC, + 0x80, FIL_, 0xE1, 0xBB, 0x93, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBB, 0x91, FIL_, 0xCC, 0x83, FIL_, + 0xE1, 0xBB, 0x97, FIL_, 0xCC, 0x89, FIL_, 0xE1, + 0xBB, 0x95, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, + 0xB9, 0x8D, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9, + 0x8F, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xAD, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAB, FIL_, 0x01, + 0xCC, 0x81, FIL_, 0xC7, 0xBF, FIL_, 0x04, 0xCC, + 0x8C, FIL_, 0xC7, 0x9A, FIL_, 0xCC, 0x84, FIL_, + 0xC7, 0x96, FIL_, 0xCC, 0x80, FIL_, 0xC7, 0x9C, + FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x98, FIL_, 0x04, + 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xAE, FIL_, 0xCC, + 0x83, FIL_, 0xE1, 0xBA, 0xB4, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBA, 0xB2, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBA, 0xB0, FIL_, 0x04, 0xCC, 0x83, FIL_, + 0xE1, 0xBA, 0xB5, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBA, 0xB1, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, + 0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xB3, + FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x96, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x94, FIL_, + 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x95, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x97, FIL_, 0x02, + 0xCC, 0x80, FIL_, 0xE1, 0xB9, 0x90, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xB9, 0x92, FIL_, 0x02, 0xCC, + 0x81, FIL_, 0xE1, 0xB9, 0x93, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xB9, 0x91, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA4, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA5, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA6, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xB9, 0xA7, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0xB8, FIL_, 0x01, 0xCC, 0x81, + FIL_, 0xE1, 0xB9, 0xB9, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xE1, 0xB9, 0xBA, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xE1, 0xB9, 0xBB, FIL_, 0x01, 0xCC, 0x87, + FIL_, 0xE1, 0xBA, 0x9B, FIL_, 0x05, 0xCC, 0x80, + FIL_, 0xE1, 0xBB, 0x9C, FIL_, 0xCC, 0x89, FIL_, + 0xE1, 0xBB, 0x9E, FIL_, 0xCC, 0x83, FIL_, 0xE1, + 0xBB, 0xA0, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, + 0x9A, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA2, + FIL_, 0x05, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xA1, + FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA3, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x9B, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBB, 0x9D, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0x9F, FIL_, 0x05, 0xCC, 0x81, + FIL_, 0xE1, 0xBB, 0xA8, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBB, 0xAA, FIL_, 0xCC, 0x89, FIL_, 0xE1, + 0xBB, 0xAC, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, + 0xAE, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB0, + FIL_, 0x05, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xAB, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0xA9, FIL_, + 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xAF, FIL_, 0xCC, + 0xA3, FIL_, 0xE1, 0xBB, 0xB1, FIL_, 0xCC, 0x89, + FIL_, 0xE1, 0xBB, 0xAD, FIL_, 0x01, 0xCC, 0x8C, + FIL_, 0xC7, 0xAE, FIL_, 0x01, 0xCC, 0x84, FIL_, + 0xC7, 0xAC, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, + 0xAD, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA0, + FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA1, FIL_, + 0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9C, FIL_, + 0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9D, FIL_, + 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xB0, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xC8, 0xB1, FIL_, 0x01, 0xCC, + 0x8C, FIL_, 0xC7, 0xAF, FIL_, 0x07, 0xCC, 0x93, + FIL_, 0xE1, 0xBC, 0x88, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x86, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBE, + 0xB8, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBE, 0xB9, + FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x89, FIL_, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xBC, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBE, 0xBA, FIL_, 0x04, 0xCC, + 0x94, FIL_, 0xE1, 0xBC, 0x99, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBF, 0x88, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x88, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, + 0x98, FIL_, 0x05, 0xCD, 0x85, FIL_, 0xE1, 0xBF, + 0x8C, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x89, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8A, FIL_, 0xCC, + 0x93, FIL_, 0xE1, 0xBC, 0xA8, FIL_, 0xCC, 0x94, + FIL_, 0xE1, 0xBC, 0xA9, FIL_, 0x07, 0xCC, 0x80, + FIL_, 0xE1, 0xBF, 0x9A, FIL_, 0xCC, 0x84, FIL_, + 0xE1, 0xBF, 0x99, FIL_, 0xCC, 0x93, FIL_, 0xE1, + 0xBC, 0xB8, FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC, + 0xB9, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0x98, + FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x8A, FIL_, 0xCC, + 0x88, FIL_, 0xCE, 0xAA, FIL_, 0x04, 0xCC, 0x81, + FIL_, 0xCE, 0x8C, FIL_, 0xCC, 0x94, FIL_, 0xE1, + 0xBD, 0x89, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, + 0x88, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0xB8, + FIL_, 0x01, 0xCC, 0x94, FIL_, 0xE1, 0xBF, 0xAC, + FIL_, 0x06, 0xCC, 0x94, FIL_, 0xE1, 0xBD, 0x99, + FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0xA8, FIL_, + 0xCC, 0x88, FIL_, 0xCE, 0xAB, FIL_, 0xCC, 0x84, + FIL_, 0xE1, 0xBF, 0xA9, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x8E, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, + 0xAA, FIL_, 0x05, 0xCC, 0x93, FIL_, 0xE1, 0xBD, + 0xA8, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xBC, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0xBA, FIL_, + 0xCC, 0x94, FIL_, 0xE1, 0xBD, 0xA9, FIL_, 0xCC, + 0x81, FIL_, 0xCE, 0x8F, FIL_, 0x01, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0xB4, FIL_, 0x01, 0xCD, 0x85, + FIL_, 0xE1, 0xBF, 0x84, FIL_, 0x08, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0xB3, FIL_, 0xCC, 0x84, FIL_, + 0xE1, 0xBE, 0xB1, FIL_, 0xCC, 0x86, FIL_, 0xE1, + 0xBE, 0xB0, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, + 0xB0, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAC, FIL_, + 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x81, FIL_, 0xCC, + 0x93, FIL_, 0xE1, 0xBC, 0x80, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBE, 0xB6, FIL_, 0x04, 0xCC, 0x93, + FIL_, 0xE1, 0xBC, 0x90, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBD, 0xB2, FIL_, 0xCC, 0x94, FIL_, 0xE1, + 0xBC, 0x91, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAD, + FIL_, 0x06, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0xA1, + FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAE, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBF, 0x83, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x86, FIL_, 0xCC, 0x93, FIL_, + 0xE1, 0xBC, 0xA0, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBD, 0xB4, FIL_, 0x08, 0xCC, 0x88, FIL_, 0xCF, + 0x8A, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAF, FIL_, + 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB0, FIL_, 0xCC, + 0x94, FIL_, 0xE1, 0xBC, 0xB1, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBD, 0xB6, FIL_, 0xCC, 0x86, FIL_, + 0xE1, 0xBF, 0x90, FIL_, 0xCC, 0x84, FIL_, 0xE1, + 0xBF, 0x91, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, + 0x96, FIL_, 0x04, 0xCC, 0x93, FIL_, 0xE1, 0xBD, + 0x80, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB8, + FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBD, 0x81, FIL_, + 0xCC, 0x81, FIL_, 0xCF, 0x8C, FIL_, 0x02, 0xCC, + 0x93, FIL_, 0xE1, 0xBF, 0xA4, FIL_, 0xCC, 0x94, + FIL_, 0xE1, 0xBF, 0xA5, FIL_, 0x08, 0xCC, 0x81, + FIL_, 0xCF, 0x8D, FIL_, 0xCC, 0x94, FIL_, 0xE1, + 0xBD, 0x91, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, + 0xA6, FIL_, 0xCC, 0x88, FIL_, 0xCF, 0x8B, FIL_, + 0xCC, 0x84, FIL_, 0xE1, 0xBF, 0xA1, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBD, 0xBA, FIL_, 0xCC, 0x93, + FIL_, 0xE1, 0xBD, 0x90, FIL_, 0xCC, 0x86, FIL_, + 0xE1, 0xBF, 0xA0, FIL_, 0x06, 0xCC, 0x80, FIL_, + 0xE1, 0xBD, 0xBC, FIL_, 0xCC, 0x94, FIL_, 0xE1, + 0xBD, 0xA1, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, + 0xA0, FIL_, 0xCC, 0x81, FIL_, 0xCF, 0x8E, FIL_, + 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB3, FIL_, 0xCD, + 0x82, FIL_, 0xE1, 0xBF, 0xB6, FIL_, 0x03, 0xCC, + 0x80, FIL_, 0xE1, 0xBF, 0x92, FIL_, 0xCD, 0x82, + FIL_, 0xE1, 0xBF, 0x97, FIL_, 0xCC, 0x81, FIL_, + 0xCE, 0x90, FIL_, 0x03, 0xCD, 0x82, FIL_, 0xE1, + 0xBF, 0xA7, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, + 0xA2, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xB0, FIL_, + 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB4, FIL_, + 0x02, 0xCC, 0x88, FIL_, 0xCF, 0x94, FIL_, 0xCC, + 0x81, FIL_, 0xCF, 0x93, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD0, 0x87, FIL_, 0x02, 0xCC, 0x88, FIL_, + 0xD3, 0x92, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x90, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x83, FIL_, + 0x03, 0xCC, 0x88, FIL_, 0xD0, 0x81, FIL_, 0xCC, + 0x80, FIL_, 0xD0, 0x80, FIL_, 0xCC, 0x86, FIL_, + 0xD3, 0x96, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xD3, + 0x81, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x9C, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9E, FIL_, 0x04, + 0xCC, 0x84, FIL_, 0xD3, 0xA2, FIL_, 0xCC, 0x88, + FIL_, 0xD3, 0xA4, FIL_, 0xCC, 0x86, FIL_, 0xD0, + 0x99, FIL_, 0xCC, 0x80, FIL_, 0xD0, 0x8D, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x8C, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xA6, FIL_, 0x04, 0xCC, + 0x8B, FIL_, 0xD3, 0xB2, FIL_, 0xCC, 0x88, FIL_, + 0xD3, 0xB0, FIL_, 0xCC, 0x86, FIL_, 0xD0, 0x8E, + FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xAE, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xB4, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xB8, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD3, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_, + 0xD3, 0x91, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x93, + FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x93, FIL_, + 0x03, 0xCC, 0x80, FIL_, 0xD1, 0x90, FIL_, 0xCC, + 0x86, FIL_, 0xD3, 0x97, FIL_, 0xCC, 0x88, FIL_, + 0xD1, 0x91, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xD3, + 0x82, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x9D, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9F, FIL_, 0x04, + 0xCC, 0x86, FIL_, 0xD0, 0xB9, FIL_, 0xCC, 0x88, + FIL_, 0xD3, 0xA5, FIL_, 0xCC, 0x84, FIL_, 0xD3, + 0xA3, FIL_, 0xCC, 0x80, FIL_, 0xD1, 0x9D, FIL_, + 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x9C, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xA7, FIL_, 0x04, 0xCC, + 0x8B, FIL_, 0xD3, 0xB3, FIL_, 0xCC, 0x84, FIL_, + 0xD3, 0xAF, FIL_, 0xCC, 0x86, FIL_, 0xD1, 0x9E, + FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB1, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xB5, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xB9, FIL_, 0x01, 0xCC, 0x88, + FIL_, 0xD3, 0xAD, FIL_, 0x01, 0xCC, 0x88, FIL_, + 0xD1, 0x97, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, + 0xB6, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, 0xB7, + FIL_, 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9A, FIL_, + 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9B, FIL_, 0x01, + 0xCC, 0x88, FIL_, 0xD3, 0xAA, FIL_, 0x01, 0xCC, + 0x88, FIL_, 0xD3, 0xAB, FIL_, 0x03, 0xD9, 0x94, + FIL_, 0xD8, 0xA3, FIL_, 0xD9, 0x95, FIL_, 0xD8, + 0xA5, FIL_, 0xD9, 0x93, FIL_, 0xD8, 0xA2, FIL_, + 0x01, 0xD9, 0x94, FIL_, 0xD8, 0xA4, FIL_, 0x01, + 0xD9, 0x94, FIL_, 0xD8, 0xA6, FIL_, 0x01, 0xD9, + 0x94, FIL_, 0xDB, 0x82, FIL_, 0x01, 0xD9, 0x94, + FIL_, 0xDB, 0x93, FIL_, 0x01, 0xD9, 0x94, FIL_, + 0xDB, 0x80, FIL_, 0x01, 0xE0, 0xA4, 0xBC, FIL_, + 0xE0, 0xA4, 0xA9, FIL_, 0x01, 0xE0, 0xA4, 0xBC, + FIL_, 0xE0, 0xA4, 0xB1, FIL_, 0x01, 0xE0, 0xA4, + 0xBC, FIL_, 0xE0, 0xA4, 0xB4, FIL_, 0x02, 0xE0, + 0xA6, 0xBE, FIL_, 0xE0, 0xA7, 0x8B, FIL_, 0xE0, + 0xA7, 0x97, FIL_, 0xE0, 0xA7, 0x8C, FIL_, 0x03, + 0xE0, 0xAD, 0x96, FIL_, 0xE0, 0xAD, 0x88, FIL_, + 0xE0, 0xAC, 0xBE, FIL_, 0xE0, 0xAD, 0x8B, FIL_, + 0xE0, 0xAD, 0x97, FIL_, 0xE0, 0xAD, 0x8C, FIL_, + 0x01, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAE, 0x94, + FIL_, 0x02, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAF, + 0x8C, FIL_, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, 0xAF, + 0x8A, FIL_, 0x01, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, + 0xAF, 0x8B, FIL_, 0x01, 0xE0, 0xB1, 0x96, FIL_, + 0xE0, 0xB1, 0x88, FIL_, 0x01, 0xE0, 0xB3, 0x95, + FIL_, 0xE0, 0xB3, 0x80, FIL_, 0x03, 0xE0, 0xB3, + 0x82, FIL_, 0xE0, 0xB3, 0x8A, FIL_, 0xE0, 0xB3, + 0x96, FIL_, 0xE0, 0xB3, 0x88, FIL_, 0xE0, 0xB3, + 0x95, FIL_, 0xE0, 0xB3, 0x87, FIL_, 0x01, 0xE0, + 0xB3, 0x95, FIL_, 0xE0, 0xB3, 0x8B, FIL_, 0x02, + 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8A, FIL_, + 0xE0, 0xB5, 0x97, FIL_, 0xE0, 0xB5, 0x8C, FIL_, + 0x01, 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8B, + FIL_, 0x03, 0xE0, 0xB7, 0x9F, FIL_, 0xE0, 0xB7, + 0x9E, FIL_, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, 0xB7, + 0x9A, FIL_, 0xE0, 0xB7, 0x8F, FIL_, 0xE0, 0xB7, + 0x9C, FIL_, 0x01, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, + 0xB7, 0x9D, FIL_, 0x01, 0xE1, 0x80, 0xAE, FIL_, + 0xE1, 0x80, 0xA6, FIL_, 0x01, 0xE1, 0xAC, 0xB5, + FIL_, 0xE1, 0xAC, 0x86, FIL_, 0x01, 0xE1, 0xAC, + 0xB5, FIL_, 0xE1, 0xAC, 0x88, FIL_, 0x01, 0xE1, + 0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8A, FIL_, 0x01, + 0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8C, FIL_, + 0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8E, + FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC, + 0x92, FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1, + 0xAC, 0xBB, FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_, + 0xE1, 0xAC, 0xBD, FIL_, 0x01, 0xE1, 0xAC, 0xB5, + FIL_, 0xE1, 0xAD, 0x80, FIL_, 0x01, 0xE1, 0xAC, + 0xB5, FIL_, 0xE1, 0xAD, 0x81, FIL_, 0x01, 0xE1, + 0xAC, 0xB5, FIL_, 0xE1, 0xAD, 0x83, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xB8, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xB9, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xE1, 0xB9, 0x9C, FIL_, 0x01, + 0xCC, 0x84, FIL_, 0xE1, 0xB9, 0x9D, FIL_, 0x01, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA8, FIL_, 0x01, + 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA9, FIL_, 0x02, + 0xCC, 0x86, FIL_, 0xE1, 0xBA, 0xB6, FIL_, 0xCC, + 0x82, FIL_, 0xE1, 0xBA, 0xAC, FIL_, 0x02, 0xCC, + 0x82, FIL_, 0xE1, 0xBA, 0xAD, FIL_, 0xCC, 0x86, + FIL_, 0xE1, 0xBA, 0xB7, FIL_, 0x01, 0xCC, 0x82, + FIL_, 0xE1, 0xBB, 0x86, FIL_, 0x01, 0xCC, 0x82, + FIL_, 0xE1, 0xBB, 0x87, FIL_, 0x01, 0xCC, 0x82, + FIL_, 0xE1, 0xBB, 0x98, FIL_, 0x01, 0xCC, 0x82, + FIL_, 0xE1, 0xBB, 0x99, FIL_, 0x04, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0x80, FIL_, 0xCD, 0x82, FIL_, + 0xE1, 0xBC, 0x86, FIL_, 0xCC, 0x80, FIL_, 0xE1, + 0xBC, 0x82, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, + 0x84, FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC, + 0x87, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x85, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x83, FIL_, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x81, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x82, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x83, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x84, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x85, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x86, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x87, FIL_, 0x04, + 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8C, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0x8A, FIL_, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0x88, FIL_, 0xCD, 0x82, FIL_, + 0xE1, 0xBC, 0x8E, FIL_, 0x04, 0xCC, 0x80, FIL_, + 0xE1, 0xBC, 0x8B, FIL_, 0xCD, 0x82, FIL_, 0xE1, + 0xBC, 0x8F, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, + 0x8D, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x89, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8A, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8B, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8C, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8D, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8E, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8F, + FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x92, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x94, FIL_, + 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x93, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x95, FIL_, 0x02, + 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x9A, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBC, 0x9C, FIL_, 0x02, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0x9B, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBC, 0x9D, FIL_, 0x04, 0xCC, 0x80, + FIL_, 0xE1, 0xBC, 0xA2, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBC, 0xA4, FIL_, 0xCD, 0x82, FIL_, 0xE1, + 0xBC, 0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, + 0x90, FIL_, 0x04, 0xCD, 0x85, FIL_, 0xE1, 0xBE, + 0x91, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xA5, + FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xA7, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA3, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x92, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x93, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x94, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x95, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x96, FIL_, 0x01, + 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x97, FIL_, 0x04, + 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xAC, FIL_, 0xCC, + 0x80, FIL_, 0xE1, 0xBC, 0xAA, FIL_, 0xCD, 0x85, + FIL_, 0xE1, 0xBE, 0x98, FIL_, 0xCD, 0x82, FIL_, + 0xE1, 0xBC, 0xAE, FIL_, 0x04, 0xCD, 0x82, FIL_, + 0xE1, 0xBC, 0xAF, FIL_, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, + 0xAD, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xAB, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9A, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9B, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9C, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9D, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9E, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9F, + FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xB4, + FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xB2, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xB6, FIL_, 0x03, + 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xB3, FIL_, 0xCD, + 0x82, FIL_, 0xE1, 0xBC, 0xB7, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBC, 0xB5, FIL_, 0x03, 0xCC, 0x81, + FIL_, 0xE1, 0xBC, 0xBC, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBC, 0xBA, FIL_, 0xCD, 0x82, FIL_, 0xE1, + 0xBC, 0xBE, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, + 0xBC, 0xBB, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, + 0xBF, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xBD, + FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x82, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x84, FIL_, + 0x02, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x85, FIL_, + 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x83, FIL_, 0x02, + 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x8A, FIL_, 0xCC, + 0x81, FIL_, 0xE1, 0xBD, 0x8C, FIL_, 0x02, 0xCC, + 0x80, FIL_, 0xE1, 0xBD, 0x8B, FIL_, 0xCC, 0x81, + FIL_, 0xE1, 0xBD, 0x8D, FIL_, 0x03, 0xCD, 0x82, + FIL_, 0xE1, 0xBD, 0x96, FIL_, 0xCC, 0x80, FIL_, + 0xE1, 0xBD, 0x92, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBD, 0x94, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, + 0xBD, 0x93, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, + 0x97, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x95, + FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x9B, + FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x9F, FIL_, + 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x9D, FIL_, 0x04, + 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xA6, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA0, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBD, 0xA2, FIL_, 0xCC, 0x81, FIL_, + 0xE1, 0xBD, 0xA4, FIL_, 0x04, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0xA1, FIL_, 0xCD, 0x82, FIL_, 0xE1, + 0xBD, 0xA7, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, + 0xA5, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xA3, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA2, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA3, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA4, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA5, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA6, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA7, + FIL_, 0x04, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xAA, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0xAC, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xAE, FIL_, 0xCD, + 0x85, FIL_, 0xE1, 0xBE, 0xA8, FIL_, 0x04, 0xCD, + 0x82, FIL_, 0xE1, 0xBD, 0xAF, FIL_, 0xCC, 0x80, + FIL_, 0xE1, 0xBD, 0xAB, FIL_, 0xCD, 0x85, FIL_, + 0xE1, 0xBE, 0xA9, FIL_, 0xCC, 0x81, FIL_, 0xE1, + 0xBD, 0xAD, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAA, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAB, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAC, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAD, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAE, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xAF, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xB2, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBF, 0x82, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBF, 0xB2, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, + 0xBE, 0xB7, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, + 0xBF, 0x8E, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, + 0x8D, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x8F, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0x87, + FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB7, + FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x9D, + FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBF, 0x9E, FIL_, + 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x9F, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x86, 0x9A, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x86, 0x9B, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x86, 0xAE, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8D, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8F, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8E, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x84, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x89, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x8C, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x88, 0xA4, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x88, 0xA6, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x81, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x84, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x87, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x89, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAD, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA2, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB0, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB1, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB4, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB5, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB8, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB9, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x80, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x81, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA0, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA1, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x84, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x85, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x88, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x89, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA2, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA3, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAC, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAD, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAE, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAF, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAA, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAB, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAC, FIL_, 0x01, + 0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAD, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0x94, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x8C, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, + 0x8E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x81, 0x90, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x81, 0x92, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x81, 0x94, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x81, 0x96, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x81, 0x98, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x9A, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x9C, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, + 0x9E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x81, 0xA0, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x81, 0xA2, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x81, 0xA5, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x81, 0xA7, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x81, 0xA9, FIL_, 0x02, + 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB1, FIL_, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB0, FIL_, + 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB3, + FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB4, + FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, + 0xB6, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, + 0xB7, FIL_, 0x02, 0xE3, 0x82, 0x9A, FIL_, 0xE3, + 0x81, 0xBA, FIL_, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x81, 0xB9, FIL_, 0x02, 0xE3, 0x82, 0x9A, FIL_, + 0xE3, 0x81, 0xBD, FIL_, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x81, 0xBC, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x82, 0x9E, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x83, 0xB4, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x82, 0xAC, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xAE, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB0, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, + 0xB2, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x82, 0xB4, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x82, 0xB6, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x82, 0xB8, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x82, 0xBA, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x82, 0xBC, FIL_, 0x01, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xBE, FIL_, + 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x80, + FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, + 0x82, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x83, 0x85, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x83, 0x87, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x83, 0x89, FIL_, 0x02, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x83, 0x90, FIL_, 0xE3, 0x82, + 0x9A, FIL_, 0xE3, 0x83, 0x91, FIL_, 0x02, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x83, 0x93, FIL_, 0xE3, + 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x94, FIL_, 0x02, + 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x97, FIL_, + 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x96, FIL_, + 0x02, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x9A, + FIL_, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x99, + FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, + 0x9C, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83, + 0x9D, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, + 0x83, 0xB7, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, + 0xE3, 0x83, 0xB8, FIL_, 0x01, 0xE3, 0x82, 0x99, + FIL_, 0xE3, 0x83, 0xB9, FIL_, 0x01, 0xE3, 0x82, + 0x99, FIL_, 0xE3, 0x83, 0xBA, FIL_, 0x01, 0xE3, + 0x82, 0x99, FIL_, 0xE3, 0x83, 0xBE, FIL_, + }, +}; + +static const uchar_t u8_decomp_b2_tbl[2][2][256] = { + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 5, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 6, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 7, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 1, 2, 3, 4, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 5, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, 6, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 7, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + +}; + +static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 0 }, + { 1, 35 }, { 2, 247 }, { 3, 474 }, + { 4, 693 }, { 5, 709 }, { 6, 951 }, + { N_, 0 }, { 7, 1139 }, { 8, 1152 }, + { N_, 0 }, { 9, 1177 }, { 10, 1199 }, + { 11, 1295 }, { 12, 1360 }, { 13, 1405 }, + { N_, 0 }, { 14, 1450 }, { N_, 0 }, + { N_, 0 }, { 15, 1620 }, { N_, 0 }, + { 16, 1624 }, { 17, 1649 }, { N_, 0 }, + { 18, 1665 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 19, 1680 }, + { 20, 1701 }, { N_, 0 }, { 21, 1757 }, + { 22, 1792 }, { 23, 1806 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 24, 1834 }, + { 25, 1869 }, { 26, 1876 }, { N_, 0 }, + { 27, 1897 }, { N_, 0 }, { 28, 1904 }, + { N_, 0 }, { 29, 1942 }, { N_, 0 }, + { 30, 1963 }, { 31, 1994 }, { N_, 0 }, + { 32, 2000 }, { 33, 2006 }, { 34, 2018 }, + { 35, 2021 }, { 36, 2109 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 37, 2158 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 0x8000, 2165 }, { 0x8001, 2445 }, + { 0x8002, 2741 }, { 0x8003, 3029 }, { 0x8004, 3337 }, + { 0x8005, 3725 }, { 0x8006, 4053 }, { 0x8007, 4536 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 38, 4895 }, + { 39, 4964 }, { 40, 4999 }, { N_, 0 }, + { 41, 5018 }, { 42, 5098 }, { 43, 5230 }, + { 44, 5248 }, { 45, 5266 }, { 46, 5326 }, + { 47, 5410 }, { 48, 5470 }, { 49, 5518 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 50, 5526 }, { 51, 5596 }, + { 52, 5767 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 53, 5810 }, { 54, 5822 }, { N_, 0 }, + { 55, 5830 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 56, 5836 }, { 57, 5839 }, { 58, 5842 }, + { 59, 6034 }, { 60, 6226 }, { 61, 6418 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 62, 6484 }, + { 63, 6497 }, { 64, 6672 }, { 65, 6770 }, + { 66, 6923 }, { 67, 6968 }, { 68, 7160 }, + { N_, 0 }, { 0x8008, 7247 }, { 69, 7597 }, + { 70, 7773 }, { 71, 7950 }, { 0x8009, 8142 }, + { 0x800A, 8919 }, { 72, 9351 }, { 73, 9522 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 5. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0x800B, 9743 }, + { 0x800C, 9999 }, { 0x800D, 10255 }, { 0x800E, 10511 }, + { 74, 10767 }, { 75, 10967 }, { N_, 0 }, + { N_, 0 }, { 76, 11139 }, { 77, 11303 }, + { 78, 11468 }, { 79, 11576 }, { 0x800F, 11740 }, + { 0x8010, 12006 }, { 0x8011, 12280 }, { 0x8012, 12546 }, + { 80, 12812 }, { 0x8013, 13060 }, { 0x8014, 13348 }, + { 81, 13720 }, { 82, 13898 }, { 83, 13933 }, + { 84, 14045 }, { 85, 14197 }, { 86, 14347 }, + { 87, 14410 }, { 88, 14540 }, { 89, 14729 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 6. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 90, 14829 }, { 91, 14912 }, + { 92, 14969 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 93, 14982 }, { 94, 15046 }, { 95, 15109 }, + { 96, 15163 }, { 97, 15225 }, { 98, 15282 }, + { 99, 15341 }, { 100, 15405 }, { 101, 15469 }, + { 102, 15533 }, { 103, 15597 }, { 104, 15681 }, + { 105, 15812 }, { 106, 15942 }, { 107, 16072 }, + { 108, 16202 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 7. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 0x8015, 16273 }, { 0x8016, 16536 }, + { 0x8017, 16799 }, { 0x8018, 17064 }, { 0x8019, 17329 }, + { 0x801A, 17601 }, { 0x801B, 17878 }, { 0x801C, 18147 }, + { 109, 18419 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + }, + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 0 }, + { 1, 35 }, { 2, 247 }, { 3, 474 }, + { 4, 693 }, { 5, 709 }, { 6, 951 }, + { N_, 0 }, { 7, 1139 }, { 8, 1152 }, + { N_, 0 }, { 9, 1177 }, { 10, 1199 }, + { 11, 1295 }, { 12, 1362 }, { 13, 1407 }, + { N_, 0 }, { 14, 1452 }, { N_, 0 }, + { N_, 0 }, { 15, 1622 }, { N_, 0 }, + { 16, 1626 }, { 17, 1651 }, { N_, 0 }, + { 18, 1667 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 19, 1682 }, + { 20, 1703 }, { N_, 0 }, { 21, 1759 }, + { 22, 1794 }, { 23, 1808 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 24, 1836 }, + { 25, 1871 }, { 26, 1878 }, { N_, 0 }, + { 27, 1899 }, { N_, 0 }, { 28, 1906 }, + { N_, 0 }, { 29, 1944 }, { N_, 0 }, + { 30, 1965 }, { 31, 1996 }, { N_, 0 }, + { 32, 2002 }, { 33, 2008 }, { 34, 2020 }, + { 35, 2023 }, { 36, 2111 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 37, 2160 }, + { N_, 0 }, { N_, 0 }, { 38, 2167 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 39, 2170 }, { 40, 2226 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 41, 2247 }, { 42, 2268 }, { 43, 2340 }, + { N_, 0 }, { 0x8000, 2414 }, { 0x8001, 2694 }, + { 0x8002, 2990 }, { 0x8003, 3278 }, { 0x8004, 3586 }, + { 0x8005, 3974 }, { 0x8006, 4302 }, { 0x8007, 4785 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 44, 5144 }, + { 45, 5213 }, { 46, 5248 }, { N_, 0 }, + { 47, 5273 }, { 48, 5358 }, { 49, 5490 }, + { 50, 5508 }, { 51, 5526 }, { 52, 5586 }, + { 53, 5670 }, { 54, 5730 }, { 55, 5778 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 56, 5786 }, { 57, 5856 }, + { 58, 6027 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 59, 6070 }, { 60, 6082 }, { N_, 0 }, + { 61, 6090 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 62, 6096 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 63, 6099 }, { 64, 6102 }, { 65, 6105 }, + { 66, 6297 }, { 67, 6489 }, { 68, 6681 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 69, 6747 }, + { 70, 6760 }, { 71, 6935 }, { 72, 7033 }, + { 73, 7186 }, { 74, 7231 }, { 75, 7423 }, + { N_, 0 }, { 0x8008, 7510 }, { 76, 7891 }, + { 77, 8103 }, { 78, 8280 }, { 0x8009, 8482 }, + { 0x800A, 9259 }, { 79, 9701 }, { 80, 9872 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 5. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0x800B, 10106 }, + { 0x800C, 10362 }, { 0x800D, 10618 }, { 0x800E, 10874 }, + { 81, 11130 }, { 82, 11330 }, { 0x800F, 11566 }, + { 83, 11822 }, { 84, 11932 }, { 85, 12096 }, + { 86, 12261 }, { 87, 12369 }, { 0x8010, 12533 }, + { 0x8011, 12799 }, { 0x8012, 13073 }, { 0x8013, 13339 }, + { 88, 13605 }, { 0x8014, 13853 }, { 0x8015, 14141 }, + { 89, 14513 }, { 90, 14691 }, { 91, 14746 }, + { 92, 14860 }, { 93, 15012 }, { 94, 15162 }, + { 95, 15225 }, { 96, 15355 }, { 97, 15544 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 6. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 98, 15644 }, { 99, 15727 }, + { 100, 15784 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 101, 15797 }, { 102, 15861 }, { 103, 15924 }, + { 104, 15978 }, { 105, 16041 }, { 106, 16098 }, + { 107, 16157 }, { 108, 16221 }, { 109, 16285 }, + { 110, 16349 }, { 111, 16413 }, { 112, 16501 }, + { 113, 16632 }, { 114, 16762 }, { 115, 16892 }, + { 116, 17022 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + { /* Third byte table 7. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 0x8016, 17097 }, { 0x8017, 17360 }, + { 0x8018, 17623 }, { 0x8019, 17888 }, { 0x801A, 18153 }, + { 0x801B, 18425 }, { 0x801C, 18702 }, { 0x801D, 18971 }, + { 117, 19243 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, + }, + }, +}; + +static const uchar_t u8_decomp_b4_tbl[2][118][257] = { + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 4, 5, 5, 5, 5, 5, + 8, 8, 8, 9, 10, 13, 15, 15, + 15, 18, 19, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 24, + 28, 32, 36, 40, 44, 48, 52, 56, + 60, 60, 64, 68, 72, 76, 80, 84, + 84, 84, 88, 92, 96, 100, 104, 104, + 104, 108, 112, 116, 120, 124, 128, 128, + 132, 136, 140, 144, 148, 152, 156, 160, + 164, 164, 168, 172, 176, 180, 184, 188, + 188, 188, 192, 196, 200, 204, 208, 208, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 64, 64, 68, 72, 76, 80, 84, + 88, 92, 96, 100, 104, 108, 112, 116, + 120, 124, 128, 132, 136, 140, 144, 144, + 144, 148, 152, 156, 160, 164, 168, 172, + 176, 180, 180, 182, 184, 188, 192, 196, + 200, 200, 204, 208, 212, 216, 220, 224, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 7, 11, 15, 19, + 23, 27, 30, 30, 30, 34, 38, 42, + 46, 50, 54, 54, 54, 58, 62, 66, + 70, 74, 78, 82, 86, 90, 94, 98, + 102, 106, 110, 114, 118, 122, 126, 126, + 126, 130, 134, 138, 142, 146, 150, 154, + 158, 162, 166, 170, 174, 178, 182, 186, + 190, 194, 198, 202, 206, 210, 214, 218, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 12, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 8, 12, + 14, 16, 18, 20, 22, 24, 28, 32, + 36, 40, 44, 48, 52, 56, 62, 68, + 74, 80, 86, 92, 98, 104, 104, 110, + 116, 122, 128, 133, 138, 138, 138, 142, + 146, 150, 154, 158, 162, 168, 174, 179, + 184, 188, 190, 192, 194, 198, 202, 202, + 202, 206, 210, 216, 222, 227, 232, 237, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 112, 112, 116, + 120, 120, 120, 120, 120, 120, 120, 124, + 128, 132, 136, 142, 148, 154, 160, 164, + 168, 174, 180, 184, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 3, 4, 5, 7, 9, 11, + 12, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 20, 21, 22, 23, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 6, 9, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 17, 17, 17, + 17, 17, 17, 20, 20, 20, 20, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 14, 19, + 22, 27, 32, 37, 37, 42, 42, 47, + 52, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 64, 69, 74, 79, 84, + 89, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 10, 15, 20, 25, + 25, 27, 29, 31, 41, 51, 53, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 57, 59, 61, 61, 63, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, + 65, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 15, 15, 15, + 20, 20, 20, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 15, 15, 15, + 20, 20, 20, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 40, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 15, 20, 25, 30, 30, 30, 35, + 40, 40, 40, 45, 50, 55, 60, 65, + 70, 70, 70, 75, 80, 85, 90, 95, + 100, 100, 100, 105, 110, 115, 120, 125, + 130, 135, 140, 145, 150, 155, 160, 160, + 160, 165, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 10, 15, 20, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 8, + 12, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 14, 14, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 21, 28, 35, 42, 49, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 21, 28, 28, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 7, 7, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 14, 21, 21, 21, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 28, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 7, 7, 7, 7, + 14, 21, 21, 28, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 7, 14, 24, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 14, 14, + 14, 14, 14, 21, 21, 21, 21, 21, + 28, 28, 28, 28, 28, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 49, 49, 56, 63, + 72, 79, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 21, 21, + 21, 21, 21, 28, 28, 28, 28, 28, + 35, 35, 35, 35, 35, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 21, 21, 21, 21, + 21, 21, 24, 24, 24, 24, 24, 24, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, 30, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 34, 34, 34, 34, 40, 49, 49, 55, + 64, 64, 64, 64, 64, 66, 66, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 4, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 20, 21, 21, 21, 22, 23, 24, + 25, 26, 27, 28, 31, 32, 33, 34, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 14, 15, 16, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, + }, + { /* Fourth byte table 41. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 7, 10, 10, 13, 16, + 18, 18, 21, 22, 23, 24, 25, 26, + 28, 29, 30, 31, 32, 32, 33, 35, + 35, 35, 36, 37, 38, 39, 40, 40, + 40, 42, 45, 47, 47, 48, 48, 51, + 51, 52, 52, 54, 58, 59, 60, 60, + 61, 62, 63, 63, 64, 65, 67, 69, + 71, 73, 74, 74, 74, 74, 76, 78, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, + }, + { /* Fourth byte table 42. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 4, 5, + 6, 7, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 13, 18, 23, 28, + 33, 38, 43, 48, 53, 58, 63, 68, + 72, 73, 75, 78, 80, 81, 83, 86, + 90, 92, 93, 95, 98, 99, 100, 101, + 102, 103, 105, 108, 110, 111, 113, 116, + 120, 122, 123, 125, 128, 129, 130, 131, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, + }, + { /* Fourth byte table 43. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 44. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 12, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 45. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 6, 6, + 6, 6, 12, 12, 12, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 24, 24, 30, + 30, 30, 30, 30, 30, 36, 45, 45, + 51, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 46. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 6, 6, 6, 12, 12, 12, + 18, 18, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 28, 28, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 40, 44, + 48, 54, 60, 60, 60, 66, 72, 72, + 72, 78, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, + }, + { /* Fourth byte table 47. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 12, 12, 18, 24, 24, + 24, 30, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 42, 48, 54, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 48. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 24, 24, 24, + 24, 24, 24, 30, 36, 42, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 49. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 50. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 11, 13, 15, 17, 19, 21, + 23, 25, 27, 29, 31, 34, 37, 40, + 43, 46, 49, 52, 55, 58, 62, 66, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, + }, + { /* Fourth byte table 51. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 53, 56, 59, 62, 65, 68, + 71, 74, 77, 80, 83, 86, 89, 92, + 95, 98, 101, 104, 107, 110, 113, 116, + 119, 122, 125, 128, 131, 134, 137, 140, + 143, 146, 149, 152, 155, 158, 161, 162, + 163, 164, 165, 166, 167, 168, 169, 170, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, + }, + { /* Fourth byte table 52. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, + }, + { /* Fourth byte table 53. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, + }, + { /* Fourth byte table 54. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 5, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 55. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 56. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 57. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 58. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 59. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 60. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 61. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, + }, + { /* Fourth byte table 62. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, + 4, 7, 10, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 63. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 14, + 14, 21, 21, 28, 28, 35, 35, 42, + 42, 49, 49, 56, 56, 63, 63, 70, + 70, 77, 77, 84, 84, 84, 91, 91, + 98, 98, 105, 105, 105, 105, 105, 105, + 105, 112, 119, 119, 126, 133, 133, 140, + 147, 147, 154, 161, 161, 168, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, + }, + { /* Fourth byte table 64. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 7, + 7, 7, 7, 7, 11, 15, 15, 22, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 35, 35, 42, + 42, 49, 49, 56, 56, 63, 63, 70, + 70, 77, 77, 84, 84, 91, 91, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, + }, + { /* Fourth byte table 65. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 14, 14, 14, 21, 21, + 28, 28, 35, 35, 35, 35, 35, 35, + 35, 42, 49, 49, 56, 63, 63, 70, + 77, 77, 84, 91, 91, 98, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 112, 112, 112, + 119, 126, 133, 140, 140, 140, 140, 147, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, + }, + { /* Fourth byte table 66. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 67. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 68. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 45, 45, 45, 48, 51, 54, 57, 60, + 63, 66, 69, 72, 75, 78, 81, 84, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 69. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 15, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 22, 24, 26, 28, 30, 32, + 34, 36, 38, 40, 42, 44, 46, 48, + 50, 53, 56, 59, 62, 65, 68, 71, + 74, 77, 80, 83, 86, 89, 92, 98, + 104, 110, 116, 122, 128, 134, 140, 146, + 152, 158, 164, 170, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 176, + }, + { /* Fourth byte table 70. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 149, 151, 153, 155, 157, 159, + 161, 163, 165, 167, 169, 171, 173, 175, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, + }, + { /* Fourth byte table 71. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 41, 46, 51, 51, 51, 51, + 51, 54, 57, 60, 63, 66, 69, 72, + 75, 78, 81, 84, 87, 90, 93, 96, + 99, 102, 105, 108, 111, 114, 117, 120, + 123, 126, 129, 132, 135, 138, 141, 144, + 147, 150, 153, 156, 159, 162, 165, 168, + 171, 174, 177, 180, 183, 186, 189, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 72. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 7, 9, 11, 13, 15, + 17, 20, 24, 26, 28, 31, 34, 36, + 38, 40, 43, 46, 49, 52, 55, 57, + 59, 61, 63, 65, 68, 70, 72, 74, + 77, 80, 82, 85, 88, 91, 93, 96, + 101, 107, 109, 112, 115, 118, 121, 128, + 136, 138, 140, 143, 145, 147, 149, 152, + 154, 156, 158, 160, 162, 165, 167, 169, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, + }, + { /* Fourth byte table 73. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 10, 12, 14, 16, 22, + 25, 27, 29, 31, 33, 35, 37, 39, + 41, 43, 45, 48, 50, 52, 55, 58, + 60, 64, 67, 69, 71, 73, 75, 75, + 75, 79, 83, 87, 91, 95, 99, 103, + 107, 111, 116, 121, 126, 131, 136, 141, + 146, 151, 156, 161, 166, 171, 176, 181, + 186, 191, 196, 201, 206, 211, 216, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 221, 221, 221, 221, 221, 221, + 221, + }, + { /* Fourth byte table 74. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 56, + 56, 60, 60, 64, 64, 64, 68, 72, + 76, 80, 84, 88, 92, 96, 100, 104, + 104, 108, 108, 112, 112, 112, 116, 120, + 120, 120, 120, 124, 128, 132, 136, 136, + 136, 140, 144, 148, 152, 156, 160, 164, + 168, 172, 176, 180, 184, 188, 192, 196, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, + }, + { /* Fourth byte table 75. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, + }, + { /* Fourth byte table 76. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 9, 12, 14, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 20, 24, 28, 32, + 36, 36, 36, 36, 36, 36, 41, 41, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 64, 65, 70, 75, 82, 89, 94, + 99, 104, 109, 114, 119, 124, 129, 134, + 134, 139, 144, 149, 154, 159, 159, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, + }, + { /* Fourth byte table 77. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 20, 20, 25, + 30, 35, 40, 45, 50, 55, 60, 65, + 69, 71, 73, 75, 77, 79, 81, 83, + 85, 87, 89, 91, 93, 95, 97, 99, + 101, 103, 105, 107, 109, 111, 113, 115, + 117, 119, 121, 123, 125, 127, 129, 131, + 133, 135, 137, 139, 141, 143, 145, 147, + 149, 151, 153, 155, 157, 159, 161, 163, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, + }, + { /* Fourth byte table 78. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 76, 80, 82, + 84, 86, 88, 90, 92, 94, 96, 98, + 100, 104, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, + }, + { /* Fourth byte table 79. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 4, 6, 8, + 10, 12, 14, 16, 18, 20, 24, 26, + 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 48, 54, 60, 66, 72, 78, + 84, 90, 96, 102, 108, 114, 120, 126, + 132, 138, 144, 150, 156, 158, 160, 162, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, + }, + { /* Fourth byte table 80. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, + }, + { /* Fourth byte table 81. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 54, 60, 68, 76, 84, 92, 100, + 108, 116, 122, 155, 170, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, + }, + { /* Fourth byte table 82. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 5, 8, 9, 10, 11, 12, + 13, 14, 17, 20, 23, 26, 29, 32, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 83. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 15, 15, + 15, 15, 18, 21, 24, 27, 28, 29, + 30, 31, 34, 35, 35, 36, 37, 38, + 39, 42, 43, 44, 45, 46, 49, 52, + 53, 54, 55, 56, 57, 58, 59, 60, + 60, 61, 62, 63, 64, 64, 64, 64, + 64, 67, 71, 74, 74, 77, 77, 80, + 84, 87, 91, 94, 98, 101, 105, 108, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, + }, + { /* Fourth byte table 84. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 6, 10, 14, 18, 22, 26, + 30, 34, 38, 42, 46, 50, 52, 54, + 56, 58, 60, 62, 64, 66, 68, 70, + 72, 74, 76, 78, 80, 82, 84, 86, + 88, 90, 92, 94, 96, 98, 100, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, + 136, 138, 140, 142, 144, 146, 148, 150, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, + }, + { /* Fourth byte table 85. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + 96, 98, 100, 102, 104, 106, 112, 118, + 124, 130, 136, 142, 146, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, + }, + { /* Fourth byte table 86. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 87. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 34, 37, 40, 43, 46, 49, 52, 55, + 58, 61, 64, 67, 70, 73, 76, 79, + 82, 85, 88, 91, 94, 97, 100, 103, + 106, 109, 112, 115, 118, 121, 124, 127, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 88. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, + }, + { /* Fourth byte table 89. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 6, 9, 12, 15, + 18, 18, 18, 21, 24, 27, 30, 33, + 36, 36, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 57, 60, 63, 63, 63, + 63, 65, 67, 69, 72, 74, 76, 79, + 79, 82, 85, 88, 91, 94, 97, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, + }, + { /* Fourth byte table 90. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 18, 31, 44, 57, 70, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, + }, + { /* Fourth byte table 91. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 18, 31, 44, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 92. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 93. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 94. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 95. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 29, 30, + 31, 31, 31, 32, 32, 32, 33, 34, + 34, 34, 35, 36, 37, 38, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 50, 51, 51, 52, 53, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 96. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 2, 3, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 97. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 6, + 7, 8, 9, 10, 10, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 18, 19, + 20, 21, 22, 23, 24, 25, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 53, 54, 55, 56, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 98. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 5, 6, + 6, 6, 6, 7, 8, 9, 10, 11, + 12, 13, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, + }, + { /* Fourth byte table 99. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 100. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 101. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 102. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 103. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 36, 36, 36, + 36, 38, 40, 42, 44, 46, 48, 50, + 52, 54, 56, 58, 60, 62, 64, 66, + 68, 70, 72, 74, 76, 78, 80, 82, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, + }, + { /* Fourth byte table 104. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 58, 60, 62, 64, + 66, 68, 70, 72, 74, 76, 78, 80, + 82, 84, 86, 88, 90, 92, 94, 96, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 123, 125, 127, 129, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, + }, + { /* Fourth byte table 105. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, + 97, 99, 101, 103, 105, 107, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 106. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 107. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 86, 88, 90, 92, 94, 96, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 108. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 9, 11, 13, 15, + 17, 19, 21, 21, 21, 21, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, + }, + { /* Fourth byte table 109. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 9, 13, 17, 21, 25, 29, + 33, 37, 42, 46, 50, 54, 58, 62, + 66, 71, 75, 80, 85, 90, 94, 98, + 102, 106, 110, 114, 118, 122, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, + }, + { /* Fourth byte table 110. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 111. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 112. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 113. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 114. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 115. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 116. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 117. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + }, + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 4, 5, 5, 5, 5, 5, + 8, 8, 8, 9, 10, 13, 15, 15, + 15, 18, 19, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 24, + 28, 32, 36, 40, 44, 48, 52, 56, + 60, 60, 64, 68, 72, 76, 80, 84, + 84, 84, 88, 92, 96, 100, 104, 104, + 104, 108, 112, 116, 120, 124, 128, 128, + 132, 136, 140, 144, 148, 152, 156, 160, + 164, 164, 168, 172, 176, 180, 184, 188, + 188, 188, 192, 196, 200, 204, 208, 208, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 64, 64, 68, 72, 76, 80, 84, + 88, 92, 96, 100, 104, 108, 112, 116, + 120, 124, 128, 132, 136, 140, 144, 144, + 144, 148, 152, 156, 160, 164, 168, 172, + 176, 180, 180, 182, 184, 188, 192, 196, + 200, 200, 204, 208, 212, 216, 220, 224, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 7, 11, 15, 19, + 23, 27, 30, 30, 30, 34, 38, 42, + 46, 50, 54, 54, 54, 58, 62, 66, + 70, 74, 78, 82, 86, 90, 94, 98, + 102, 106, 110, 114, 118, 122, 126, 126, + 126, 130, 134, 138, 142, 146, 150, 154, + 158, 162, 166, 170, 174, 178, 182, 186, + 190, 194, 198, 202, 206, 210, 214, 218, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 219, 219, 219, 219, 219, 219, + 219, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 12, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 8, 12, + 14, 16, 18, 20, 22, 24, 28, 32, + 36, 40, 44, 48, 52, 56, 62, 68, + 74, 80, 86, 92, 98, 104, 104, 110, + 116, 122, 128, 133, 138, 138, 138, 142, + 146, 150, 154, 158, 162, 168, 174, 179, + 184, 188, 190, 192, 194, 198, 202, 202, + 202, 206, 210, 216, 222, 227, 232, 237, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, + 242, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 112, 112, 116, + 120, 120, 120, 120, 120, 120, 120, 124, + 128, 132, 136, 142, 148, 154, 160, 164, + 168, 174, 180, 184, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 188, + 188, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 3, 4, 5, 7, 9, 11, + 12, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 20, 21, 22, 23, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 6, 9, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 17, 17, 17, + 17, 17, 17, 20, 20, 20, 20, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 14, 19, + 22, 27, 32, 37, 37, 42, 42, 47, + 52, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 64, 69, 74, 79, 84, + 89, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 10, 15, 20, 25, + 25, 27, 29, 31, 41, 51, 53, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 57, 59, 61, 61, 63, 65, 65, + 65, 65, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 15, 15, 15, + 20, 20, 20, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 15, 15, 15, + 20, 20, 20, 20, 20, 25, 30, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 40, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 15, 20, 25, 30, 30, 30, 35, + 40, 40, 40, 45, 50, 55, 60, 65, + 70, 70, 70, 75, 80, 85, 90, 95, + 100, 100, 100, 105, 110, 115, 120, 125, + 130, 135, 140, 145, 150, 155, 160, 160, + 160, 165, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 170, 170, 170, 170, 170, 170, + 170, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 10, 15, 20, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 8, + 12, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 14, 14, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 21, 28, 35, 42, 49, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 21, 28, 28, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 7, 7, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 14, 21, 21, 21, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 28, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 7, 7, 7, 7, 7, + 14, 21, 21, 28, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 14, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7, 7, 14, 24, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 14, 14, + 14, 14, 14, 21, 21, 21, 21, 21, + 28, 28, 28, 28, 28, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 49, 49, 56, 63, + 72, 79, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 21, 21, + 21, 21, 21, 28, 28, 28, 28, 28, + 35, 35, 35, 35, 35, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, + 49, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 39. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, + 7, 14, 14, 21, 21, 28, 28, 35, + 35, 35, 35, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 49, 49, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 40. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 14, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 41. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 3, 4, + 4, 5, 6, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 16, 17, 19, 20, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 42. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 6, 8, 11, + 12, 13, 14, 16, 18, 20, 21, 21, + 22, 23, 25, 26, 28, 31, 34, 35, + 36, 37, 40, 42, 43, 46, 48, 50, + 52, 54, 56, 57, 58, 59, 60, 62, + 64, 66, 68, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, + }, + { /* Fourth byte table 43. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 3, 5, 7, + 9, 10, 12, 14, 16, 18, 20, 22, + 25, 27, 29, 32, 34, 36, 38, 40, + 42, 44, 46, 48, 50, 52, 54, 56, + 58, 61, 63, 65, 66, 68, 70, 72, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, + }, + { /* Fourth byte table 44. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 21, 21, 21, 21, + 21, 21, 24, 24, 24, 24, 24, 24, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, 30, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 34, 34, 34, 34, 40, 49, 49, 55, + 64, 64, 64, 64, 64, 66, 66, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, + }, + { /* Fourth byte table 45. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 4, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 20, 21, 21, 21, 22, 23, 24, + 25, 26, 27, 28, 31, 32, 33, 34, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, + 35, + }, + { /* Fourth byte table 46. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 14, 15, 16, 17, + 17, 18, 19, 20, 21, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, + 23, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, + }, + { /* Fourth byte table 47. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 7, 10, 10, 13, 16, + 18, 18, 21, 22, 23, 24, 25, 26, + 28, 29, 30, 31, 32, 32, 33, 35, + 35, 35, 36, 37, 38, 39, 40, 40, + 40, 42, 45, 47, 47, 48, 48, 51, + 51, 52, 52, 54, 58, 59, 60, 60, + 61, 62, 63, 63, 64, 65, 67, 69, + 71, 73, 74, 74, 77, 79, 81, 83, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, + 85, + }, + { /* Fourth byte table 48. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 4, 5, + 6, 7, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 13, 18, 23, 28, + 33, 38, 43, 48, 53, 58, 63, 68, + 72, 73, 75, 78, 80, 81, 83, 86, + 90, 92, 93, 95, 98, 99, 100, 101, + 102, 103, 105, 108, 110, 111, 113, 116, + 120, 122, 123, 125, 128, 129, 130, 131, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 132, + }, + { /* Fourth byte table 49. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 50. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 12, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 51. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 6, 6, + 6, 6, 12, 12, 12, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 24, 24, 30, + 30, 30, 30, 30, 30, 36, 45, 45, + 51, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 52. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 6, 6, 6, 12, 12, 12, + 18, 18, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 28, 28, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 40, 44, + 48, 54, 60, 60, 60, 66, 72, 72, + 72, 78, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, + }, + { /* Fourth byte table 53. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 12, 12, 18, 24, 24, + 24, 30, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 42, 48, 54, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 54. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 24, 24, 24, + 24, 24, 24, 30, 36, 42, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 55. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 56. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 11, 13, 15, 17, 19, 21, + 23, 25, 27, 29, 31, 34, 37, 40, + 43, 46, 49, 52, 55, 58, 62, 66, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, + }, + { /* Fourth byte table 57. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 53, 56, 59, 62, 65, 68, + 71, 74, 77, 80, 83, 86, 89, 92, + 95, 98, 101, 104, 107, 110, 113, 116, + 119, 122, 125, 128, 131, 134, 137, 140, + 143, 146, 149, 152, 155, 158, 161, 162, + 163, 164, 165, 166, 167, 168, 169, 170, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, + }, + { /* Fourth byte table 58. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, + }, + { /* Fourth byte table 59. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, + }, + { /* Fourth byte table 60. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 5, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 61. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, + }, + { /* Fourth byte table 62. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 63. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 64. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 65. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 66. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 67. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 68. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, + }, + { /* Fourth byte table 69. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, + 4, 7, 10, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 70. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 14, + 14, 21, 21, 28, 28, 35, 35, 42, + 42, 49, 49, 56, 56, 63, 63, 70, + 70, 77, 77, 84, 84, 84, 91, 91, + 98, 98, 105, 105, 105, 105, 105, 105, + 105, 112, 119, 119, 126, 133, 133, 140, + 147, 147, 154, 161, 161, 168, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, 175, 175, 175, 175, 175, 175, 175, + 175, + }, + { /* Fourth byte table 71. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 7, + 7, 7, 7, 7, 11, 15, 15, 22, + 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 35, 35, 42, + 42, 49, 49, 56, 56, 63, 63, 70, + 70, 77, 77, 84, 84, 91, 91, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, + 98, + }, + { /* Fourth byte table 72. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 7, 14, 14, 14, 21, 21, + 28, 28, 35, 35, 35, 35, 35, 35, + 35, 42, 49, 49, 56, 63, 63, 70, + 77, 77, 84, 91, 91, 98, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 112, 112, 112, + 119, 126, 133, 140, 140, 140, 140, 147, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, + 153, + }, + { /* Fourth byte table 73. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, + }, + { /* Fourth byte table 74. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, + }, + { /* Fourth byte table 75. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 45, 45, 45, 48, 51, 54, 57, 60, + 63, 66, 69, 72, 75, 78, 81, 84, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 76. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 15, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 23, 25, 27, 29, 31, 33, 35, + 37, 39, 41, 43, 45, 47, 49, 51, + 53, 56, 59, 62, 65, 68, 71, 74, + 77, 80, 83, 86, 89, 92, 95, 101, + 107, 113, 119, 125, 131, 137, 143, 149, + 155, 161, 167, 173, 179, 194, 206, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, + 212, + }, + { /* Fourth byte table 77. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 149, 151, 153, 155, 157, 159, + 161, 163, 165, 167, 169, 171, 173, 175, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 177, 177, 177, 177, 177, + 177, + }, + { /* Fourth byte table 78. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 41, 46, 51, 53, 56, 58, + 61, 64, 67, 70, 73, 76, 79, 82, + 85, 88, 91, 94, 97, 100, 103, 106, + 109, 112, 115, 118, 121, 124, 127, 130, + 133, 136, 139, 142, 145, 148, 151, 154, + 157, 160, 163, 166, 169, 172, 175, 178, + 181, 184, 187, 190, 193, 196, 199, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, 202, 202, 202, 202, 202, 202, 202, + 202, + }, + { /* Fourth byte table 79. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 7, 9, 11, 13, 15, + 17, 20, 24, 26, 28, 31, 34, 36, + 38, 40, 43, 46, 49, 52, 55, 57, + 59, 61, 63, 65, 68, 70, 72, 74, + 77, 80, 82, 85, 88, 91, 93, 96, + 101, 107, 109, 112, 115, 118, 121, 128, + 136, 138, 140, 143, 145, 147, 149, 152, + 154, 156, 158, 160, 162, 165, 167, 169, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, 171, 171, 171, 171, 171, 171, 171, + 171, + }, + { /* Fourth byte table 80. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 10, 12, 14, 16, 22, + 25, 27, 29, 31, 33, 35, 37, 39, + 41, 43, 45, 48, 50, 52, 55, 58, + 60, 64, 67, 69, 71, 73, 75, 80, + 85, 89, 93, 97, 101, 105, 109, 113, + 117, 121, 126, 131, 136, 141, 146, 151, + 156, 161, 166, 171, 176, 181, 186, 191, + 196, 201, 206, 211, 216, 221, 226, 231, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 234, 234, + 234, + }, + { /* Fourth byte table 81. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 56, + 56, 60, 60, 64, 64, 64, 68, 72, + 76, 80, 84, 88, 92, 96, 100, 104, + 104, 108, 108, 112, 112, 112, 116, 120, + 120, 120, 120, 124, 128, 132, 136, 136, + 136, 140, 144, 148, 152, 156, 160, 164, + 168, 172, 176, 180, 184, 188, 192, 196, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 200, 200, 200, + 200, + }, + { /* Fourth byte table 82. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 172, 172, 172, 172, + 172, 176, 180, 184, 188, 192, 196, 200, + 204, 208, 212, 216, 220, 224, 228, 232, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, 236, 236, 236, 236, 236, 236, 236, + 236, + }, + { /* Fourth byte table 83. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 65, 70, 75, 79, 83, 87, 92, 97, + 102, 106, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, + }, + { /* Fourth byte table 84. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 9, 12, 14, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 20, 24, 28, 32, + 36, 36, 36, 36, 36, 36, 41, 41, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 64, 65, 70, 75, 82, 89, 94, + 99, 104, 109, 114, 119, 124, 129, 134, + 134, 139, 144, 149, 154, 159, 159, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, + }, + { /* Fourth byte table 85. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 10, 15, 20, 20, 25, + 30, 35, 40, 45, 50, 55, 60, 65, + 69, 71, 73, 75, 77, 79, 81, 83, + 85, 87, 89, 91, 93, 95, 97, 99, + 101, 103, 105, 107, 109, 111, 113, 115, + 117, 119, 121, 123, 125, 127, 129, 131, + 133, 135, 137, 139, 141, 143, 145, 147, + 149, 151, 153, 155, 157, 159, 161, 163, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, + 165, + }, + { /* Fourth byte table 86. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 76, 80, 82, + 84, 86, 88, 90, 92, 94, 96, 98, + 100, 104, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, + 108, + }, + { /* Fourth byte table 87. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 4, 6, 8, + 10, 12, 14, 16, 18, 20, 24, 26, + 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 48, 54, 60, 66, 72, 78, + 84, 90, 96, 102, 108, 114, 120, 126, + 132, 138, 144, 150, 156, 158, 160, 162, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, + 164, + }, + { /* Fourth byte table 88. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, + 248, + }, + { /* Fourth byte table 89. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 54, 60, 68, 76, 84, 92, 100, + 108, 116, 122, 155, 170, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, + 178, + }, + { /* Fourth byte table 90. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 4, 7, 8, 9, 10, 11, + 14, 17, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 22, 25, 28, 29, 30, 31, 32, + 33, 34, 37, 40, 43, 46, 49, 52, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, + }, + { /* Fourth byte table 91. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 15, 15, + 16, 17, 20, 23, 26, 29, 30, 31, + 32, 33, 36, 37, 37, 38, 39, 40, + 41, 44, 45, 46, 47, 48, 51, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 62, 63, 64, 65, 66, 66, 66, 66, + 66, 69, 73, 76, 76, 79, 79, 82, + 86, 89, 93, 96, 100, 103, 107, 110, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, + }, + { /* Fourth byte table 92. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 6, 10, 14, 18, 22, 26, + 30, 34, 38, 42, 46, 50, 52, 54, + 56, 58, 60, 62, 64, 66, 68, 70, + 72, 74, 76, 78, 80, 82, 84, 86, + 88, 90, 92, 94, 96, 98, 100, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, + 136, 138, 140, 142, 144, 146, 148, 150, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, + }, + { /* Fourth byte table 93. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + 96, 98, 100, 102, 104, 106, 112, 118, + 124, 130, 136, 142, 146, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, + }, + { /* Fourth byte table 94. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 95. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 34, 37, 40, 43, 46, 49, 52, 55, + 58, 61, 64, 67, 70, 73, 76, 79, + 82, 85, 88, 91, 94, 97, 100, 103, + 106, 109, 112, 115, 118, 121, 124, 127, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 96. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 144, 147, 150, 153, 156, 159, 162, 165, + 168, 171, 174, 177, 180, 183, 186, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, 189, 189, 189, 189, 189, 189, 189, + 189, + }, + { /* Fourth byte table 97. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 6, 9, 12, 15, + 18, 18, 18, 21, 24, 27, 30, 33, + 36, 36, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 57, 60, 63, 63, 63, + 63, 65, 67, 69, 72, 74, 76, 79, + 79, 82, 85, 88, 91, 94, 97, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, + }, + { /* Fourth byte table 98. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 18, 31, 44, 57, 70, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, + }, + { /* Fourth byte table 99. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 18, 31, 44, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 100. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, + }, + { /* Fourth byte table 101. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 102. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 103. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 29, 30, + 31, 31, 31, 32, 32, 32, 33, 34, + 34, 34, 35, 36, 37, 38, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 50, 51, 51, 52, 53, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 104. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 105. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 6, + 7, 8, 9, 10, 10, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 18, 19, + 20, 21, 22, 23, 24, 25, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 53, 54, 55, 56, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 106. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 5, 6, + 6, 6, 6, 7, 8, 9, 10, 11, + 12, 13, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, + }, + { /* Fourth byte table 107. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 108. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 109. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 110. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 111. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 38, 40, 40, + 40, 42, 44, 46, 48, 50, 52, 54, + 56, 58, 60, 62, 64, 66, 68, 70, + 72, 74, 76, 78, 80, 82, 84, 86, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, + 88, + }, + { /* Fourth byte table 112. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 58, 60, 62, 64, + 66, 68, 70, 72, 74, 76, 78, 80, + 82, 84, 86, 88, 90, 92, 94, 96, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 123, 125, 127, 129, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, + 131, + }, + { /* Fourth byte table 113. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, + 97, 99, 101, 103, 105, 107, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 114. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 115. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63, + 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 86, 88, 90, 92, 94, 96, + 98, 100, 102, 104, 106, 108, 110, 112, + 114, 116, 118, 120, 122, 124, 126, 128, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, + }, + { /* Fourth byte table 116. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 25, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, + }, + { /* Fourth byte table 117. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 9, 13, 17, 21, 25, 29, + 33, 37, 42, 46, 50, 54, 58, 62, + 66, 71, 75, 80, 85, 90, 94, 98, + 102, 106, 110, 114, 118, 122, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, + 127, + }, + }, +}; + +static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { + { + { /* Fourth byte 16-bit table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 38, 44, 48, 52, 56, 60, 64, + 68, 72, 76, 80, 84, 90, 96, 102, + 108, 112, 116, 120, 124, 130, 136, 140, + 144, 148, 152, 156, 160, 164, 168, 172, + 176, 180, 184, 188, 192, 196, 200, 206, + 212, 216, 220, 224, 228, 232, 236, 240, + 244, 250, 256, 260, 264, 268, 272, 276, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, + }, + { /* Fourth byte 16-bit table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 54, 60, 66, + 72, 78, 84, 90, 96, 100, 104, 108, + 112, 116, 120, 124, 128, 134, 140, 144, + 148, 152, 156, 160, 164, 170, 176, 182, + 188, 194, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 262, 268, 274, 280, 284, 288, 292, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, + }, + { /* Fourth byte 16-bit table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 107, 116, 116, 116, 116, + 116, 120, 124, 128, 132, 138, 144, 150, + 156, 162, 168, 174, 180, 186, 192, 198, + 204, 210, 216, 222, 228, 234, 240, 246, + 252, 256, 260, 264, 268, 272, 276, 282, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, + }, + { /* Fourth byte 16-bit table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 52, 56, 60, 64, 68, 72, 76, + 80, 86, 92, 98, 104, 110, 116, 122, + 128, 134, 140, 146, 152, 158, 164, 170, + 176, 182, 188, 194, 200, 204, 208, 212, + 216, 222, 228, 234, 240, 246, 252, 258, + 264, 270, 276, 280, 284, 288, 292, 296, + 300, 304, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, + }, + { /* Fourth byte 16-bit table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 17, 24, 31, 38, 45, + 52, 57, 62, 69, 76, 83, 90, 97, + 104, 109, 114, 121, 128, 135, 142, 142, + 142, 147, 152, 159, 166, 173, 180, 180, + 180, 185, 190, 197, 204, 211, 218, 225, + 232, 237, 242, 249, 256, 263, 270, 277, + 284, 289, 294, 301, 308, 315, 322, 329, + 336, 341, 346, 353, 360, 367, 374, 381, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, + }, + { /* Fourth byte 16-bit table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 17, 24, 31, 38, 38, + 38, 43, 48, 55, 62, 69, 76, 76, + 76, 81, 86, 93, 100, 107, 114, 121, + 128, 128, 133, 133, 140, 140, 147, 147, + 154, 159, 164, 171, 178, 185, 192, 199, + 206, 211, 216, 223, 230, 237, 244, 251, + 258, 263, 268, 273, 278, 283, 288, 293, + 298, 303, 308, 313, 318, 323, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, + }, + { /* Fourth byte 16-bit table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 23, 32, 41, 50, 59, + 68, 75, 82, 91, 100, 109, 118, 127, + 136, 143, 150, 159, 168, 177, 186, 195, + 204, 211, 218, 227, 236, 245, 254, 263, + 272, 279, 286, 295, 304, 313, 322, 331, + 340, 347, 354, 363, 372, 381, 390, 399, + 408, 413, 418, 425, 430, 437, 437, 442, + 449, 454, 459, 464, 469, 474, 477, 480, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, + }, + { /* Fourth byte 16-bit table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 14, 21, 26, 33, 33, 38, + 45, 50, 55, 60, 65, 70, 82, 94, + 106, 111, 116, 123, 130, 130, 130, 135, + 142, 147, 152, 157, 162, 162, 174, 186, + 198, 203, 208, 215, 222, 227, 232, 237, + 244, 249, 254, 259, 264, 269, 280, 291, + 293, 293, 293, 300, 305, 312, 312, 317, + 324, 329, 334, 339, 344, 349, 356, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, + }, + { /* Fourth byte 16-bit table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 15, 20, 25, 30, 35, + 40, 45, 50, 55, 60, 65, 70, 78, + 86, 94, 102, 110, 118, 126, 134, 142, + 150, 158, 166, 174, 182, 190, 190, 190, + 190, 195, 200, 205, 210, 215, 220, 225, + 230, 235, 240, 245, 250, 255, 260, 265, + 270, 275, 280, 285, 290, 295, 300, 305, + 310, 315, 320, 325, 330, 335, 340, 345, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 350, 350, 350, 350, 350, + 350, + }, + { /* Fourth byte 16-bit table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 27, 42, 51, 66, 75, 84, + 102, 114, 123, 132, 141, 153, 165, 177, + 189, 201, 213, 225, 243, 249, 267, 285, + 300, 312, 330, 348, 360, 369, 378, 390, + 402, 417, 432, 441, 450, 462, 471, 480, + 486, 492, 501, 510, 528, 540, 555, 573, + 585, 594, 603, 621, 633, 651, 660, 675, + 684, 696, 705, 717, 732, 744, 759, 771, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, + }, + { /* Fourth byte 16-bit table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 24, 33, 45, 54, 63, 72, + 87, 99, 105, 123, 132, 147, 159, 171, + 180, 189, 201, 207, 219, 234, 240, 258, + 267, 271, 275, 279, 283, 287, 291, 295, + 299, 303, 307, 312, 317, 322, 327, 332, + 337, 342, 347, 352, 357, 362, 367, 372, + 377, 382, 385, 387, 389, 392, 394, 396, + 396, 396, 396, 396, 402, 408, 414, 420, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, 432, 432, 432, 432, 432, 432, 432, + 432, + }, + { /* Fourth byte 16-bit table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 34, 38, + 42, 46, 50, 54, 58, 62, 66, 70, + 74, 78, 82, 86, 90, 94, 98, 102, + 106, 110, 114, 118, 122, 126, 130, 134, + 138, 142, 146, 150, 154, 158, 162, 166, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 125, + 130, 135, 140, 145, 150, 156, 162, 168, + 174, 180, 186, 190, 194, 198, 202, 206, + 210, 214, 218, 222, 226, 230, 234, 238, + 242, 246, 250, 254, 258, 262, 266, 270, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, + }, + { /* Fourth byte 16-bit table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 98, 104, 110, 116, 122, 126, 130, 134, + 138, 142, 146, 150, 154, 158, 162, 166, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 130, 136, 140, 144, 148, 152, 156, 160, + 164, 168, 172, 176, 180, 184, 188, 192, + 196, 200, 204, 210, 216, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 54, 60, 66, 72, 78, 84, 90, + 96, 102, 108, 114, 120, 126, 132, 138, + 144, 150, 156, 162, 168, 174, 180, 186, + 192, 198, 204, 210, 216, 222, 228, 234, + 240, 246, 252, 258, 264, 270, 276, 282, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, + }, + { /* Fourth byte 16-bit table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 54, 60, 66, 72, 78, 84, 90, + 96, 96, 96, 102, 108, 114, 120, 126, + 132, 138, 144, 150, 156, 162, 168, 174, + 180, 186, 192, 198, 204, 210, 216, 222, + 228, 234, 240, 246, 252, 258, 264, 270, + 276, 282, 288, 294, 300, 306, 312, 318, + 324, 330, 336, 342, 348, 354, 360, 366, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, + }, + { /* Fourth byte 16-bit table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 17, 21, 25, 29, + 33, 37, 41, 45, 49, 53, 58, 62, + 66, 70, 74, 79, 83, 87, 91, 96, + 100, 104, 108, 112, 116, 121, 125, 129, + 133, 137, 141, 145, 149, 153, 157, 161, + 165, 169, 173, 177, 181, 185, 189, 193, + 197, 201, 205, 209, 213, 218, 222, 226, + 230, 235, 239, 243, 247, 251, 255, 259, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, + }, + { /* Fourth byte 16-bit table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 105, 109, 113, 117, 121, 125, + 129, 134, 139, 143, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 184, 188, 192, + 196, 200, 205, 209, 213, 217, 221, 225, + 229, 233, 237, 241, 246, 250, 255, 259, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, + }, + { /* Fourth byte 16-bit table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 41, 45, 49, 53, 57, 61, + 66, 70, 75, 80, 84, 88, 92, 96, + 101, 106, 110, 114, 118, 122, 126, 130, + 134, 138, 142, 146, 150, 155, 159, 163, + 167, 171, 175, 179, 183, 187, 191, 195, + 199, 203, 207, 211, 215, 219, 223, 227, + 231, 236, 240, 244, 248, 252, 256, 261, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, + }, + { /* Fourth byte 16-bit table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 45, 49, 53, 57, 61, + 65, 69, 73, 77, 81, 85, 89, 93, + 97, 101, 105, 109, 113, 117, 122, 126, + 130, 134, 138, 142, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 184, 188, 192, + 196, 201, 205, 209, 213, 217, 221, 225, + 230, 235, 240, 244, 249, 253, 257, 261, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, + }, + { /* Fourth byte 16-bit table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 29, + 33, 37, 41, 45, 49, 53, 58, 62, + 66, 71, 76, 80, 84, 88, 92, 96, + 100, 104, 108, 112, 117, 121, 126, 130, + 135, 139, 143, 147, 152, 156, 160, 165, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 227, 231, + 236, 240, 245, 249, 254, 259, 264, 268, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, + }, + { /* Fourth byte 16-bit table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 9, 14, 19, 24, 28, 32, + 36, 40, 44, 48, 52, 56, 61, 65, + 69, 73, 77, 82, 86, 91, 96, 100, + 104, 108, 112, 116, 120, 125, 130, 135, + 139, 143, 148, 152, 156, 160, 165, 169, + 173, 177, 181, 185, 190, 194, 198, 202, + 206, 210, 214, 219, 224, 228, 233, 237, + 242, 246, 250, 254, 259, 264, 268, 273, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, + }, + { /* Fourth byte 16-bit table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 9, 13, 17, 21, 25, 29, + 34, 39, 44, 49, 53, 57, 61, 65, + 69, 73, 77, 81, 85, 89, 93, 97, + 102, 106, 110, 114, 118, 122, 126, 130, + 134, 138, 142, 146, 150, 155, 160, 165, + 169, 173, 177, 181, 186, 190, 195, 199, + 203, 208, 213, 217, 221, 225, 229, 233, + 237, 241, 245, 249, 253, 257, 261, 265, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, + }, + { /* Fourth byte 16-bit table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 25, 29, + 33, 37, 41, 45, 50, 55, 59, 63, + 67, 71, 75, 79, 84, 88, 92, 96, + 100, 105, 110, 114, 118, 122, 127, 131, + 135, 140, 145, 149, 153, 157, 162, 166, + 170, 174, 178, 182, 186, 190, 195, 199, + 203, 207, 212, 216, 220, 224, 228, 233, + 238, 242, 246, 250, 255, 259, 264, 268, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, + }, + { /* Fourth byte 16-bit table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + }, + { + { /* Fourth byte 16-bit table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 38, 44, 48, 52, 56, 60, 64, + 68, 72, 76, 80, 84, 90, 96, 102, + 108, 112, 116, 120, 124, 130, 136, 140, + 144, 148, 152, 156, 160, 164, 168, 172, + 176, 180, 184, 188, 192, 196, 200, 206, + 212, 216, 220, 224, 228, 232, 236, 240, + 244, 250, 256, 260, 264, 268, 272, 276, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, + }, + { /* Fourth byte 16-bit table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 54, 60, 66, + 72, 78, 84, 90, 96, 100, 104, 108, + 112, 116, 120, 124, 128, 134, 140, 144, + 148, 152, 156, 160, 164, 170, 176, 182, + 188, 194, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 262, 268, 274, 280, 284, 288, 292, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, 296, 296, 296, 296, 296, 296, 296, + 296, + }, + { /* Fourth byte 16-bit table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 107, 116, 116, 116, 116, + 116, 120, 124, 128, 132, 138, 144, 150, + 156, 162, 168, 174, 180, 186, 192, 198, + 204, 210, 216, 222, 228, 234, 240, 246, + 252, 256, 260, 264, 268, 272, 276, 282, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, + }, + { /* Fourth byte 16-bit table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 52, 56, 60, 64, 68, 72, 76, + 80, 86, 92, 98, 104, 110, 116, 122, + 128, 134, 140, 146, 152, 158, 164, 170, + 176, 182, 188, 194, 200, 204, 208, 212, + 216, 222, 228, 234, 240, 246, 252, 258, + 264, 270, 276, 280, 284, 288, 292, 296, + 300, 304, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, 308, 308, 308, 308, 308, 308, 308, + 308, + }, + { /* Fourth byte 16-bit table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 17, 24, 31, 38, 45, + 52, 57, 62, 69, 76, 83, 90, 97, + 104, 109, 114, 121, 128, 135, 142, 142, + 142, 147, 152, 159, 166, 173, 180, 180, + 180, 185, 190, 197, 204, 211, 218, 225, + 232, 237, 242, 249, 256, 263, 270, 277, + 284, 289, 294, 301, 308, 315, 322, 329, + 336, 341, 346, 353, 360, 367, 374, 381, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, 388, 388, 388, 388, 388, 388, 388, + 388, + }, + { /* Fourth byte 16-bit table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 17, 24, 31, 38, 38, + 38, 43, 48, 55, 62, 69, 76, 76, + 76, 81, 86, 93, 100, 107, 114, 121, + 128, 128, 133, 133, 140, 140, 147, 147, + 154, 159, 164, 171, 178, 185, 192, 199, + 206, 211, 216, 223, 230, 237, 244, 251, + 258, 263, 268, 273, 278, 283, 288, 293, + 298, 303, 308, 313, 318, 323, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, 328, 328, 328, 328, 328, 328, 328, + 328, + }, + { /* Fourth byte 16-bit table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 14, 23, 32, 41, 50, 59, + 68, 75, 82, 91, 100, 109, 118, 127, + 136, 143, 150, 159, 168, 177, 186, 195, + 204, 211, 218, 227, 236, 245, 254, 263, + 272, 279, 286, 295, 304, 313, 322, 331, + 340, 347, 354, 363, 372, 381, 390, 399, + 408, 413, 418, 425, 430, 437, 437, 442, + 449, 454, 459, 464, 469, 474, 477, 480, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, 483, 483, 483, 483, 483, 483, 483, + 483, + }, + { /* Fourth byte 16-bit table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 14, 21, 26, 33, 33, 38, + 45, 50, 55, 60, 65, 70, 82, 94, + 106, 111, 116, 123, 130, 130, 130, 135, + 142, 147, 152, 157, 162, 162, 174, 186, + 198, 203, 208, 215, 222, 227, 232, 237, + 244, 249, 254, 259, 264, 269, 280, 291, + 293, 293, 293, 300, 305, 312, 312, 317, + 324, 329, 334, 339, 344, 349, 356, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 359, 359, 359, 359, 359, 359, + 359, + }, + { /* Fourth byte 16-bit table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 10, 15, 20, 25, 30, 35, + 40, 45, 50, 55, 60, 65, 70, 78, + 86, 94, 102, 110, 118, 126, 134, 142, + 150, 158, 166, 174, 182, 190, 207, 221, + 221, 226, 231, 236, 241, 246, 251, 256, + 261, 266, 271, 276, 281, 286, 291, 296, + 301, 306, 311, 316, 321, 326, 331, 336, + 341, 346, 351, 356, 361, 366, 371, 376, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, 381, 381, 381, 381, 381, 381, 381, + 381, + }, + { /* Fourth byte 16-bit table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 27, 42, 51, 66, 75, 84, + 102, 114, 123, 132, 141, 153, 165, 177, + 189, 201, 213, 225, 243, 249, 267, 285, + 300, 312, 330, 348, 360, 369, 378, 390, + 402, 417, 432, 441, 450, 462, 471, 480, + 486, 492, 501, 510, 528, 540, 555, 573, + 585, 594, 603, 621, 633, 651, 660, 675, + 684, 696, 705, 717, 732, 744, 759, 771, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, 777, 777, 777, 777, 777, 777, 777, + 777, + }, + { /* Fourth byte 16-bit table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 24, 33, 45, 54, 63, 72, + 87, 99, 105, 123, 132, 147, 159, 171, + 180, 189, 201, 207, 219, 234, 240, 258, + 267, 271, 275, 279, 283, 287, 291, 295, + 299, 303, 307, 312, 317, 322, 327, 332, + 337, 342, 347, 352, 357, 362, 367, 372, + 377, 382, 385, 387, 389, 392, 394, 396, + 398, 401, 404, 406, 412, 418, 424, 430, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, 442, 442, 442, 442, 442, 442, 442, + 442, + }, + { /* Fourth byte 16-bit table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 248, 252, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, + 256, + }, + { /* Fourth byte 16-bit table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 34, 38, + 42, 46, 50, 54, 58, 62, 66, 70, + 74, 78, 82, 86, 90, 94, 98, 102, + 106, 110, 114, 118, 122, 126, 130, 134, + 138, 142, 146, 150, 154, 158, 162, 166, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 125, + 130, 135, 140, 145, 150, 156, 162, 168, + 174, 180, 186, 190, 194, 198, 202, 206, + 210, 214, 218, 222, 226, 230, 234, 238, + 242, 246, 250, 254, 258, 262, 266, 270, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, + 274, + }, + { /* Fourth byte 16-bit table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 98, 104, 110, 116, 122, 126, 130, 134, + 138, 142, 146, 150, 154, 158, 162, 166, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 130, 136, 140, 144, 148, 152, 156, 160, + 164, 168, 172, 176, 180, 184, 188, 192, + 196, 200, 204, 210, 216, 222, 226, 230, + 234, 238, 242, 246, 250, 254, 258, 262, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, + }, + { /* Fourth byte 16-bit table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 54, 60, 66, 72, 78, 84, 90, + 96, 102, 108, 114, 120, 126, 132, 138, + 144, 150, 156, 162, 168, 174, 180, 186, + 192, 198, 204, 210, 216, 222, 228, 234, + 240, 246, 252, 258, 264, 270, 276, 282, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, 288, 288, 288, 288, 288, 288, 288, + 288, + }, + { /* Fourth byte 16-bit table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 12, 18, 24, 30, 36, 42, + 48, 54, 60, 66, 72, 78, 84, 90, + 96, 96, 96, 102, 108, 114, 120, 126, + 132, 138, 144, 150, 156, 162, 168, 174, + 180, 186, 192, 198, 204, 210, 216, 222, + 228, 234, 240, 246, 252, 258, 264, 270, + 276, 282, 288, 294, 300, 306, 312, 318, + 324, 330, 336, 342, 348, 354, 360, 366, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, + 372, + }, + { /* Fourth byte 16-bit table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 17, 21, 25, 29, + 33, 37, 41, 45, 49, 53, 58, 62, + 66, 70, 74, 79, 83, 87, 91, 96, + 100, 104, 108, 112, 116, 121, 125, 129, + 133, 137, 141, 145, 149, 153, 157, 161, + 165, 169, 173, 177, 181, 185, 189, 193, + 197, 201, 205, 209, 213, 218, 222, 226, + 230, 235, 239, 243, 247, 251, 255, 259, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, + }, + { /* Fourth byte 16-bit table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 105, 109, 113, 117, 121, 125, + 129, 134, 139, 143, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 184, 188, 192, + 196, 200, 205, 209, 213, 217, 221, 225, + 229, 233, 237, 241, 246, 250, 255, 259, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 263, + 263, + }, + { /* Fourth byte 16-bit table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 41, 45, 49, 53, 57, 61, + 66, 70, 75, 80, 84, 88, 92, 96, + 101, 106, 110, 114, 118, 122, 126, 130, + 134, 138, 142, 146, 150, 155, 159, 163, + 167, 171, 175, 179, 183, 187, 191, 195, + 199, 203, 207, 211, 215, 219, 223, 227, + 231, 236, 240, 244, 248, 252, 256, 261, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, + }, + { /* Fourth byte 16-bit table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 45, 49, 53, 57, 61, + 65, 69, 73, 77, 81, 85, 89, 93, + 97, 101, 105, 109, 113, 117, 122, 126, + 130, 134, 138, 142, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 184, 188, 192, + 196, 201, 205, 209, 213, 217, 221, 225, + 230, 235, 240, 244, 249, 253, 257, 261, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 265, + 265, + }, + { /* Fourth byte 16-bit table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 29, + 33, 37, 41, 45, 49, 53, 58, 62, + 66, 71, 76, 80, 84, 88, 92, 96, + 100, 104, 108, 112, 117, 121, 126, 130, + 135, 139, 143, 147, 152, 156, 160, 165, + 170, 174, 178, 182, 186, 190, 194, 198, + 202, 206, 210, 214, 218, 222, 227, 231, + 236, 240, 245, 249, 254, 259, 264, 268, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, + }, + { /* Fourth byte 16-bit table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 9, 14, 19, 24, 28, 32, + 36, 40, 44, 48, 52, 56, 61, 65, + 69, 73, 77, 82, 86, 91, 96, 100, + 104, 108, 112, 116, 120, 125, 130, 135, + 139, 143, 148, 152, 156, 160, 165, 169, + 173, 177, 181, 185, 190, 194, 198, 202, + 206, 210, 214, 219, 224, 228, 233, 237, + 242, 246, 250, 254, 259, 264, 268, 273, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, + }, + { /* Fourth byte 16-bit table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 9, 13, 17, 21, 25, 29, + 34, 39, 44, 49, 53, 57, 61, 65, + 69, 73, 77, 81, 85, 89, 93, 97, + 102, 106, 110, 114, 118, 122, 126, 130, + 134, 138, 142, 146, 150, 155, 160, 165, + 169, 173, 177, 181, 186, 190, 195, 199, + 203, 208, 213, 217, 221, 225, 229, 233, + 237, 241, 245, 249, 253, 257, 261, 265, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, + 269, + }, + { /* Fourth byte 16-bit table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 25, 29, + 33, 37, 41, 45, 50, 55, 59, 63, + 67, 71, 75, 79, 84, 88, 92, 96, + 100, 105, 110, 114, 118, 122, 127, 131, + 135, 140, 145, 149, 153, 157, 162, 166, + 170, 174, 178, 182, 186, 190, 195, 199, + 203, 207, 212, 216, 220, 224, 228, 233, + 238, 242, 246, 250, 255, 259, 264, 268, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, + 272, + }, + }, +}; + +static const uchar_t u8_decomp_final_tbl[2][19370] = { + { + 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, + 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, + 0xCC, 0xA7, 0x31, 0x6F, 0x31, 0xE2, 0x81, 0x84, + 0x34, 0x31, 0xE2, 0x81, 0x84, 0x32, 0x33, 0xE2, + 0x81, 0x84, 0x34, 0xF6, 0x41, 0xCC, 0x80, 0xF6, + 0x41, 0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x82, 0xF6, + 0x41, 0xCC, 0x83, 0xF6, 0x41, 0xCC, 0x88, 0xF6, + 0x41, 0xCC, 0x8A, 0xF6, 0x43, 0xCC, 0xA7, 0xF6, + 0x45, 0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x81, 0xF6, + 0x45, 0xCC, 0x82, 0xF6, 0x45, 0xCC, 0x88, 0xF6, + 0x49, 0xCC, 0x80, 0xF6, 0x49, 0xCC, 0x81, 0xF6, + 0x49, 0xCC, 0x82, 0xF6, 0x49, 0xCC, 0x88, 0xF6, + 0x4E, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x80, 0xF6, + 0x4F, 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xF6, + 0x4F, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x88, 0xF6, + 0x55, 0xCC, 0x80, 0xF6, 0x55, 0xCC, 0x81, 0xF6, + 0x55, 0xCC, 0x82, 0xF6, 0x55, 0xCC, 0x88, 0xF6, + 0x59, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x80, 0xF6, + 0x61, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x82, 0xF6, + 0x61, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x88, 0xF6, + 0x61, 0xCC, 0x8A, 0xF6, 0x63, 0xCC, 0xA7, 0xF6, + 0x65, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x81, 0xF6, + 0x65, 0xCC, 0x82, 0xF6, 0x65, 0xCC, 0x88, 0xF6, + 0x69, 0xCC, 0x80, 0xF6, 0x69, 0xCC, 0x81, 0xF6, + 0x69, 0xCC, 0x82, 0xF6, 0x69, 0xCC, 0x88, 0xF6, + 0x6E, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x80, 0xF6, + 0x6F, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xF6, + 0x6F, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x88, 0xF6, + 0x75, 0xCC, 0x80, 0xF6, 0x75, 0xCC, 0x81, 0xF6, + 0x75, 0xCC, 0x82, 0xF6, 0x75, 0xCC, 0x88, 0xF6, + 0x79, 0xCC, 0x81, 0xF6, 0x79, 0xCC, 0x88, 0xF6, + 0x41, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x84, 0xF6, + 0x41, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0x86, 0xF6, + 0x41, 0xCC, 0xA8, 0xF6, 0x61, 0xCC, 0xA8, 0xF6, + 0x43, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0x81, 0xF6, + 0x43, 0xCC, 0x82, 0xF6, 0x63, 0xCC, 0x82, 0xF6, + 0x43, 0xCC, 0x87, 0xF6, 0x63, 0xCC, 0x87, 0xF6, + 0x43, 0xCC, 0x8C, 0xF6, 0x63, 0xCC, 0x8C, 0xF6, + 0x44, 0xCC, 0x8C, 0xF6, 0x64, 0xCC, 0x8C, 0xF6, + 0x45, 0xCC, 0x84, 0xF6, 0x65, 0xCC, 0x84, 0xF6, + 0x45, 0xCC, 0x86, 0xF6, 0x65, 0xCC, 0x86, 0xF6, + 0x45, 0xCC, 0x87, 0xF6, 0x65, 0xCC, 0x87, 0xF6, + 0x45, 0xCC, 0xA8, 0xF6, 0x65, 0xCC, 0xA8, 0xF6, + 0x45, 0xCC, 0x8C, 0xF6, 0x65, 0xCC, 0x8C, 0xF6, + 0x47, 0xCC, 0x82, 0xF6, 0x67, 0xCC, 0x82, 0xF6, + 0x47, 0xCC, 0x86, 0xF6, 0x67, 0xCC, 0x86, 0xF6, + 0x47, 0xCC, 0x87, 0xF6, 0x67, 0xCC, 0x87, 0xF6, + 0x47, 0xCC, 0xA7, 0xF6, 0x67, 0xCC, 0xA7, 0xF6, + 0x48, 0xCC, 0x82, 0xF6, 0x68, 0xCC, 0x82, 0xF6, + 0x49, 0xCC, 0x83, 0xF6, 0x69, 0xCC, 0x83, 0xF6, + 0x49, 0xCC, 0x84, 0xF6, 0x69, 0xCC, 0x84, 0xF6, + 0x49, 0xCC, 0x86, 0xF6, 0x69, 0xCC, 0x86, 0xF6, + 0x49, 0xCC, 0xA8, 0xF6, 0x69, 0xCC, 0xA8, 0xF6, + 0x49, 0xCC, 0x87, 0x49, 0x4A, 0x69, 0x6A, 0xF6, + 0x4A, 0xCC, 0x82, 0xF6, 0x6A, 0xCC, 0x82, 0xF6, + 0x4B, 0xCC, 0xA7, 0xF6, 0x6B, 0xCC, 0xA7, 0xF6, + 0x4C, 0xCC, 0x81, 0xF6, 0x6C, 0xCC, 0x81, 0xF6, + 0x4C, 0xCC, 0xA7, 0xF6, 0x6C, 0xCC, 0xA7, 0xF6, + 0x4C, 0xCC, 0x8C, 0xF6, 0x6C, 0xCC, 0x8C, 0x4C, + 0xC2, 0xB7, 0x6C, 0xC2, 0xB7, 0xF6, 0x4E, 0xCC, + 0x81, 0xF6, 0x6E, 0xCC, 0x81, 0xF6, 0x4E, 0xCC, + 0xA7, 0xF6, 0x6E, 0xCC, 0xA7, 0xF6, 0x4E, 0xCC, + 0x8C, 0xF6, 0x6E, 0xCC, 0x8C, 0xCA, 0xBC, 0x6E, + 0xF6, 0x4F, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, 0x84, + 0xF6, 0x4F, 0xCC, 0x86, 0xF6, 0x6F, 0xCC, 0x86, + 0xF6, 0x4F, 0xCC, 0x8B, 0xF6, 0x6F, 0xCC, 0x8B, + 0xF6, 0x52, 0xCC, 0x81, 0xF6, 0x72, 0xCC, 0x81, + 0xF6, 0x52, 0xCC, 0xA7, 0xF6, 0x72, 0xCC, 0xA7, + 0xF6, 0x52, 0xCC, 0x8C, 0xF6, 0x72, 0xCC, 0x8C, + 0xF6, 0x53, 0xCC, 0x81, 0xF6, 0x73, 0xCC, 0x81, + 0xF6, 0x53, 0xCC, 0x82, 0xF6, 0x73, 0xCC, 0x82, + 0xF6, 0x53, 0xCC, 0xA7, 0xF6, 0x73, 0xCC, 0xA7, + 0xF6, 0x53, 0xCC, 0x8C, 0xF6, 0x73, 0xCC, 0x8C, + 0xF6, 0x54, 0xCC, 0xA7, 0xF6, 0x74, 0xCC, 0xA7, + 0xF6, 0x54, 0xCC, 0x8C, 0xF6, 0x74, 0xCC, 0x8C, + 0xF6, 0x55, 0xCC, 0x83, 0xF6, 0x75, 0xCC, 0x83, + 0xF6, 0x55, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x84, + 0xF6, 0x55, 0xCC, 0x86, 0xF6, 0x75, 0xCC, 0x86, + 0xF6, 0x55, 0xCC, 0x8A, 0xF6, 0x75, 0xCC, 0x8A, + 0xF6, 0x55, 0xCC, 0x8B, 0xF6, 0x75, 0xCC, 0x8B, + 0xF6, 0x55, 0xCC, 0xA8, 0xF6, 0x75, 0xCC, 0xA8, + 0xF6, 0x57, 0xCC, 0x82, 0xF6, 0x77, 0xCC, 0x82, + 0xF6, 0x59, 0xCC, 0x82, 0xF6, 0x79, 0xCC, 0x82, + 0xF6, 0x59, 0xCC, 0x88, 0xF6, 0x5A, 0xCC, 0x81, + 0xF6, 0x7A, 0xCC, 0x81, 0xF6, 0x5A, 0xCC, 0x87, + 0xF6, 0x7A, 0xCC, 0x87, 0xF6, 0x5A, 0xCC, 0x8C, + 0xF6, 0x7A, 0xCC, 0x8C, 0x73, 0xF6, 0x4F, 0xCC, + 0x9B, 0xF6, 0x6F, 0xCC, 0x9B, 0xF6, 0x55, 0xCC, + 0x9B, 0xF6, 0x75, 0xCC, 0x9B, 0x44, 0x5A, 0xCC, + 0x8C, 0x44, 0x7A, 0xCC, 0x8C, 0x64, 0x7A, 0xCC, + 0x8C, 0x4C, 0x4A, 0x4C, 0x6A, 0x6C, 0x6A, 0x4E, + 0x4A, 0x4E, 0x6A, 0x6E, 0x6A, 0xF6, 0x41, 0xCC, + 0x8C, 0xF6, 0x61, 0xCC, 0x8C, 0xF6, 0x49, 0xCC, + 0x8C, 0xF6, 0x69, 0xCC, 0x8C, 0xF6, 0x4F, 0xCC, + 0x8C, 0xF6, 0x6F, 0xCC, 0x8C, 0xF6, 0x55, 0xCC, + 0x8C, 0xF6, 0x75, 0xCC, 0x8C, 0xF6, 0x55, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x88, 0xCC, + 0x84, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0x75, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x55, 0xCC, + 0x88, 0xCC, 0x8C, 0xF6, 0x75, 0xCC, 0x88, 0xCC, + 0x8C, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x80, 0xF6, + 0x75, 0xCC, 0x88, 0xCC, 0x80, 0xF6, 0x41, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x88, 0xCC, + 0x84, 0xF6, 0x41, 0xCC, 0x87, 0xCC, 0x84, 0xF6, + 0x61, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0xC3, 0x86, + 0xCC, 0x84, 0xF6, 0xC3, 0xA6, 0xCC, 0x84, 0xF6, + 0x47, 0xCC, 0x8C, 0xF6, 0x67, 0xCC, 0x8C, 0xF6, + 0x4B, 0xCC, 0x8C, 0xF6, 0x6B, 0xCC, 0x8C, 0xF6, + 0x4F, 0xCC, 0xA8, 0xF6, 0x6F, 0xCC, 0xA8, 0xF6, + 0x4F, 0xCC, 0xA8, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0xA8, 0xCC, 0x84, 0xF6, 0xC6, 0xB7, 0xCC, 0x8C, + 0xF6, 0xCA, 0x92, 0xCC, 0x8C, 0xF6, 0x6A, 0xCC, + 0x8C, 0x44, 0x5A, 0x44, 0x7A, 0x64, 0x7A, 0xF6, + 0x47, 0xCC, 0x81, 0xF6, 0x67, 0xCC, 0x81, 0xF6, + 0x4E, 0xCC, 0x80, 0xF6, 0x6E, 0xCC, 0x80, 0xF6, + 0x41, 0xCC, 0x8A, 0xCC, 0x81, 0xF6, 0x61, 0xCC, + 0x8A, 0xCC, 0x81, 0xF6, 0xC3, 0x86, 0xCC, 0x81, + 0xF6, 0xC3, 0xA6, 0xCC, 0x81, 0xF6, 0xC3, 0x98, + 0xCC, 0x81, 0xF6, 0xC3, 0xB8, 0xCC, 0x81, 0xF6, + 0x41, 0xCC, 0x8F, 0xF6, 0x61, 0xCC, 0x8F, 0xF6, + 0x41, 0xCC, 0x91, 0xF6, 0x61, 0xCC, 0x91, 0xF6, + 0x45, 0xCC, 0x8F, 0xF6, 0x65, 0xCC, 0x8F, 0xF6, + 0x45, 0xCC, 0x91, 0xF6, 0x65, 0xCC, 0x91, 0xF6, + 0x49, 0xCC, 0x8F, 0xF6, 0x69, 0xCC, 0x8F, 0xF6, + 0x49, 0xCC, 0x91, 0xF6, 0x69, 0xCC, 0x91, 0xF6, + 0x4F, 0xCC, 0x8F, 0xF6, 0x6F, 0xCC, 0x8F, 0xF6, + 0x4F, 0xCC, 0x91, 0xF6, 0x6F, 0xCC, 0x91, 0xF6, + 0x52, 0xCC, 0x8F, 0xF6, 0x72, 0xCC, 0x8F, 0xF6, + 0x52, 0xCC, 0x91, 0xF6, 0x72, 0xCC, 0x91, 0xF6, + 0x55, 0xCC, 0x8F, 0xF6, 0x75, 0xCC, 0x8F, 0xF6, + 0x55, 0xCC, 0x91, 0xF6, 0x75, 0xCC, 0x91, 0xF6, + 0x53, 0xCC, 0xA6, 0xF6, 0x73, 0xCC, 0xA6, 0xF6, + 0x54, 0xCC, 0xA6, 0xF6, 0x74, 0xCC, 0xA6, 0xF6, + 0x48, 0xCC, 0x8C, 0xF6, 0x68, 0xCC, 0x8C, 0xF6, + 0x41, 0xCC, 0x87, 0xF6, 0x61, 0xCC, 0x87, 0xF6, + 0x45, 0xCC, 0xA7, 0xF6, 0x65, 0xCC, 0xA7, 0xF6, + 0x4F, 0xCC, 0x88, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, + 0x84, 0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x84, 0xF6, + 0x4F, 0xCC, 0x87, 0xF6, 0x6F, 0xCC, 0x87, 0xF6, + 0x4F, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0x87, 0xCC, 0x84, 0xF6, 0x59, 0xCC, 0x84, 0xF6, + 0x79, 0xCC, 0x84, 0x68, 0xC9, 0xA6, 0x6A, 0x72, + 0xC9, 0xB9, 0xC9, 0xBB, 0xCA, 0x81, 0x77, 0x79, + 0x20, 0xCC, 0x86, 0x20, 0xCC, 0x87, 0x20, 0xCC, + 0x8A, 0x20, 0xCC, 0xA8, 0x20, 0xCC, 0x83, 0x20, + 0xCC, 0x8B, 0xC9, 0xA3, 0x6C, 0x73, 0x78, 0xCA, + 0x95, 0xF6, 0xCC, 0x80, 0xF6, 0xCC, 0x81, 0xF6, + 0xCC, 0x93, 0xF6, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0xCA, 0xB9, 0x20, 0xCD, 0x85, 0xF6, 0x3B, 0x20, + 0xCC, 0x81, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, + 0x20, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCE, 0x91, + 0xCC, 0x81, 0xF6, 0xC2, 0xB7, 0xF6, 0xCE, 0x95, + 0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x81, 0xF6, + 0xCE, 0x99, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, + 0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x81, 0xF6, 0xCE, + 0xA9, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x88, + 0xCC, 0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x88, 0xF6, + 0xCE, 0xA5, 0xCC, 0x88, 0xF6, 0xCE, 0xB1, 0xCC, + 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE, + 0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x81, + 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0xCE, 0xB9, 0xCC, 0x88, 0xF6, 0xCF, 0x85, 0xCC, + 0x88, 0xF6, 0xCE, 0xBF, 0xCC, 0x81, 0xF6, 0xCF, + 0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x81, + 0xCE, 0xB2, 0xCE, 0xB8, 0xCE, 0xA5, 0xF5, 0x05, + 0xCF, 0x92, 0xCC, 0x81, 0xCE, 0xA5, 0xCC, 0x81, + 0xF5, 0x05, 0xCF, 0x92, 0xCC, 0x88, 0xCE, 0xA5, + 0xCC, 0x88, 0xCF, 0x86, 0xCF, 0x80, 0xCE, 0xBA, + 0xCF, 0x81, 0xCF, 0x82, 0xCE, 0x98, 0xCE, 0xB5, + 0xF6, 0xD0, 0x95, 0xCC, 0x80, 0xF6, 0xD0, 0x95, + 0xCC, 0x88, 0xF6, 0xD0, 0x93, 0xCC, 0x81, 0xF6, + 0xD0, 0x86, 0xCC, 0x88, 0xF6, 0xD0, 0x9A, 0xCC, + 0x81, 0xF6, 0xD0, 0x98, 0xCC, 0x80, 0xF6, 0xD0, + 0xA3, 0xCC, 0x86, 0xF6, 0xD0, 0x98, 0xCC, 0x86, + 0xF6, 0xD0, 0xB8, 0xCC, 0x86, 0xF6, 0xD0, 0xB5, + 0xCC, 0x80, 0xF6, 0xD0, 0xB5, 0xCC, 0x88, 0xF6, + 0xD0, 0xB3, 0xCC, 0x81, 0xF6, 0xD1, 0x96, 0xCC, + 0x88, 0xF6, 0xD0, 0xBA, 0xCC, 0x81, 0xF6, 0xD0, + 0xB8, 0xCC, 0x80, 0xF6, 0xD1, 0x83, 0xCC, 0x86, + 0xF6, 0xD1, 0xB4, 0xCC, 0x8F, 0xF6, 0xD1, 0xB5, + 0xCC, 0x8F, 0xF6, 0xD0, 0x96, 0xCC, 0x86, 0xF6, + 0xD0, 0xB6, 0xCC, 0x86, 0xF6, 0xD0, 0x90, 0xCC, + 0x86, 0xF6, 0xD0, 0xB0, 0xCC, 0x86, 0xF6, 0xD0, + 0x90, 0xCC, 0x88, 0xF6, 0xD0, 0xB0, 0xCC, 0x88, + 0xF6, 0xD0, 0x95, 0xCC, 0x86, 0xF6, 0xD0, 0xB5, + 0xCC, 0x86, 0xF6, 0xD3, 0x98, 0xCC, 0x88, 0xF6, + 0xD3, 0x99, 0xCC, 0x88, 0xF6, 0xD0, 0x96, 0xCC, + 0x88, 0xF6, 0xD0, 0xB6, 0xCC, 0x88, 0xF6, 0xD0, + 0x97, 0xCC, 0x88, 0xF6, 0xD0, 0xB7, 0xCC, 0x88, + 0xF6, 0xD0, 0x98, 0xCC, 0x84, 0xF6, 0xD0, 0xB8, + 0xCC, 0x84, 0xF6, 0xD0, 0x98, 0xCC, 0x88, 0xF6, + 0xD0, 0xB8, 0xCC, 0x88, 0xF6, 0xD0, 0x9E, 0xCC, + 0x88, 0xF6, 0xD0, 0xBE, 0xCC, 0x88, 0xF6, 0xD3, + 0xA8, 0xCC, 0x88, 0xF6, 0xD3, 0xA9, 0xCC, 0x88, + 0xF6, 0xD0, 0xAD, 0xCC, 0x88, 0xF6, 0xD1, 0x8D, + 0xCC, 0x88, 0xF6, 0xD0, 0xA3, 0xCC, 0x84, 0xF6, + 0xD1, 0x83, 0xCC, 0x84, 0xF6, 0xD0, 0xA3, 0xCC, + 0x88, 0xF6, 0xD1, 0x83, 0xCC, 0x88, 0xF6, 0xD0, + 0xA3, 0xCC, 0x8B, 0xF6, 0xD1, 0x83, 0xCC, 0x8B, + 0xF6, 0xD0, 0xA7, 0xCC, 0x88, 0xF6, 0xD1, 0x87, + 0xCC, 0x88, 0xF6, 0xD0, 0xAB, 0xCC, 0x88, 0xF6, + 0xD1, 0x8B, 0xCC, 0x88, 0xD5, 0xA5, 0xD6, 0x82, + 0xF6, 0xD8, 0xA7, 0xD9, 0x93, 0xF6, 0xD8, 0xA7, + 0xD9, 0x94, 0xF6, 0xD9, 0x88, 0xD9, 0x94, 0xF6, + 0xD8, 0xA7, 0xD9, 0x95, 0xF6, 0xD9, 0x8A, 0xD9, + 0x94, 0xD8, 0xA7, 0xD9, 0xB4, 0xD9, 0x88, 0xD9, + 0xB4, 0xDB, 0x87, 0xD9, 0xB4, 0xD9, 0x8A, 0xD9, + 0xB4, 0xF6, 0xDB, 0x95, 0xD9, 0x94, 0xF6, 0xDB, + 0x81, 0xD9, 0x94, 0xF6, 0xDB, 0x92, 0xD9, 0x94, + 0xF6, 0xE0, 0xA4, 0xA8, 0xE0, 0xA4, 0xBC, 0xF6, + 0xE0, 0xA4, 0xB0, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, + 0xA4, 0xB3, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, + 0x95, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x96, + 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x97, 0xE0, + 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x9C, 0xE0, 0xA4, + 0xBC, 0xF6, 0xE0, 0xA4, 0xA1, 0xE0, 0xA4, 0xBC, + 0xF6, 0xE0, 0xA4, 0xA2, 0xE0, 0xA4, 0xBC, 0xF6, + 0xE0, 0xA4, 0xAB, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, + 0xA4, 0xAF, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA7, + 0x87, 0xE0, 0xA6, 0xBE, 0xF6, 0xE0, 0xA7, 0x87, + 0xE0, 0xA7, 0x97, 0xF6, 0xE0, 0xA6, 0xA1, 0xE0, + 0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xA2, 0xE0, 0xA6, + 0xBC, 0xF6, 0xE0, 0xA6, 0xAF, 0xE0, 0xA6, 0xBC, + 0xF6, 0xE0, 0xA8, 0xB2, 0xE0, 0xA8, 0xBC, 0xF6, + 0xE0, 0xA8, 0xB8, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, + 0xA8, 0x96, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, + 0x97, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, 0x9C, + 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, 0xAB, 0xE0, + 0xA8, 0xBC, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD, + 0x96, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAC, 0xBE, + 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD, 0x97, 0xF6, + 0xE0, 0xAC, 0xA1, 0xE0, 0xAC, 0xBC, 0xF6, 0xE0, + 0xAC, 0xA2, 0xE0, 0xAC, 0xBC, 0xF6, 0xE0, 0xAE, + 0x92, 0xE0, 0xAF, 0x97, 0xF6, 0xE0, 0xAF, 0x86, + 0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x87, 0xE0, + 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x86, 0xE0, 0xAF, + 0x97, 0xF6, 0xE0, 0xB1, 0x86, 0xE0, 0xB1, 0x96, + 0xF6, 0xE0, 0xB2, 0xBF, 0xE0, 0xB3, 0x95, 0xF6, + 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x95, 0xF6, 0xE0, + 0xB3, 0x86, 0xE0, 0xB3, 0x96, 0xF6, 0xE0, 0xB3, + 0x86, 0xE0, 0xB3, 0x82, 0xF6, 0xE0, 0xB3, 0x86, + 0xE0, 0xB3, 0x82, 0xE0, 0xB3, 0x95, 0xF6, 0xE0, + 0xB5, 0x86, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0, 0xB5, + 0x87, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0, 0xB5, 0x86, + 0xE0, 0xB5, 0x97, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, + 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, 0xB7, + 0x8F, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, 0xB7, 0x8F, + 0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, + 0xB7, 0x9F, 0xE0, 0xB9, 0x8D, 0xE0, 0xB8, 0xB2, + 0xE0, 0xBB, 0x8D, 0xE0, 0xBA, 0xB2, 0xE0, 0xBA, + 0xAB, 0xE0, 0xBA, 0x99, 0xE0, 0xBA, 0xAB, 0xE0, + 0xBA, 0xA1, 0xE0, 0xBC, 0x8B, 0xF6, 0xE0, 0xBD, + 0x82, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x8C, + 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x91, 0xE0, + 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x96, 0xE0, 0xBE, + 0xB7, 0xF6, 0xE0, 0xBD, 0x9B, 0xE0, 0xBE, 0xB7, + 0xF6, 0xE0, 0xBD, 0x80, 0xE0, 0xBE, 0xB5, 0xF6, + 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2, 0xF6, 0xE0, + 0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0xF6, 0xE0, 0xBE, + 0xB2, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, 0xB2, 0xE0, + 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBE, + 0xB3, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, 0xB3, 0xE0, + 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBD, + 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBE, 0x92, + 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0x9C, 0xE0, + 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA1, 0xE0, 0xBE, + 0xB7, 0xF6, 0xE0, 0xBE, 0xA6, 0xE0, 0xBE, 0xB7, + 0xF6, 0xE0, 0xBE, 0xAB, 0xE0, 0xBE, 0xB7, 0xF6, + 0xE0, 0xBE, 0x90, 0xE0, 0xBE, 0xB5, 0xF6, 0xE1, + 0x80, 0xA5, 0xE1, 0x80, 0xAE, 0xF6, 0x41, 0xCC, + 0xA5, 0xF6, 0x61, 0xCC, 0xA5, 0xF6, 0x42, 0xCC, + 0x87, 0xF6, 0x62, 0xCC, 0x87, 0xF6, 0x42, 0xCC, + 0xA3, 0xF6, 0x62, 0xCC, 0xA3, 0xF6, 0x42, 0xCC, + 0xB1, 0xF6, 0x62, 0xCC, 0xB1, 0xF6, 0x43, 0xCC, + 0xA7, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0xA7, 0xCC, + 0x81, 0xF6, 0x44, 0xCC, 0x87, 0xF6, 0x64, 0xCC, + 0x87, 0xF6, 0x44, 0xCC, 0xA3, 0xF6, 0x64, 0xCC, + 0xA3, 0xF6, 0x44, 0xCC, 0xB1, 0xF6, 0x64, 0xCC, + 0xB1, 0xF6, 0x44, 0xCC, 0xA7, 0xF6, 0x64, 0xCC, + 0xA7, 0xF6, 0x44, 0xCC, 0xAD, 0xF6, 0x64, 0xCC, + 0xAD, 0xF6, 0x45, 0xCC, 0x84, 0xCC, 0x80, 0xF6, + 0x65, 0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x45, 0xCC, + 0x84, 0xCC, 0x81, 0xF6, 0x65, 0xCC, 0x84, 0xCC, + 0x81, 0xF6, 0x45, 0xCC, 0xAD, 0xF6, 0x65, 0xCC, + 0xAD, 0xF6, 0x45, 0xCC, 0xB0, 0xF6, 0x65, 0xCC, + 0xB0, 0xF6, 0x45, 0xCC, 0xA7, 0xCC, 0x86, 0xF6, + 0x65, 0xCC, 0xA7, 0xCC, 0x86, 0xF6, 0x46, 0xCC, + 0x87, 0xF6, 0x66, 0xCC, 0x87, 0xF6, 0x47, 0xCC, + 0x84, 0xF6, 0x67, 0xCC, 0x84, 0xF6, 0x48, 0xCC, + 0x87, 0xF6, 0x68, 0xCC, 0x87, 0xF6, 0x48, 0xCC, + 0xA3, 0xF6, 0x68, 0xCC, 0xA3, 0xF6, 0x48, 0xCC, + 0x88, 0xF6, 0x68, 0xCC, 0x88, 0xF6, 0x48, 0xCC, + 0xA7, 0xF6, 0x68, 0xCC, 0xA7, 0xF6, 0x48, 0xCC, + 0xAE, 0xF6, 0x68, 0xCC, 0xAE, 0xF6, 0x49, 0xCC, + 0xB0, 0xF6, 0x69, 0xCC, 0xB0, 0xF6, 0x49, 0xCC, + 0x88, 0xCC, 0x81, 0xF6, 0x69, 0xCC, 0x88, 0xCC, + 0x81, 0xF6, 0x4B, 0xCC, 0x81, 0xF6, 0x6B, 0xCC, + 0x81, 0xF6, 0x4B, 0xCC, 0xA3, 0xF6, 0x6B, 0xCC, + 0xA3, 0xF6, 0x4B, 0xCC, 0xB1, 0xF6, 0x6B, 0xCC, + 0xB1, 0xF6, 0x4C, 0xCC, 0xA3, 0xF6, 0x6C, 0xCC, + 0xA3, 0xF6, 0x4C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6, + 0x6C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x4C, 0xCC, + 0xB1, 0xF6, 0x6C, 0xCC, 0xB1, 0xF6, 0x4C, 0xCC, + 0xAD, 0xF6, 0x6C, 0xCC, 0xAD, 0xF6, 0x4D, 0xCC, + 0x81, 0xF6, 0x6D, 0xCC, 0x81, 0xF6, 0x4D, 0xCC, + 0x87, 0xF6, 0x6D, 0xCC, 0x87, 0xF6, 0x4D, 0xCC, + 0xA3, 0xF6, 0x6D, 0xCC, 0xA3, 0xF6, 0x4E, 0xCC, + 0x87, 0xF6, 0x6E, 0xCC, 0x87, 0xF6, 0x4E, 0xCC, + 0xA3, 0xF6, 0x6E, 0xCC, 0xA3, 0xF6, 0x4E, 0xCC, + 0xB1, 0xF6, 0x6E, 0xCC, 0xB1, 0xF6, 0x4E, 0xCC, + 0xAD, 0xF6, 0x6E, 0xCC, 0xAD, 0xF6, 0x4F, 0xCC, + 0x83, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x83, 0xCC, + 0x81, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, 0x88, 0xF6, + 0x6F, 0xCC, 0x83, 0xCC, 0x88, 0xF6, 0x4F, 0xCC, + 0x84, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x84, 0xCC, + 0x80, 0xF6, 0x4F, 0xCC, 0x84, 0xCC, 0x81, 0xF6, + 0x6F, 0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x50, 0xCC, + 0x81, 0xF6, 0x70, 0xCC, 0x81, 0xF6, 0x50, 0xCC, + 0x87, 0xF6, 0x70, 0xCC, 0x87, 0xF6, 0x52, 0xCC, + 0x87, 0xF6, 0x72, 0xCC, 0x87, 0xF6, 0x52, 0xCC, + 0xA3, 0xF6, 0x72, 0xCC, 0xA3, 0xF6, 0x52, 0xCC, + 0xA3, 0xCC, 0x84, 0xF6, 0x72, 0xCC, 0xA3, 0xCC, + 0x84, 0xF6, 0x52, 0xCC, 0xB1, 0xF6, 0x72, 0xCC, + 0xB1, 0xF6, 0x53, 0xCC, 0x87, 0xF6, 0x73, 0xCC, + 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xF6, 0x73, 0xCC, + 0xA3, 0xF6, 0x53, 0xCC, 0x81, 0xCC, 0x87, 0xF6, + 0x73, 0xCC, 0x81, 0xCC, 0x87, 0xF6, 0x53, 0xCC, + 0x8C, 0xCC, 0x87, 0xF6, 0x73, 0xCC, 0x8C, 0xCC, + 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xCC, 0x87, 0xF6, + 0x73, 0xCC, 0xA3, 0xCC, 0x87, 0xF6, 0x54, 0xCC, + 0x87, 0xF6, 0x74, 0xCC, 0x87, 0xF6, 0x54, 0xCC, + 0xA3, 0xF6, 0x74, 0xCC, 0xA3, 0xF6, 0x54, 0xCC, + 0xB1, 0xF6, 0x74, 0xCC, 0xB1, 0xF6, 0x54, 0xCC, + 0xAD, 0xF6, 0x74, 0xCC, 0xAD, 0xF6, 0x55, 0xCC, + 0xA4, 0xF6, 0x75, 0xCC, 0xA4, 0xF6, 0x55, 0xCC, + 0xB0, 0xF6, 0x75, 0xCC, 0xB0, 0xF6, 0x55, 0xCC, + 0xAD, 0xF6, 0x75, 0xCC, 0xAD, 0xF6, 0x55, 0xCC, + 0x83, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x83, 0xCC, + 0x81, 0xF6, 0x55, 0xCC, 0x84, 0xCC, 0x88, 0xF6, + 0x75, 0xCC, 0x84, 0xCC, 0x88, 0xF6, 0x56, 0xCC, + 0x83, 0xF6, 0x76, 0xCC, 0x83, 0xF6, 0x56, 0xCC, + 0xA3, 0xF6, 0x76, 0xCC, 0xA3, 0xF6, 0x57, 0xCC, + 0x80, 0xF6, 0x77, 0xCC, 0x80, 0xF6, 0x57, 0xCC, + 0x81, 0xF6, 0x77, 0xCC, 0x81, 0xF6, 0x57, 0xCC, + 0x88, 0xF6, 0x77, 0xCC, 0x88, 0xF6, 0x57, 0xCC, + 0x87, 0xF6, 0x77, 0xCC, 0x87, 0xF6, 0x57, 0xCC, + 0xA3, 0xF6, 0x77, 0xCC, 0xA3, 0xF6, 0x58, 0xCC, + 0x87, 0xF6, 0x78, 0xCC, 0x87, 0xF6, 0x58, 0xCC, + 0x88, 0xF6, 0x78, 0xCC, 0x88, 0xF6, 0x59, 0xCC, + 0x87, 0xF6, 0x79, 0xCC, 0x87, 0xF6, 0x5A, 0xCC, + 0x82, 0xF6, 0x7A, 0xCC, 0x82, 0xF6, 0x5A, 0xCC, + 0xA3, 0xF6, 0x7A, 0xCC, 0xA3, 0xF6, 0x5A, 0xCC, + 0xB1, 0xF6, 0x7A, 0xCC, 0xB1, 0xF6, 0x68, 0xCC, + 0xB1, 0xF6, 0x74, 0xCC, 0x88, 0xF6, 0x77, 0xCC, + 0x8A, 0xF6, 0x79, 0xCC, 0x8A, 0x61, 0xCA, 0xBE, + 0xF5, 0x05, 0xC5, 0xBF, 0xCC, 0x87, 0x73, 0xCC, + 0x87, 0xF6, 0x41, 0xCC, 0xA3, 0xF6, 0x61, 0xCC, + 0xA3, 0xF6, 0x41, 0xCC, 0x89, 0xF6, 0x61, 0xCC, + 0x89, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x81, 0xF6, + 0x61, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x41, 0xCC, + 0x82, 0xCC, 0x80, 0xF6, 0x61, 0xCC, 0x82, 0xCC, + 0x80, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x89, 0xF6, + 0x61, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x41, 0xCC, + 0x82, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x82, 0xCC, + 0x83, 0xF6, 0x41, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, + 0x61, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x41, 0xCC, + 0x86, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x86, 0xCC, + 0x81, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x80, 0xF6, + 0x61, 0xCC, 0x86, 0xCC, 0x80, 0xF6, 0x41, 0xCC, + 0x86, 0xCC, 0x89, 0xF6, 0x61, 0xCC, 0x86, 0xCC, + 0x89, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x83, 0xF6, + 0x61, 0xCC, 0x86, 0xCC, 0x83, 0xF6, 0x41, 0xCC, + 0xA3, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0xA3, 0xCC, + 0x86, 0xF6, 0x45, 0xCC, 0xA3, 0xF6, 0x65, 0xCC, + 0xA3, 0xF6, 0x45, 0xCC, 0x89, 0xF6, 0x65, 0xCC, + 0x89, 0xF6, 0x45, 0xCC, 0x83, 0xF6, 0x65, 0xCC, + 0x83, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x81, 0xF6, + 0x65, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x45, 0xCC, + 0x82, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x82, 0xCC, + 0x80, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x89, 0xF6, + 0x65, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x45, 0xCC, + 0x82, 0xCC, 0x83, 0xF6, 0x65, 0xCC, 0x82, 0xCC, + 0x83, 0xF6, 0x45, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, + 0x65, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x49, 0xCC, + 0x89, 0xF6, 0x69, 0xCC, 0x89, 0xF6, 0x49, 0xCC, + 0xA3, 0xF6, 0x69, 0xCC, 0xA3, 0xF6, 0x4F, 0xCC, + 0xA3, 0xF6, 0x6F, 0xCC, 0xA3, 0xF6, 0x4F, 0xCC, + 0x89, 0xF6, 0x6F, 0xCC, 0x89, 0xF6, 0x4F, 0xCC, + 0x82, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xCC, + 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x80, 0xF6, + 0x6F, 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x4F, 0xCC, + 0x82, 0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x82, 0xCC, + 0x89, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x83, 0xF6, + 0x6F, 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, + 0xA3, 0xCC, 0x82, 0xF6, 0x6F, 0xCC, 0xA3, 0xCC, + 0x82, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6, + 0x6F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x4F, 0xCC, + 0x9B, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x9B, 0xCC, + 0x80, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6, + 0x6F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x4F, 0xCC, + 0x9B, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x9B, 0xCC, + 0x83, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, + 0x6F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x55, 0xCC, + 0xA3, 0xF6, 0x75, 0xCC, 0xA3, 0xF6, 0x55, 0xCC, + 0x89, 0xF6, 0x75, 0xCC, 0x89, 0xF6, 0x55, 0xCC, + 0x9B, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x9B, 0xCC, + 0x81, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x80, 0xF6, + 0x75, 0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x55, 0xCC, + 0x9B, 0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x9B, 0xCC, + 0x89, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x83, 0xF6, + 0x75, 0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x55, 0xCC, + 0x9B, 0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0x9B, 0xCC, + 0xA3, 0xF6, 0x59, 0xCC, 0x80, 0xF6, 0x79, 0xCC, + 0x80, 0xF6, 0x59, 0xCC, 0xA3, 0xF6, 0x79, 0xCC, + 0xA3, 0xF6, 0x59, 0xCC, 0x89, 0xF6, 0x79, 0xCC, + 0x89, 0xF6, 0x59, 0xCC, 0x83, 0xF6, 0x79, 0xCC, + 0x83, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xF6, 0xCE, + 0xB1, 0xCC, 0x94, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, + 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCC, 0x81, + 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xF6, + 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE, + 0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0x91, + 0xCC, 0x93, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xF6, + 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, + 0x91, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x91, + 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC, + 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC, 0x93, + 0xCD, 0x82, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD, + 0x82, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xF6, 0xCE, + 0xB5, 0xCC, 0x94, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, + 0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xCC, 0x81, + 0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC, 0x81, 0xF6, + 0xCE, 0x95, 0xCC, 0x93, 0xF6, 0xCE, 0x95, 0xCC, + 0x94, 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x80, + 0xF6, 0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x80, 0xF6, + 0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, + 0x95, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, + 0xCC, 0x93, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xF6, + 0xCE, 0xB7, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, + 0xB7, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xB7, + 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, + 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, 0x93, + 0xCD, 0x82, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD, + 0x82, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xF6, 0xCE, + 0x97, 0xCC, 0x94, 0xF6, 0xCE, 0x97, 0xCC, 0x93, + 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x81, + 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x81, 0xF6, + 0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE, + 0x97, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xB9, + 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, 0xF6, + 0xCE, 0xB9, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, + 0xB9, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, + 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, + 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x93, + 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, 0xCD, + 0x82, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xF6, 0xCE, + 0x99, 0xCC, 0x94, 0xF6, 0xCE, 0x99, 0xCC, 0x93, + 0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCC, 0x81, + 0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC, 0x81, 0xF6, + 0xCE, 0x99, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE, + 0x99, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xBF, + 0xCC, 0x93, 0xF6, 0xCE, 0xBF, 0xCC, 0x94, 0xF6, + 0xCE, 0xBF, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, + 0xBF, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xBF, + 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xBF, 0xCC, + 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x93, + 0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xF6, 0xCE, 0x9F, + 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, + 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x93, + 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xCC, + 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xF6, 0xCF, + 0x85, 0xCC, 0x94, 0xF6, 0xCF, 0x85, 0xCC, 0x93, + 0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCC, 0x81, + 0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC, 0x81, 0xF6, + 0xCF, 0x85, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF, + 0x85, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA5, + 0xCC, 0x94, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC, + 0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCD, 0x82, 0xF6, + 0xCF, 0x89, 0xCC, 0x93, 0xF6, 0xCF, 0x89, 0xCC, + 0x94, 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x80, + 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x80, 0xF6, + 0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCF, + 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCF, 0x89, + 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF, 0x89, 0xCC, + 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, + 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xF6, 0xCE, 0xA9, + 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, + 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, + 0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, + 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, 0x82, + 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x82, 0xF6, + 0xCE, 0xB1, 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, + 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x80, 0xF6, 0xCE, + 0xB5, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, 0x80, + 0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, + 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x81, 0xF6, + 0xCE, 0xBF, 0xCC, 0x80, 0xF6, 0xCE, 0xBF, 0xCC, + 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x80, 0xF6, 0xCF, + 0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x80, + 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xF6, 0xCE, 0xB1, + 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, + 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, + 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, + 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, + 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, + 0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85, + 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCD, 0x85, + 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD, 0x85, 0xF6, + 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85, + 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x81, + 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, + 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, + 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, + 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, + 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x93, + 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, + 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, + 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, + 0xB7, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCE, 0xB7, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85, + 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x85, + 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x85, 0xF6, + 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85, + 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x81, + 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, + 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, + 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCF, 0x89, + 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, + 0x94, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x93, + 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, + 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, + 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCF, + 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCF, 0x89, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85, + 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, 0x85, + 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x85, 0xF6, + 0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85, + 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x81, + 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, + 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, + 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, + 0xCC, 0x86, 0xF6, 0xCE, 0xB1, 0xCC, 0x84, 0xF6, + 0xCE, 0xB1, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, + 0xB1, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x81, + 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCD, 0x82, 0xF6, + 0xCE, 0xB1, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, + 0x91, 0xCC, 0x86, 0xF6, 0xCE, 0x91, 0xCC, 0x84, + 0xF6, 0xCE, 0x91, 0xCC, 0x80, 0xF6, 0xCE, 0x91, + 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCD, 0x85, 0x20, + 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0x20, 0xCC, 0x93, + 0x20, 0xCD, 0x82, 0xF5, 0x05, 0xC2, 0xA8, 0xCD, + 0x82, 0x20, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, + 0xB7, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, + 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xCD, + 0x85, 0xF6, 0xCE, 0xB7, 0xCD, 0x82, 0xF6, 0xCE, + 0xB7, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x95, + 0xCC, 0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x81, 0xF6, + 0xCE, 0x97, 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, + 0x81, 0xF6, 0xCE, 0x97, 0xCD, 0x85, 0xF5, 0x06, + 0xE1, 0xBE, 0xBF, 0xCC, 0x80, 0x20, 0xCC, 0x93, + 0xCC, 0x80, 0xF5, 0x06, 0xE1, 0xBE, 0xBF, 0xCC, + 0x81, 0x20, 0xCC, 0x93, 0xCC, 0x81, 0xF5, 0x06, + 0xE1, 0xBE, 0xBF, 0xCD, 0x82, 0x20, 0xCC, 0x93, + 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x86, 0xF6, + 0xCE, 0xB9, 0xCC, 0x84, 0xF6, 0xCE, 0xB9, 0xCC, + 0x88, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x88, + 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCD, 0x82, 0xF6, + 0xCE, 0xB9, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, + 0x99, 0xCC, 0x86, 0xF6, 0xCE, 0x99, 0xCC, 0x84, + 0xF6, 0xCE, 0x99, 0xCC, 0x80, 0xF6, 0xCE, 0x99, + 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, 0xCC, + 0x80, 0x20, 0xCC, 0x94, 0xCC, 0x80, 0xF5, 0x06, + 0xE1, 0xBF, 0xBE, 0xCC, 0x81, 0x20, 0xCC, 0x94, + 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, 0xCD, + 0x82, 0x20, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCF, + 0x85, 0xCC, 0x86, 0xF6, 0xCF, 0x85, 0xCC, 0x84, + 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x80, 0xF6, + 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCF, + 0x81, 0xCC, 0x93, 0xF6, 0xCF, 0x81, 0xCC, 0x94, + 0xF6, 0xCF, 0x85, 0xCD, 0x82, 0xF6, 0xCF, 0x85, + 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, 0xA5, 0xCC, + 0x86, 0xF6, 0xCE, 0xA5, 0xCC, 0x84, 0xF6, 0xCE, + 0xA5, 0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x81, + 0xF6, 0xCE, 0xA1, 0xCC, 0x94, 0xF5, 0x05, 0xC2, + 0xA8, 0xCC, 0x80, 0x20, 0xCC, 0x88, 0xCC, 0x80, + 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, 0x20, 0xCC, + 0x88, 0xCC, 0x81, 0xF6, 0x60, 0xF6, 0xCF, 0x89, + 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCD, + 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xCD, 0x85, + 0xF6, 0xCF, 0x89, 0xCD, 0x82, 0xF6, 0xCF, 0x89, + 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x9F, 0xCC, + 0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x81, 0xF6, 0xCE, + 0xA9, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, 0x81, + 0xF6, 0xCE, 0xA9, 0xCD, 0x85, 0xF5, 0x03, 0xC2, + 0xB4, 0x20, 0xCC, 0x81, 0x20, 0xCC, 0x94, 0xF5, + 0x04, 0xE2, 0x80, 0x82, 0x20, 0xF5, 0x04, 0xE2, + 0x80, 0x83, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0xE2, 0x80, 0x90, 0x20, + 0xCC, 0xB3, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, + 0x20, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2, + 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, + 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80, + 0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0x21, + 0x21, 0x20, 0xCC, 0x85, 0x3F, 0x3F, 0x3F, 0x21, + 0x21, 0x3F, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, + 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0x20, 0x30, + 0x69, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, + 0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29, 0x6E, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29, + 0x52, 0x73, 0x61, 0x2F, 0x63, 0x61, 0x2F, 0x73, + 0x43, 0xC2, 0xB0, 0x43, 0x63, 0x2F, 0x6F, 0x63, + 0x2F, 0x75, 0xC6, 0x90, 0xC2, 0xB0, 0x46, 0x67, + 0x48, 0x48, 0x48, 0x68, 0xC4, 0xA7, 0x49, 0x49, + 0x4C, 0x6C, 0x4E, 0x4E, 0x6F, 0x50, 0x51, 0x52, + 0x52, 0x52, 0x53, 0x4D, 0x54, 0x45, 0x4C, 0x54, + 0x4D, 0x5A, 0xF6, 0xCE, 0xA9, 0x5A, 0xF6, 0x4B, + 0xF6, 0x41, 0xCC, 0x8A, 0x42, 0x43, 0x65, 0x45, + 0x46, 0x4D, 0x6F, 0xD7, 0x90, 0xD7, 0x91, 0xD7, + 0x92, 0xD7, 0x93, 0x69, 0xCE, 0xB3, 0xCE, 0x93, + 0xCE, 0xA0, 0xE2, 0x88, 0x91, 0x44, 0x64, 0x65, + 0x69, 0x6A, 0x31, 0xE2, 0x81, 0x84, 0x33, 0x32, + 0xE2, 0x81, 0x84, 0x33, 0x31, 0xE2, 0x81, 0x84, + 0x35, 0x32, 0xE2, 0x81, 0x84, 0x35, 0x33, 0xE2, + 0x81, 0x84, 0x35, 0x34, 0xE2, 0x81, 0x84, 0x35, + 0x31, 0xE2, 0x81, 0x84, 0x36, 0x35, 0xE2, 0x81, + 0x84, 0x36, 0x31, 0xE2, 0x81, 0x84, 0x38, 0x33, + 0xE2, 0x81, 0x84, 0x38, 0x35, 0xE2, 0x81, 0x84, + 0x38, 0x37, 0xE2, 0x81, 0x84, 0x38, 0x31, 0xE2, + 0x81, 0x84, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, + 0x49, 0x56, 0x56, 0x56, 0x49, 0x56, 0x49, 0x49, + 0x56, 0x49, 0x49, 0x49, 0x49, 0x58, 0x58, 0x58, + 0x49, 0x58, 0x49, 0x49, 0x4C, 0x43, 0x44, 0x4D, + 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x76, + 0x76, 0x76, 0x69, 0x76, 0x69, 0x69, 0x76, 0x69, + 0x69, 0x69, 0x69, 0x78, 0x78, 0x78, 0x69, 0x78, + 0x69, 0x69, 0x6C, 0x63, 0x64, 0x6D, 0xF6, 0xE2, + 0x86, 0x90, 0xCC, 0xB8, 0xF6, 0xE2, 0x86, 0x92, + 0xCC, 0xB8, 0xF6, 0xE2, 0x86, 0x94, 0xCC, 0xB8, + 0xF6, 0xE2, 0x87, 0x90, 0xCC, 0xB8, 0xF6, 0xE2, + 0x87, 0x94, 0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x92, + 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x83, 0xCC, 0xB8, + 0xF6, 0xE2, 0x88, 0x88, 0xCC, 0xB8, 0xF6, 0xE2, + 0x88, 0x8B, 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0xA3, + 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0xA5, 0xCC, 0xB8, + 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, + 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, + 0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE, + 0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xF6, 0xE2, + 0x88, 0xBC, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x83, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x85, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0x88, 0xCC, 0xB8, 0xF6, 0x3D, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA1, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0x8D, 0xCC, 0xB8, 0xF6, 0x3C, + 0xCC, 0xB8, 0xF6, 0x3E, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xA4, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA5, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB2, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0xB3, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xB6, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB7, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBA, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0xBB, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0x82, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x83, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x86, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0x87, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0xA2, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA8, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA9, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0xAB, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xBC, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBD, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x91, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0x92, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0xB2, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB3, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB4, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0xB5, 0xCC, 0xB8, 0xF6, 0xE3, + 0x80, 0x88, 0xF6, 0xE3, 0x80, 0x89, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x31, + 0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31, + 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, 0x31, + 0x38, 0x31, 0x39, 0x32, 0x30, 0x28, 0x31, 0x29, + 0x28, 0x32, 0x29, 0x28, 0x33, 0x29, 0x28, 0x34, + 0x29, 0x28, 0x35, 0x29, 0x28, 0x36, 0x29, 0x28, + 0x37, 0x29, 0x28, 0x38, 0x29, 0x28, 0x39, 0x29, + 0x28, 0x31, 0x30, 0x29, 0x28, 0x31, 0x31, 0x29, + 0x28, 0x31, 0x32, 0x29, 0x28, 0x31, 0x33, 0x29, + 0x28, 0x31, 0x34, 0x29, 0x28, 0x31, 0x35, 0x29, + 0x28, 0x31, 0x36, 0x29, 0x28, 0x31, 0x37, 0x29, + 0x28, 0x31, 0x38, 0x29, 0x28, 0x31, 0x39, 0x29, + 0x28, 0x32, 0x30, 0x29, 0x31, 0x2E, 0x32, 0x2E, + 0x33, 0x2E, 0x34, 0x2E, 0x35, 0x2E, 0x36, 0x2E, + 0x37, 0x2E, 0x38, 0x2E, 0x39, 0x2E, 0x31, 0x30, + 0x2E, 0x31, 0x31, 0x2E, 0x31, 0x32, 0x2E, 0x31, + 0x33, 0x2E, 0x31, 0x34, 0x2E, 0x31, 0x35, 0x2E, + 0x31, 0x36, 0x2E, 0x31, 0x37, 0x2E, 0x31, 0x38, + 0x2E, 0x31, 0x39, 0x2E, 0x32, 0x30, 0x2E, 0x28, + 0x61, 0x29, 0x28, 0x62, 0x29, 0x28, 0x63, 0x29, + 0x28, 0x64, 0x29, 0x28, 0x65, 0x29, 0x28, 0x66, + 0x29, 0x28, 0x67, 0x29, 0x28, 0x68, 0x29, 0x28, + 0x69, 0x29, 0x28, 0x6A, 0x29, 0x28, 0x6B, 0x29, + 0x28, 0x6C, 0x29, 0x28, 0x6D, 0x29, 0x28, 0x6E, + 0x29, 0x28, 0x6F, 0x29, 0x28, 0x70, 0x29, 0x28, + 0x71, 0x29, 0x28, 0x72, 0x29, 0x28, 0x73, 0x29, + 0x28, 0x74, 0x29, 0x28, 0x75, 0x29, 0x28, 0x76, + 0x29, 0x28, 0x77, 0x29, 0x28, 0x78, 0x29, 0x28, + 0x79, 0x29, 0x28, 0x7A, 0x29, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x30, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, + 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0x3A, 0x3A, + 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0xF6, 0xE2, + 0xAB, 0x9D, 0xCC, 0xB8, 0xE6, 0xAF, 0x8D, 0xE9, + 0xBE, 0x9F, 0xE4, 0xB8, 0x80, 0xE4, 0xB8, 0xA8, + 0xE4, 0xB8, 0xB6, 0xE4, 0xB8, 0xBF, 0xE4, 0xB9, + 0x99, 0xE4, 0xBA, 0x85, 0xE4, 0xBA, 0x8C, 0xE4, + 0xBA, 0xA0, 0xE4, 0xBA, 0xBA, 0xE5, 0x84, 0xBF, + 0xE5, 0x85, 0xA5, 0xE5, 0x85, 0xAB, 0xE5, 0x86, + 0x82, 0xE5, 0x86, 0x96, 0xE5, 0x86, 0xAB, 0xE5, + 0x87, 0xA0, 0xE5, 0x87, 0xB5, 0xE5, 0x88, 0x80, + 0xE5, 0x8A, 0x9B, 0xE5, 0x8B, 0xB9, 0xE5, 0x8C, + 0x95, 0xE5, 0x8C, 0x9A, 0xE5, 0x8C, 0xB8, 0xE5, + 0x8D, 0x81, 0xE5, 0x8D, 0x9C, 0xE5, 0x8D, 0xA9, + 0xE5, 0x8E, 0x82, 0xE5, 0x8E, 0xB6, 0xE5, 0x8F, + 0x88, 0xE5, 0x8F, 0xA3, 0xE5, 0x9B, 0x97, 0xE5, + 0x9C, 0x9F, 0xE5, 0xA3, 0xAB, 0xE5, 0xA4, 0x82, + 0xE5, 0xA4, 0x8A, 0xE5, 0xA4, 0x95, 0xE5, 0xA4, + 0xA7, 0xE5, 0xA5, 0xB3, 0xE5, 0xAD, 0x90, 0xE5, + 0xAE, 0x80, 0xE5, 0xAF, 0xB8, 0xE5, 0xB0, 0x8F, + 0xE5, 0xB0, 0xA2, 0xE5, 0xB0, 0xB8, 0xE5, 0xB1, + 0xAE, 0xE5, 0xB1, 0xB1, 0xE5, 0xB7, 0x9B, 0xE5, + 0xB7, 0xA5, 0xE5, 0xB7, 0xB1, 0xE5, 0xB7, 0xBE, + 0xE5, 0xB9, 0xB2, 0xE5, 0xB9, 0xBA, 0xE5, 0xB9, + 0xBF, 0xE5, 0xBB, 0xB4, 0xE5, 0xBB, 0xBE, 0xE5, + 0xBC, 0x8B, 0xE5, 0xBC, 0x93, 0xE5, 0xBD, 0x90, + 0xE5, 0xBD, 0xA1, 0xE5, 0xBD, 0xB3, 0xE5, 0xBF, + 0x83, 0xE6, 0x88, 0x88, 0xE6, 0x88, 0xB6, 0xE6, + 0x89, 0x8B, 0xE6, 0x94, 0xAF, 0xE6, 0x94, 0xB4, + 0xE6, 0x96, 0x87, 0xE6, 0x96, 0x97, 0xE6, 0x96, + 0xA4, 0xE6, 0x96, 0xB9, 0xE6, 0x97, 0xA0, 0xE6, + 0x97, 0xA5, 0xE6, 0x9B, 0xB0, 0xE6, 0x9C, 0x88, + 0xE6, 0x9C, 0xA8, 0xE6, 0xAC, 0xA0, 0xE6, 0xAD, + 0xA2, 0xE6, 0xAD, 0xB9, 0xE6, 0xAE, 0xB3, 0xE6, + 0xAF, 0x8B, 0xE6, 0xAF, 0x94, 0xE6, 0xAF, 0x9B, + 0xE6, 0xB0, 0x8F, 0xE6, 0xB0, 0x94, 0xE6, 0xB0, + 0xB4, 0xE7, 0x81, 0xAB, 0xE7, 0x88, 0xAA, 0xE7, + 0x88, 0xB6, 0xE7, 0x88, 0xBB, 0xE7, 0x88, 0xBF, + 0xE7, 0x89, 0x87, 0xE7, 0x89, 0x99, 0xE7, 0x89, + 0x9B, 0xE7, 0x8A, 0xAC, 0xE7, 0x8E, 0x84, 0xE7, + 0x8E, 0x89, 0xE7, 0x93, 0x9C, 0xE7, 0x93, 0xA6, + 0xE7, 0x94, 0x98, 0xE7, 0x94, 0x9F, 0xE7, 0x94, + 0xA8, 0xE7, 0x94, 0xB0, 0xE7, 0x96, 0x8B, 0xE7, + 0x96, 0x92, 0xE7, 0x99, 0xB6, 0xE7, 0x99, 0xBD, + 0xE7, 0x9A, 0xAE, 0xE7, 0x9A, 0xBF, 0xE7, 0x9B, + 0xAE, 0xE7, 0x9F, 0x9B, 0xE7, 0x9F, 0xA2, 0xE7, + 0x9F, 0xB3, 0xE7, 0xA4, 0xBA, 0xE7, 0xA6, 0xB8, + 0xE7, 0xA6, 0xBE, 0xE7, 0xA9, 0xB4, 0xE7, 0xAB, + 0x8B, 0xE7, 0xAB, 0xB9, 0xE7, 0xB1, 0xB3, 0xE7, + 0xB3, 0xB8, 0xE7, 0xBC, 0xB6, 0xE7, 0xBD, 0x91, + 0xE7, 0xBE, 0x8A, 0xE7, 0xBE, 0xBD, 0xE8, 0x80, + 0x81, 0xE8, 0x80, 0x8C, 0xE8, 0x80, 0x92, 0xE8, + 0x80, 0xB3, 0xE8, 0x81, 0xBF, 0xE8, 0x82, 0x89, + 0xE8, 0x87, 0xA3, 0xE8, 0x87, 0xAA, 0xE8, 0x87, + 0xB3, 0xE8, 0x87, 0xBC, 0xE8, 0x88, 0x8C, 0xE8, + 0x88, 0x9B, 0xE8, 0x88, 0x9F, 0xE8, 0x89, 0xAE, + 0xE8, 0x89, 0xB2, 0xE8, 0x89, 0xB8, 0xE8, 0x99, + 0x8D, 0xE8, 0x99, 0xAB, 0xE8, 0xA1, 0x80, 0xE8, + 0xA1, 0x8C, 0xE8, 0xA1, 0xA3, 0xE8, 0xA5, 0xBE, + 0xE8, 0xA6, 0x8B, 0xE8, 0xA7, 0x92, 0xE8, 0xA8, + 0x80, 0xE8, 0xB0, 0xB7, 0xE8, 0xB1, 0x86, 0xE8, + 0xB1, 0x95, 0xE8, 0xB1, 0xB8, 0xE8, 0xB2, 0x9D, + 0xE8, 0xB5, 0xA4, 0xE8, 0xB5, 0xB0, 0xE8, 0xB6, + 0xB3, 0xE8, 0xBA, 0xAB, 0xE8, 0xBB, 0x8A, 0xE8, + 0xBE, 0x9B, 0xE8, 0xBE, 0xB0, 0xE8, 0xBE, 0xB5, + 0xE9, 0x82, 0x91, 0xE9, 0x85, 0x89, 0xE9, 0x87, + 0x86, 0xE9, 0x87, 0x8C, 0xE9, 0x87, 0x91, 0xE9, + 0x95, 0xB7, 0xE9, 0x96, 0x80, 0xE9, 0x98, 0x9C, + 0xE9, 0x9A, 0xB6, 0xE9, 0x9A, 0xB9, 0xE9, 0x9B, + 0xA8, 0xE9, 0x9D, 0x91, 0xE9, 0x9D, 0x9E, 0xE9, + 0x9D, 0xA2, 0xE9, 0x9D, 0xA9, 0xE9, 0x9F, 0x8B, + 0xE9, 0x9F, 0xAD, 0xE9, 0x9F, 0xB3, 0xE9, 0xA0, + 0x81, 0xE9, 0xA2, 0xA8, 0xE9, 0xA3, 0x9B, 0xE9, + 0xA3, 0x9F, 0xE9, 0xA6, 0x96, 0xE9, 0xA6, 0x99, + 0xE9, 0xA6, 0xAC, 0xE9, 0xAA, 0xA8, 0xE9, 0xAB, + 0x98, 0xE9, 0xAB, 0x9F, 0xE9, 0xAC, 0xA5, 0xE9, + 0xAC, 0xAF, 0xE9, 0xAC, 0xB2, 0xE9, 0xAC, 0xBC, + 0xE9, 0xAD, 0x9A, 0xE9, 0xB3, 0xA5, 0xE9, 0xB9, + 0xB5, 0xE9, 0xB9, 0xBF, 0xE9, 0xBA, 0xA5, 0xE9, + 0xBA, 0xBB, 0xE9, 0xBB, 0x83, 0xE9, 0xBB, 0x8D, + 0xE9, 0xBB, 0x91, 0xE9, 0xBB, 0xB9, 0xE9, 0xBB, + 0xBD, 0xE9, 0xBC, 0x8E, 0xE9, 0xBC, 0x93, 0xE9, + 0xBC, 0xA0, 0xE9, 0xBC, 0xBB, 0xE9, 0xBD, 0x8A, + 0xE9, 0xBD, 0x92, 0xE9, 0xBE, 0x8D, 0xE9, 0xBE, + 0x9C, 0xE9, 0xBE, 0xA0, 0x20, 0xE3, 0x80, 0x92, + 0xE5, 0x8D, 0x81, 0xE5, 0x8D, 0x84, 0xE5, 0x8D, + 0x85, 0xF6, 0xE3, 0x81, 0x8B, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x81, 0x8D, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x81, 0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x81, 0x91, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, + 0x93, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x95, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x97, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x99, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x81, 0x9B, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x81, 0x9D, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x81, 0x9F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x81, 0xA1, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, + 0xA4, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA6, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA8, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x9A, + 0xF6, 0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, + 0x81, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, + 0xB5, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xB8, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB8, 0xE3, + 0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x9A, + 0xF6, 0xE3, 0x81, 0x86, 0xE3, 0x82, 0x99, 0x20, + 0xE3, 0x82, 0x99, 0x20, 0xE3, 0x82, 0x9A, 0xF6, + 0xE3, 0x82, 0x9D, 0xE3, 0x82, 0x99, 0xE3, 0x82, + 0x88, 0xE3, 0x82, 0x8A, 0xF6, 0xE3, 0x82, 0xAB, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAD, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAF, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x82, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x82, 0xB7, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, + 0xB9, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBB, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBD, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBF, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x83, 0x84, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x83, 0x86, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x83, 0x88, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, + 0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F, + 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x92, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82, + 0x9A, 0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x9A, 0xF6, + 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83, + 0x9B, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x9B, + 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x82, 0xA6, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x83, 0xAF, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x83, 0xB0, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x83, 0xB1, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x83, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x83, 0xBD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xB3, + 0xE3, 0x83, 0x88, 0xE1, 0x84, 0x80, 0xE1, 0x84, + 0x81, 0xE1, 0x86, 0xAA, 0xE1, 0x84, 0x82, 0xE1, + 0x86, 0xAC, 0xE1, 0x86, 0xAD, 0xE1, 0x84, 0x83, + 0xE1, 0x84, 0x84, 0xE1, 0x84, 0x85, 0xE1, 0x86, + 0xB0, 0xE1, 0x86, 0xB1, 0xE1, 0x86, 0xB2, 0xE1, + 0x86, 0xB3, 0xE1, 0x86, 0xB4, 0xE1, 0x86, 0xB5, + 0xE1, 0x84, 0x9A, 0xE1, 0x84, 0x86, 0xE1, 0x84, + 0x87, 0xE1, 0x84, 0x88, 0xE1, 0x84, 0xA1, 0xE1, + 0x84, 0x89, 0xE1, 0x84, 0x8A, 0xE1, 0x84, 0x8B, + 0xE1, 0x84, 0x8C, 0xE1, 0x84, 0x8D, 0xE1, 0x84, + 0x8E, 0xE1, 0x84, 0x8F, 0xE1, 0x84, 0x90, 0xE1, + 0x84, 0x91, 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, + 0xE1, 0x85, 0xA2, 0xE1, 0x85, 0xA3, 0xE1, 0x85, + 0xA4, 0xE1, 0x85, 0xA5, 0xE1, 0x85, 0xA6, 0xE1, + 0x85, 0xA7, 0xE1, 0x85, 0xA8, 0xE1, 0x85, 0xA9, + 0xE1, 0x85, 0xAA, 0xE1, 0x85, 0xAB, 0xE1, 0x85, + 0xAC, 0xE1, 0x85, 0xAD, 0xE1, 0x85, 0xAE, 0xE1, + 0x85, 0xAF, 0xE1, 0x85, 0xB0, 0xE1, 0x85, 0xB1, + 0xE1, 0x85, 0xB2, 0xE1, 0x85, 0xB3, 0xE1, 0x85, + 0xB4, 0xE1, 0x85, 0xB5, 0xE1, 0x85, 0xA0, 0xE1, + 0x84, 0x94, 0xE1, 0x84, 0x95, 0xE1, 0x87, 0x87, + 0xE1, 0x87, 0x88, 0xE1, 0x87, 0x8C, 0xE1, 0x87, + 0x8E, 0xE1, 0x87, 0x93, 0xE1, 0x87, 0x97, 0xE1, + 0x87, 0x99, 0xE1, 0x84, 0x9C, 0xE1, 0x87, 0x9D, + 0xE1, 0x87, 0x9F, 0xE1, 0x84, 0x9D, 0xE1, 0x84, + 0x9E, 0xE1, 0x84, 0xA0, 0xE1, 0x84, 0xA2, 0xE1, + 0x84, 0xA3, 0xE1, 0x84, 0xA7, 0xE1, 0x84, 0xA9, + 0xE1, 0x84, 0xAB, 0xE1, 0x84, 0xAC, 0xE1, 0x84, + 0xAD, 0xE1, 0x84, 0xAE, 0xE1, 0x84, 0xAF, 0xE1, + 0x84, 0xB2, 0xE1, 0x84, 0xB6, 0xE1, 0x85, 0x80, + 0xE1, 0x85, 0x87, 0xE1, 0x85, 0x8C, 0xE1, 0x87, + 0xB1, 0xE1, 0x87, 0xB2, 0xE1, 0x85, 0x97, 0xE1, + 0x85, 0x98, 0xE1, 0x85, 0x99, 0xE1, 0x86, 0x84, + 0xE1, 0x86, 0x85, 0xE1, 0x86, 0x88, 0xE1, 0x86, + 0x91, 0xE1, 0x86, 0x92, 0xE1, 0x86, 0x94, 0xE1, + 0x86, 0x9E, 0xE1, 0x86, 0xA1, 0xE4, 0xB8, 0x80, + 0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B, + 0x9B, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4, + 0xB8, 0x8B, 0xE7, 0x94, 0xB2, 0xE4, 0xB9, 0x99, + 0xE4, 0xB8, 0x99, 0xE4, 0xB8, 0x81, 0xE5, 0xA4, + 0xA9, 0xE5, 0x9C, 0xB0, 0xE4, 0xBA, 0xBA, 0x28, + 0xE1, 0x84, 0x80, 0x29, 0x28, 0xE1, 0x84, 0x82, + 0x29, 0x28, 0xE1, 0x84, 0x83, 0x29, 0x28, 0xE1, + 0x84, 0x85, 0x29, 0x28, 0xE1, 0x84, 0x86, 0x29, + 0x28, 0xE1, 0x84, 0x87, 0x29, 0x28, 0xE1, 0x84, + 0x89, 0x29, 0x28, 0xE1, 0x84, 0x8B, 0x29, 0x28, + 0xE1, 0x84, 0x8C, 0x29, 0x28, 0xE1, 0x84, 0x8E, + 0x29, 0x28, 0xE1, 0x84, 0x8F, 0x29, 0x28, 0xE1, + 0x84, 0x90, 0x29, 0x28, 0xE1, 0x84, 0x91, 0x29, + 0x28, 0xE1, 0x84, 0x92, 0x29, 0x28, 0xE1, 0x84, + 0x80, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x82, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x83, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x85, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x86, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x87, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x89, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x8B, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x8C, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x8E, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x8F, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x90, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x91, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x92, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, + 0x8C, 0xE1, 0x85, 0xAE, 0x29, 0x28, 0xE4, 0xB8, + 0x80, 0x29, 0x28, 0xE4, 0xBA, 0x8C, 0x29, 0x28, + 0xE4, 0xB8, 0x89, 0x29, 0x28, 0xE5, 0x9B, 0x9B, + 0x29, 0x28, 0xE4, 0xBA, 0x94, 0x29, 0x28, 0xE5, + 0x85, 0xAD, 0x29, 0x28, 0xE4, 0xB8, 0x83, 0x29, + 0x28, 0xE5, 0x85, 0xAB, 0x29, 0x28, 0xE4, 0xB9, + 0x9D, 0x29, 0x28, 0xE5, 0x8D, 0x81, 0x29, 0x28, + 0xE6, 0x9C, 0x88, 0x29, 0x28, 0xE7, 0x81, 0xAB, + 0x29, 0x28, 0xE6, 0xB0, 0xB4, 0x29, 0x28, 0xE6, + 0x9C, 0xA8, 0x29, 0x28, 0xE9, 0x87, 0x91, 0x29, + 0x28, 0xE5, 0x9C, 0x9F, 0x29, 0x28, 0xE6, 0x97, + 0xA5, 0x29, 0x28, 0xE6, 0xA0, 0xAA, 0x29, 0x28, + 0xE6, 0x9C, 0x89, 0x29, 0x28, 0xE7, 0xA4, 0xBE, + 0x29, 0x28, 0xE5, 0x90, 0x8D, 0x29, 0x28, 0xE7, + 0x89, 0xB9, 0x29, 0x28, 0xE8, 0xB2, 0xA1, 0x29, + 0x28, 0xE7, 0xA5, 0x9D, 0x29, 0x28, 0xE5, 0x8A, + 0xB4, 0x29, 0x28, 0xE4, 0xBB, 0xA3, 0x29, 0x28, + 0xE5, 0x91, 0xBC, 0x29, 0x28, 0xE5, 0xAD, 0xA6, + 0x29, 0x28, 0xE7, 0x9B, 0xA3, 0x29, 0x28, 0xE4, + 0xBC, 0x81, 0x29, 0x28, 0xE8, 0xB3, 0x87, 0x29, + 0x28, 0xE5, 0x8D, 0x94, 0x29, 0x28, 0xE7, 0xA5, + 0xAD, 0x29, 0x28, 0xE4, 0xBC, 0x91, 0x29, 0x28, + 0xE8, 0x87, 0xAA, 0x29, 0x28, 0xE8, 0x87, 0xB3, + 0x29, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32, + 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, + 0x38, 0x32, 0x39, 0x33, 0x30, 0x33, 0x31, 0x33, + 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0xE1, + 0x84, 0x80, 0xE1, 0x84, 0x82, 0xE1, 0x84, 0x83, + 0xE1, 0x84, 0x85, 0xE1, 0x84, 0x86, 0xE1, 0x84, + 0x87, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8B, 0xE1, + 0x84, 0x8C, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F, + 0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84, + 0x92, 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xA1, 0xE1, + 0x84, 0x82, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x83, + 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x85, 0xE1, 0x85, + 0xA1, 0xE1, 0x84, 0x86, 0xE1, 0x85, 0xA1, 0xE1, + 0x84, 0x87, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x89, + 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8B, 0xE1, 0x85, + 0xA1, 0xE1, 0x84, 0x8C, 0xE1, 0x85, 0xA1, 0xE1, + 0x84, 0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8F, + 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x90, 0xE1, 0x85, + 0xA1, 0xE1, 0x84, 0x91, 0xE1, 0x85, 0xA1, 0xE1, + 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE4, 0xB8, 0x80, + 0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B, + 0x9B, 0xE4, 0xBA, 0x94, 0xE5, 0x85, 0xAD, 0xE4, + 0xB8, 0x83, 0xE5, 0x85, 0xAB, 0xE4, 0xB9, 0x9D, + 0xE5, 0x8D, 0x81, 0xE6, 0x9C, 0x88, 0xE7, 0x81, + 0xAB, 0xE6, 0xB0, 0xB4, 0xE6, 0x9C, 0xA8, 0xE9, + 0x87, 0x91, 0xE5, 0x9C, 0x9F, 0xE6, 0x97, 0xA5, + 0xE6, 0xA0, 0xAA, 0xE6, 0x9C, 0x89, 0xE7, 0xA4, + 0xBE, 0xE5, 0x90, 0x8D, 0xE7, 0x89, 0xB9, 0xE8, + 0xB2, 0xA1, 0xE7, 0xA5, 0x9D, 0xE5, 0x8A, 0xB4, + 0xE7, 0xA7, 0x98, 0xE7, 0x94, 0xB7, 0xE5, 0xA5, + 0xB3, 0xE9, 0x81, 0xA9, 0xE5, 0x84, 0xAA, 0xE5, + 0x8D, 0xB0, 0xE6, 0xB3, 0xA8, 0xE9, 0xA0, 0x85, + 0xE4, 0xBC, 0x91, 0xE5, 0x86, 0x99, 0xE6, 0xAD, + 0xA3, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4, + 0xB8, 0x8B, 0xE5, 0xB7, 0xA6, 0xE5, 0x8F, 0xB3, + 0xE5, 0x8C, 0xBB, 0xE5, 0xAE, 0x97, 0xE5, 0xAD, + 0xA6, 0xE7, 0x9B, 0xA3, 0xE4, 0xBC, 0x81, 0xE8, + 0xB3, 0x87, 0xE5, 0x8D, 0x94, 0xE5, 0xA4, 0x9C, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, + 0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33, + 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x31, 0xE6, + 0x9C, 0x88, 0x32, 0xE6, 0x9C, 0x88, 0x33, 0xE6, + 0x9C, 0x88, 0x34, 0xE6, 0x9C, 0x88, 0x35, 0xE6, + 0x9C, 0x88, 0x36, 0xE6, 0x9C, 0x88, 0x37, 0xE6, + 0x9C, 0x88, 0x38, 0xE6, 0x9C, 0x88, 0x39, 0xE6, + 0x9C, 0x88, 0x31, 0x30, 0xE6, 0x9C, 0x88, 0x31, + 0x31, 0xE6, 0x9C, 0x88, 0x31, 0x32, 0xE6, 0x9C, + 0x88, 0xE3, 0x82, 0xA2, 0xE3, 0x82, 0xA4, 0xE3, + 0x82, 0xA6, 0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xAA, + 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x82, + 0xAF, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0xB3, 0xE3, + 0x82, 0xB5, 0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xB9, + 0xE3, 0x82, 0xBB, 0xE3, 0x82, 0xBD, 0xE3, 0x82, + 0xBF, 0xE3, 0x83, 0x81, 0xE3, 0x83, 0x84, 0xE3, + 0x83, 0x86, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8A, + 0xE3, 0x83, 0x8B, 0xE3, 0x83, 0x8C, 0xE3, 0x83, + 0x8D, 0xE3, 0x83, 0x8E, 0xE3, 0x83, 0x8F, 0xE3, + 0x83, 0x92, 0xE3, 0x83, 0x95, 0xE3, 0x83, 0x98, + 0xE3, 0x83, 0x9B, 0xE3, 0x83, 0x9E, 0xE3, 0x83, + 0x9F, 0xE3, 0x83, 0xA0, 0xE3, 0x83, 0xA1, 0xE3, + 0x83, 0xA2, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xA6, + 0xE3, 0x83, 0xA8, 0xE3, 0x83, 0xA9, 0xE3, 0x83, + 0xAA, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAC, 0xE3, + 0x83, 0xAD, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0xB0, + 0xE3, 0x83, 0xB1, 0xE3, 0x83, 0xB2, 0xE3, 0x82, + 0xA2, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xA2, + 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x82, + 0xA1, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xB3, 0xE3, + 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA2, + 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0xAB, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0x8B, 0xE3, + 0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, + 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x81, 0xE3, 0x82, 0xA6, 0xE3, 0x82, 0xA9, 0xE3, + 0x83, 0xB3, 0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xB9, + 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0x88, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA8, 0xE3, + 0x83, 0xBC, 0xE3, 0x82, 0xAB, 0xE3, 0x83, 0xBC, + 0xE3, 0x82, 0xAA, 0xE3, 0x83, 0xB3, 0xE3, 0x82, + 0xB9, 0xE3, 0x82, 0xAA, 0xE3, 0x83, 0xBC, 0xE3, + 0x83, 0xA0, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0xA4, + 0xE3, 0x83, 0xAA, 0xE3, 0x82, 0xAB, 0xE3, 0x83, + 0xA9, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, + 0x82, 0xAB, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xAA, + 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAB, 0xE3, 0x82, + 0x99, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xB3, 0xE3, + 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xAD, 0xE3, 0x82, + 0x99, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, + 0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x8B, + 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x83, + 0xA5, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xBC, 0xE3, + 0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB, + 0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, + 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, 0x82, 0xAF, + 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3, 0x83, + 0xA0, 0xE3, 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, + 0x83, 0xA1, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, + 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x83, + 0xAD, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0x83, 0xE3, + 0x83, 0x88, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x82, + 0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3, + 0x83, 0xA0, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3, + 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB, 0xE3, 0x82, + 0xBB, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA4, 0xE3, + 0x83, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD, + 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x8D, 0xE3, 0x82, + 0xB1, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB9, 0xE3, + 0x82, 0xB3, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x8A, + 0xE3, 0x82, 0xB3, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xB5, 0xE3, + 0x82, 0xA4, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB, + 0xE3, 0x82, 0xB5, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x81, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xA0, 0xE3, + 0x82, 0xB7, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xB3, + 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x82, + 0xBB, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3, + 0x82, 0xBB, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, + 0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xBC, 0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x86, 0xE3, + 0x82, 0x99, 0xE3, 0x82, 0xB7, 0xE3, 0x83, 0x88, + 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x8A, 0xE3, + 0x83, 0x8E, 0xE3, 0x83, 0x8E, 0xE3, 0x83, 0x83, + 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8F, 0xE3, 0x82, + 0xA4, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x8F, 0xE3, + 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xBB, + 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x83, + 0x8F, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, + 0x83, 0x84, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAC, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, + 0x82, 0xA2, 0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x88, + 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, + 0x9A, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB, 0xE3, + 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xB3, + 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0xA1, 0xE3, + 0x83, 0xA9, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, + 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x95, 0xE3, 0x82, + 0xA3, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, + 0x83, 0x95, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x83, + 0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xA7, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x83, 0xA9, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0xAF, + 0xE3, 0x82, 0xBF, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, + 0x82, 0xBD, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x9A, + 0xE3, 0x83, 0x8B, 0xE3, 0x83, 0x92, 0xE3, 0x83, + 0x98, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x84, 0xE3, + 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xB3, + 0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x98, 0xE3, 0x82, + 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB7, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xBF, 0xE3, 0x83, + 0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA4, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x9B, + 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0x9B, 0xE3, 0x83, 0xB3, 0xE3, + 0x83, 0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0x9B, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, + 0x83, 0x9B, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xA4, 0xE3, 0x82, + 0xAF, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0x9E, 0xE3, + 0x82, 0xA4, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x9E, + 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x8F, 0xE3, 0x83, + 0x9E, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xAF, 0xE3, + 0x83, 0x9E, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xB7, + 0xE3, 0x83, 0xA7, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x9F, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xAA, + 0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xAA, 0xE3, 0x83, + 0x8F, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, + 0x83, 0xAB, 0xE3, 0x83, 0xA1, 0xE3, 0x82, 0xAB, + 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA1, 0xE3, 0x82, + 0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x88, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3, 0x83, + 0xA4, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xA6, 0xE3, 0x82, + 0xA2, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0xAA, 0xE3, + 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, + 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xA9, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xA0, 0xE3, + 0x83, 0xAC, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, + 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xB3, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0x83, 0xE3, + 0x83, 0x88, 0x30, 0xE7, 0x82, 0xB9, 0x31, 0xE7, + 0x82, 0xB9, 0x32, 0xE7, 0x82, 0xB9, 0x33, 0xE7, + 0x82, 0xB9, 0x34, 0xE7, 0x82, 0xB9, 0x35, 0xE7, + 0x82, 0xB9, 0x36, 0xE7, 0x82, 0xB9, 0x37, 0xE7, + 0x82, 0xB9, 0x38, 0xE7, 0x82, 0xB9, 0x39, 0xE7, + 0x82, 0xB9, 0x31, 0x30, 0xE7, 0x82, 0xB9, 0x31, + 0x31, 0xE7, 0x82, 0xB9, 0x31, 0x32, 0xE7, 0x82, + 0xB9, 0x31, 0x33, 0xE7, 0x82, 0xB9, 0x31, 0x34, + 0xE7, 0x82, 0xB9, 0x31, 0x35, 0xE7, 0x82, 0xB9, + 0x31, 0x36, 0xE7, 0x82, 0xB9, 0x31, 0x37, 0xE7, + 0x82, 0xB9, 0x31, 0x38, 0xE7, 0x82, 0xB9, 0x31, + 0x39, 0xE7, 0x82, 0xB9, 0x32, 0x30, 0xE7, 0x82, + 0xB9, 0x32, 0x31, 0xE7, 0x82, 0xB9, 0x32, 0x32, + 0xE7, 0x82, 0xB9, 0x32, 0x33, 0xE7, 0x82, 0xB9, + 0x32, 0x34, 0xE7, 0x82, 0xB9, 0x68, 0x50, 0x61, + 0x64, 0x61, 0x41, 0x55, 0x62, 0x61, 0x72, 0x6F, + 0x56, 0x70, 0x63, 0xE5, 0xB9, 0xB3, 0xE6, 0x88, + 0x90, 0xE6, 0x98, 0xAD, 0xE5, 0x92, 0x8C, 0xE5, + 0xA4, 0xA7, 0xE6, 0xAD, 0xA3, 0xE6, 0x98, 0x8E, + 0xE6, 0xB2, 0xBB, 0xE6, 0xA0, 0xAA, 0xE5, 0xBC, + 0x8F, 0xE4, 0xBC, 0x9A, 0xE7, 0xA4, 0xBE, 0x70, + 0x41, 0x6E, 0x41, 0xCE, 0xBC, 0x41, 0x6D, 0x41, + 0x6B, 0x41, 0x4B, 0x42, 0x4D, 0x42, 0x47, 0x42, + 0x63, 0x61, 0x6C, 0x6B, 0x63, 0x61, 0x6C, 0x70, + 0x46, 0x6E, 0x46, 0xCE, 0xBC, 0x46, 0xCE, 0xBC, + 0x67, 0x6D, 0x67, 0x6B, 0x67, 0x48, 0x7A, 0x6B, + 0x48, 0x7A, 0x4D, 0x48, 0x7A, 0x47, 0x48, 0x7A, + 0x54, 0x48, 0x7A, 0xCE, 0xBC, 0x6C, 0x6D, 0x6C, + 0x64, 0x6C, 0x6B, 0x6C, 0x66, 0x6D, 0x6E, 0x6D, + 0xCE, 0xBC, 0x6D, 0x6D, 0x6D, 0x63, 0x6D, 0x6B, + 0x6D, 0x6D, 0x6D, 0x32, 0x63, 0x6D, 0x32, 0x6D, + 0x32, 0x6B, 0x6D, 0x32, 0x6D, 0x6D, 0x33, 0x63, + 0x6D, 0x33, 0x6D, 0x33, 0x6B, 0x6D, 0x33, 0x6D, + 0xE2, 0x88, 0x95, 0x73, 0x6D, 0xE2, 0x88, 0x95, + 0x73, 0x32, 0x50, 0x61, 0x6B, 0x50, 0x61, 0x4D, + 0x50, 0x61, 0x47, 0x50, 0x61, 0x72, 0x61, 0x64, + 0x72, 0x61, 0x64, 0xE2, 0x88, 0x95, 0x73, 0x72, + 0x61, 0x64, 0xE2, 0x88, 0x95, 0x73, 0x32, 0x70, + 0x73, 0x6E, 0x73, 0xCE, 0xBC, 0x73, 0x6D, 0x73, + 0x70, 0x56, 0x6E, 0x56, 0xCE, 0xBC, 0x56, 0x6D, + 0x56, 0x6B, 0x56, 0x4D, 0x56, 0x70, 0x57, 0x6E, + 0x57, 0xCE, 0xBC, 0x57, 0x6D, 0x57, 0x6B, 0x57, + 0x4D, 0x57, 0x6B, 0xCE, 0xA9, 0x4D, 0xCE, 0xA9, + 0x61, 0x2E, 0x6D, 0x2E, 0x42, 0x71, 0x63, 0x63, + 0x63, 0x64, 0x43, 0xE2, 0x88, 0x95, 0x6B, 0x67, + 0x43, 0x6F, 0x2E, 0x64, 0x42, 0x47, 0x79, 0x68, + 0x61, 0x48, 0x50, 0x69, 0x6E, 0x4B, 0x4B, 0x4B, + 0x4D, 0x6B, 0x74, 0x6C, 0x6D, 0x6C, 0x6E, 0x6C, + 0x6F, 0x67, 0x6C, 0x78, 0x6D, 0x62, 0x6D, 0x69, + 0x6C, 0x6D, 0x6F, 0x6C, 0x50, 0x48, 0x70, 0x2E, + 0x6D, 0x2E, 0x50, 0x50, 0x4D, 0x50, 0x52, 0x73, + 0x72, 0x53, 0x76, 0x57, 0x62, 0x31, 0xE6, 0x97, + 0xA5, 0x32, 0xE6, 0x97, 0xA5, 0x33, 0xE6, 0x97, + 0xA5, 0x34, 0xE6, 0x97, 0xA5, 0x35, 0xE6, 0x97, + 0xA5, 0x36, 0xE6, 0x97, 0xA5, 0x37, 0xE6, 0x97, + 0xA5, 0x38, 0xE6, 0x97, 0xA5, 0x39, 0xE6, 0x97, + 0xA5, 0x31, 0x30, 0xE6, 0x97, 0xA5, 0x31, 0x31, + 0xE6, 0x97, 0xA5, 0x31, 0x32, 0xE6, 0x97, 0xA5, + 0x31, 0x33, 0xE6, 0x97, 0xA5, 0x31, 0x34, 0xE6, + 0x97, 0xA5, 0x31, 0x35, 0xE6, 0x97, 0xA5, 0x31, + 0x36, 0xE6, 0x97, 0xA5, 0x31, 0x37, 0xE6, 0x97, + 0xA5, 0x31, 0x38, 0xE6, 0x97, 0xA5, 0x31, 0x39, + 0xE6, 0x97, 0xA5, 0x32, 0x30, 0xE6, 0x97, 0xA5, + 0x32, 0x31, 0xE6, 0x97, 0xA5, 0x32, 0x32, 0xE6, + 0x97, 0xA5, 0x32, 0x33, 0xE6, 0x97, 0xA5, 0x32, + 0x34, 0xE6, 0x97, 0xA5, 0x32, 0x35, 0xE6, 0x97, + 0xA5, 0x32, 0x36, 0xE6, 0x97, 0xA5, 0x32, 0x37, + 0xE6, 0x97, 0xA5, 0x32, 0x38, 0xE6, 0x97, 0xA5, + 0x32, 0x39, 0xE6, 0x97, 0xA5, 0x33, 0x30, 0xE6, + 0x97, 0xA5, 0x33, 0x31, 0xE6, 0x97, 0xA5, 0xF6, + 0xE8, 0xB1, 0x88, 0xF6, 0xE6, 0x9B, 0xB4, 0xF6, + 0xE8, 0xBB, 0x8A, 0xF6, 0xE8, 0xB3, 0x88, 0xF6, + 0xE6, 0xBB, 0x91, 0xF6, 0xE4, 0xB8, 0xB2, 0xF6, + 0xE5, 0x8F, 0xA5, 0xF6, 0xE9, 0xBE, 0x9C, 0xF6, + 0xE9, 0xBE, 0x9C, 0xF6, 0xE5, 0xA5, 0x91, 0xF6, + 0xE9, 0x87, 0x91, 0xF6, 0xE5, 0x96, 0x87, 0xF6, + 0xE5, 0xA5, 0x88, 0xF6, 0xE6, 0x87, 0xB6, 0xF6, + 0xE7, 0x99, 0xA9, 0xF6, 0xE7, 0xBE, 0x85, 0xF6, + 0xE8, 0x98, 0xBF, 0xF6, 0xE8, 0x9E, 0xBA, 0xF6, + 0xE8, 0xA3, 0xB8, 0xF6, 0xE9, 0x82, 0x8F, 0xF6, + 0xE6, 0xA8, 0x82, 0xF6, 0xE6, 0xB4, 0x9B, 0xF6, + 0xE7, 0x83, 0x99, 0xF6, 0xE7, 0x8F, 0x9E, 0xF6, + 0xE8, 0x90, 0xBD, 0xF6, 0xE9, 0x85, 0xAA, 0xF6, + 0xE9, 0xA7, 0xB1, 0xF6, 0xE4, 0xBA, 0x82, 0xF6, + 0xE5, 0x8D, 0xB5, 0xF6, 0xE6, 0xAC, 0x84, 0xF6, + 0xE7, 0x88, 0x9B, 0xF6, 0xE8, 0x98, 0xAD, 0xF6, + 0xE9, 0xB8, 0x9E, 0xF6, 0xE5, 0xB5, 0x90, 0xF6, + 0xE6, 0xBF, 0xAB, 0xF6, 0xE8, 0x97, 0x8D, 0xF6, + 0xE8, 0xA5, 0xA4, 0xF6, 0xE6, 0x8B, 0x89, 0xF6, + 0xE8, 0x87, 0x98, 0xF6, 0xE8, 0xA0, 0x9F, 0xF6, + 0xE5, 0xBB, 0x8A, 0xF6, 0xE6, 0x9C, 0x97, 0xF6, + 0xE6, 0xB5, 0xAA, 0xF6, 0xE7, 0x8B, 0xBC, 0xF6, + 0xE9, 0x83, 0x8E, 0xF6, 0xE4, 0xBE, 0x86, 0xF6, + 0xE5, 0x86, 0xB7, 0xF6, 0xE5, 0x8B, 0x9E, 0xF6, + 0xE6, 0x93, 0x84, 0xF6, 0xE6, 0xAB, 0x93, 0xF6, + 0xE7, 0x88, 0x90, 0xF6, 0xE7, 0x9B, 0xA7, 0xF6, + 0xE8, 0x80, 0x81, 0xF6, 0xE8, 0x98, 0x86, 0xF6, + 0xE8, 0x99, 0x9C, 0xF6, 0xE8, 0xB7, 0xAF, 0xF6, + 0xE9, 0x9C, 0xB2, 0xF6, 0xE9, 0xAD, 0xAF, 0xF6, + 0xE9, 0xB7, 0xBA, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6, + 0xE7, 0xA5, 0xBF, 0xF6, 0xE7, 0xB6, 0xA0, 0xF6, + 0xE8, 0x8F, 0x89, 0xF6, 0xE9, 0x8C, 0x84, 0xF6, + 0xE9, 0xB9, 0xBF, 0xF6, 0xE8, 0xAB, 0x96, 0xF6, + 0xE5, 0xA3, 0x9F, 0xF6, 0xE5, 0xBC, 0x84, 0xF6, + 0xE7, 0xB1, 0xA0, 0xF6, 0xE8, 0x81, 0xBE, 0xF6, + 0xE7, 0x89, 0xA2, 0xF6, 0xE7, 0xA3, 0x8A, 0xF6, + 0xE8, 0xB3, 0x82, 0xF6, 0xE9, 0x9B, 0xB7, 0xF6, + 0xE5, 0xA3, 0x98, 0xF6, 0xE5, 0xB1, 0xA2, 0xF6, + 0xE6, 0xA8, 0x93, 0xF6, 0xE6, 0xB7, 0x9A, 0xF6, + 0xE6, 0xBC, 0x8F, 0xF6, 0xE7, 0xB4, 0xAF, 0xF6, + 0xE7, 0xB8, 0xB7, 0xF6, 0xE9, 0x99, 0x8B, 0xF6, + 0xE5, 0x8B, 0x92, 0xF6, 0xE8, 0x82, 0x8B, 0xF6, + 0xE5, 0x87, 0x9C, 0xF6, 0xE5, 0x87, 0x8C, 0xF6, + 0xE7, 0xA8, 0x9C, 0xF6, 0xE7, 0xB6, 0xBE, 0xF6, + 0xE8, 0x8F, 0xB1, 0xF6, 0xE9, 0x99, 0xB5, 0xF6, + 0xE8, 0xAE, 0x80, 0xF6, 0xE6, 0x8B, 0x8F, 0xF6, + 0xE6, 0xA8, 0x82, 0xF6, 0xE8, 0xAB, 0xBE, 0xF6, + 0xE4, 0xB8, 0xB9, 0xF6, 0xE5, 0xAF, 0xA7, 0xF6, + 0xE6, 0x80, 0x92, 0xF6, 0xE7, 0x8E, 0x87, 0xF6, + 0xE7, 0x95, 0xB0, 0xF6, 0xE5, 0x8C, 0x97, 0xF6, + 0xE7, 0xA3, 0xBB, 0xF6, 0xE4, 0xBE, 0xBF, 0xF6, + 0xE5, 0xBE, 0xA9, 0xF6, 0xE4, 0xB8, 0x8D, 0xF6, + 0xE6, 0xB3, 0x8C, 0xF6, 0xE6, 0x95, 0xB8, 0xF6, + 0xE7, 0xB4, 0xA2, 0xF6, 0xE5, 0x8F, 0x83, 0xF6, + 0xE5, 0xA1, 0x9E, 0xF6, 0xE7, 0x9C, 0x81, 0xF6, + 0xE8, 0x91, 0x89, 0xF6, 0xE8, 0xAA, 0xAA, 0xF6, + 0xE6, 0xAE, 0xBA, 0xF6, 0xE8, 0xBE, 0xB0, 0xF6, + 0xE6, 0xB2, 0x88, 0xF6, 0xE6, 0x8B, 0xBE, 0xF6, + 0xE8, 0x8B, 0xA5, 0xF6, 0xE6, 0x8E, 0xA0, 0xF6, + 0xE7, 0x95, 0xA5, 0xF6, 0xE4, 0xBA, 0xAE, 0xF6, + 0xE5, 0x85, 0xA9, 0xF6, 0xE5, 0x87, 0x89, 0xF6, + 0xE6, 0xA2, 0x81, 0xF6, 0xE7, 0xB3, 0xA7, 0xF6, + 0xE8, 0x89, 0xAF, 0xF6, 0xE8, 0xAB, 0x92, 0xF6, + 0xE9, 0x87, 0x8F, 0xF6, 0xE5, 0x8B, 0xB5, 0xF6, + 0xE5, 0x91, 0x82, 0xF6, 0xE5, 0xA5, 0xB3, 0xF6, + 0xE5, 0xBB, 0xAC, 0xF6, 0xE6, 0x97, 0x85, 0xF6, + 0xE6, 0xBF, 0xBE, 0xF6, 0xE7, 0xA4, 0xAA, 0xF6, + 0xE9, 0x96, 0xAD, 0xF6, 0xE9, 0xA9, 0xAA, 0xF6, + 0xE9, 0xBA, 0x97, 0xF6, 0xE9, 0xBB, 0x8E, 0xF6, + 0xE5, 0x8A, 0x9B, 0xF6, 0xE6, 0x9B, 0x86, 0xF6, + 0xE6, 0xAD, 0xB7, 0xF6, 0xE8, 0xBD, 0xA2, 0xF6, + 0xE5, 0xB9, 0xB4, 0xF6, 0xE6, 0x86, 0x90, 0xF6, + 0xE6, 0x88, 0x80, 0xF6, 0xE6, 0x92, 0x9A, 0xF6, + 0xE6, 0xBC, 0xA3, 0xF6, 0xE7, 0x85, 0x89, 0xF6, + 0xE7, 0x92, 0x89, 0xF6, 0xE7, 0xA7, 0x8A, 0xF6, + 0xE7, 0xB7, 0xB4, 0xF6, 0xE8, 0x81, 0xAF, 0xF6, + 0xE8, 0xBC, 0xA6, 0xF6, 0xE8, 0x93, 0xAE, 0xF6, + 0xE9, 0x80, 0xA3, 0xF6, 0xE9, 0x8D, 0x8A, 0xF6, + 0xE5, 0x88, 0x97, 0xF6, 0xE5, 0x8A, 0xA3, 0xF6, + 0xE5, 0x92, 0xBD, 0xF6, 0xE7, 0x83, 0x88, 0xF6, + 0xE8, 0xA3, 0x82, 0xF6, 0xE8, 0xAA, 0xAA, 0xF6, + 0xE5, 0xBB, 0x89, 0xF6, 0xE5, 0xBF, 0xB5, 0xF6, + 0xE6, 0x8D, 0xBB, 0xF6, 0xE6, 0xAE, 0xAE, 0xF6, + 0xE7, 0xB0, 0xBE, 0xF6, 0xE7, 0x8D, 0xB5, 0xF6, + 0xE4, 0xBB, 0xA4, 0xF6, 0xE5, 0x9B, 0xB9, 0xF6, + 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xB6, 0xBA, 0xF6, + 0xE6, 0x80, 0x9C, 0xF6, 0xE7, 0x8E, 0xB2, 0xF6, + 0xE7, 0x91, 0xA9, 0xF6, 0xE7, 0xBE, 0x9A, 0xF6, + 0xE8, 0x81, 0x86, 0xF6, 0xE9, 0x88, 0xB4, 0xF6, + 0xE9, 0x9B, 0xB6, 0xF6, 0xE9, 0x9D, 0x88, 0xF6, + 0xE9, 0xA0, 0x98, 0xF6, 0xE4, 0xBE, 0x8B, 0xF6, + 0xE7, 0xA6, 0xAE, 0xF6, 0xE9, 0x86, 0xB4, 0xF6, + 0xE9, 0x9A, 0xB8, 0xF6, 0xE6, 0x83, 0xA1, 0xF6, + 0xE4, 0xBA, 0x86, 0xF6, 0xE5, 0x83, 0x9A, 0xF6, + 0xE5, 0xAF, 0xAE, 0xF6, 0xE5, 0xB0, 0xBF, 0xF6, + 0xE6, 0x96, 0x99, 0xF6, 0xE6, 0xA8, 0x82, 0xF6, + 0xE7, 0x87, 0x8E, 0xF6, 0xE7, 0x99, 0x82, 0xF6, + 0xE8, 0x93, 0xBC, 0xF6, 0xE9, 0x81, 0xBC, 0xF6, + 0xE9, 0xBE, 0x8D, 0xF6, 0xE6, 0x9A, 0x88, 0xF6, + 0xE9, 0x98, 0xAE, 0xF6, 0xE5, 0x8A, 0x89, 0xF6, + 0xE6, 0x9D, 0xBB, 0xF6, 0xE6, 0x9F, 0xB3, 0xF6, + 0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xBA, 0x9C, 0xF6, + 0xE7, 0x90, 0x89, 0xF6, 0xE7, 0x95, 0x99, 0xF6, + 0xE7, 0xA1, 0xAB, 0xF6, 0xE7, 0xB4, 0x90, 0xF6, + 0xE9, 0xA1, 0x9E, 0xF6, 0xE5, 0x85, 0xAD, 0xF6, + 0xE6, 0x88, 0xAE, 0xF6, 0xE9, 0x99, 0xB8, 0xF6, + 0xE5, 0x80, 0xAB, 0xF6, 0xE5, 0xB4, 0x99, 0xF6, + 0xE6, 0xB7, 0xAA, 0xF6, 0xE8, 0xBC, 0xAA, 0xF6, + 0xE5, 0xBE, 0x8B, 0xF6, 0xE6, 0x85, 0x84, 0xF6, + 0xE6, 0xA0, 0x97, 0xF6, 0xE7, 0x8E, 0x87, 0xF6, + 0xE9, 0x9A, 0x86, 0xF6, 0xE5, 0x88, 0xA9, 0xF6, + 0xE5, 0x90, 0x8F, 0xF6, 0xE5, 0xB1, 0xA5, 0xF6, + 0xE6, 0x98, 0x93, 0xF6, 0xE6, 0x9D, 0x8E, 0xF6, + 0xE6, 0xA2, 0xA8, 0xF6, 0xE6, 0xB3, 0xA5, 0xF6, + 0xE7, 0x90, 0x86, 0xF6, 0xE7, 0x97, 0xA2, 0xF6, + 0xE7, 0xBD, 0xB9, 0xF6, 0xE8, 0xA3, 0x8F, 0xF6, + 0xE8, 0xA3, 0xA1, 0xF6, 0xE9, 0x87, 0x8C, 0xF6, + 0xE9, 0x9B, 0xA2, 0xF6, 0xE5, 0x8C, 0xBF, 0xF6, + 0xE6, 0xBA, 0xBA, 0xF6, 0xE5, 0x90, 0x9D, 0xF6, + 0xE7, 0x87, 0x90, 0xF6, 0xE7, 0x92, 0x98, 0xF6, + 0xE8, 0x97, 0xBA, 0xF6, 0xE9, 0x9A, 0xA3, 0xF6, + 0xE9, 0xB1, 0x97, 0xF6, 0xE9, 0xBA, 0x9F, 0xF6, + 0xE6, 0x9E, 0x97, 0xF6, 0xE6, 0xB7, 0x8B, 0xF6, + 0xE8, 0x87, 0xA8, 0xF6, 0xE7, 0xAB, 0x8B, 0xF6, + 0xE7, 0xAC, 0xA0, 0xF6, 0xE7, 0xB2, 0x92, 0xF6, + 0xE7, 0x8B, 0x80, 0xF6, 0xE7, 0x82, 0x99, 0xF6, + 0xE8, 0xAD, 0x98, 0xF6, 0xE4, 0xBB, 0x80, 0xF6, + 0xE8, 0x8C, 0xB6, 0xF6, 0xE5, 0x88, 0xBA, 0xF6, + 0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xBA, 0xA6, 0xF6, + 0xE6, 0x8B, 0x93, 0xF6, 0xE7, 0xB3, 0x96, 0xF6, + 0xE5, 0xAE, 0x85, 0xF6, 0xE6, 0xB4, 0x9E, 0xF6, + 0xE6, 0x9A, 0xB4, 0xF6, 0xE8, 0xBC, 0xBB, 0xF6, + 0xE8, 0xA1, 0x8C, 0xF6, 0xE9, 0x99, 0x8D, 0xF6, + 0xE8, 0xA6, 0x8B, 0xF6, 0xE5, 0xBB, 0x93, 0xF6, + 0xE5, 0x85, 0x80, 0xF6, 0xE5, 0x97, 0x80, 0xF6, + 0xE5, 0xA1, 0x9A, 0xF6, 0xE6, 0x99, 0xB4, 0xF6, + 0xE5, 0x87, 0x9E, 0xF6, 0xE7, 0x8C, 0xAA, 0xF6, + 0xE7, 0x9B, 0x8A, 0xF6, 0xE7, 0xA4, 0xBC, 0xF6, + 0xE7, 0xA5, 0x9E, 0xF6, 0xE7, 0xA5, 0xA5, 0xF6, + 0xE7, 0xA6, 0x8F, 0xF6, 0xE9, 0x9D, 0x96, 0xF6, + 0xE7, 0xB2, 0xBE, 0xF6, 0xE7, 0xBE, 0xBD, 0xF6, + 0xE8, 0x98, 0x92, 0xF6, 0xE8, 0xAB, 0xB8, 0xF6, + 0xE9, 0x80, 0xB8, 0xF6, 0xE9, 0x83, 0xBD, 0xF6, + 0xE9, 0xA3, 0xAF, 0xF6, 0xE9, 0xA3, 0xBC, 0xF6, + 0xE9, 0xA4, 0xA8, 0xF6, 0xE9, 0xB6, 0xB4, 0xF6, + 0xE4, 0xBE, 0xAE, 0xF6, 0xE5, 0x83, 0xA7, 0xF6, + 0xE5, 0x85, 0x8D, 0xF6, 0xE5, 0x8B, 0x89, 0xF6, + 0xE5, 0x8B, 0xA4, 0xF6, 0xE5, 0x8D, 0x91, 0xF6, + 0xE5, 0x96, 0x9D, 0xF6, 0xE5, 0x98, 0x86, 0xF6, + 0xE5, 0x99, 0xA8, 0xF6, 0xE5, 0xA1, 0x80, 0xF6, + 0xE5, 0xA2, 0xA8, 0xF6, 0xE5, 0xB1, 0xA4, 0xF6, + 0xE5, 0xB1, 0xAE, 0xF6, 0xE6, 0x82, 0x94, 0xF6, + 0xE6, 0x85, 0xA8, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, + 0xE6, 0x87, 0xB2, 0xF6, 0xE6, 0x95, 0x8F, 0xF6, + 0xE6, 0x97, 0xA2, 0xF6, 0xE6, 0x9A, 0x91, 0xF6, + 0xE6, 0xA2, 0x85, 0xF6, 0xE6, 0xB5, 0xB7, 0xF6, + 0xE6, 0xB8, 0x9A, 0xF6, 0xE6, 0xBC, 0xA2, 0xF6, + 0xE7, 0x85, 0xAE, 0xF6, 0xE7, 0x88, 0xAB, 0xF6, + 0xE7, 0x90, 0xA2, 0xF6, 0xE7, 0xA2, 0x91, 0xF6, + 0xE7, 0xA4, 0xBE, 0xF6, 0xE7, 0xA5, 0x89, 0xF6, + 0xE7, 0xA5, 0x88, 0xF6, 0xE7, 0xA5, 0x90, 0xF6, + 0xE7, 0xA5, 0x96, 0xF6, 0xE7, 0xA5, 0x9D, 0xF6, + 0xE7, 0xA6, 0x8D, 0xF6, 0xE7, 0xA6, 0x8E, 0xF6, + 0xE7, 0xA9, 0x80, 0xF6, 0xE7, 0xAA, 0x81, 0xF6, + 0xE7, 0xAF, 0x80, 0xF6, 0xE7, 0xB7, 0xB4, 0xF6, + 0xE7, 0xB8, 0x89, 0xF6, 0xE7, 0xB9, 0x81, 0xF6, + 0xE7, 0xBD, 0xB2, 0xF6, 0xE8, 0x80, 0x85, 0xF6, + 0xE8, 0x87, 0xAD, 0xF6, 0xE8, 0x89, 0xB9, 0xF6, + 0xE8, 0x89, 0xB9, 0xF6, 0xE8, 0x91, 0x97, 0xF6, + 0xE8, 0xA4, 0x90, 0xF6, 0xE8, 0xA6, 0x96, 0xF6, + 0xE8, 0xAC, 0x81, 0xF6, 0xE8, 0xAC, 0xB9, 0xF6, + 0xE8, 0xB3, 0x93, 0xF6, 0xE8, 0xB4, 0x88, 0xF6, + 0xE8, 0xBE, 0xB6, 0xF6, 0xE9, 0x80, 0xB8, 0xF6, + 0xE9, 0x9B, 0xA3, 0xF6, 0xE9, 0x9F, 0xBF, 0xF6, + 0xE9, 0xA0, 0xBB, 0x66, 0x66, 0x66, 0x69, 0x66, + 0x6C, 0x66, 0x66, 0x69, 0x66, 0x66, 0x6C, 0x73, + 0x74, 0x73, 0x74, 0xD5, 0xB4, 0xD5, 0xB6, 0xD5, + 0xB4, 0xD5, 0xA5, 0xD5, 0xB4, 0xD5, 0xAB, 0xD5, + 0xBE, 0xD5, 0xB6, 0xD5, 0xB4, 0xD5, 0xAD, 0xF6, + 0xD7, 0x99, 0xD6, 0xB4, 0xF6, 0xD7, 0xB2, 0xD6, + 0xB7, 0xD7, 0xA2, 0xD7, 0x90, 0xD7, 0x93, 0xD7, + 0x94, 0xD7, 0x9B, 0xD7, 0x9C, 0xD7, 0x9D, 0xD7, + 0xA8, 0xD7, 0xAA, 0x2B, 0xF6, 0xD7, 0xA9, 0xD7, + 0x81, 0xF6, 0xD7, 0xA9, 0xD7, 0x82, 0xF6, 0xD7, + 0xA9, 0xD6, 0xBC, 0xD7, 0x81, 0xF6, 0xD7, 0xA9, + 0xD6, 0xBC, 0xD7, 0x82, 0xF6, 0xD7, 0x90, 0xD6, + 0xB7, 0xF6, 0xD7, 0x90, 0xD6, 0xB8, 0xF6, 0xD7, + 0x90, 0xD6, 0xBC, 0xF6, 0xD7, 0x91, 0xD6, 0xBC, + 0xF6, 0xD7, 0x92, 0xD6, 0xBC, 0xF6, 0xD7, 0x93, + 0xD6, 0xBC, 0xF6, 0xD7, 0x94, 0xD6, 0xBC, 0xF6, + 0xD7, 0x95, 0xD6, 0xBC, 0xF6, 0xD7, 0x96, 0xD6, + 0xBC, 0xF6, 0xD7, 0x98, 0xD6, 0xBC, 0xF6, 0xD7, + 0x99, 0xD6, 0xBC, 0xF6, 0xD7, 0x9A, 0xD6, 0xBC, + 0xF6, 0xD7, 0x9B, 0xD6, 0xBC, 0xF6, 0xD7, 0x9C, + 0xD6, 0xBC, 0xF6, 0xD7, 0x9E, 0xD6, 0xBC, 0xF6, + 0xD7, 0xA0, 0xD6, 0xBC, 0xF6, 0xD7, 0xA1, 0xD6, + 0xBC, 0xF6, 0xD7, 0xA3, 0xD6, 0xBC, 0xF6, 0xD7, + 0xA4, 0xD6, 0xBC, 0xF6, 0xD7, 0xA6, 0xD6, 0xBC, + 0xF6, 0xD7, 0xA7, 0xD6, 0xBC, 0xF6, 0xD7, 0xA8, + 0xD6, 0xBC, 0xF6, 0xD7, 0xA9, 0xD6, 0xBC, 0xF6, + 0xD7, 0xAA, 0xD6, 0xBC, 0xF6, 0xD7, 0x95, 0xD6, + 0xB9, 0xF6, 0xD7, 0x91, 0xD6, 0xBF, 0xF6, 0xD7, + 0x9B, 0xD6, 0xBF, 0xF6, 0xD7, 0xA4, 0xD6, 0xBF, + 0xD7, 0x90, 0xD7, 0x9C, 0xD9, 0xB1, 0xD9, 0xB1, + 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB, + 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE, + 0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80, + 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA, + 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF, + 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9, + 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4, + 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6, + 0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84, + 0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83, + 0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86, + 0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87, + 0xDA, 0x8D, 0xDA, 0x8D, 0xDA, 0x8C, 0xDA, 0x8C, + 0xDA, 0x8E, 0xDA, 0x8E, 0xDA, 0x88, 0xDA, 0x88, + 0xDA, 0x98, 0xDA, 0x98, 0xDA, 0x91, 0xDA, 0x91, + 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9, + 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF, + 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3, + 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1, + 0xDA, 0xBA, 0xDA, 0xBA, 0xDA, 0xBB, 0xDA, 0xBB, + 0xDA, 0xBB, 0xDA, 0xBB, 0xDB, 0x95, 0xD9, 0x94, + 0xDB, 0x95, 0xD9, 0x94, 0xDB, 0x81, 0xDB, 0x81, + 0xDB, 0x81, 0xDB, 0x81, 0xDA, 0xBE, 0xDA, 0xBE, + 0xDA, 0xBE, 0xDA, 0xBE, 0xDB, 0x92, 0xDB, 0x92, + 0xDB, 0x92, 0xD9, 0x94, 0xDB, 0x92, 0xD9, 0x94, + 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD, + 0xDB, 0x87, 0xDB, 0x87, 0xDB, 0x86, 0xDB, 0x86, + 0xDB, 0x88, 0xDB, 0x88, 0xDB, 0x87, 0xD9, 0xB4, + 0xDB, 0x8B, 0xDB, 0x8B, 0xDB, 0x85, 0xDB, 0x85, + 0xDB, 0x89, 0xDB, 0x89, 0xDB, 0x90, 0xDB, 0x90, + 0xDB, 0x90, 0xDB, 0x90, 0xD9, 0x89, 0xD9, 0x89, + 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A, + 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A, 0xD9, 0x94, + 0xDB, 0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x95, + 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A, 0xD9, 0x94, + 0xDB, 0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x87, + 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A, + 0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A, 0xD9, 0x94, + 0xDB, 0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x88, + 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A, + 0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94, + 0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89, + 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x89, 0xDB, 0x8C, 0xDB, 0x8C, + 0xDB, 0x8C, 0xDB, 0x8C, 0xD9, 0x8A, 0xD9, 0x94, + 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94, + 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, 0xA8, + 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, 0xA8, + 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8, + 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD8, 0xAA, + 0xD8, 0xAD, 0xD8, 0xAA, 0xD8, 0xAE, 0xD8, 0xAA, + 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA, + 0xD9, 0x8A, 0xD8, 0xAB, 0xD8, 0xAC, 0xD8, 0xAB, + 0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB, + 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, + 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAD, + 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD8, 0xAE, + 0xD8, 0xAD, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB3, + 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, + 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB5, + 0xD8, 0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD8, 0xB6, + 0xD8, 0xAC, 0xD8, 0xB6, 0xD8, 0xAD, 0xD8, 0xB6, + 0xD8, 0xAE, 0xD8, 0xB6, 0xD9, 0x85, 0xD8, 0xB7, + 0xD8, 0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB8, + 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, 0xB9, + 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, 0xBA, + 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, 0x81, + 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x81, + 0xD9, 0x85, 0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81, + 0xD9, 0x8A, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, 0x82, + 0xD9, 0x85, 0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82, + 0xD9, 0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83, + 0xD8, 0xAC, 0xD9, 0x83, 0xD8, 0xAD, 0xD9, 0x83, + 0xD8, 0xAE, 0xD9, 0x83, 0xD9, 0x84, 0xD9, 0x83, + 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89, 0xD9, 0x83, + 0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x84, + 0xD8, 0xAD, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x84, + 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84, + 0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, 0x85, + 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x85, + 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x86, + 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, 0x86, + 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86, + 0xD9, 0x8A, 0xD9, 0x87, 0xD8, 0xAC, 0xD9, 0x87, + 0xD9, 0x85, 0xD9, 0x87, 0xD9, 0x89, 0xD9, 0x87, + 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, 0x8A, + 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x89, 0xD9, 0x8A, + 0xD9, 0x8A, 0xD8, 0xB0, 0xD9, 0xB0, 0xD8, 0xB1, + 0xD9, 0xB0, 0xD9, 0x89, 0xD9, 0xB0, 0x20, 0xD9, + 0x8C, 0xD9, 0x91, 0x20, 0xD9, 0x8D, 0xD9, 0x91, + 0x20, 0xD9, 0x8E, 0xD9, 0x91, 0x20, 0xD9, 0x8F, + 0xD9, 0x91, 0x20, 0xD9, 0x90, 0xD9, 0x91, 0x20, + 0xD9, 0x91, 0xD9, 0xB0, 0xD9, 0x8A, 0xD9, 0x94, + 0xD8, 0xB1, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xB2, + 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, 0x94, + 0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x8A, + 0xD8, 0xA8, 0xD8, 0xB1, 0xD8, 0xA8, 0xD8, 0xB2, + 0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x86, + 0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8, 0xD9, 0x8A, + 0xD8, 0xAA, 0xD8, 0xB1, 0xD8, 0xAA, 0xD8, 0xB2, + 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x86, + 0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x8A, + 0xD8, 0xAB, 0xD8, 0xB1, 0xD8, 0xAB, 0xD8, 0xB2, + 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x86, + 0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB, 0xD9, 0x8A, + 0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81, 0xD9, 0x8A, + 0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82, 0xD9, 0x8A, + 0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83, 0xD9, 0x84, + 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89, + 0xD9, 0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD9, 0x85, + 0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84, 0xD9, 0x8A, + 0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x85, 0xD9, 0x85, + 0xD9, 0x86, 0xD8, 0xB1, 0xD9, 0x86, 0xD8, 0xB2, + 0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x86, + 0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86, 0xD9, 0x8A, + 0xD9, 0x89, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xB1, + 0xD9, 0x8A, 0xD8, 0xB2, 0xD9, 0x8A, 0xD9, 0x85, + 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, 0x89, + 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x94, + 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAE, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x94, + 0xD9, 0x87, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, 0xA8, + 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, 0xA8, + 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x87, 0xD8, 0xAA, + 0xD8, 0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAA, + 0xD8, 0xAE, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, + 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAC, + 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xAD, + 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, + 0xD8, 0xAC, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB3, + 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, + 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB5, + 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAE, 0xD8, 0xB5, + 0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAC, 0xD8, 0xB6, + 0xD8, 0xAD, 0xD8, 0xB6, 0xD8, 0xAE, 0xD8, 0xB6, + 0xD9, 0x85, 0xD8, 0xB7, 0xD8, 0xAD, 0xD8, 0xB8, + 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, 0xB9, + 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, 0xBA, + 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, 0x81, + 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x81, + 0xD9, 0x85, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, 0x82, + 0xD9, 0x85, 0xD9, 0x83, 0xD8, 0xAC, 0xD9, 0x83, + 0xD8, 0xAD, 0xD9, 0x83, 0xD8, 0xAE, 0xD9, 0x83, + 0xD9, 0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x84, + 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x84, + 0xD8, 0xAE, 0xD9, 0x84, 0xD9, 0x85, 0xD9, 0x84, + 0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, 0x85, + 0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x86, + 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, 0x86, + 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, 0x87, + 0xD8, 0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x87, + 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, 0x8A, + 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x94, + 0xD9, 0x87, 0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8, + 0xD9, 0x87, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, + 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB, + 0xD9, 0x87, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB3, + 0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xB4, + 0xD9, 0x87, 0xD9, 0x83, 0xD9, 0x84, 0xD9, 0x83, + 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x85, 0xD9, 0x86, + 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, 0x8A, + 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x80, + 0xD9, 0x8E, 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x8F, + 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x90, 0xD9, 0x91, + 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, 0x8A, + 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, 0x8A, + 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x8A, + 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, 0x8A, + 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, 0x8A, + 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, 0x8A, + 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, 0x8A, + 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x8A, + 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, 0x8A, + 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85, + 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, 0xB1, + 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, 0xB1, + 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, 0x8A, + 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, 0x8A, + 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x8A, + 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, 0x8A, + 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, 0x8A, + 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, 0x8A, + 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, 0x8A, + 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x8A, + 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, 0x8A, + 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85, + 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, 0xB1, + 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, 0xB1, + 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85, + 0xD8, 0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x87, + 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, 0xAC, + 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, 0xD8, 0xAE, + 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB7, 0xD9, 0x85, + 0xD8, 0xB8, 0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x8B, + 0xD8, 0xA7, 0xD9, 0x8B, 0xD8, 0xAA, 0xD8, 0xAC, + 0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC, + 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAA, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAE, + 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAC, + 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAA, + 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, 0x85, + 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xAD, + 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAD, + 0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xB3, 0xD8, 0xAD, + 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAC, 0xD8, 0xAD, + 0xD8, 0xB3, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xB3, + 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85, + 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xAC, + 0xD8, 0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3, + 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB5, 0xD8, 0xAD, + 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAD, 0xD8, 0xAD, + 0xD8, 0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB4, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4, + 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85, + 0xD9, 0x85, 0xD8, 0xB4, 0xD9, 0x85, 0xD9, 0x85, + 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xB6, + 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAE, + 0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD, + 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB7, + 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85, + 0xD9, 0x8A, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85, + 0xD8, 0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9, + 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, 0x85, + 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x85, + 0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xBA, + 0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x81, 0xD8, 0xAE, + 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x85, + 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x82, + 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD, + 0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x8A, + 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x89, 0xD9, 0x84, + 0xD8, 0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAC, + 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85, + 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x84, + 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84, 0xD9, 0x85, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC, + 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x85, + 0xD8, 0xAD, 0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85, + 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, 0x85, + 0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xAC, + 0xD8, 0xAE, 0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC, + 0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x86, + 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAD, + 0xD9, 0x89, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85, + 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x86, + 0xD8, 0xAC, 0xD9, 0x89, 0xD9, 0x86, 0xD9, 0x85, + 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x89, + 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x8A, + 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xA8, 0xD8, 0xAE, + 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAA, + 0xD8, 0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAE, + 0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x8A, + 0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xAC, + 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD, + 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x89, + 0xD8, 0xB3, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xB5, + 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xB4, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x8A, + 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x84, + 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x85, + 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x82, 0xD9, 0x85, + 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAD, 0xD9, 0x8A, + 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84, + 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, 0x85, + 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x8A, + 0xD9, 0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85, + 0xD8, 0xAE, 0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC, + 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x85, + 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x86, + 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x81, + 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAD, + 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x85, + 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xB5, + 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, 0xAE, + 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x8A, + 0xD8, 0xB5, 0xD9, 0x84, 0xDB, 0x92, 0xD9, 0x82, + 0xD9, 0x84, 0xDB, 0x92, 0xD8, 0xA7, 0xD9, 0x84, + 0xD9, 0x84, 0xD9, 0x87, 0xD8, 0xA7, 0xD9, 0x83, + 0xD8, 0xA8, 0xD8, 0xB1, 0xD9, 0x85, 0xD8, 0xAD, + 0xD9, 0x85, 0xD8, 0xAF, 0xD8, 0xB5, 0xD9, 0x84, + 0xD8, 0xB9, 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xB3, + 0xD9, 0x88, 0xD9, 0x84, 0xD8, 0xB9, 0xD9, 0x84, + 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x88, 0xD8, 0xB3, + 0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xB5, 0xD9, 0x84, + 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, + 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x84, 0xD9, + 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, + 0xD9, 0x87, 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, + 0x84, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x84, 0x20, + 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x84, + 0xD9, 0x87, 0xD8, 0xB1, 0xDB, 0x8C, 0xD8, 0xA7, + 0xD9, 0x84, 0x2E, 0x2E, 0xE2, 0x80, 0x94, 0xE2, + 0x80, 0x93, 0x5F, 0x5F, 0x28, 0x29, 0x7B, 0x7D, + 0xE3, 0x80, 0x94, 0xE3, 0x80, 0x95, 0xE3, 0x80, + 0x90, 0xE3, 0x80, 0x91, 0xE3, 0x80, 0x8A, 0xE3, + 0x80, 0x8B, 0xE3, 0x80, 0x88, 0xE3, 0x80, 0x89, + 0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D, 0xE3, 0x80, + 0x8E, 0xE3, 0x80, 0x8F, 0x20, 0xCC, 0x85, 0x20, + 0xCC, 0x85, 0x20, 0xCC, 0x85, 0x20, 0xCC, 0x85, + 0x5F, 0x5F, 0x5F, 0x2C, 0xE3, 0x80, 0x81, 0x2E, + 0x3B, 0x3A, 0x3F, 0x21, 0xE2, 0x80, 0x94, 0x28, + 0x29, 0x7B, 0x7D, 0xE3, 0x80, 0x94, 0xE3, 0x80, + 0x95, 0x23, 0x26, 0x2A, 0x2B, 0x2D, 0x3C, 0x3E, + 0x3D, 0x5C, 0x24, 0x25, 0x40, 0x20, 0xD9, 0x8B, + 0xD9, 0x80, 0xD9, 0x8B, 0x20, 0xD9, 0x8C, 0x20, + 0xD9, 0x8D, 0x20, 0xD9, 0x8E, 0xD9, 0x80, 0xD9, + 0x8E, 0x20, 0xD9, 0x8F, 0xD9, 0x80, 0xD9, 0x8F, + 0x20, 0xD9, 0x90, 0xD9, 0x80, 0xD9, 0x90, 0x20, + 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x91, 0x20, 0xD9, + 0x92, 0xD9, 0x80, 0xD9, 0x92, 0xD8, 0xA1, 0xD8, + 0xA7, 0xD9, 0x93, 0xD8, 0xA7, 0xD9, 0x93, 0xD8, + 0xA7, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, + 0x88, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x94, 0xD8, + 0xA7, 0xD9, 0x95, 0xD8, 0xA7, 0xD9, 0x95, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, + 0xA7, 0xD8, 0xA7, 0xD8, 0xA8, 0xD8, 0xA8, 0xD8, + 0xA8, 0xD8, 0xA8, 0xD8, 0xA9, 0xD8, 0xA9, 0xD8, + 0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, + 0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, + 0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, + 0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, + 0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, + 0xAF, 0xD8, 0xAF, 0xD8, 0xB0, 0xD8, 0xB0, 0xD8, + 0xB1, 0xD8, 0xB1, 0xD8, 0xB2, 0xD8, 0xB2, 0xD8, + 0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, + 0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, + 0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, + 0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, + 0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, + 0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, + 0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, + 0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD9, + 0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9, + 0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9, + 0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9, + 0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9, + 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, + 0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9, + 0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9, + 0x88, 0xD9, 0x88, 0xD9, 0x89, 0xD9, 0x89, 0xD9, + 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, + 0x84, 0xD8, 0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8, + 0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, + 0x94, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, + 0x84, 0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8, + 0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, + 0x84, 0xD8, 0xA7, 0x21, 0x22, 0x23, 0x24, 0x25, + 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, + 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, + 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, + 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, + 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, + 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, + 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, + 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, + 0x7E, 0xE2, 0xA6, 0x85, 0xE2, 0xA6, 0x86, 0xE3, + 0x80, 0x82, 0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D, + 0xE3, 0x80, 0x81, 0xE3, 0x83, 0xBB, 0xE3, 0x83, + 0xB2, 0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA3, 0xE3, + 0x82, 0xA5, 0xE3, 0x82, 0xA7, 0xE3, 0x82, 0xA9, + 0xE3, 0x83, 0xA3, 0xE3, 0x83, 0xA5, 0xE3, 0x83, + 0xA7, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0xBC, 0xE3, + 0x82, 0xA2, 0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6, + 0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82, + 0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, + 0x82, 0xB1, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5, + 0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82, + 0xBB, 0xE3, 0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3, + 0x83, 0x81, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86, + 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83, + 0x8B, 0xE3, 0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3, + 0x83, 0x8E, 0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92, + 0xE3, 0x83, 0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83, + 0x9B, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3, + 0x83, 0xA0, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2, + 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83, + 0xA8, 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3, + 0x83, 0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD, + 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0xB3, 0xE3, 0x82, + 0x99, 0xE3, 0x82, 0x9A, 0xE1, 0x85, 0xA0, 0xE1, + 0x84, 0x80, 0xE1, 0x84, 0x81, 0xE1, 0x86, 0xAA, + 0xE1, 0x84, 0x82, 0xE1, 0x86, 0xAC, 0xE1, 0x86, + 0xAD, 0xE1, 0x84, 0x83, 0xE1, 0x84, 0x84, 0xE1, + 0x84, 0x85, 0xE1, 0x86, 0xB0, 0xE1, 0x86, 0xB1, + 0xE1, 0x86, 0xB2, 0xE1, 0x86, 0xB3, 0xE1, 0x86, + 0xB4, 0xE1, 0x86, 0xB5, 0xE1, 0x84, 0x9A, 0xE1, + 0x84, 0x86, 0xE1, 0x84, 0x87, 0xE1, 0x84, 0x88, + 0xE1, 0x84, 0xA1, 0xE1, 0x84, 0x89, 0xE1, 0x84, + 0x8A, 0xE1, 0x84, 0x8B, 0xE1, 0x84, 0x8C, 0xE1, + 0x84, 0x8D, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F, + 0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84, + 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x85, 0xA2, 0xE1, + 0x85, 0xA3, 0xE1, 0x85, 0xA4, 0xE1, 0x85, 0xA5, + 0xE1, 0x85, 0xA6, 0xE1, 0x85, 0xA7, 0xE1, 0x85, + 0xA8, 0xE1, 0x85, 0xA9, 0xE1, 0x85, 0xAA, 0xE1, + 0x85, 0xAB, 0xE1, 0x85, 0xAC, 0xE1, 0x85, 0xAD, + 0xE1, 0x85, 0xAE, 0xE1, 0x85, 0xAF, 0xE1, 0x85, + 0xB0, 0xE1, 0x85, 0xB1, 0xE1, 0x85, 0xB2, 0xE1, + 0x85, 0xB3, 0xE1, 0x85, 0xB4, 0xE1, 0x85, 0xB5, + 0xC2, 0xA2, 0xC2, 0xA3, 0xC2, 0xAC, 0x20, 0xCC, + 0x84, 0xC2, 0xA6, 0xC2, 0xA5, 0xE2, 0x82, 0xA9, + 0xE2, 0x94, 0x82, 0xE2, 0x86, 0x90, 0xE2, 0x86, + 0x91, 0xE2, 0x86, 0x92, 0xE2, 0x86, 0x93, 0xE2, + 0x96, 0xA0, 0xE2, 0x97, 0x8B, 0xF6, 0xF0, 0x9D, + 0x85, 0x97, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0, + 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, + 0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, + 0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x85, + 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, + 0xAF, 0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D, + 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB0, 0xF6, 0xF0, + 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, + 0x9D, 0x85, 0xB1, 0xF6, 0xF0, 0x9D, 0x85, 0x98, + 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB2, + 0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D, 0x85, + 0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, + 0x85, 0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0, + 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAE, 0xF6, + 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, 0xA5, + 0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x86, + 0xB9, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, + 0xAF, 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, + 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF, 0x41, 0x42, + 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, + 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, + 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, + 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, + 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, + 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x43, 0x44, 0x47, 0x4A, 0x4B, 0x4E, + 0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x66, + 0x68, 0x69, 0x6A, 0x6B, 0x6D, 0x6E, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, + 0x45, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, + 0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, + 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, + 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, 0x45, + 0x46, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4F, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, + 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, + 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, + 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, + 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, + 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, + 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, + 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, + 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, + 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, + 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, + 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, + 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, + 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, + 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, + 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, + 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, + 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, + 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, + 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, + 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, + 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, + 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, + 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, + 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, + 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, + 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, + 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, + 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, + 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, + 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, + 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, + 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, + 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, + 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, + 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, + 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, + 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, + 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, + 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, + 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, + 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, + 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, + 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, + 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, + 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, + 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, + 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, + 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, + 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, + 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, + 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, + 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, + 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, + 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, + 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, + 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, + 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, + 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, + 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, + 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, + 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, + 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, + 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, + 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, + 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, + 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, + 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, + 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, + 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, + 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, + 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, + 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, + 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x30, 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0xF6, 0xE4, 0xB8, 0xBD, 0xF6, 0xE4, 0xB8, + 0xB8, 0xF6, 0xE4, 0xB9, 0x81, 0xF6, 0xF0, 0xA0, + 0x84, 0xA2, 0xF6, 0xE4, 0xBD, 0xA0, 0xF6, 0xE4, + 0xBE, 0xAE, 0xF6, 0xE4, 0xBE, 0xBB, 0xF6, 0xE5, + 0x80, 0x82, 0xF6, 0xE5, 0x81, 0xBA, 0xF6, 0xE5, + 0x82, 0x99, 0xF6, 0xE5, 0x83, 0xA7, 0xF6, 0xE5, + 0x83, 0x8F, 0xF6, 0xE3, 0x92, 0x9E, 0xF6, 0xF0, + 0xA0, 0x98, 0xBA, 0xF6, 0xE5, 0x85, 0x8D, 0xF6, + 0xE5, 0x85, 0x94, 0xF6, 0xE5, 0x85, 0xA4, 0xF6, + 0xE5, 0x85, 0xB7, 0xF6, 0xF0, 0xA0, 0x94, 0x9C, + 0xF6, 0xE3, 0x92, 0xB9, 0xF6, 0xE5, 0x85, 0xA7, + 0xF6, 0xE5, 0x86, 0x8D, 0xF6, 0xF0, 0xA0, 0x95, + 0x8B, 0xF6, 0xE5, 0x86, 0x97, 0xF6, 0xE5, 0x86, + 0xA4, 0xF6, 0xE4, 0xBB, 0x8C, 0xF6, 0xE5, 0x86, + 0xAC, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xF0, 0xA9, + 0x87, 0x9F, 0xF6, 0xE5, 0x87, 0xB5, 0xF6, 0xE5, + 0x88, 0x83, 0xF6, 0xE3, 0x93, 0x9F, 0xF6, 0xE5, + 0x88, 0xBB, 0xF6, 0xE5, 0x89, 0x86, 0xF6, 0xE5, + 0x89, 0xB2, 0xF6, 0xE5, 0x89, 0xB7, 0xF6, 0xE3, + 0x94, 0x95, 0xF6, 0xE5, 0x8B, 0x87, 0xF6, 0xE5, + 0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5, + 0x8B, 0xBA, 0xF6, 0xE5, 0x8C, 0x85, 0xF6, 0xE5, + 0x8C, 0x86, 0xF6, 0xE5, 0x8C, 0x97, 0xF6, 0xE5, + 0x8D, 0x89, 0xF6, 0xE5, 0x8D, 0x91, 0xF6, 0xE5, + 0x8D, 0x9A, 0xF6, 0xE5, 0x8D, 0xB3, 0xF6, 0xE5, + 0x8D, 0xBD, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xE5, + 0x8D, 0xBF, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xF0, + 0xA0, 0xA8, 0xAC, 0xF6, 0xE7, 0x81, 0xB0, 0xF6, + 0xE5, 0x8F, 0x8A, 0xF6, 0xE5, 0x8F, 0x9F, 0xF6, + 0xF0, 0xA0, 0xAD, 0xA3, 0xF6, 0xE5, 0x8F, 0xAB, + 0xF6, 0xE5, 0x8F, 0xB1, 0xF6, 0xE5, 0x90, 0x86, + 0xF6, 0xE5, 0x92, 0x9E, 0xF6, 0xE5, 0x90, 0xB8, + 0xF6, 0xE5, 0x91, 0x88, 0xF6, 0xE5, 0x91, 0xA8, + 0xF6, 0xE5, 0x92, 0xA2, 0xF6, 0xE5, 0x93, 0xB6, + 0xF6, 0xE5, 0x94, 0x90, 0xF6, 0xE5, 0x95, 0x93, + 0xF6, 0xE5, 0x95, 0xA3, 0xF6, 0xE5, 0x96, 0x84, + 0xF6, 0xE5, 0x96, 0x84, 0xF6, 0xE5, 0x96, 0x99, + 0xF6, 0xE5, 0x96, 0xAB, 0xF6, 0xE5, 0x96, 0xB3, + 0xF6, 0xE5, 0x97, 0x82, 0xF6, 0xE5, 0x9C, 0x96, + 0xF6, 0xE5, 0x98, 0x86, 0xF6, 0xE5, 0x9C, 0x97, + 0xF6, 0xE5, 0x99, 0x91, 0xF6, 0xE5, 0x99, 0xB4, + 0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xA3, 0xAE, + 0xF6, 0xE5, 0x9F, 0x8E, 0xF6, 0xE5, 0x9F, 0xB4, + 0xF6, 0xE5, 0xA0, 0x8D, 0xF6, 0xE5, 0x9E, 0x8B, + 0xF6, 0xE5, 0xA0, 0xB2, 0xF6, 0xE5, 0xA0, 0xB1, + 0xF6, 0xE5, 0xA2, 0xAC, 0xF6, 0xF0, 0xA1, 0x93, + 0xA4, 0xF6, 0xE5, 0xA3, 0xB2, 0xF6, 0xE5, 0xA3, + 0xB7, 0xF6, 0xE5, 0xA4, 0x86, 0xF6, 0xE5, 0xA4, + 0x9A, 0xF6, 0xE5, 0xA4, 0xA2, 0xF6, 0xE5, 0xA5, + 0xA2, 0xF6, 0xF0, 0xA1, 0x9A, 0xA8, 0xF6, 0xF0, + 0xA1, 0x9B, 0xAA, 0xF6, 0xE5, 0xA7, 0xAC, 0xF6, + 0xE5, 0xA8, 0x9B, 0xF6, 0xE5, 0xA8, 0xA7, 0xF6, + 0xE5, 0xA7, 0x98, 0xF6, 0xE5, 0xA9, 0xA6, 0xF6, + 0xE3, 0x9B, 0xAE, 0xF6, 0xE3, 0x9B, 0xBC, 0xF6, + 0xE5, 0xAC, 0x88, 0xF6, 0xE5, 0xAC, 0xBE, 0xF6, + 0xE5, 0xAC, 0xBE, 0xF6, 0xF0, 0xA1, 0xA7, 0x88, + 0xF6, 0xE5, 0xAF, 0x83, 0xF6, 0xE5, 0xAF, 0x98, + 0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xAF, 0xB3, + 0xF6, 0xF0, 0xA1, 0xAC, 0x98, 0xF6, 0xE5, 0xAF, + 0xBF, 0xF6, 0xE5, 0xB0, 0x86, 0xF6, 0xE5, 0xBD, + 0x93, 0xF6, 0xE5, 0xB0, 0xA2, 0xF6, 0xE3, 0x9E, + 0x81, 0xF6, 0xE5, 0xB1, 0xA0, 0xF6, 0xE5, 0xB1, + 0xAE, 0xF6, 0xE5, 0xB3, 0x80, 0xF6, 0xE5, 0xB2, + 0x8D, 0xF6, 0xF0, 0xA1, 0xB7, 0xA4, 0xF6, 0xE5, + 0xB5, 0x83, 0xF6, 0xF0, 0xA1, 0xB7, 0xA6, 0xF6, + 0xE5, 0xB5, 0xAE, 0xF6, 0xE5, 0xB5, 0xAB, 0xF6, + 0xE5, 0xB5, 0xBC, 0xF6, 0xE5, 0xB7, 0xA1, 0xF6, + 0xE5, 0xB7, 0xA2, 0xF6, 0xE3, 0xA0, 0xAF, 0xF6, + 0xE5, 0xB7, 0xBD, 0xF6, 0xE5, 0xB8, 0xA8, 0xF6, + 0xE5, 0xB8, 0xBD, 0xF6, 0xE5, 0xB9, 0xA9, 0xF6, + 0xE3, 0xA1, 0xA2, 0xF6, 0xF0, 0xA2, 0x86, 0x83, + 0xF6, 0xE3, 0xA1, 0xBC, 0xF6, 0xE5, 0xBA, 0xB0, + 0xF6, 0xE5, 0xBA, 0xB3, 0xF6, 0xE5, 0xBA, 0xB6, + 0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xF0, 0xAA, 0x8E, + 0x92, 0xF6, 0xE5, 0xBB, 0xBE, 0xF6, 0xF0, 0xA2, + 0x8C, 0xB1, 0xF6, 0xF0, 0xA2, 0x8C, 0xB1, 0xF6, + 0xE8, 0x88, 0x81, 0xF6, 0xE5, 0xBC, 0xA2, 0xF6, + 0xE5, 0xBC, 0xA2, 0xF6, 0xE3, 0xA3, 0x87, 0xF6, + 0xF0, 0xA3, 0x8A, 0xB8, 0xF6, 0xF0, 0xA6, 0x87, + 0x9A, 0xF6, 0xE5, 0xBD, 0xA2, 0xF6, 0xE5, 0xBD, + 0xAB, 0xF6, 0xE3, 0xA3, 0xA3, 0xF6, 0xE5, 0xBE, + 0x9A, 0xF6, 0xE5, 0xBF, 0x8D, 0xF6, 0xE5, 0xBF, + 0x97, 0xF6, 0xE5, 0xBF, 0xB9, 0xF6, 0xE6, 0x82, + 0x81, 0xF6, 0xE3, 0xA4, 0xBA, 0xF6, 0xE3, 0xA4, + 0x9C, 0xF6, 0xE6, 0x82, 0x94, 0xF6, 0xF0, 0xA2, + 0x9B, 0x94, 0xF6, 0xE6, 0x83, 0x87, 0xF6, 0xE6, + 0x85, 0x88, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6, + 0x85, 0x8E, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6, + 0x85, 0xBA, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6, + 0x86, 0xB2, 0xF6, 0xE6, 0x86, 0xA4, 0xF6, 0xE6, + 0x86, 0xAF, 0xF6, 0xE6, 0x87, 0x9E, 0xF6, 0xE6, + 0x87, 0xB2, 0xF6, 0xE6, 0x87, 0xB6, 0xF6, 0xE6, + 0x88, 0x90, 0xF6, 0xE6, 0x88, 0x9B, 0xF6, 0xE6, + 0x89, 0x9D, 0xF6, 0xE6, 0x8A, 0xB1, 0xF6, 0xE6, + 0x8B, 0x94, 0xF6, 0xE6, 0x8D, 0x90, 0xF6, 0xF0, + 0xA2, 0xAC, 0x8C, 0xF6, 0xE6, 0x8C, 0xBD, 0xF6, + 0xE6, 0x8B, 0xBC, 0xF6, 0xE6, 0x8D, 0xA8, 0xF6, + 0xE6, 0x8E, 0x83, 0xF6, 0xE6, 0x8F, 0xA4, 0xF6, + 0xF0, 0xA2, 0xAF, 0xB1, 0xF6, 0xE6, 0x90, 0xA2, + 0xF6, 0xE6, 0x8F, 0x85, 0xF6, 0xE6, 0x8E, 0xA9, + 0xF6, 0xE3, 0xA8, 0xAE, 0xF6, 0xE6, 0x91, 0xA9, + 0xF6, 0xE6, 0x91, 0xBE, 0xF6, 0xE6, 0x92, 0x9D, + 0xF6, 0xE6, 0x91, 0xB7, 0xF6, 0xE3, 0xA9, 0xAC, + 0xF6, 0xE6, 0x95, 0x8F, 0xF6, 0xE6, 0x95, 0xAC, + 0xF6, 0xF0, 0xA3, 0x80, 0x8A, 0xF6, 0xE6, 0x97, + 0xA3, 0xF6, 0xE6, 0x9B, 0xB8, 0xF6, 0xE6, 0x99, + 0x89, 0xF6, 0xE3, 0xAC, 0x99, 0xF6, 0xE6, 0x9A, + 0x91, 0xF6, 0xE3, 0xAC, 0x88, 0xF6, 0xE3, 0xAB, + 0xA4, 0xF6, 0xE5, 0x86, 0x92, 0xF6, 0xE5, 0x86, + 0x95, 0xF6, 0xE6, 0x9C, 0x80, 0xF6, 0xE6, 0x9A, + 0x9C, 0xF6, 0xE8, 0x82, 0xAD, 0xF6, 0xE4, 0x8F, + 0x99, 0xF6, 0xE6, 0x9C, 0x97, 0xF6, 0xE6, 0x9C, + 0x9B, 0xF6, 0xE6, 0x9C, 0xA1, 0xF6, 0xE6, 0x9D, + 0x9E, 0xF6, 0xE6, 0x9D, 0x93, 0xF6, 0xF0, 0xA3, + 0x8F, 0x83, 0xF6, 0xE3, 0xAD, 0x89, 0xF6, 0xE6, + 0x9F, 0xBA, 0xF6, 0xE6, 0x9E, 0x85, 0xF6, 0xE6, + 0xA1, 0x92, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xF0, + 0xA3, 0x91, 0xAD, 0xF6, 0xE6, 0xA2, 0x8E, 0xF6, + 0xE6, 0xA0, 0x9F, 0xF6, 0xE6, 0xA4, 0x94, 0xF6, + 0xE3, 0xAE, 0x9D, 0xF6, 0xE6, 0xA5, 0x82, 0xF6, + 0xE6, 0xA6, 0xA3, 0xF6, 0xE6, 0xA7, 0xAA, 0xF6, + 0xE6, 0xAA, 0xA8, 0xF6, 0xF0, 0xA3, 0x9A, 0xA3, + 0xF6, 0xE6, 0xAB, 0x9B, 0xF6, 0xE3, 0xB0, 0x98, + 0xF6, 0xE6, 0xAC, 0xA1, 0xF6, 0xF0, 0xA3, 0xA2, + 0xA7, 0xF6, 0xE6, 0xAD, 0x94, 0xF6, 0xE3, 0xB1, + 0x8E, 0xF6, 0xE6, 0xAD, 0xB2, 0xF6, 0xE6, 0xAE, + 0x9F, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE6, 0xAE, + 0xBB, 0xF6, 0xF0, 0xA3, 0xAA, 0x8D, 0xF6, 0xF0, + 0xA1, 0xB4, 0x8B, 0xF6, 0xF0, 0xA3, 0xAB, 0xBA, + 0xF6, 0xE6, 0xB1, 0x8E, 0xF6, 0xF0, 0xA3, 0xB2, + 0xBC, 0xF6, 0xE6, 0xB2, 0xBF, 0xF6, 0xE6, 0xB3, + 0x8D, 0xF6, 0xE6, 0xB1, 0xA7, 0xF6, 0xE6, 0xB4, + 0x96, 0xF6, 0xE6, 0xB4, 0xBE, 0xF6, 0xE6, 0xB5, + 0xB7, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xB5, + 0xA9, 0xF6, 0xE6, 0xB5, 0xB8, 0xF6, 0xE6, 0xB6, + 0x85, 0xF6, 0xF0, 0xA3, 0xB4, 0x9E, 0xF6, 0xE6, + 0xB4, 0xB4, 0xF6, 0xE6, 0xB8, 0xAF, 0xF6, 0xE6, + 0xB9, 0xAE, 0xF6, 0xE3, 0xB4, 0xB3, 0xF6, 0xE6, + 0xBB, 0x8B, 0xF6, 0xE6, 0xBB, 0x87, 0xF6, 0xF0, + 0xA3, 0xBB, 0x91, 0xF6, 0xE6, 0xB7, 0xB9, 0xF6, + 0xE6, 0xBD, 0xAE, 0xF6, 0xF0, 0xA3, 0xBD, 0x9E, + 0xF6, 0xF0, 0xA3, 0xBE, 0x8E, 0xF6, 0xE6, 0xBF, + 0x86, 0xF6, 0xE7, 0x80, 0xB9, 0xF6, 0xE7, 0x80, + 0x9E, 0xF6, 0xE7, 0x80, 0x9B, 0xF6, 0xE3, 0xB6, + 0x96, 0xF6, 0xE7, 0x81, 0x8A, 0xF6, 0xE7, 0x81, + 0xBD, 0xF6, 0xE7, 0x81, 0xB7, 0xF6, 0xE7, 0x82, + 0xAD, 0xF6, 0xF0, 0xA0, 0x94, 0xA5, 0xF6, 0xE7, + 0x85, 0x85, 0xF6, 0xF0, 0xA4, 0x89, 0xA3, 0xF6, + 0xE7, 0x86, 0x9C, 0xF6, 0xF0, 0xA4, 0x8E, 0xAB, + 0xF6, 0xE7, 0x88, 0xA8, 0xF6, 0xE7, 0x88, 0xB5, + 0xF6, 0xE7, 0x89, 0x90, 0xF6, 0xF0, 0xA4, 0x98, + 0x88, 0xF6, 0xE7, 0x8A, 0x80, 0xF6, 0xE7, 0x8A, + 0x95, 0xF6, 0xF0, 0xA4, 0x9C, 0xB5, 0xF6, 0xF0, + 0xA4, 0xA0, 0x94, 0xF6, 0xE7, 0x8D, 0xBA, 0xF6, + 0xE7, 0x8E, 0x8B, 0xF6, 0xE3, 0xBA, 0xAC, 0xF6, + 0xE7, 0x8E, 0xA5, 0xF6, 0xE3, 0xBA, 0xB8, 0xF6, + 0xE3, 0xBA, 0xB8, 0xF6, 0xE7, 0x91, 0x87, 0xF6, + 0xE7, 0x91, 0x9C, 0xF6, 0xE7, 0x91, 0xB1, 0xF6, + 0xE7, 0x92, 0x85, 0xF6, 0xE7, 0x93, 0x8A, 0xF6, + 0xE3, 0xBC, 0x9B, 0xF6, 0xE7, 0x94, 0xA4, 0xF6, + 0xF0, 0xA4, 0xB0, 0xB6, 0xF6, 0xE7, 0x94, 0xBE, + 0xF6, 0xF0, 0xA4, 0xB2, 0x92, 0xF6, 0xE7, 0x95, + 0xB0, 0xF6, 0xF0, 0xA2, 0x86, 0x9F, 0xF6, 0xE7, + 0x98, 0x90, 0xF6, 0xF0, 0xA4, 0xBE, 0xA1, 0xF6, + 0xF0, 0xA4, 0xBE, 0xB8, 0xF6, 0xF0, 0xA5, 0x81, + 0x84, 0xF6, 0xE3, 0xBF, 0xBC, 0xF6, 0xE4, 0x80, + 0x88, 0xF6, 0xE7, 0x9B, 0xB4, 0xF6, 0xF0, 0xA5, + 0x83, 0xB3, 0xF6, 0xF0, 0xA5, 0x83, 0xB2, 0xF6, + 0xF0, 0xA5, 0x84, 0x99, 0xF6, 0xF0, 0xA5, 0x84, + 0xB3, 0xF6, 0xE7, 0x9C, 0x9E, 0xF6, 0xE7, 0x9C, + 0x9F, 0xF6, 0xE7, 0x9C, 0x9F, 0xF6, 0xE7, 0x9D, + 0x8A, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xE7, 0x9E, + 0x8B, 0xF6, 0xE4, 0x81, 0x86, 0xF6, 0xE4, 0x82, + 0x96, 0xF6, 0xF0, 0xA5, 0x90, 0x9D, 0xF6, 0xE7, + 0xA1, 0x8E, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6, 0xE7, + 0xA3, 0x8C, 0xF6, 0xE4, 0x83, 0xA3, 0xF6, 0xF0, + 0xA5, 0x98, 0xA6, 0xF6, 0xE7, 0xA5, 0x96, 0xF6, + 0xF0, 0xA5, 0x9A, 0x9A, 0xF6, 0xF0, 0xA5, 0x9B, + 0x85, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE7, 0xA7, + 0xAB, 0xF6, 0xE4, 0x84, 0xAF, 0xF6, 0xE7, 0xA9, + 0x80, 0xF6, 0xE7, 0xA9, 0x8A, 0xF6, 0xE7, 0xA9, + 0x8F, 0xF6, 0xF0, 0xA5, 0xA5, 0xBC, 0xF6, 0xF0, + 0xA5, 0xAA, 0xA7, 0xF6, 0xF0, 0xA5, 0xAA, 0xA7, + 0xF6, 0xE7, 0xAB, 0xAE, 0xF6, 0xE4, 0x88, 0x82, + 0xF6, 0xF0, 0xA5, 0xAE, 0xAB, 0xF6, 0xE7, 0xAF, + 0x86, 0xF6, 0xE7, 0xAF, 0x89, 0xF6, 0xE4, 0x88, + 0xA7, 0xF6, 0xF0, 0xA5, 0xB2, 0x80, 0xF6, 0xE7, + 0xB3, 0x92, 0xF6, 0xE4, 0x8A, 0xA0, 0xF6, 0xE7, + 0xB3, 0xA8, 0xF6, 0xE7, 0xB3, 0xA3, 0xF6, 0xE7, + 0xB4, 0x80, 0xF6, 0xF0, 0xA5, 0xBE, 0x86, 0xF6, + 0xE7, 0xB5, 0xA3, 0xF6, 0xE4, 0x8C, 0x81, 0xF6, + 0xE7, 0xB7, 0x87, 0xF6, 0xE7, 0xB8, 0x82, 0xF6, + 0xE7, 0xB9, 0x85, 0xF6, 0xE4, 0x8C, 0xB4, 0xF6, + 0xF0, 0xA6, 0x88, 0xA8, 0xF6, 0xF0, 0xA6, 0x89, + 0x87, 0xF6, 0xE4, 0x8D, 0x99, 0xF6, 0xF0, 0xA6, + 0x8B, 0x99, 0xF6, 0xE7, 0xBD, 0xBA, 0xF6, 0xF0, + 0xA6, 0x8C, 0xBE, 0xF6, 0xE7, 0xBE, 0x95, 0xF6, + 0xE7, 0xBF, 0xBA, 0xF6, 0xE8, 0x80, 0x85, 0xF6, + 0xF0, 0xA6, 0x93, 0x9A, 0xF6, 0xF0, 0xA6, 0x94, + 0xA3, 0xF6, 0xE8, 0x81, 0xA0, 0xF6, 0xF0, 0xA6, + 0x96, 0xA8, 0xF6, 0xE8, 0x81, 0xB0, 0xF6, 0xF0, + 0xA3, 0x8D, 0x9F, 0xF6, 0xE4, 0x8F, 0x95, 0xF6, + 0xE8, 0x82, 0xB2, 0xF6, 0xE8, 0x84, 0x83, 0xF6, + 0xE4, 0x90, 0x8B, 0xF6, 0xE8, 0x84, 0xBE, 0xF6, + 0xE5, 0xAA, 0xB5, 0xF6, 0xF0, 0xA6, 0x9E, 0xA7, + 0xF6, 0xF0, 0xA6, 0x9E, 0xB5, 0xF6, 0xF0, 0xA3, + 0x8E, 0x93, 0xF6, 0xF0, 0xA3, 0x8E, 0x9C, 0xF6, + 0xE8, 0x88, 0x81, 0xF6, 0xE8, 0x88, 0x84, 0xF6, + 0xE8, 0xBE, 0x9E, 0xF6, 0xE4, 0x91, 0xAB, 0xF6, + 0xE8, 0x8A, 0x91, 0xF6, 0xE8, 0x8A, 0x8B, 0xF6, + 0xE8, 0x8A, 0x9D, 0xF6, 0xE5, 0x8A, 0xB3, 0xF6, + 0xE8, 0x8A, 0xB1, 0xF6, 0xE8, 0x8A, 0xB3, 0xF6, + 0xE8, 0x8A, 0xBD, 0xF6, 0xE8, 0x8B, 0xA6, 0xF6, + 0xF0, 0xA6, 0xAC, 0xBC, 0xF6, 0xE8, 0x8B, 0xA5, + 0xF6, 0xE8, 0x8C, 0x9D, 0xF6, 0xE8, 0x8D, 0xA3, + 0xF6, 0xE8, 0x8E, 0xAD, 0xF6, 0xE8, 0x8C, 0xA3, + 0xF6, 0xE8, 0x8E, 0xBD, 0xF6, 0xE8, 0x8F, 0xA7, + 0xF6, 0xE8, 0x91, 0x97, 0xF6, 0xE8, 0x8D, 0x93, + 0xF6, 0xE8, 0x8F, 0x8A, 0xF6, 0xE8, 0x8F, 0x8C, + 0xF6, 0xE8, 0x8F, 0x9C, 0xF6, 0xF0, 0xA6, 0xB0, + 0xB6, 0xF6, 0xF0, 0xA6, 0xB5, 0xAB, 0xF6, 0xF0, + 0xA6, 0xB3, 0x95, 0xF6, 0xE4, 0x94, 0xAB, 0xF6, + 0xE8, 0x93, 0xB1, 0xF6, 0xE8, 0x93, 0xB3, 0xF6, + 0xE8, 0x94, 0x96, 0xF6, 0xF0, 0xA7, 0x8F, 0x8A, + 0xF6, 0xE8, 0x95, 0xA4, 0xF6, 0xF0, 0xA6, 0xBC, + 0xAC, 0xF6, 0xE4, 0x95, 0x9D, 0xF6, 0xE4, 0x95, + 0xA1, 0xF6, 0xF0, 0xA6, 0xBE, 0xB1, 0xF6, 0xF0, + 0xA7, 0x83, 0x92, 0xF6, 0xE4, 0x95, 0xAB, 0xF6, + 0xE8, 0x99, 0x90, 0xF6, 0xE8, 0x99, 0x9C, 0xF6, + 0xE8, 0x99, 0xA7, 0xF6, 0xE8, 0x99, 0xA9, 0xF6, + 0xE8, 0x9A, 0xA9, 0xF6, 0xE8, 0x9A, 0x88, 0xF6, + 0xE8, 0x9C, 0x8E, 0xF6, 0xE8, 0x9B, 0xA2, 0xF6, + 0xE8, 0x9D, 0xB9, 0xF6, 0xE8, 0x9C, 0xA8, 0xF6, + 0xE8, 0x9D, 0xAB, 0xF6, 0xE8, 0x9E, 0x86, 0xF6, + 0xE4, 0x97, 0x97, 0xF6, 0xE8, 0x9F, 0xA1, 0xF6, + 0xE8, 0xA0, 0x81, 0xF6, 0xE4, 0x97, 0xB9, 0xF6, + 0xE8, 0xA1, 0xA0, 0xF6, 0xE8, 0xA1, 0xA3, 0xF6, + 0xF0, 0xA7, 0x99, 0xA7, 0xF6, 0xE8, 0xA3, 0x97, + 0xF6, 0xE8, 0xA3, 0x9E, 0xF6, 0xE4, 0x98, 0xB5, + 0xF6, 0xE8, 0xA3, 0xBA, 0xF6, 0xE3, 0x92, 0xBB, + 0xF6, 0xF0, 0xA7, 0xA2, 0xAE, 0xF6, 0xF0, 0xA7, + 0xA5, 0xA6, 0xF6, 0xE4, 0x9A, 0xBE, 0xF6, 0xE4, + 0x9B, 0x87, 0xF6, 0xE8, 0xAA, 0xA0, 0xF6, 0xE8, + 0xAB, 0xAD, 0xF6, 0xE8, 0xAE, 0x8A, 0xF6, 0xE8, + 0xB1, 0x95, 0xF6, 0xF0, 0xA7, 0xB2, 0xA8, 0xF6, + 0xE8, 0xB2, 0xAB, 0xF6, 0xE8, 0xB3, 0x81, 0xF6, + 0xE8, 0xB4, 0x9B, 0xF6, 0xE8, 0xB5, 0xB7, 0xF6, + 0xF0, 0xA7, 0xBC, 0xAF, 0xF6, 0xF0, 0xA0, 0xA0, + 0x84, 0xF6, 0xE8, 0xB7, 0x8B, 0xF6, 0xE8, 0xB6, + 0xBC, 0xF6, 0xE8, 0xB7, 0xB0, 0xF6, 0xF0, 0xA0, + 0xA3, 0x9E, 0xF6, 0xE8, 0xBB, 0x94, 0xF6, 0xE8, + 0xBC, 0xB8, 0xF6, 0xF0, 0xA8, 0x97, 0x92, 0xF6, + 0xF0, 0xA8, 0x97, 0xAD, 0xF6, 0xE9, 0x82, 0x94, + 0xF6, 0xE9, 0x83, 0xB1, 0xF6, 0xE9, 0x84, 0x91, + 0xF6, 0xF0, 0xA8, 0x9C, 0xAE, 0xF6, 0xE9, 0x84, + 0x9B, 0xF6, 0xE9, 0x88, 0xB8, 0xF6, 0xE9, 0x8B, + 0x97, 0xF6, 0xE9, 0x8B, 0x98, 0xF6, 0xE9, 0x89, + 0xBC, 0xF6, 0xE9, 0x8F, 0xB9, 0xF6, 0xE9, 0x90, + 0x95, 0xF6, 0xF0, 0xA8, 0xAF, 0xBA, 0xF6, 0xE9, + 0x96, 0x8B, 0xF6, 0xE4, 0xA6, 0x95, 0xF6, 0xE9, + 0x96, 0xB7, 0xF6, 0xF0, 0xA8, 0xB5, 0xB7, 0xF6, + 0xE4, 0xA7, 0xA6, 0xF6, 0xE9, 0x9B, 0x83, 0xF6, + 0xE5, 0xB6, 0xB2, 0xF6, 0xE9, 0x9C, 0xA3, 0xF6, + 0xF0, 0xA9, 0x85, 0x85, 0xF6, 0xF0, 0xA9, 0x88, + 0x9A, 0xF6, 0xE4, 0xA9, 0xAE, 0xF6, 0xE4, 0xA9, + 0xB6, 0xF6, 0xE9, 0x9F, 0xA0, 0xF6, 0xF0, 0xA9, + 0x90, 0x8A, 0xF6, 0xE4, 0xAA, 0xB2, 0xF6, 0xF0, + 0xA9, 0x92, 0x96, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6, + 0xE9, 0xA0, 0x8B, 0xF6, 0xE9, 0xA0, 0xA9, 0xF6, + 0xF0, 0xA9, 0x96, 0xB6, 0xF6, 0xE9, 0xA3, 0xA2, + 0xF6, 0xE4, 0xAC, 0xB3, 0xF6, 0xE9, 0xA4, 0xA9, + 0xF6, 0xE9, 0xA6, 0xA7, 0xF6, 0xE9, 0xA7, 0x82, + 0xF6, 0xE9, 0xA7, 0xBE, 0xF6, 0xE4, 0xAF, 0x8E, + 0xF6, 0xF0, 0xA9, 0xAC, 0xB0, 0xF6, 0xE9, 0xAC, + 0x92, 0xF6, 0xE9, 0xB1, 0x80, 0xF6, 0xE9, 0xB3, + 0xBD, 0xF6, 0xE4, 0xB3, 0x8E, 0xF6, 0xE4, 0xB3, + 0xAD, 0xF6, 0xE9, 0xB5, 0xA7, 0xF6, 0xF0, 0xAA, + 0x83, 0x8E, 0xF6, 0xE4, 0xB3, 0xB8, 0xF6, 0xF0, + 0xAA, 0x84, 0x85, 0xF6, 0xF0, 0xAA, 0x88, 0x8E, + 0xF6, 0xF0, 0xAA, 0x8A, 0x91, 0xF6, 0xE9, 0xBA, + 0xBB, 0xF6, 0xE4, 0xB5, 0x96, 0xF6, 0xE9, 0xBB, + 0xB9, 0xF6, 0xE9, 0xBB, 0xBE, 0xF6, 0xE9, 0xBC, + 0x85, 0xF6, 0xE9, 0xBC, 0x8F, 0xF6, 0xE9, 0xBC, + 0x96, 0xF6, 0xE9, 0xBC, 0xBB, 0xF6, 0xF0, 0xAA, + 0x98, 0x80, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, + }, + { + 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, + 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, + 0xCC, 0xA7, 0x31, 0x6F, 0x31, 0xE2, 0x81, 0x84, + 0x34, 0x31, 0xE2, 0x81, 0x84, 0x32, 0x33, 0xE2, + 0x81, 0x84, 0x34, 0xF6, 0x41, 0xCC, 0x80, 0xF6, + 0x41, 0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x82, 0xF6, + 0x41, 0xCC, 0x83, 0xF6, 0x41, 0xCC, 0x88, 0xF6, + 0x41, 0xCC, 0x8A, 0xF6, 0x43, 0xCC, 0xA7, 0xF6, + 0x45, 0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x81, 0xF6, + 0x45, 0xCC, 0x82, 0xF6, 0x45, 0xCC, 0x88, 0xF6, + 0x49, 0xCC, 0x80, 0xF6, 0x49, 0xCC, 0x81, 0xF6, + 0x49, 0xCC, 0x82, 0xF6, 0x49, 0xCC, 0x88, 0xF6, + 0x4E, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x80, 0xF6, + 0x4F, 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xF6, + 0x4F, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x88, 0xF6, + 0x55, 0xCC, 0x80, 0xF6, 0x55, 0xCC, 0x81, 0xF6, + 0x55, 0xCC, 0x82, 0xF6, 0x55, 0xCC, 0x88, 0xF6, + 0x59, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x80, 0xF6, + 0x61, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x82, 0xF6, + 0x61, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x88, 0xF6, + 0x61, 0xCC, 0x8A, 0xF6, 0x63, 0xCC, 0xA7, 0xF6, + 0x65, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x81, 0xF6, + 0x65, 0xCC, 0x82, 0xF6, 0x65, 0xCC, 0x88, 0xF6, + 0x69, 0xCC, 0x80, 0xF6, 0x69, 0xCC, 0x81, 0xF6, + 0x69, 0xCC, 0x82, 0xF6, 0x69, 0xCC, 0x88, 0xF6, + 0x6E, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x80, 0xF6, + 0x6F, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xF6, + 0x6F, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x88, 0xF6, + 0x75, 0xCC, 0x80, 0xF6, 0x75, 0xCC, 0x81, 0xF6, + 0x75, 0xCC, 0x82, 0xF6, 0x75, 0xCC, 0x88, 0xF6, + 0x79, 0xCC, 0x81, 0xF6, 0x79, 0xCC, 0x88, 0xF6, + 0x41, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x84, 0xF6, + 0x41, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0x86, 0xF6, + 0x41, 0xCC, 0xA8, 0xF6, 0x61, 0xCC, 0xA8, 0xF6, + 0x43, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0x81, 0xF6, + 0x43, 0xCC, 0x82, 0xF6, 0x63, 0xCC, 0x82, 0xF6, + 0x43, 0xCC, 0x87, 0xF6, 0x63, 0xCC, 0x87, 0xF6, + 0x43, 0xCC, 0x8C, 0xF6, 0x63, 0xCC, 0x8C, 0xF6, + 0x44, 0xCC, 0x8C, 0xF6, 0x64, 0xCC, 0x8C, 0xF6, + 0x45, 0xCC, 0x84, 0xF6, 0x65, 0xCC, 0x84, 0xF6, + 0x45, 0xCC, 0x86, 0xF6, 0x65, 0xCC, 0x86, 0xF6, + 0x45, 0xCC, 0x87, 0xF6, 0x65, 0xCC, 0x87, 0xF6, + 0x45, 0xCC, 0xA8, 0xF6, 0x65, 0xCC, 0xA8, 0xF6, + 0x45, 0xCC, 0x8C, 0xF6, 0x65, 0xCC, 0x8C, 0xF6, + 0x47, 0xCC, 0x82, 0xF6, 0x67, 0xCC, 0x82, 0xF6, + 0x47, 0xCC, 0x86, 0xF6, 0x67, 0xCC, 0x86, 0xF6, + 0x47, 0xCC, 0x87, 0xF6, 0x67, 0xCC, 0x87, 0xF6, + 0x47, 0xCC, 0xA7, 0xF6, 0x67, 0xCC, 0xA7, 0xF6, + 0x48, 0xCC, 0x82, 0xF6, 0x68, 0xCC, 0x82, 0xF6, + 0x49, 0xCC, 0x83, 0xF6, 0x69, 0xCC, 0x83, 0xF6, + 0x49, 0xCC, 0x84, 0xF6, 0x69, 0xCC, 0x84, 0xF6, + 0x49, 0xCC, 0x86, 0xF6, 0x69, 0xCC, 0x86, 0xF6, + 0x49, 0xCC, 0xA8, 0xF6, 0x69, 0xCC, 0xA8, 0xF6, + 0x49, 0xCC, 0x87, 0x49, 0x4A, 0x69, 0x6A, 0xF6, + 0x4A, 0xCC, 0x82, 0xF6, 0x6A, 0xCC, 0x82, 0xF6, + 0x4B, 0xCC, 0xA7, 0xF6, 0x6B, 0xCC, 0xA7, 0xF6, + 0x4C, 0xCC, 0x81, 0xF6, 0x6C, 0xCC, 0x81, 0xF6, + 0x4C, 0xCC, 0xA7, 0xF6, 0x6C, 0xCC, 0xA7, 0xF6, + 0x4C, 0xCC, 0x8C, 0xF6, 0x6C, 0xCC, 0x8C, 0x4C, + 0xC2, 0xB7, 0x6C, 0xC2, 0xB7, 0xF6, 0x4E, 0xCC, + 0x81, 0xF6, 0x6E, 0xCC, 0x81, 0xF6, 0x4E, 0xCC, + 0xA7, 0xF6, 0x6E, 0xCC, 0xA7, 0xF6, 0x4E, 0xCC, + 0x8C, 0xF6, 0x6E, 0xCC, 0x8C, 0xCA, 0xBC, 0x6E, + 0xF6, 0x4F, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, 0x84, + 0xF6, 0x4F, 0xCC, 0x86, 0xF6, 0x6F, 0xCC, 0x86, + 0xF6, 0x4F, 0xCC, 0x8B, 0xF6, 0x6F, 0xCC, 0x8B, + 0xF6, 0x52, 0xCC, 0x81, 0xF6, 0x72, 0xCC, 0x81, + 0xF6, 0x52, 0xCC, 0xA7, 0xF6, 0x72, 0xCC, 0xA7, + 0xF6, 0x52, 0xCC, 0x8C, 0xF6, 0x72, 0xCC, 0x8C, + 0xF6, 0x53, 0xCC, 0x81, 0xF6, 0x73, 0xCC, 0x81, + 0xF6, 0x53, 0xCC, 0x82, 0xF6, 0x73, 0xCC, 0x82, + 0xF6, 0x53, 0xCC, 0xA7, 0xF6, 0x73, 0xCC, 0xA7, + 0xF6, 0x53, 0xCC, 0x8C, 0xF6, 0x73, 0xCC, 0x8C, + 0xF6, 0x54, 0xCC, 0xA7, 0xF6, 0x74, 0xCC, 0xA7, + 0xF6, 0x54, 0xCC, 0x8C, 0xF6, 0x74, 0xCC, 0x8C, + 0xF6, 0x55, 0xCC, 0x83, 0xF6, 0x75, 0xCC, 0x83, + 0xF6, 0x55, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x84, + 0xF6, 0x55, 0xCC, 0x86, 0xF6, 0x75, 0xCC, 0x86, + 0xF6, 0x55, 0xCC, 0x8A, 0xF6, 0x75, 0xCC, 0x8A, + 0xF6, 0x55, 0xCC, 0x8B, 0xF6, 0x75, 0xCC, 0x8B, + 0xF6, 0x55, 0xCC, 0xA8, 0xF6, 0x75, 0xCC, 0xA8, + 0xF6, 0x57, 0xCC, 0x82, 0xF6, 0x77, 0xCC, 0x82, + 0xF6, 0x59, 0xCC, 0x82, 0xF6, 0x79, 0xCC, 0x82, + 0xF6, 0x59, 0xCC, 0x88, 0xF6, 0x5A, 0xCC, 0x81, + 0xF6, 0x7A, 0xCC, 0x81, 0xF6, 0x5A, 0xCC, 0x87, + 0xF6, 0x7A, 0xCC, 0x87, 0xF6, 0x5A, 0xCC, 0x8C, + 0xF6, 0x7A, 0xCC, 0x8C, 0x73, 0xF6, 0x4F, 0xCC, + 0x9B, 0xF6, 0x6F, 0xCC, 0x9B, 0xF6, 0x55, 0xCC, + 0x9B, 0xF6, 0x75, 0xCC, 0x9B, 0x44, 0x5A, 0xCC, + 0x8C, 0x44, 0x7A, 0xCC, 0x8C, 0x64, 0x7A, 0xCC, + 0x8C, 0x4C, 0x4A, 0x4C, 0x6A, 0x6C, 0x6A, 0x4E, + 0x4A, 0x4E, 0x6A, 0x6E, 0x6A, 0xF6, 0x41, 0xCC, + 0x8C, 0xF6, 0x61, 0xCC, 0x8C, 0xF6, 0x49, 0xCC, + 0x8C, 0xF6, 0x69, 0xCC, 0x8C, 0xF6, 0x4F, 0xCC, + 0x8C, 0xF6, 0x6F, 0xCC, 0x8C, 0xF6, 0x55, 0xCC, + 0x8C, 0xF6, 0x75, 0xCC, 0x8C, 0xF6, 0x55, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x88, 0xCC, + 0x84, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0x75, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x55, 0xCC, + 0x88, 0xCC, 0x8C, 0xF6, 0x75, 0xCC, 0x88, 0xCC, + 0x8C, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x80, 0xF6, + 0x75, 0xCC, 0x88, 0xCC, 0x80, 0xF6, 0x41, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x88, 0xCC, + 0x84, 0xF6, 0x41, 0xCC, 0x87, 0xCC, 0x84, 0xF6, + 0x61, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0xC3, 0x86, + 0xCC, 0x84, 0xF6, 0xC3, 0xA6, 0xCC, 0x84, 0xF6, + 0x47, 0xCC, 0x8C, 0xF6, 0x67, 0xCC, 0x8C, 0xF6, + 0x4B, 0xCC, 0x8C, 0xF6, 0x6B, 0xCC, 0x8C, 0xF6, + 0x4F, 0xCC, 0xA8, 0xF6, 0x6F, 0xCC, 0xA8, 0xF6, + 0x4F, 0xCC, 0xA8, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0xA8, 0xCC, 0x84, 0xF6, 0xC6, 0xB7, 0xCC, 0x8C, + 0xF6, 0xCA, 0x92, 0xCC, 0x8C, 0xF6, 0x6A, 0xCC, + 0x8C, 0x44, 0x5A, 0x44, 0x7A, 0x64, 0x7A, 0xF6, + 0x47, 0xCC, 0x81, 0xF6, 0x67, 0xCC, 0x81, 0xF6, + 0x4E, 0xCC, 0x80, 0xF6, 0x6E, 0xCC, 0x80, 0xF6, + 0x41, 0xCC, 0x8A, 0xCC, 0x81, 0xF6, 0x61, 0xCC, + 0x8A, 0xCC, 0x81, 0xF6, 0xC3, 0x86, 0xCC, 0x81, + 0xF6, 0xC3, 0xA6, 0xCC, 0x81, 0xF6, 0xC3, 0x98, + 0xCC, 0x81, 0xF6, 0xC3, 0xB8, 0xCC, 0x81, 0xF6, + 0x41, 0xCC, 0x8F, 0xF6, 0x61, 0xCC, 0x8F, 0xF6, + 0x41, 0xCC, 0x91, 0xF6, 0x61, 0xCC, 0x91, 0xF6, + 0x45, 0xCC, 0x8F, 0xF6, 0x65, 0xCC, 0x8F, 0xF6, + 0x45, 0xCC, 0x91, 0xF6, 0x65, 0xCC, 0x91, 0xF6, + 0x49, 0xCC, 0x8F, 0xF6, 0x69, 0xCC, 0x8F, 0xF6, + 0x49, 0xCC, 0x91, 0xF6, 0x69, 0xCC, 0x91, 0xF6, + 0x4F, 0xCC, 0x8F, 0xF6, 0x6F, 0xCC, 0x8F, 0xF6, + 0x4F, 0xCC, 0x91, 0xF6, 0x6F, 0xCC, 0x91, 0xF6, + 0x52, 0xCC, 0x8F, 0xF6, 0x72, 0xCC, 0x8F, 0xF6, + 0x52, 0xCC, 0x91, 0xF6, 0x72, 0xCC, 0x91, 0xF6, + 0x55, 0xCC, 0x8F, 0xF6, 0x75, 0xCC, 0x8F, 0xF6, + 0x55, 0xCC, 0x91, 0xF6, 0x75, 0xCC, 0x91, 0xF6, + 0x53, 0xCC, 0xA6, 0xF6, 0x73, 0xCC, 0xA6, 0xF6, + 0x54, 0xCC, 0xA6, 0xF6, 0x74, 0xCC, 0xA6, 0xF6, + 0x48, 0xCC, 0x8C, 0xF6, 0x68, 0xCC, 0x8C, 0xF6, + 0x41, 0xCC, 0x87, 0xF6, 0x61, 0xCC, 0x87, 0xF6, + 0x45, 0xCC, 0xA7, 0xF6, 0x65, 0xCC, 0xA7, 0xF6, + 0x4F, 0xCC, 0x88, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0x88, 0xCC, 0x84, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, + 0x84, 0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x84, 0xF6, + 0x4F, 0xCC, 0x87, 0xF6, 0x6F, 0xCC, 0x87, 0xF6, + 0x4F, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, + 0x87, 0xCC, 0x84, 0xF6, 0x59, 0xCC, 0x84, 0xF6, + 0x79, 0xCC, 0x84, 0x68, 0xC9, 0xA6, 0x6A, 0x72, + 0xC9, 0xB9, 0xC9, 0xBB, 0xCA, 0x81, 0x77, 0x79, + 0x20, 0xCC, 0x86, 0x20, 0xCC, 0x87, 0x20, 0xCC, + 0x8A, 0x20, 0xCC, 0xA8, 0x20, 0xCC, 0x83, 0x20, + 0xCC, 0x8B, 0xC9, 0xA3, 0x6C, 0x73, 0x78, 0xCA, + 0x95, 0xF6, 0xCC, 0x80, 0xF6, 0xCC, 0x81, 0xF6, + 0xCC, 0x93, 0xF6, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0xCA, 0xB9, 0x20, 0xCD, 0x85, 0xF6, 0x3B, 0x20, + 0xCC, 0x81, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, + 0x20, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCE, 0x91, + 0xCC, 0x81, 0xF6, 0xC2, 0xB7, 0xF6, 0xCE, 0x95, + 0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x81, 0xF6, + 0xCE, 0x99, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, + 0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x81, 0xF6, 0xCE, + 0xA9, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x88, + 0xCC, 0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x88, 0xF6, + 0xCE, 0xA5, 0xCC, 0x88, 0xF6, 0xCE, 0xB1, 0xCC, + 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE, + 0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x81, + 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0xCE, 0xB9, 0xCC, 0x88, 0xF6, 0xCF, 0x85, 0xCC, + 0x88, 0xF6, 0xCE, 0xBF, 0xCC, 0x81, 0xF6, 0xCF, + 0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x81, + 0xCE, 0xB2, 0xCE, 0xB8, 0xCE, 0xA5, 0xF5, 0x05, + 0xCF, 0x92, 0xCC, 0x81, 0xCE, 0xA5, 0xCC, 0x81, + 0xF5, 0x05, 0xCF, 0x92, 0xCC, 0x88, 0xCE, 0xA5, + 0xCC, 0x88, 0xCF, 0x86, 0xCF, 0x80, 0xCE, 0xBA, + 0xCF, 0x81, 0xCF, 0x82, 0xCE, 0x98, 0xCE, 0xB5, + 0xCE, 0xA3, 0xF6, 0xD0, 0x95, 0xCC, 0x80, 0xF6, + 0xD0, 0x95, 0xCC, 0x88, 0xF6, 0xD0, 0x93, 0xCC, + 0x81, 0xF6, 0xD0, 0x86, 0xCC, 0x88, 0xF6, 0xD0, + 0x9A, 0xCC, 0x81, 0xF6, 0xD0, 0x98, 0xCC, 0x80, + 0xF6, 0xD0, 0xA3, 0xCC, 0x86, 0xF6, 0xD0, 0x98, + 0xCC, 0x86, 0xF6, 0xD0, 0xB8, 0xCC, 0x86, 0xF6, + 0xD0, 0xB5, 0xCC, 0x80, 0xF6, 0xD0, 0xB5, 0xCC, + 0x88, 0xF6, 0xD0, 0xB3, 0xCC, 0x81, 0xF6, 0xD1, + 0x96, 0xCC, 0x88, 0xF6, 0xD0, 0xBA, 0xCC, 0x81, + 0xF6, 0xD0, 0xB8, 0xCC, 0x80, 0xF6, 0xD1, 0x83, + 0xCC, 0x86, 0xF6, 0xD1, 0xB4, 0xCC, 0x8F, 0xF6, + 0xD1, 0xB5, 0xCC, 0x8F, 0xF6, 0xD0, 0x96, 0xCC, + 0x86, 0xF6, 0xD0, 0xB6, 0xCC, 0x86, 0xF6, 0xD0, + 0x90, 0xCC, 0x86, 0xF6, 0xD0, 0xB0, 0xCC, 0x86, + 0xF6, 0xD0, 0x90, 0xCC, 0x88, 0xF6, 0xD0, 0xB0, + 0xCC, 0x88, 0xF6, 0xD0, 0x95, 0xCC, 0x86, 0xF6, + 0xD0, 0xB5, 0xCC, 0x86, 0xF6, 0xD3, 0x98, 0xCC, + 0x88, 0xF6, 0xD3, 0x99, 0xCC, 0x88, 0xF6, 0xD0, + 0x96, 0xCC, 0x88, 0xF6, 0xD0, 0xB6, 0xCC, 0x88, + 0xF6, 0xD0, 0x97, 0xCC, 0x88, 0xF6, 0xD0, 0xB7, + 0xCC, 0x88, 0xF6, 0xD0, 0x98, 0xCC, 0x84, 0xF6, + 0xD0, 0xB8, 0xCC, 0x84, 0xF6, 0xD0, 0x98, 0xCC, + 0x88, 0xF6, 0xD0, 0xB8, 0xCC, 0x88, 0xF6, 0xD0, + 0x9E, 0xCC, 0x88, 0xF6, 0xD0, 0xBE, 0xCC, 0x88, + 0xF6, 0xD3, 0xA8, 0xCC, 0x88, 0xF6, 0xD3, 0xA9, + 0xCC, 0x88, 0xF6, 0xD0, 0xAD, 0xCC, 0x88, 0xF6, + 0xD1, 0x8D, 0xCC, 0x88, 0xF6, 0xD0, 0xA3, 0xCC, + 0x84, 0xF6, 0xD1, 0x83, 0xCC, 0x84, 0xF6, 0xD0, + 0xA3, 0xCC, 0x88, 0xF6, 0xD1, 0x83, 0xCC, 0x88, + 0xF6, 0xD0, 0xA3, 0xCC, 0x8B, 0xF6, 0xD1, 0x83, + 0xCC, 0x8B, 0xF6, 0xD0, 0xA7, 0xCC, 0x88, 0xF6, + 0xD1, 0x87, 0xCC, 0x88, 0xF6, 0xD0, 0xAB, 0xCC, + 0x88, 0xF6, 0xD1, 0x8B, 0xCC, 0x88, 0xD5, 0xA5, + 0xD6, 0x82, 0xF6, 0xD8, 0xA7, 0xD9, 0x93, 0xF6, + 0xD8, 0xA7, 0xD9, 0x94, 0xF6, 0xD9, 0x88, 0xD9, + 0x94, 0xF6, 0xD8, 0xA7, 0xD9, 0x95, 0xF6, 0xD9, + 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0xB4, 0xD9, + 0x88, 0xD9, 0xB4, 0xDB, 0x87, 0xD9, 0xB4, 0xD9, + 0x8A, 0xD9, 0xB4, 0xF6, 0xDB, 0x95, 0xD9, 0x94, + 0xF6, 0xDB, 0x81, 0xD9, 0x94, 0xF6, 0xDB, 0x92, + 0xD9, 0x94, 0xF6, 0xE0, 0xA4, 0xA8, 0xE0, 0xA4, + 0xBC, 0xF6, 0xE0, 0xA4, 0xB0, 0xE0, 0xA4, 0xBC, + 0xF6, 0xE0, 0xA4, 0xB3, 0xE0, 0xA4, 0xBC, 0xF6, + 0xE0, 0xA4, 0x95, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, + 0xA4, 0x96, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, + 0x97, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x9C, + 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0xA1, 0xE0, + 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0xA2, 0xE0, 0xA4, + 0xBC, 0xF6, 0xE0, 0xA4, 0xAB, 0xE0, 0xA4, 0xBC, + 0xF6, 0xE0, 0xA4, 0xAF, 0xE0, 0xA4, 0xBC, 0xF6, + 0xE0, 0xA7, 0x87, 0xE0, 0xA6, 0xBE, 0xF6, 0xE0, + 0xA7, 0x87, 0xE0, 0xA7, 0x97, 0xF6, 0xE0, 0xA6, + 0xA1, 0xE0, 0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xA2, + 0xE0, 0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xAF, 0xE0, + 0xA6, 0xBC, 0xF6, 0xE0, 0xA8, 0xB2, 0xE0, 0xA8, + 0xBC, 0xF6, 0xE0, 0xA8, 0xB8, 0xE0, 0xA8, 0xBC, + 0xF6, 0xE0, 0xA8, 0x96, 0xE0, 0xA8, 0xBC, 0xF6, + 0xE0, 0xA8, 0x97, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, + 0xA8, 0x9C, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, + 0xAB, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xAD, 0x87, + 0xE0, 0xAD, 0x96, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, + 0xAC, 0xBE, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD, + 0x97, 0xF6, 0xE0, 0xAC, 0xA1, 0xE0, 0xAC, 0xBC, + 0xF6, 0xE0, 0xAC, 0xA2, 0xE0, 0xAC, 0xBC, 0xF6, + 0xE0, 0xAE, 0x92, 0xE0, 0xAF, 0x97, 0xF6, 0xE0, + 0xAF, 0x86, 0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, + 0x87, 0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x86, + 0xE0, 0xAF, 0x97, 0xF6, 0xE0, 0xB1, 0x86, 0xE0, + 0xB1, 0x96, 0xF6, 0xE0, 0xB2, 0xBF, 0xE0, 0xB3, + 0x95, 0xF6, 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x95, + 0xF6, 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x96, 0xF6, + 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x82, 0xF6, 0xE0, + 0xB3, 0x86, 0xE0, 0xB3, 0x82, 0xE0, 0xB3, 0x95, + 0xF6, 0xE0, 0xB5, 0x86, 0xE0, 0xB4, 0xBE, 0xF6, + 0xE0, 0xB5, 0x87, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0, + 0xB5, 0x86, 0xE0, 0xB5, 0x97, 0xF6, 0xE0, 0xB7, + 0x99, 0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99, + 0xE0, 0xB7, 0x8F, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, + 0xB7, 0x8F, 0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, + 0x99, 0xE0, 0xB7, 0x9F, 0xE0, 0xB9, 0x8D, 0xE0, + 0xB8, 0xB2, 0xE0, 0xBB, 0x8D, 0xE0, 0xBA, 0xB2, + 0xE0, 0xBA, 0xAB, 0xE0, 0xBA, 0x99, 0xE0, 0xBA, + 0xAB, 0xE0, 0xBA, 0xA1, 0xE0, 0xBC, 0x8B, 0xF6, + 0xE0, 0xBD, 0x82, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, + 0xBD, 0x8C, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, + 0x91, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x96, + 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x9B, 0xE0, + 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x80, 0xE0, 0xBE, + 0xB5, 0xF6, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2, + 0xF6, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0xF6, + 0xE0, 0xBE, 0xB2, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, + 0xB2, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, + 0xE0, 0xBE, 0xB3, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, + 0xB3, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, + 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, + 0xBE, 0x92, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, + 0x9C, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA1, + 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA6, 0xE0, + 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xAB, 0xE0, 0xBE, + 0xB7, 0xF6, 0xE0, 0xBE, 0x90, 0xE0, 0xBE, 0xB5, + 0xF6, 0xE1, 0x80, 0xA5, 0xE1, 0x80, 0xAE, 0xE1, + 0x83, 0x9C, 0xF6, 0xE1, 0xAC, 0x85, 0xE1, 0xAC, + 0xB5, 0xF6, 0xE1, 0xAC, 0x87, 0xE1, 0xAC, 0xB5, + 0xF6, 0xE1, 0xAC, 0x89, 0xE1, 0xAC, 0xB5, 0xF6, + 0xE1, 0xAC, 0x8B, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, + 0xAC, 0x8D, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, + 0x91, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBA, + 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBC, 0xE1, + 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBE, 0xE1, 0xAC, + 0xB5, 0xF6, 0xE1, 0xAC, 0xBF, 0xE1, 0xAC, 0xB5, + 0xF6, 0xE1, 0xAD, 0x82, 0xE1, 0xAC, 0xB5, 0x41, + 0xC3, 0x86, 0x42, 0x44, 0x45, 0xC6, 0x8E, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0xC8, 0xA2, 0x50, 0x52, 0x54, 0x55, 0x57, 0x61, + 0xC9, 0x90, 0xC9, 0x91, 0xE1, 0xB4, 0x82, 0x62, + 0x64, 0x65, 0xC9, 0x99, 0xC9, 0x9B, 0xC9, 0x9C, + 0x67, 0x6B, 0x6D, 0xC5, 0x8B, 0x6F, 0xC9, 0x94, + 0xE1, 0xB4, 0x96, 0xE1, 0xB4, 0x97, 0x70, 0x74, + 0x75, 0xE1, 0xB4, 0x9D, 0xC9, 0xAF, 0x76, 0xE1, + 0xB4, 0xA5, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, + 0xCF, 0x86, 0xCF, 0x87, 0x69, 0x72, 0x75, 0x76, + 0xCE, 0xB2, 0xCE, 0xB3, 0xCF, 0x81, 0xCF, 0x86, + 0xCF, 0x87, 0xD0, 0xBD, 0xC9, 0x92, 0x63, 0xC9, + 0x95, 0xC3, 0xB0, 0xC9, 0x9C, 0x66, 0xC9, 0x9F, + 0xC9, 0xA1, 0xC9, 0xA5, 0xC9, 0xA8, 0xC9, 0xA9, + 0xC9, 0xAA, 0xE1, 0xB5, 0xBB, 0xCA, 0x9D, 0xC9, + 0xAD, 0xE1, 0xB6, 0x85, 0xCA, 0x9F, 0xC9, 0xB1, + 0xC9, 0xB0, 0xC9, 0xB2, 0xC9, 0xB3, 0xC9, 0xB4, + 0xC9, 0xB5, 0xC9, 0xB8, 0xCA, 0x82, 0xCA, 0x83, + 0xC6, 0xAB, 0xCA, 0x89, 0xCA, 0x8A, 0xE1, 0xB4, + 0x9C, 0xCA, 0x8B, 0xCA, 0x8C, 0x7A, 0xCA, 0x90, + 0xCA, 0x91, 0xCA, 0x92, 0xCE, 0xB8, 0xF6, 0x41, + 0xCC, 0xA5, 0xF6, 0x61, 0xCC, 0xA5, 0xF6, 0x42, + 0xCC, 0x87, 0xF6, 0x62, 0xCC, 0x87, 0xF6, 0x42, + 0xCC, 0xA3, 0xF6, 0x62, 0xCC, 0xA3, 0xF6, 0x42, + 0xCC, 0xB1, 0xF6, 0x62, 0xCC, 0xB1, 0xF6, 0x43, + 0xCC, 0xA7, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0xA7, + 0xCC, 0x81, 0xF6, 0x44, 0xCC, 0x87, 0xF6, 0x64, + 0xCC, 0x87, 0xF6, 0x44, 0xCC, 0xA3, 0xF6, 0x64, + 0xCC, 0xA3, 0xF6, 0x44, 0xCC, 0xB1, 0xF6, 0x64, + 0xCC, 0xB1, 0xF6, 0x44, 0xCC, 0xA7, 0xF6, 0x64, + 0xCC, 0xA7, 0xF6, 0x44, 0xCC, 0xAD, 0xF6, 0x64, + 0xCC, 0xAD, 0xF6, 0x45, 0xCC, 0x84, 0xCC, 0x80, + 0xF6, 0x65, 0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x45, + 0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x65, 0xCC, 0x84, + 0xCC, 0x81, 0xF6, 0x45, 0xCC, 0xAD, 0xF6, 0x65, + 0xCC, 0xAD, 0xF6, 0x45, 0xCC, 0xB0, 0xF6, 0x65, + 0xCC, 0xB0, 0xF6, 0x45, 0xCC, 0xA7, 0xCC, 0x86, + 0xF6, 0x65, 0xCC, 0xA7, 0xCC, 0x86, 0xF6, 0x46, + 0xCC, 0x87, 0xF6, 0x66, 0xCC, 0x87, 0xF6, 0x47, + 0xCC, 0x84, 0xF6, 0x67, 0xCC, 0x84, 0xF6, 0x48, + 0xCC, 0x87, 0xF6, 0x68, 0xCC, 0x87, 0xF6, 0x48, + 0xCC, 0xA3, 0xF6, 0x68, 0xCC, 0xA3, 0xF6, 0x48, + 0xCC, 0x88, 0xF6, 0x68, 0xCC, 0x88, 0xF6, 0x48, + 0xCC, 0xA7, 0xF6, 0x68, 0xCC, 0xA7, 0xF6, 0x48, + 0xCC, 0xAE, 0xF6, 0x68, 0xCC, 0xAE, 0xF6, 0x49, + 0xCC, 0xB0, 0xF6, 0x69, 0xCC, 0xB0, 0xF6, 0x49, + 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x69, 0xCC, 0x88, + 0xCC, 0x81, 0xF6, 0x4B, 0xCC, 0x81, 0xF6, 0x6B, + 0xCC, 0x81, 0xF6, 0x4B, 0xCC, 0xA3, 0xF6, 0x6B, + 0xCC, 0xA3, 0xF6, 0x4B, 0xCC, 0xB1, 0xF6, 0x6B, + 0xCC, 0xB1, 0xF6, 0x4C, 0xCC, 0xA3, 0xF6, 0x6C, + 0xCC, 0xA3, 0xF6, 0x4C, 0xCC, 0xA3, 0xCC, 0x84, + 0xF6, 0x6C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x4C, + 0xCC, 0xB1, 0xF6, 0x6C, 0xCC, 0xB1, 0xF6, 0x4C, + 0xCC, 0xAD, 0xF6, 0x6C, 0xCC, 0xAD, 0xF6, 0x4D, + 0xCC, 0x81, 0xF6, 0x6D, 0xCC, 0x81, 0xF6, 0x4D, + 0xCC, 0x87, 0xF6, 0x6D, 0xCC, 0x87, 0xF6, 0x4D, + 0xCC, 0xA3, 0xF6, 0x6D, 0xCC, 0xA3, 0xF6, 0x4E, + 0xCC, 0x87, 0xF6, 0x6E, 0xCC, 0x87, 0xF6, 0x4E, + 0xCC, 0xA3, 0xF6, 0x6E, 0xCC, 0xA3, 0xF6, 0x4E, + 0xCC, 0xB1, 0xF6, 0x6E, 0xCC, 0xB1, 0xF6, 0x4E, + 0xCC, 0xAD, 0xF6, 0x6E, 0xCC, 0xAD, 0xF6, 0x4F, + 0xCC, 0x83, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x83, + 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, 0x88, + 0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x88, 0xF6, 0x4F, + 0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x84, + 0xCC, 0x80, 0xF6, 0x4F, 0xCC, 0x84, 0xCC, 0x81, + 0xF6, 0x6F, 0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x50, + 0xCC, 0x81, 0xF6, 0x70, 0xCC, 0x81, 0xF6, 0x50, + 0xCC, 0x87, 0xF6, 0x70, 0xCC, 0x87, 0xF6, 0x52, + 0xCC, 0x87, 0xF6, 0x72, 0xCC, 0x87, 0xF6, 0x52, + 0xCC, 0xA3, 0xF6, 0x72, 0xCC, 0xA3, 0xF6, 0x52, + 0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x72, 0xCC, 0xA3, + 0xCC, 0x84, 0xF6, 0x52, 0xCC, 0xB1, 0xF6, 0x72, + 0xCC, 0xB1, 0xF6, 0x53, 0xCC, 0x87, 0xF6, 0x73, + 0xCC, 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xF6, 0x73, + 0xCC, 0xA3, 0xF6, 0x53, 0xCC, 0x81, 0xCC, 0x87, + 0xF6, 0x73, 0xCC, 0x81, 0xCC, 0x87, 0xF6, 0x53, + 0xCC, 0x8C, 0xCC, 0x87, 0xF6, 0x73, 0xCC, 0x8C, + 0xCC, 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xCC, 0x87, + 0xF6, 0x73, 0xCC, 0xA3, 0xCC, 0x87, 0xF6, 0x54, + 0xCC, 0x87, 0xF6, 0x74, 0xCC, 0x87, 0xF6, 0x54, + 0xCC, 0xA3, 0xF6, 0x74, 0xCC, 0xA3, 0xF6, 0x54, + 0xCC, 0xB1, 0xF6, 0x74, 0xCC, 0xB1, 0xF6, 0x54, + 0xCC, 0xAD, 0xF6, 0x74, 0xCC, 0xAD, 0xF6, 0x55, + 0xCC, 0xA4, 0xF6, 0x75, 0xCC, 0xA4, 0xF6, 0x55, + 0xCC, 0xB0, 0xF6, 0x75, 0xCC, 0xB0, 0xF6, 0x55, + 0xCC, 0xAD, 0xF6, 0x75, 0xCC, 0xAD, 0xF6, 0x55, + 0xCC, 0x83, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x83, + 0xCC, 0x81, 0xF6, 0x55, 0xCC, 0x84, 0xCC, 0x88, + 0xF6, 0x75, 0xCC, 0x84, 0xCC, 0x88, 0xF6, 0x56, + 0xCC, 0x83, 0xF6, 0x76, 0xCC, 0x83, 0xF6, 0x56, + 0xCC, 0xA3, 0xF6, 0x76, 0xCC, 0xA3, 0xF6, 0x57, + 0xCC, 0x80, 0xF6, 0x77, 0xCC, 0x80, 0xF6, 0x57, + 0xCC, 0x81, 0xF6, 0x77, 0xCC, 0x81, 0xF6, 0x57, + 0xCC, 0x88, 0xF6, 0x77, 0xCC, 0x88, 0xF6, 0x57, + 0xCC, 0x87, 0xF6, 0x77, 0xCC, 0x87, 0xF6, 0x57, + 0xCC, 0xA3, 0xF6, 0x77, 0xCC, 0xA3, 0xF6, 0x58, + 0xCC, 0x87, 0xF6, 0x78, 0xCC, 0x87, 0xF6, 0x58, + 0xCC, 0x88, 0xF6, 0x78, 0xCC, 0x88, 0xF6, 0x59, + 0xCC, 0x87, 0xF6, 0x79, 0xCC, 0x87, 0xF6, 0x5A, + 0xCC, 0x82, 0xF6, 0x7A, 0xCC, 0x82, 0xF6, 0x5A, + 0xCC, 0xA3, 0xF6, 0x7A, 0xCC, 0xA3, 0xF6, 0x5A, + 0xCC, 0xB1, 0xF6, 0x7A, 0xCC, 0xB1, 0xF6, 0x68, + 0xCC, 0xB1, 0xF6, 0x74, 0xCC, 0x88, 0xF6, 0x77, + 0xCC, 0x8A, 0xF6, 0x79, 0xCC, 0x8A, 0x61, 0xCA, + 0xBE, 0xF5, 0x05, 0xC5, 0xBF, 0xCC, 0x87, 0x73, + 0xCC, 0x87, 0xF6, 0x41, 0xCC, 0xA3, 0xF6, 0x61, + 0xCC, 0xA3, 0xF6, 0x41, 0xCC, 0x89, 0xF6, 0x61, + 0xCC, 0x89, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x81, + 0xF6, 0x61, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x41, + 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x61, 0xCC, 0x82, + 0xCC, 0x80, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x89, + 0xF6, 0x61, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x41, + 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x82, + 0xCC, 0x83, 0xF6, 0x41, 0xCC, 0xA3, 0xCC, 0x82, + 0xF6, 0x61, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x41, + 0xCC, 0x86, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x86, + 0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x80, + 0xF6, 0x61, 0xCC, 0x86, 0xCC, 0x80, 0xF6, 0x41, + 0xCC, 0x86, 0xCC, 0x89, 0xF6, 0x61, 0xCC, 0x86, + 0xCC, 0x89, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x83, + 0xF6, 0x61, 0xCC, 0x86, 0xCC, 0x83, 0xF6, 0x41, + 0xCC, 0xA3, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0xA3, + 0xCC, 0x86, 0xF6, 0x45, 0xCC, 0xA3, 0xF6, 0x65, + 0xCC, 0xA3, 0xF6, 0x45, 0xCC, 0x89, 0xF6, 0x65, + 0xCC, 0x89, 0xF6, 0x45, 0xCC, 0x83, 0xF6, 0x65, + 0xCC, 0x83, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x81, + 0xF6, 0x65, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x45, + 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x82, + 0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x89, + 0xF6, 0x65, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x45, + 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x65, 0xCC, 0x82, + 0xCC, 0x83, 0xF6, 0x45, 0xCC, 0xA3, 0xCC, 0x82, + 0xF6, 0x65, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x49, + 0xCC, 0x89, 0xF6, 0x69, 0xCC, 0x89, 0xF6, 0x49, + 0xCC, 0xA3, 0xF6, 0x69, 0xCC, 0xA3, 0xF6, 0x4F, + 0xCC, 0xA3, 0xF6, 0x6F, 0xCC, 0xA3, 0xF6, 0x4F, + 0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x89, 0xF6, 0x4F, + 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, + 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x80, + 0xF6, 0x6F, 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x4F, + 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x82, + 0xCC, 0x89, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x83, + 0xF6, 0x6F, 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x4F, + 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x6F, 0xCC, 0xA3, + 0xCC, 0x82, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x81, + 0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x4F, + 0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x9B, + 0xCC, 0x80, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x89, + 0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x4F, + 0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x9B, + 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0xA3, + 0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x55, + 0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0xA3, 0xF6, 0x55, + 0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x89, 0xF6, 0x55, + 0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x9B, + 0xCC, 0x81, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x80, + 0xF6, 0x75, 0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x55, + 0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x9B, + 0xCC, 0x89, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x83, + 0xF6, 0x75, 0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x55, + 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0x9B, + 0xCC, 0xA3, 0xF6, 0x59, 0xCC, 0x80, 0xF6, 0x79, + 0xCC, 0x80, 0xF6, 0x59, 0xCC, 0xA3, 0xF6, 0x79, + 0xCC, 0xA3, 0xF6, 0x59, 0xCC, 0x89, 0xF6, 0x79, + 0xCC, 0x89, 0xF6, 0x59, 0xCC, 0x83, 0xF6, 0x79, + 0xCC, 0x83, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xF6, + 0xCE, 0xB1, 0xCC, 0x94, 0xF6, 0xCE, 0xB1, 0xCC, + 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCC, + 0x81, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xF6, + 0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, + 0x91, 0xCC, 0x93, 0xF6, 0xCE, 0x91, 0xCC, 0x94, + 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xF6, + 0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, + 0x91, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x91, + 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC, + 0x93, 0xCD, 0x82, 0xF6, 0xCE, 0x91, 0xCC, 0x94, + 0xCD, 0x82, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xF6, + 0xCE, 0xB5, 0xCC, 0x94, 0xF6, 0xCE, 0xB5, 0xCC, + 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xCC, + 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xF6, 0xCE, 0x95, + 0xCC, 0x94, 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC, + 0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x80, + 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x81, 0xF6, + 0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, + 0xB7, 0xCC, 0x93, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, + 0xF6, 0xCE, 0xB7, 0xCC, 0x93, 0xCC, 0x80, 0xF6, + 0xCE, 0xB7, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, + 0xB7, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, + 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, + 0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, + 0xCD, 0x82, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xF6, + 0xCE, 0x97, 0xCC, 0x94, 0xF6, 0xCE, 0x97, 0xCC, + 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, + 0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x82, 0xF6, + 0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, + 0xB9, 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, + 0xF6, 0xCE, 0xB9, 0xCC, 0x93, 0xCC, 0x80, 0xF6, + 0xCE, 0xB9, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, + 0xB9, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, + 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, + 0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, + 0xCD, 0x82, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xF6, + 0xCE, 0x99, 0xCC, 0x94, 0xF6, 0xCE, 0x99, 0xCC, + 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCC, + 0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCD, 0x82, 0xF6, + 0xCE, 0x99, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, + 0xBF, 0xCC, 0x93, 0xF6, 0xCE, 0xBF, 0xCC, 0x94, + 0xF6, 0xCE, 0xBF, 0xCC, 0x93, 0xCC, 0x80, 0xF6, + 0xCE, 0xBF, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, + 0xBF, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xBF, + 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, + 0x93, 0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xF6, 0xCE, + 0x9F, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, + 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, + 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x94, + 0xCC, 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xF6, + 0xCF, 0x85, 0xCC, 0x94, 0xF6, 0xCF, 0x85, 0xCC, + 0x93, 0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCC, + 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC, 0x81, + 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCD, 0x82, 0xF6, + 0xCF, 0x85, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, + 0xA5, 0xCC, 0x94, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, + 0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC, + 0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCD, 0x82, + 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xF6, 0xCF, 0x89, + 0xCC, 0x94, 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC, + 0x80, 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x80, + 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x81, 0xF6, + 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCF, + 0x89, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF, 0x89, + 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA9, 0xCC, + 0x93, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xF6, 0xCE, + 0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, + 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, + 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, + 0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, + 0x82, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x82, + 0xF6, 0xCE, 0xB1, 0xCC, 0x80, 0xF6, 0xCE, 0xB1, + 0xCC, 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x80, 0xF6, + 0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, + 0x80, 0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xF6, 0xCE, + 0xB9, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x81, + 0xF6, 0xCE, 0xBF, 0xCC, 0x80, 0xF6, 0xCE, 0xBF, + 0xCC, 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x80, 0xF6, + 0xCF, 0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, + 0x80, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xF6, 0xCE, + 0xB1, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, + 0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, + 0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, + 0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, + 0xB1, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, + 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82, + 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCD, + 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD, 0x85, + 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80, + 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, + 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, + 0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x91, + 0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, + 0xB7, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, + 0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, + 0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, + 0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, + 0xB7, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCE, 0xB7, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, + 0xF6, 0xCE, 0xB7, 0xCC, 0x93, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD, 0x82, + 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD, + 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x85, + 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x80, + 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, + 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, + 0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x97, + 0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCF, + 0x89, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCF, 0x89, + 0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, + 0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, + 0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, + 0x89, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, + 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, + 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCD, 0x82, 0xCD, + 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCD, 0x82, + 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, + 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x85, + 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xCD, + 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, 0x80, + 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, + 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, + 0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, + 0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, + 0xB1, 0xCC, 0x86, 0xF6, 0xCE, 0xB1, 0xCC, 0x84, + 0xF6, 0xCE, 0xB1, 0xCC, 0x80, 0xCD, 0x85, 0xF6, + 0xCE, 0xB1, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, + 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCD, 0x82, + 0xF6, 0xCE, 0xB1, 0xCD, 0x82, 0xCD, 0x85, 0xF6, + 0xCE, 0x91, 0xCC, 0x86, 0xF6, 0xCE, 0x91, 0xCC, + 0x84, 0xF6, 0xCE, 0x91, 0xCC, 0x80, 0xF6, 0xCE, + 0x91, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCD, 0x85, + 0x20, 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0x20, 0xCC, + 0x93, 0x20, 0xCD, 0x82, 0xF5, 0x05, 0xC2, 0xA8, + 0xCD, 0x82, 0x20, 0xCC, 0x88, 0xCD, 0x82, 0xF6, + 0xCE, 0xB7, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, + 0xB7, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x81, + 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCD, 0x82, 0xF6, + 0xCE, 0xB7, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, + 0x95, 0xCC, 0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x81, + 0xF6, 0xCE, 0x97, 0xCC, 0x80, 0xF6, 0xCE, 0x97, + 0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCD, 0x85, 0xF5, + 0x06, 0xE1, 0xBE, 0xBF, 0xCC, 0x80, 0x20, 0xCC, + 0x93, 0xCC, 0x80, 0xF5, 0x06, 0xE1, 0xBE, 0xBF, + 0xCC, 0x81, 0x20, 0xCC, 0x93, 0xCC, 0x81, 0xF5, + 0x06, 0xE1, 0xBE, 0xBF, 0xCD, 0x82, 0x20, 0xCC, + 0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x86, + 0xF6, 0xCE, 0xB9, 0xCC, 0x84, 0xF6, 0xCE, 0xB9, + 0xCC, 0x88, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, + 0x88, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCD, 0x82, + 0xF6, 0xCE, 0xB9, 0xCC, 0x88, 0xCD, 0x82, 0xF6, + 0xCE, 0x99, 0xCC, 0x86, 0xF6, 0xCE, 0x99, 0xCC, + 0x84, 0xF6, 0xCE, 0x99, 0xCC, 0x80, 0xF6, 0xCE, + 0x99, 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, + 0xCC, 0x80, 0x20, 0xCC, 0x94, 0xCC, 0x80, 0xF5, + 0x06, 0xE1, 0xBF, 0xBE, 0xCC, 0x81, 0x20, 0xCC, + 0x94, 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, + 0xCD, 0x82, 0x20, 0xCC, 0x94, 0xCD, 0x82, 0xF6, + 0xCF, 0x85, 0xCC, 0x86, 0xF6, 0xCF, 0x85, 0xCC, + 0x84, 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x80, + 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6, + 0xCF, 0x81, 0xCC, 0x93, 0xF6, 0xCF, 0x81, 0xCC, + 0x94, 0xF6, 0xCF, 0x85, 0xCD, 0x82, 0xF6, 0xCF, + 0x85, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, 0xA5, + 0xCC, 0x86, 0xF6, 0xCE, 0xA5, 0xCC, 0x84, 0xF6, + 0xCE, 0xA5, 0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC, + 0x81, 0xF6, 0xCE, 0xA1, 0xCC, 0x94, 0xF5, 0x05, + 0xC2, 0xA8, 0xCC, 0x80, 0x20, 0xCC, 0x88, 0xCC, + 0x80, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, 0x20, + 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x60, 0xF6, 0xCF, + 0x89, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, + 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xCD, + 0x85, 0xF6, 0xCF, 0x89, 0xCD, 0x82, 0xF6, 0xCF, + 0x89, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x9F, + 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x81, 0xF6, + 0xCE, 0xA9, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, + 0x81, 0xF6, 0xCE, 0xA9, 0xCD, 0x85, 0xF5, 0x03, + 0xC2, 0xB4, 0x20, 0xCC, 0x81, 0x20, 0xCC, 0x94, + 0xF5, 0x04, 0xE2, 0x80, 0x82, 0x20, 0xF5, 0x04, + 0xE2, 0x80, 0x83, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x80, 0x90, + 0x20, 0xCC, 0xB3, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, + 0x2E, 0x20, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, + 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80, + 0xB2, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2, + 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, + 0x21, 0x21, 0x20, 0xCC, 0x85, 0x3F, 0x3F, 0x3F, + 0x21, 0x21, 0x3F, 0xE2, 0x80, 0xB2, 0xE2, 0x80, + 0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0x20, + 0x30, 0x69, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29, 0x6E, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28, + 0x29, 0x61, 0x65, 0x6F, 0x78, 0xC9, 0x99, 0x52, + 0x73, 0x61, 0x2F, 0x63, 0x61, 0x2F, 0x73, 0x43, + 0xC2, 0xB0, 0x43, 0x63, 0x2F, 0x6F, 0x63, 0x2F, + 0x75, 0xC6, 0x90, 0xC2, 0xB0, 0x46, 0x67, 0x48, + 0x48, 0x48, 0x68, 0xC4, 0xA7, 0x49, 0x49, 0x4C, + 0x6C, 0x4E, 0x4E, 0x6F, 0x50, 0x51, 0x52, 0x52, + 0x52, 0x53, 0x4D, 0x54, 0x45, 0x4C, 0x54, 0x4D, + 0x5A, 0xF6, 0xCE, 0xA9, 0x5A, 0xF6, 0x4B, 0xF6, + 0x41, 0xCC, 0x8A, 0x42, 0x43, 0x65, 0x45, 0x46, + 0x4D, 0x6F, 0xD7, 0x90, 0xD7, 0x91, 0xD7, 0x92, + 0xD7, 0x93, 0x69, 0x46, 0x41, 0x58, 0xCF, 0x80, + 0xCE, 0xB3, 0xCE, 0x93, 0xCE, 0xA0, 0xE2, 0x88, + 0x91, 0x44, 0x64, 0x65, 0x69, 0x6A, 0x31, 0xE2, + 0x81, 0x84, 0x33, 0x32, 0xE2, 0x81, 0x84, 0x33, + 0x31, 0xE2, 0x81, 0x84, 0x35, 0x32, 0xE2, 0x81, + 0x84, 0x35, 0x33, 0xE2, 0x81, 0x84, 0x35, 0x34, + 0xE2, 0x81, 0x84, 0x35, 0x31, 0xE2, 0x81, 0x84, + 0x36, 0x35, 0xE2, 0x81, 0x84, 0x36, 0x31, 0xE2, + 0x81, 0x84, 0x38, 0x33, 0xE2, 0x81, 0x84, 0x38, + 0x35, 0xE2, 0x81, 0x84, 0x38, 0x37, 0xE2, 0x81, + 0x84, 0x38, 0x31, 0xE2, 0x81, 0x84, 0x49, 0x49, + 0x49, 0x49, 0x49, 0x49, 0x49, 0x56, 0x56, 0x56, + 0x49, 0x56, 0x49, 0x49, 0x56, 0x49, 0x49, 0x49, + 0x49, 0x58, 0x58, 0x58, 0x49, 0x58, 0x49, 0x49, + 0x4C, 0x43, 0x44, 0x4D, 0x69, 0x69, 0x69, 0x69, + 0x69, 0x69, 0x69, 0x76, 0x76, 0x76, 0x69, 0x76, + 0x69, 0x69, 0x76, 0x69, 0x69, 0x69, 0x69, 0x78, + 0x78, 0x78, 0x69, 0x78, 0x69, 0x69, 0x6C, 0x63, + 0x64, 0x6D, 0xF6, 0xE2, 0x86, 0x90, 0xCC, 0xB8, + 0xF6, 0xE2, 0x86, 0x92, 0xCC, 0xB8, 0xF6, 0xE2, + 0x86, 0x94, 0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x90, + 0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x94, 0xCC, 0xB8, + 0xF6, 0xE2, 0x87, 0x92, 0xCC, 0xB8, 0xF6, 0xE2, + 0x88, 0x83, 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x88, + 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x8B, 0xCC, 0xB8, + 0xF6, 0xE2, 0x88, 0xA3, 0xCC, 0xB8, 0xF6, 0xE2, + 0x88, 0xA5, 0xCC, 0xB8, 0xE2, 0x88, 0xAB, 0xE2, + 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, + 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAE, 0xE2, 0x88, + 0xAE, 0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xE2, + 0x88, 0xAE, 0xF6, 0xE2, 0x88, 0xBC, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0x83, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0x85, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x88, + 0xCC, 0xB8, 0xF6, 0x3D, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xA1, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x8D, + 0xCC, 0xB8, 0xF6, 0x3C, 0xCC, 0xB8, 0xF6, 0x3E, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA4, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0xA5, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xB2, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB3, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB6, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0xB7, 0xCC, 0xB8, 0xF6, 0xE2, + 0x89, 0xBA, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBB, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x82, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0x83, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0x86, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x87, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA2, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0xA8, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0xA9, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xAB, + 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBC, 0xCC, 0xB8, + 0xF6, 0xE2, 0x89, 0xBD, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0x91, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x92, + 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB2, 0xCC, 0xB8, + 0xF6, 0xE2, 0x8A, 0xB3, 0xCC, 0xB8, 0xF6, 0xE2, + 0x8A, 0xB4, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB5, + 0xCC, 0xB8, 0xF6, 0xE3, 0x80, 0x88, 0xF6, 0xE3, + 0x80, 0x89, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x31, 0x30, 0x31, 0x31, 0x31, + 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, + 0x36, 0x31, 0x37, 0x31, 0x38, 0x31, 0x39, 0x32, + 0x30, 0x28, 0x31, 0x29, 0x28, 0x32, 0x29, 0x28, + 0x33, 0x29, 0x28, 0x34, 0x29, 0x28, 0x35, 0x29, + 0x28, 0x36, 0x29, 0x28, 0x37, 0x29, 0x28, 0x38, + 0x29, 0x28, 0x39, 0x29, 0x28, 0x31, 0x30, 0x29, + 0x28, 0x31, 0x31, 0x29, 0x28, 0x31, 0x32, 0x29, + 0x28, 0x31, 0x33, 0x29, 0x28, 0x31, 0x34, 0x29, + 0x28, 0x31, 0x35, 0x29, 0x28, 0x31, 0x36, 0x29, + 0x28, 0x31, 0x37, 0x29, 0x28, 0x31, 0x38, 0x29, + 0x28, 0x31, 0x39, 0x29, 0x28, 0x32, 0x30, 0x29, + 0x31, 0x2E, 0x32, 0x2E, 0x33, 0x2E, 0x34, 0x2E, + 0x35, 0x2E, 0x36, 0x2E, 0x37, 0x2E, 0x38, 0x2E, + 0x39, 0x2E, 0x31, 0x30, 0x2E, 0x31, 0x31, 0x2E, + 0x31, 0x32, 0x2E, 0x31, 0x33, 0x2E, 0x31, 0x34, + 0x2E, 0x31, 0x35, 0x2E, 0x31, 0x36, 0x2E, 0x31, + 0x37, 0x2E, 0x31, 0x38, 0x2E, 0x31, 0x39, 0x2E, + 0x32, 0x30, 0x2E, 0x28, 0x61, 0x29, 0x28, 0x62, + 0x29, 0x28, 0x63, 0x29, 0x28, 0x64, 0x29, 0x28, + 0x65, 0x29, 0x28, 0x66, 0x29, 0x28, 0x67, 0x29, + 0x28, 0x68, 0x29, 0x28, 0x69, 0x29, 0x28, 0x6A, + 0x29, 0x28, 0x6B, 0x29, 0x28, 0x6C, 0x29, 0x28, + 0x6D, 0x29, 0x28, 0x6E, 0x29, 0x28, 0x6F, 0x29, + 0x28, 0x70, 0x29, 0x28, 0x71, 0x29, 0x28, 0x72, + 0x29, 0x28, 0x73, 0x29, 0x28, 0x74, 0x29, 0x28, + 0x75, 0x29, 0x28, 0x76, 0x29, 0x28, 0x77, 0x29, + 0x28, 0x78, 0x29, 0x28, 0x79, 0x29, 0x28, 0x7A, + 0x29, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0xE2, 0x88, + 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, + 0x88, 0xAB, 0x3A, 0x3A, 0x3D, 0x3D, 0x3D, 0x3D, + 0x3D, 0x3D, 0xF6, 0xE2, 0xAB, 0x9D, 0xCC, 0xB8, + 0xE2, 0xB5, 0xA1, 0xE6, 0xAF, 0x8D, 0xE9, 0xBE, + 0x9F, 0xE4, 0xB8, 0x80, 0xE4, 0xB8, 0xA8, 0xE4, + 0xB8, 0xB6, 0xE4, 0xB8, 0xBF, 0xE4, 0xB9, 0x99, + 0xE4, 0xBA, 0x85, 0xE4, 0xBA, 0x8C, 0xE4, 0xBA, + 0xA0, 0xE4, 0xBA, 0xBA, 0xE5, 0x84, 0xBF, 0xE5, + 0x85, 0xA5, 0xE5, 0x85, 0xAB, 0xE5, 0x86, 0x82, + 0xE5, 0x86, 0x96, 0xE5, 0x86, 0xAB, 0xE5, 0x87, + 0xA0, 0xE5, 0x87, 0xB5, 0xE5, 0x88, 0x80, 0xE5, + 0x8A, 0x9B, 0xE5, 0x8B, 0xB9, 0xE5, 0x8C, 0x95, + 0xE5, 0x8C, 0x9A, 0xE5, 0x8C, 0xB8, 0xE5, 0x8D, + 0x81, 0xE5, 0x8D, 0x9C, 0xE5, 0x8D, 0xA9, 0xE5, + 0x8E, 0x82, 0xE5, 0x8E, 0xB6, 0xE5, 0x8F, 0x88, + 0xE5, 0x8F, 0xA3, 0xE5, 0x9B, 0x97, 0xE5, 0x9C, + 0x9F, 0xE5, 0xA3, 0xAB, 0xE5, 0xA4, 0x82, 0xE5, + 0xA4, 0x8A, 0xE5, 0xA4, 0x95, 0xE5, 0xA4, 0xA7, + 0xE5, 0xA5, 0xB3, 0xE5, 0xAD, 0x90, 0xE5, 0xAE, + 0x80, 0xE5, 0xAF, 0xB8, 0xE5, 0xB0, 0x8F, 0xE5, + 0xB0, 0xA2, 0xE5, 0xB0, 0xB8, 0xE5, 0xB1, 0xAE, + 0xE5, 0xB1, 0xB1, 0xE5, 0xB7, 0x9B, 0xE5, 0xB7, + 0xA5, 0xE5, 0xB7, 0xB1, 0xE5, 0xB7, 0xBE, 0xE5, + 0xB9, 0xB2, 0xE5, 0xB9, 0xBA, 0xE5, 0xB9, 0xBF, + 0xE5, 0xBB, 0xB4, 0xE5, 0xBB, 0xBE, 0xE5, 0xBC, + 0x8B, 0xE5, 0xBC, 0x93, 0xE5, 0xBD, 0x90, 0xE5, + 0xBD, 0xA1, 0xE5, 0xBD, 0xB3, 0xE5, 0xBF, 0x83, + 0xE6, 0x88, 0x88, 0xE6, 0x88, 0xB6, 0xE6, 0x89, + 0x8B, 0xE6, 0x94, 0xAF, 0xE6, 0x94, 0xB4, 0xE6, + 0x96, 0x87, 0xE6, 0x96, 0x97, 0xE6, 0x96, 0xA4, + 0xE6, 0x96, 0xB9, 0xE6, 0x97, 0xA0, 0xE6, 0x97, + 0xA5, 0xE6, 0x9B, 0xB0, 0xE6, 0x9C, 0x88, 0xE6, + 0x9C, 0xA8, 0xE6, 0xAC, 0xA0, 0xE6, 0xAD, 0xA2, + 0xE6, 0xAD, 0xB9, 0xE6, 0xAE, 0xB3, 0xE6, 0xAF, + 0x8B, 0xE6, 0xAF, 0x94, 0xE6, 0xAF, 0x9B, 0xE6, + 0xB0, 0x8F, 0xE6, 0xB0, 0x94, 0xE6, 0xB0, 0xB4, + 0xE7, 0x81, 0xAB, 0xE7, 0x88, 0xAA, 0xE7, 0x88, + 0xB6, 0xE7, 0x88, 0xBB, 0xE7, 0x88, 0xBF, 0xE7, + 0x89, 0x87, 0xE7, 0x89, 0x99, 0xE7, 0x89, 0x9B, + 0xE7, 0x8A, 0xAC, 0xE7, 0x8E, 0x84, 0xE7, 0x8E, + 0x89, 0xE7, 0x93, 0x9C, 0xE7, 0x93, 0xA6, 0xE7, + 0x94, 0x98, 0xE7, 0x94, 0x9F, 0xE7, 0x94, 0xA8, + 0xE7, 0x94, 0xB0, 0xE7, 0x96, 0x8B, 0xE7, 0x96, + 0x92, 0xE7, 0x99, 0xB6, 0xE7, 0x99, 0xBD, 0xE7, + 0x9A, 0xAE, 0xE7, 0x9A, 0xBF, 0xE7, 0x9B, 0xAE, + 0xE7, 0x9F, 0x9B, 0xE7, 0x9F, 0xA2, 0xE7, 0x9F, + 0xB3, 0xE7, 0xA4, 0xBA, 0xE7, 0xA6, 0xB8, 0xE7, + 0xA6, 0xBE, 0xE7, 0xA9, 0xB4, 0xE7, 0xAB, 0x8B, + 0xE7, 0xAB, 0xB9, 0xE7, 0xB1, 0xB3, 0xE7, 0xB3, + 0xB8, 0xE7, 0xBC, 0xB6, 0xE7, 0xBD, 0x91, 0xE7, + 0xBE, 0x8A, 0xE7, 0xBE, 0xBD, 0xE8, 0x80, 0x81, + 0xE8, 0x80, 0x8C, 0xE8, 0x80, 0x92, 0xE8, 0x80, + 0xB3, 0xE8, 0x81, 0xBF, 0xE8, 0x82, 0x89, 0xE8, + 0x87, 0xA3, 0xE8, 0x87, 0xAA, 0xE8, 0x87, 0xB3, + 0xE8, 0x87, 0xBC, 0xE8, 0x88, 0x8C, 0xE8, 0x88, + 0x9B, 0xE8, 0x88, 0x9F, 0xE8, 0x89, 0xAE, 0xE8, + 0x89, 0xB2, 0xE8, 0x89, 0xB8, 0xE8, 0x99, 0x8D, + 0xE8, 0x99, 0xAB, 0xE8, 0xA1, 0x80, 0xE8, 0xA1, + 0x8C, 0xE8, 0xA1, 0xA3, 0xE8, 0xA5, 0xBE, 0xE8, + 0xA6, 0x8B, 0xE8, 0xA7, 0x92, 0xE8, 0xA8, 0x80, + 0xE8, 0xB0, 0xB7, 0xE8, 0xB1, 0x86, 0xE8, 0xB1, + 0x95, 0xE8, 0xB1, 0xB8, 0xE8, 0xB2, 0x9D, 0xE8, + 0xB5, 0xA4, 0xE8, 0xB5, 0xB0, 0xE8, 0xB6, 0xB3, + 0xE8, 0xBA, 0xAB, 0xE8, 0xBB, 0x8A, 0xE8, 0xBE, + 0x9B, 0xE8, 0xBE, 0xB0, 0xE8, 0xBE, 0xB5, 0xE9, + 0x82, 0x91, 0xE9, 0x85, 0x89, 0xE9, 0x87, 0x86, + 0xE9, 0x87, 0x8C, 0xE9, 0x87, 0x91, 0xE9, 0x95, + 0xB7, 0xE9, 0x96, 0x80, 0xE9, 0x98, 0x9C, 0xE9, + 0x9A, 0xB6, 0xE9, 0x9A, 0xB9, 0xE9, 0x9B, 0xA8, + 0xE9, 0x9D, 0x91, 0xE9, 0x9D, 0x9E, 0xE9, 0x9D, + 0xA2, 0xE9, 0x9D, 0xA9, 0xE9, 0x9F, 0x8B, 0xE9, + 0x9F, 0xAD, 0xE9, 0x9F, 0xB3, 0xE9, 0xA0, 0x81, + 0xE9, 0xA2, 0xA8, 0xE9, 0xA3, 0x9B, 0xE9, 0xA3, + 0x9F, 0xE9, 0xA6, 0x96, 0xE9, 0xA6, 0x99, 0xE9, + 0xA6, 0xAC, 0xE9, 0xAA, 0xA8, 0xE9, 0xAB, 0x98, + 0xE9, 0xAB, 0x9F, 0xE9, 0xAC, 0xA5, 0xE9, 0xAC, + 0xAF, 0xE9, 0xAC, 0xB2, 0xE9, 0xAC, 0xBC, 0xE9, + 0xAD, 0x9A, 0xE9, 0xB3, 0xA5, 0xE9, 0xB9, 0xB5, + 0xE9, 0xB9, 0xBF, 0xE9, 0xBA, 0xA5, 0xE9, 0xBA, + 0xBB, 0xE9, 0xBB, 0x83, 0xE9, 0xBB, 0x8D, 0xE9, + 0xBB, 0x91, 0xE9, 0xBB, 0xB9, 0xE9, 0xBB, 0xBD, + 0xE9, 0xBC, 0x8E, 0xE9, 0xBC, 0x93, 0xE9, 0xBC, + 0xA0, 0xE9, 0xBC, 0xBB, 0xE9, 0xBD, 0x8A, 0xE9, + 0xBD, 0x92, 0xE9, 0xBE, 0x8D, 0xE9, 0xBE, 0x9C, + 0xE9, 0xBE, 0xA0, 0x20, 0xE3, 0x80, 0x92, 0xE5, + 0x8D, 0x81, 0xE5, 0x8D, 0x84, 0xE5, 0x8D, 0x85, + 0xF6, 0xE3, 0x81, 0x8B, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x81, 0x8D, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x81, 0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, + 0x91, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x93, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x95, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x97, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x81, 0x99, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x81, 0x9B, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x81, 0x9D, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x81, 0x9F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, + 0xA1, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA4, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA6, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA8, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x9A, 0xF6, + 0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x81, 0xB2, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81, + 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB5, + 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xB8, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB8, 0xE3, 0x82, + 0x9A, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x9A, 0xF6, + 0xE3, 0x81, 0x86, 0xE3, 0x82, 0x99, 0x20, 0xE3, + 0x82, 0x99, 0x20, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, + 0x82, 0x9D, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0x88, + 0xE3, 0x82, 0x8A, 0xF6, 0xE3, 0x82, 0xAB, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAD, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x82, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, + 0xB7, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xB9, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBB, 0xE3, + 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBD, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x83, 0x84, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x83, 0x86, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, + 0x88, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F, 0xE3, + 0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A, + 0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, + 0x83, 0x98, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, + 0x98, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x9B, + 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x9B, 0xE3, + 0x82, 0x9A, 0xF6, 0xE3, 0x82, 0xA6, 0xE3, 0x82, + 0x99, 0xF6, 0xE3, 0x83, 0xAF, 0xE3, 0x82, 0x99, + 0xF6, 0xE3, 0x83, 0xB0, 0xE3, 0x82, 0x99, 0xF6, + 0xE3, 0x83, 0xB1, 0xE3, 0x82, 0x99, 0xF6, 0xE3, + 0x83, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, + 0xBD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xB3, 0xE3, + 0x83, 0x88, 0xE1, 0x84, 0x80, 0xE1, 0x84, 0x81, + 0xE1, 0x86, 0xAA, 0xE1, 0x84, 0x82, 0xE1, 0x86, + 0xAC, 0xE1, 0x86, 0xAD, 0xE1, 0x84, 0x83, 0xE1, + 0x84, 0x84, 0xE1, 0x84, 0x85, 0xE1, 0x86, 0xB0, + 0xE1, 0x86, 0xB1, 0xE1, 0x86, 0xB2, 0xE1, 0x86, + 0xB3, 0xE1, 0x86, 0xB4, 0xE1, 0x86, 0xB5, 0xE1, + 0x84, 0x9A, 0xE1, 0x84, 0x86, 0xE1, 0x84, 0x87, + 0xE1, 0x84, 0x88, 0xE1, 0x84, 0xA1, 0xE1, 0x84, + 0x89, 0xE1, 0x84, 0x8A, 0xE1, 0x84, 0x8B, 0xE1, + 0x84, 0x8C, 0xE1, 0x84, 0x8D, 0xE1, 0x84, 0x8E, + 0xE1, 0x84, 0x8F, 0xE1, 0x84, 0x90, 0xE1, 0x84, + 0x91, 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, + 0x85, 0xA2, 0xE1, 0x85, 0xA3, 0xE1, 0x85, 0xA4, + 0xE1, 0x85, 0xA5, 0xE1, 0x85, 0xA6, 0xE1, 0x85, + 0xA7, 0xE1, 0x85, 0xA8, 0xE1, 0x85, 0xA9, 0xE1, + 0x85, 0xAA, 0xE1, 0x85, 0xAB, 0xE1, 0x85, 0xAC, + 0xE1, 0x85, 0xAD, 0xE1, 0x85, 0xAE, 0xE1, 0x85, + 0xAF, 0xE1, 0x85, 0xB0, 0xE1, 0x85, 0xB1, 0xE1, + 0x85, 0xB2, 0xE1, 0x85, 0xB3, 0xE1, 0x85, 0xB4, + 0xE1, 0x85, 0xB5, 0xE1, 0x85, 0xA0, 0xE1, 0x84, + 0x94, 0xE1, 0x84, 0x95, 0xE1, 0x87, 0x87, 0xE1, + 0x87, 0x88, 0xE1, 0x87, 0x8C, 0xE1, 0x87, 0x8E, + 0xE1, 0x87, 0x93, 0xE1, 0x87, 0x97, 0xE1, 0x87, + 0x99, 0xE1, 0x84, 0x9C, 0xE1, 0x87, 0x9D, 0xE1, + 0x87, 0x9F, 0xE1, 0x84, 0x9D, 0xE1, 0x84, 0x9E, + 0xE1, 0x84, 0xA0, 0xE1, 0x84, 0xA2, 0xE1, 0x84, + 0xA3, 0xE1, 0x84, 0xA7, 0xE1, 0x84, 0xA9, 0xE1, + 0x84, 0xAB, 0xE1, 0x84, 0xAC, 0xE1, 0x84, 0xAD, + 0xE1, 0x84, 0xAE, 0xE1, 0x84, 0xAF, 0xE1, 0x84, + 0xB2, 0xE1, 0x84, 0xB6, 0xE1, 0x85, 0x80, 0xE1, + 0x85, 0x87, 0xE1, 0x85, 0x8C, 0xE1, 0x87, 0xB1, + 0xE1, 0x87, 0xB2, 0xE1, 0x85, 0x97, 0xE1, 0x85, + 0x98, 0xE1, 0x85, 0x99, 0xE1, 0x86, 0x84, 0xE1, + 0x86, 0x85, 0xE1, 0x86, 0x88, 0xE1, 0x86, 0x91, + 0xE1, 0x86, 0x92, 0xE1, 0x86, 0x94, 0xE1, 0x86, + 0x9E, 0xE1, 0x86, 0xA1, 0xE4, 0xB8, 0x80, 0xE4, + 0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B, 0x9B, + 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4, 0xB8, + 0x8B, 0xE7, 0x94, 0xB2, 0xE4, 0xB9, 0x99, 0xE4, + 0xB8, 0x99, 0xE4, 0xB8, 0x81, 0xE5, 0xA4, 0xA9, + 0xE5, 0x9C, 0xB0, 0xE4, 0xBA, 0xBA, 0x28, 0xE1, + 0x84, 0x80, 0x29, 0x28, 0xE1, 0x84, 0x82, 0x29, + 0x28, 0xE1, 0x84, 0x83, 0x29, 0x28, 0xE1, 0x84, + 0x85, 0x29, 0x28, 0xE1, 0x84, 0x86, 0x29, 0x28, + 0xE1, 0x84, 0x87, 0x29, 0x28, 0xE1, 0x84, 0x89, + 0x29, 0x28, 0xE1, 0x84, 0x8B, 0x29, 0x28, 0xE1, + 0x84, 0x8C, 0x29, 0x28, 0xE1, 0x84, 0x8E, 0x29, + 0x28, 0xE1, 0x84, 0x8F, 0x29, 0x28, 0xE1, 0x84, + 0x90, 0x29, 0x28, 0xE1, 0x84, 0x91, 0x29, 0x28, + 0xE1, 0x84, 0x92, 0x29, 0x28, 0xE1, 0x84, 0x80, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x82, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x83, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x85, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x86, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x87, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x89, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8B, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8C, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8E, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8F, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x90, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x91, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x92, + 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8C, + 0xE1, 0x85, 0xAE, 0x29, 0x28, 0xE1, 0x84, 0x8B, + 0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x8C, 0xE1, 0x85, + 0xA5, 0xE1, 0x86, 0xAB, 0x29, 0x28, 0xE1, 0x84, + 0x8B, 0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x92, 0xE1, + 0x85, 0xAE, 0x29, 0x28, 0xE4, 0xB8, 0x80, 0x29, + 0x28, 0xE4, 0xBA, 0x8C, 0x29, 0x28, 0xE4, 0xB8, + 0x89, 0x29, 0x28, 0xE5, 0x9B, 0x9B, 0x29, 0x28, + 0xE4, 0xBA, 0x94, 0x29, 0x28, 0xE5, 0x85, 0xAD, + 0x29, 0x28, 0xE4, 0xB8, 0x83, 0x29, 0x28, 0xE5, + 0x85, 0xAB, 0x29, 0x28, 0xE4, 0xB9, 0x9D, 0x29, + 0x28, 0xE5, 0x8D, 0x81, 0x29, 0x28, 0xE6, 0x9C, + 0x88, 0x29, 0x28, 0xE7, 0x81, 0xAB, 0x29, 0x28, + 0xE6, 0xB0, 0xB4, 0x29, 0x28, 0xE6, 0x9C, 0xA8, + 0x29, 0x28, 0xE9, 0x87, 0x91, 0x29, 0x28, 0xE5, + 0x9C, 0x9F, 0x29, 0x28, 0xE6, 0x97, 0xA5, 0x29, + 0x28, 0xE6, 0xA0, 0xAA, 0x29, 0x28, 0xE6, 0x9C, + 0x89, 0x29, 0x28, 0xE7, 0xA4, 0xBE, 0x29, 0x28, + 0xE5, 0x90, 0x8D, 0x29, 0x28, 0xE7, 0x89, 0xB9, + 0x29, 0x28, 0xE8, 0xB2, 0xA1, 0x29, 0x28, 0xE7, + 0xA5, 0x9D, 0x29, 0x28, 0xE5, 0x8A, 0xB4, 0x29, + 0x28, 0xE4, 0xBB, 0xA3, 0x29, 0x28, 0xE5, 0x91, + 0xBC, 0x29, 0x28, 0xE5, 0xAD, 0xA6, 0x29, 0x28, + 0xE7, 0x9B, 0xA3, 0x29, 0x28, 0xE4, 0xBC, 0x81, + 0x29, 0x28, 0xE8, 0xB3, 0x87, 0x29, 0x28, 0xE5, + 0x8D, 0x94, 0x29, 0x28, 0xE7, 0xA5, 0xAD, 0x29, + 0x28, 0xE4, 0xBC, 0x91, 0x29, 0x28, 0xE8, 0x87, + 0xAA, 0x29, 0x28, 0xE8, 0x87, 0xB3, 0x29, 0x50, + 0x54, 0x45, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, + 0x32, 0x38, 0x32, 0x39, 0x33, 0x30, 0x33, 0x31, + 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0xE1, 0x84, 0x80, 0xE1, 0x84, 0x82, 0xE1, 0x84, + 0x83, 0xE1, 0x84, 0x85, 0xE1, 0x84, 0x86, 0xE1, + 0x84, 0x87, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8B, + 0xE1, 0x84, 0x8C, 0xE1, 0x84, 0x8E, 0xE1, 0x84, + 0x8F, 0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, + 0x84, 0x92, 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xA1, + 0xE1, 0x84, 0x82, 0xE1, 0x85, 0xA1, 0xE1, 0x84, + 0x83, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x85, 0xE1, + 0x85, 0xA1, 0xE1, 0x84, 0x86, 0xE1, 0x85, 0xA1, + 0xE1, 0x84, 0x87, 0xE1, 0x85, 0xA1, 0xE1, 0x84, + 0x89, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8B, 0xE1, + 0x85, 0xA1, 0xE1, 0x84, 0x8C, 0xE1, 0x85, 0xA1, + 0xE1, 0x84, 0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x84, + 0x8F, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x90, 0xE1, + 0x85, 0xA1, 0xE1, 0x84, 0x91, 0xE1, 0x85, 0xA1, + 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x84, + 0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xB7, 0xE1, + 0x84, 0x80, 0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x8C, + 0xE1, 0x85, 0xAE, 0xE1, 0x84, 0x8B, 0xE1, 0x85, + 0xB4, 0xE1, 0x84, 0x8B, 0xE1, 0x85, 0xAE, 0xE4, + 0xB8, 0x80, 0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89, + 0xE5, 0x9B, 0x9B, 0xE4, 0xBA, 0x94, 0xE5, 0x85, + 0xAD, 0xE4, 0xB8, 0x83, 0xE5, 0x85, 0xAB, 0xE4, + 0xB9, 0x9D, 0xE5, 0x8D, 0x81, 0xE6, 0x9C, 0x88, + 0xE7, 0x81, 0xAB, 0xE6, 0xB0, 0xB4, 0xE6, 0x9C, + 0xA8, 0xE9, 0x87, 0x91, 0xE5, 0x9C, 0x9F, 0xE6, + 0x97, 0xA5, 0xE6, 0xA0, 0xAA, 0xE6, 0x9C, 0x89, + 0xE7, 0xA4, 0xBE, 0xE5, 0x90, 0x8D, 0xE7, 0x89, + 0xB9, 0xE8, 0xB2, 0xA1, 0xE7, 0xA5, 0x9D, 0xE5, + 0x8A, 0xB4, 0xE7, 0xA7, 0x98, 0xE7, 0x94, 0xB7, + 0xE5, 0xA5, 0xB3, 0xE9, 0x81, 0xA9, 0xE5, 0x84, + 0xAA, 0xE5, 0x8D, 0xB0, 0xE6, 0xB3, 0xA8, 0xE9, + 0xA0, 0x85, 0xE4, 0xBC, 0x91, 0xE5, 0x86, 0x99, + 0xE6, 0xAD, 0xA3, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, + 0xAD, 0xE4, 0xB8, 0x8B, 0xE5, 0xB7, 0xA6, 0xE5, + 0x8F, 0xB3, 0xE5, 0x8C, 0xBB, 0xE5, 0xAE, 0x97, + 0xE5, 0xAD, 0xA6, 0xE7, 0x9B, 0xA3, 0xE4, 0xBC, + 0x81, 0xE8, 0xB3, 0x87, 0xE5, 0x8D, 0x94, 0xE5, + 0xA4, 0x9C, 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, + 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, 0x34, 0x32, + 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, + 0x34, 0x37, 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, + 0x31, 0xE6, 0x9C, 0x88, 0x32, 0xE6, 0x9C, 0x88, + 0x33, 0xE6, 0x9C, 0x88, 0x34, 0xE6, 0x9C, 0x88, + 0x35, 0xE6, 0x9C, 0x88, 0x36, 0xE6, 0x9C, 0x88, + 0x37, 0xE6, 0x9C, 0x88, 0x38, 0xE6, 0x9C, 0x88, + 0x39, 0xE6, 0x9C, 0x88, 0x31, 0x30, 0xE6, 0x9C, + 0x88, 0x31, 0x31, 0xE6, 0x9C, 0x88, 0x31, 0x32, + 0xE6, 0x9C, 0x88, 0x48, 0x67, 0x65, 0x72, 0x67, + 0x65, 0x56, 0x4C, 0x54, 0x44, 0xE3, 0x82, 0xA2, + 0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6, 0xE3, 0x82, + 0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82, 0xAB, 0xE3, + 0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0xB1, + 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5, 0xE3, 0x82, + 0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xBB, 0xE3, + 0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3, 0x83, 0x81, + 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8B, 0xE3, + 0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3, 0x83, 0x8E, + 0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92, 0xE3, 0x83, + 0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0x9B, 0xE3, + 0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xA0, + 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2, 0xE3, 0x83, + 0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83, 0xA8, 0xE3, + 0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xAB, + 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD, 0xE3, 0x83, + 0xAF, 0xE3, 0x83, 0xB0, 0xE3, 0x83, 0xB1, 0xE3, + 0x83, 0xB2, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0x8F, + 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0x88, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xAB, 0xE3, + 0x83, 0x95, 0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA2, + 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x98, 0xE3, 0x82, + 0x9A, 0xE3, 0x82, 0xA2, 0xE3, 0x82, 0xA2, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xA4, + 0xE3, 0x83, 0x8B, 0xE3, 0x83, 0xB3, 0xE3, 0x82, + 0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA4, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0xA6, + 0xE3, 0x82, 0xA9, 0xE3, 0x83, 0xB3, 0xE3, 0x82, + 0xA8, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xAF, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, + 0xE3, 0x82, 0xA8, 0xE3, 0x83, 0xBC, 0xE3, 0x82, + 0xAB, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAA, 0xE3, + 0x83, 0xB3, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xAA, + 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xA0, 0xE3, 0x82, + 0xAB, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xAA, 0xE3, + 0x82, 0xAB, 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0x83, + 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xAB, 0xE3, 0x83, + 0xAD, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xBC, 0xE3, + 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAD, + 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAB, 0xE3, 0x82, + 0x99, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9E, 0xE3, + 0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xAB, + 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xAD, 0xE3, 0x82, + 0x99, 0xE3, 0x83, 0x8B, 0xE3, 0x83, 0xBC, 0xE3, + 0x82, 0xAD, 0xE3, 0x83, 0xA5, 0xE3, 0x83, 0xAA, + 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x82, + 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xBF, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD, + 0xE3, 0x83, 0xAD, 0xE3, 0x82, 0xAD, 0xE3, 0x83, + 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, 0xE3, + 0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x82, 0xAD, + 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xA1, 0xE3, 0x83, + 0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3, + 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xAF, + 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, 0x82, + 0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3, + 0x83, 0xA0, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3, + 0x83, 0xAB, 0xE3, 0x82, 0xBB, 0xE3, 0x82, 0x99, + 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xAD, 0xE3, 0x82, + 0xAF, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xBC, 0xE3, + 0x83, 0x8D, 0xE3, 0x82, 0xB1, 0xE3, 0x83, 0xBC, + 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xB3, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x8A, 0xE3, 0x82, 0xB3, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x9A, + 0xE3, 0x82, 0xB5, 0xE3, 0x82, 0xA4, 0xE3, 0x82, + 0xAF, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xB5, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0xA0, 0xE3, 0x82, 0xB7, 0xE3, 0x83, + 0xAA, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3, + 0x82, 0x99, 0xE3, 0x82, 0xBB, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0x81, 0xE3, 0x82, 0xBB, 0xE3, 0x83, + 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xBF, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB9, + 0xE3, 0x83, 0x86, 0xE3, 0x82, 0x99, 0xE3, 0x82, + 0xB7, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, + 0x83, 0xAB, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8E, 0xE3, 0x83, + 0x8E, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, + 0x83, 0x8F, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0x84, + 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A, 0xE3, 0x83, + 0xBC, 0xE3, 0x82, 0xBB, 0xE3, 0x83, 0xB3, 0xE3, + 0x83, 0x88, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A, + 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x84, 0xE3, 0x83, + 0x8F, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, + 0x83, 0xAC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, + 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA2, 0xE3, 0x82, + 0xB9, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3, + 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xAF, + 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, + 0x9A, 0xE3, 0x82, 0xB3, 0xE3, 0x83, 0x92, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95, + 0xE3, 0x82, 0xA1, 0xE3, 0x83, 0xA9, 0xE3, 0x83, + 0x83, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, + 0x83, 0x95, 0xE3, 0x82, 0xA3, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x95, 0xE3, 0x82, + 0x99, 0xE3, 0x83, 0x83, 0xE3, 0x82, 0xB7, 0xE3, + 0x82, 0xA7, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95, + 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x98, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0xBF, 0xE3, + 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x98, + 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xBD, 0xE3, 0x83, + 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0x8B, 0xE3, + 0x83, 0x92, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0xAB, + 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x98, 0xE3, 0x82, + 0x9A, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xB9, 0xE3, + 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, + 0xE3, 0x82, 0xB7, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0x98, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, + 0x82, 0xBF, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x9A, + 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x99, 0xE3, + 0x83, 0xAB, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x9B, + 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9B, 0xE3, 0x82, + 0x9A, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0x9B, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x9B, 0xE3, 0x83, + 0xBC, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9E, 0xE3, + 0x82, 0xA4, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD, + 0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xA4, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x83, 0xE3, + 0x83, 0x8F, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0xAB, + 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0x9E, 0xE3, 0x83, + 0xB3, 0xE3, 0x82, 0xB7, 0xE3, 0x83, 0xA7, 0xE3, + 0x83, 0xB3, 0xE3, 0x83, 0x9F, 0xE3, 0x82, 0xAF, + 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0x9F, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0x9F, 0xE3, + 0x83, 0xAA, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, + 0xA1, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, + 0x83, 0xA1, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99, + 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x83, + 0xA1, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, + 0x83, 0xAB, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xBC, + 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, 0x83, + 0xA4, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, + 0x83, 0xA6, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xB3, + 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0x83, 0xE3, 0x83, + 0x88, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAA, 0xE3, + 0x83, 0xA9, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, + 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x95, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAC, + 0xE3, 0x83, 0xA0, 0xE3, 0x83, 0xAC, 0xE3, 0x83, + 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xB1, 0xE3, + 0x82, 0x99, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0xAF, + 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0x30, 0xE7, + 0x82, 0xB9, 0x31, 0xE7, 0x82, 0xB9, 0x32, 0xE7, + 0x82, 0xB9, 0x33, 0xE7, 0x82, 0xB9, 0x34, 0xE7, + 0x82, 0xB9, 0x35, 0xE7, 0x82, 0xB9, 0x36, 0xE7, + 0x82, 0xB9, 0x37, 0xE7, 0x82, 0xB9, 0x38, 0xE7, + 0x82, 0xB9, 0x39, 0xE7, 0x82, 0xB9, 0x31, 0x30, + 0xE7, 0x82, 0xB9, 0x31, 0x31, 0xE7, 0x82, 0xB9, + 0x31, 0x32, 0xE7, 0x82, 0xB9, 0x31, 0x33, 0xE7, + 0x82, 0xB9, 0x31, 0x34, 0xE7, 0x82, 0xB9, 0x31, + 0x35, 0xE7, 0x82, 0xB9, 0x31, 0x36, 0xE7, 0x82, + 0xB9, 0x31, 0x37, 0xE7, 0x82, 0xB9, 0x31, 0x38, + 0xE7, 0x82, 0xB9, 0x31, 0x39, 0xE7, 0x82, 0xB9, + 0x32, 0x30, 0xE7, 0x82, 0xB9, 0x32, 0x31, 0xE7, + 0x82, 0xB9, 0x32, 0x32, 0xE7, 0x82, 0xB9, 0x32, + 0x33, 0xE7, 0x82, 0xB9, 0x32, 0x34, 0xE7, 0x82, + 0xB9, 0x68, 0x50, 0x61, 0x64, 0x61, 0x41, 0x55, + 0x62, 0x61, 0x72, 0x6F, 0x56, 0x70, 0x63, 0x64, + 0x6D, 0x64, 0x6D, 0x32, 0x64, 0x6D, 0x33, 0x49, + 0x55, 0xE5, 0xB9, 0xB3, 0xE6, 0x88, 0x90, 0xE6, + 0x98, 0xAD, 0xE5, 0x92, 0x8C, 0xE5, 0xA4, 0xA7, + 0xE6, 0xAD, 0xA3, 0xE6, 0x98, 0x8E, 0xE6, 0xB2, + 0xBB, 0xE6, 0xA0, 0xAA, 0xE5, 0xBC, 0x8F, 0xE4, + 0xBC, 0x9A, 0xE7, 0xA4, 0xBE, 0x70, 0x41, 0x6E, + 0x41, 0xCE, 0xBC, 0x41, 0x6D, 0x41, 0x6B, 0x41, + 0x4B, 0x42, 0x4D, 0x42, 0x47, 0x42, 0x63, 0x61, + 0x6C, 0x6B, 0x63, 0x61, 0x6C, 0x70, 0x46, 0x6E, + 0x46, 0xCE, 0xBC, 0x46, 0xCE, 0xBC, 0x67, 0x6D, + 0x67, 0x6B, 0x67, 0x48, 0x7A, 0x6B, 0x48, 0x7A, + 0x4D, 0x48, 0x7A, 0x47, 0x48, 0x7A, 0x54, 0x48, + 0x7A, 0xCE, 0xBC, 0x6C, 0x6D, 0x6C, 0x64, 0x6C, + 0x6B, 0x6C, 0x66, 0x6D, 0x6E, 0x6D, 0xCE, 0xBC, + 0x6D, 0x6D, 0x6D, 0x63, 0x6D, 0x6B, 0x6D, 0x6D, + 0x6D, 0x32, 0x63, 0x6D, 0x32, 0x6D, 0x32, 0x6B, + 0x6D, 0x32, 0x6D, 0x6D, 0x33, 0x63, 0x6D, 0x33, + 0x6D, 0x33, 0x6B, 0x6D, 0x33, 0x6D, 0xE2, 0x88, + 0x95, 0x73, 0x6D, 0xE2, 0x88, 0x95, 0x73, 0x32, + 0x50, 0x61, 0x6B, 0x50, 0x61, 0x4D, 0x50, 0x61, + 0x47, 0x50, 0x61, 0x72, 0x61, 0x64, 0x72, 0x61, + 0x64, 0xE2, 0x88, 0x95, 0x73, 0x72, 0x61, 0x64, + 0xE2, 0x88, 0x95, 0x73, 0x32, 0x70, 0x73, 0x6E, + 0x73, 0xCE, 0xBC, 0x73, 0x6D, 0x73, 0x70, 0x56, + 0x6E, 0x56, 0xCE, 0xBC, 0x56, 0x6D, 0x56, 0x6B, + 0x56, 0x4D, 0x56, 0x70, 0x57, 0x6E, 0x57, 0xCE, + 0xBC, 0x57, 0x6D, 0x57, 0x6B, 0x57, 0x4D, 0x57, + 0x6B, 0xCE, 0xA9, 0x4D, 0xCE, 0xA9, 0x61, 0x2E, + 0x6D, 0x2E, 0x42, 0x71, 0x63, 0x63, 0x63, 0x64, + 0x43, 0xE2, 0x88, 0x95, 0x6B, 0x67, 0x43, 0x6F, + 0x2E, 0x64, 0x42, 0x47, 0x79, 0x68, 0x61, 0x48, + 0x50, 0x69, 0x6E, 0x4B, 0x4B, 0x4B, 0x4D, 0x6B, + 0x74, 0x6C, 0x6D, 0x6C, 0x6E, 0x6C, 0x6F, 0x67, + 0x6C, 0x78, 0x6D, 0x62, 0x6D, 0x69, 0x6C, 0x6D, + 0x6F, 0x6C, 0x50, 0x48, 0x70, 0x2E, 0x6D, 0x2E, + 0x50, 0x50, 0x4D, 0x50, 0x52, 0x73, 0x72, 0x53, + 0x76, 0x57, 0x62, 0x56, 0xE2, 0x88, 0x95, 0x6D, + 0x41, 0xE2, 0x88, 0x95, 0x6D, 0x31, 0xE6, 0x97, + 0xA5, 0x32, 0xE6, 0x97, 0xA5, 0x33, 0xE6, 0x97, + 0xA5, 0x34, 0xE6, 0x97, 0xA5, 0x35, 0xE6, 0x97, + 0xA5, 0x36, 0xE6, 0x97, 0xA5, 0x37, 0xE6, 0x97, + 0xA5, 0x38, 0xE6, 0x97, 0xA5, 0x39, 0xE6, 0x97, + 0xA5, 0x31, 0x30, 0xE6, 0x97, 0xA5, 0x31, 0x31, + 0xE6, 0x97, 0xA5, 0x31, 0x32, 0xE6, 0x97, 0xA5, + 0x31, 0x33, 0xE6, 0x97, 0xA5, 0x31, 0x34, 0xE6, + 0x97, 0xA5, 0x31, 0x35, 0xE6, 0x97, 0xA5, 0x31, + 0x36, 0xE6, 0x97, 0xA5, 0x31, 0x37, 0xE6, 0x97, + 0xA5, 0x31, 0x38, 0xE6, 0x97, 0xA5, 0x31, 0x39, + 0xE6, 0x97, 0xA5, 0x32, 0x30, 0xE6, 0x97, 0xA5, + 0x32, 0x31, 0xE6, 0x97, 0xA5, 0x32, 0x32, 0xE6, + 0x97, 0xA5, 0x32, 0x33, 0xE6, 0x97, 0xA5, 0x32, + 0x34, 0xE6, 0x97, 0xA5, 0x32, 0x35, 0xE6, 0x97, + 0xA5, 0x32, 0x36, 0xE6, 0x97, 0xA5, 0x32, 0x37, + 0xE6, 0x97, 0xA5, 0x32, 0x38, 0xE6, 0x97, 0xA5, + 0x32, 0x39, 0xE6, 0x97, 0xA5, 0x33, 0x30, 0xE6, + 0x97, 0xA5, 0x33, 0x31, 0xE6, 0x97, 0xA5, 0x67, + 0x61, 0x6C, 0xF6, 0xE8, 0xB1, 0x88, 0xF6, 0xE6, + 0x9B, 0xB4, 0xF6, 0xE8, 0xBB, 0x8A, 0xF6, 0xE8, + 0xB3, 0x88, 0xF6, 0xE6, 0xBB, 0x91, 0xF6, 0xE4, + 0xB8, 0xB2, 0xF6, 0xE5, 0x8F, 0xA5, 0xF6, 0xE9, + 0xBE, 0x9C, 0xF6, 0xE9, 0xBE, 0x9C, 0xF6, 0xE5, + 0xA5, 0x91, 0xF6, 0xE9, 0x87, 0x91, 0xF6, 0xE5, + 0x96, 0x87, 0xF6, 0xE5, 0xA5, 0x88, 0xF6, 0xE6, + 0x87, 0xB6, 0xF6, 0xE7, 0x99, 0xA9, 0xF6, 0xE7, + 0xBE, 0x85, 0xF6, 0xE8, 0x98, 0xBF, 0xF6, 0xE8, + 0x9E, 0xBA, 0xF6, 0xE8, 0xA3, 0xB8, 0xF6, 0xE9, + 0x82, 0x8F, 0xF6, 0xE6, 0xA8, 0x82, 0xF6, 0xE6, + 0xB4, 0x9B, 0xF6, 0xE7, 0x83, 0x99, 0xF6, 0xE7, + 0x8F, 0x9E, 0xF6, 0xE8, 0x90, 0xBD, 0xF6, 0xE9, + 0x85, 0xAA, 0xF6, 0xE9, 0xA7, 0xB1, 0xF6, 0xE4, + 0xBA, 0x82, 0xF6, 0xE5, 0x8D, 0xB5, 0xF6, 0xE6, + 0xAC, 0x84, 0xF6, 0xE7, 0x88, 0x9B, 0xF6, 0xE8, + 0x98, 0xAD, 0xF6, 0xE9, 0xB8, 0x9E, 0xF6, 0xE5, + 0xB5, 0x90, 0xF6, 0xE6, 0xBF, 0xAB, 0xF6, 0xE8, + 0x97, 0x8D, 0xF6, 0xE8, 0xA5, 0xA4, 0xF6, 0xE6, + 0x8B, 0x89, 0xF6, 0xE8, 0x87, 0x98, 0xF6, 0xE8, + 0xA0, 0x9F, 0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xE6, + 0x9C, 0x97, 0xF6, 0xE6, 0xB5, 0xAA, 0xF6, 0xE7, + 0x8B, 0xBC, 0xF6, 0xE9, 0x83, 0x8E, 0xF6, 0xE4, + 0xBE, 0x86, 0xF6, 0xE5, 0x86, 0xB7, 0xF6, 0xE5, + 0x8B, 0x9E, 0xF6, 0xE6, 0x93, 0x84, 0xF6, 0xE6, + 0xAB, 0x93, 0xF6, 0xE7, 0x88, 0x90, 0xF6, 0xE7, + 0x9B, 0xA7, 0xF6, 0xE8, 0x80, 0x81, 0xF6, 0xE8, + 0x98, 0x86, 0xF6, 0xE8, 0x99, 0x9C, 0xF6, 0xE8, + 0xB7, 0xAF, 0xF6, 0xE9, 0x9C, 0xB2, 0xF6, 0xE9, + 0xAD, 0xAF, 0xF6, 0xE9, 0xB7, 0xBA, 0xF6, 0xE7, + 0xA2, 0x8C, 0xF6, 0xE7, 0xA5, 0xBF, 0xF6, 0xE7, + 0xB6, 0xA0, 0xF6, 0xE8, 0x8F, 0x89, 0xF6, 0xE9, + 0x8C, 0x84, 0xF6, 0xE9, 0xB9, 0xBF, 0xF6, 0xE8, + 0xAB, 0x96, 0xF6, 0xE5, 0xA3, 0x9F, 0xF6, 0xE5, + 0xBC, 0x84, 0xF6, 0xE7, 0xB1, 0xA0, 0xF6, 0xE8, + 0x81, 0xBE, 0xF6, 0xE7, 0x89, 0xA2, 0xF6, 0xE7, + 0xA3, 0x8A, 0xF6, 0xE8, 0xB3, 0x82, 0xF6, 0xE9, + 0x9B, 0xB7, 0xF6, 0xE5, 0xA3, 0x98, 0xF6, 0xE5, + 0xB1, 0xA2, 0xF6, 0xE6, 0xA8, 0x93, 0xF6, 0xE6, + 0xB7, 0x9A, 0xF6, 0xE6, 0xBC, 0x8F, 0xF6, 0xE7, + 0xB4, 0xAF, 0xF6, 0xE7, 0xB8, 0xB7, 0xF6, 0xE9, + 0x99, 0x8B, 0xF6, 0xE5, 0x8B, 0x92, 0xF6, 0xE8, + 0x82, 0x8B, 0xF6, 0xE5, 0x87, 0x9C, 0xF6, 0xE5, + 0x87, 0x8C, 0xF6, 0xE7, 0xA8, 0x9C, 0xF6, 0xE7, + 0xB6, 0xBE, 0xF6, 0xE8, 0x8F, 0xB1, 0xF6, 0xE9, + 0x99, 0xB5, 0xF6, 0xE8, 0xAE, 0x80, 0xF6, 0xE6, + 0x8B, 0x8F, 0xF6, 0xE6, 0xA8, 0x82, 0xF6, 0xE8, + 0xAB, 0xBE, 0xF6, 0xE4, 0xB8, 0xB9, 0xF6, 0xE5, + 0xAF, 0xA7, 0xF6, 0xE6, 0x80, 0x92, 0xF6, 0xE7, + 0x8E, 0x87, 0xF6, 0xE7, 0x95, 0xB0, 0xF6, 0xE5, + 0x8C, 0x97, 0xF6, 0xE7, 0xA3, 0xBB, 0xF6, 0xE4, + 0xBE, 0xBF, 0xF6, 0xE5, 0xBE, 0xA9, 0xF6, 0xE4, + 0xB8, 0x8D, 0xF6, 0xE6, 0xB3, 0x8C, 0xF6, 0xE6, + 0x95, 0xB8, 0xF6, 0xE7, 0xB4, 0xA2, 0xF6, 0xE5, + 0x8F, 0x83, 0xF6, 0xE5, 0xA1, 0x9E, 0xF6, 0xE7, + 0x9C, 0x81, 0xF6, 0xE8, 0x91, 0x89, 0xF6, 0xE8, + 0xAA, 0xAA, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE8, + 0xBE, 0xB0, 0xF6, 0xE6, 0xB2, 0x88, 0xF6, 0xE6, + 0x8B, 0xBE, 0xF6, 0xE8, 0x8B, 0xA5, 0xF6, 0xE6, + 0x8E, 0xA0, 0xF6, 0xE7, 0x95, 0xA5, 0xF6, 0xE4, + 0xBA, 0xAE, 0xF6, 0xE5, 0x85, 0xA9, 0xF6, 0xE5, + 0x87, 0x89, 0xF6, 0xE6, 0xA2, 0x81, 0xF6, 0xE7, + 0xB3, 0xA7, 0xF6, 0xE8, 0x89, 0xAF, 0xF6, 0xE8, + 0xAB, 0x92, 0xF6, 0xE9, 0x87, 0x8F, 0xF6, 0xE5, + 0x8B, 0xB5, 0xF6, 0xE5, 0x91, 0x82, 0xF6, 0xE5, + 0xA5, 0xB3, 0xF6, 0xE5, 0xBB, 0xAC, 0xF6, 0xE6, + 0x97, 0x85, 0xF6, 0xE6, 0xBF, 0xBE, 0xF6, 0xE7, + 0xA4, 0xAA, 0xF6, 0xE9, 0x96, 0xAD, 0xF6, 0xE9, + 0xA9, 0xAA, 0xF6, 0xE9, 0xBA, 0x97, 0xF6, 0xE9, + 0xBB, 0x8E, 0xF6, 0xE5, 0x8A, 0x9B, 0xF6, 0xE6, + 0x9B, 0x86, 0xF6, 0xE6, 0xAD, 0xB7, 0xF6, 0xE8, + 0xBD, 0xA2, 0xF6, 0xE5, 0xB9, 0xB4, 0xF6, 0xE6, + 0x86, 0x90, 0xF6, 0xE6, 0x88, 0x80, 0xF6, 0xE6, + 0x92, 0x9A, 0xF6, 0xE6, 0xBC, 0xA3, 0xF6, 0xE7, + 0x85, 0x89, 0xF6, 0xE7, 0x92, 0x89, 0xF6, 0xE7, + 0xA7, 0x8A, 0xF6, 0xE7, 0xB7, 0xB4, 0xF6, 0xE8, + 0x81, 0xAF, 0xF6, 0xE8, 0xBC, 0xA6, 0xF6, 0xE8, + 0x93, 0xAE, 0xF6, 0xE9, 0x80, 0xA3, 0xF6, 0xE9, + 0x8D, 0x8A, 0xF6, 0xE5, 0x88, 0x97, 0xF6, 0xE5, + 0x8A, 0xA3, 0xF6, 0xE5, 0x92, 0xBD, 0xF6, 0xE7, + 0x83, 0x88, 0xF6, 0xE8, 0xA3, 0x82, 0xF6, 0xE8, + 0xAA, 0xAA, 0xF6, 0xE5, 0xBB, 0x89, 0xF6, 0xE5, + 0xBF, 0xB5, 0xF6, 0xE6, 0x8D, 0xBB, 0xF6, 0xE6, + 0xAE, 0xAE, 0xF6, 0xE7, 0xB0, 0xBE, 0xF6, 0xE7, + 0x8D, 0xB5, 0xF6, 0xE4, 0xBB, 0xA4, 0xF6, 0xE5, + 0x9B, 0xB9, 0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, + 0xB6, 0xBA, 0xF6, 0xE6, 0x80, 0x9C, 0xF6, 0xE7, + 0x8E, 0xB2, 0xF6, 0xE7, 0x91, 0xA9, 0xF6, 0xE7, + 0xBE, 0x9A, 0xF6, 0xE8, 0x81, 0x86, 0xF6, 0xE9, + 0x88, 0xB4, 0xF6, 0xE9, 0x9B, 0xB6, 0xF6, 0xE9, + 0x9D, 0x88, 0xF6, 0xE9, 0xA0, 0x98, 0xF6, 0xE4, + 0xBE, 0x8B, 0xF6, 0xE7, 0xA6, 0xAE, 0xF6, 0xE9, + 0x86, 0xB4, 0xF6, 0xE9, 0x9A, 0xB8, 0xF6, 0xE6, + 0x83, 0xA1, 0xF6, 0xE4, 0xBA, 0x86, 0xF6, 0xE5, + 0x83, 0x9A, 0xF6, 0xE5, 0xAF, 0xAE, 0xF6, 0xE5, + 0xB0, 0xBF, 0xF6, 0xE6, 0x96, 0x99, 0xF6, 0xE6, + 0xA8, 0x82, 0xF6, 0xE7, 0x87, 0x8E, 0xF6, 0xE7, + 0x99, 0x82, 0xF6, 0xE8, 0x93, 0xBC, 0xF6, 0xE9, + 0x81, 0xBC, 0xF6, 0xE9, 0xBE, 0x8D, 0xF6, 0xE6, + 0x9A, 0x88, 0xF6, 0xE9, 0x98, 0xAE, 0xF6, 0xE5, + 0x8A, 0x89, 0xF6, 0xE6, 0x9D, 0xBB, 0xF6, 0xE6, + 0x9F, 0xB3, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, + 0xBA, 0x9C, 0xF6, 0xE7, 0x90, 0x89, 0xF6, 0xE7, + 0x95, 0x99, 0xF6, 0xE7, 0xA1, 0xAB, 0xF6, 0xE7, + 0xB4, 0x90, 0xF6, 0xE9, 0xA1, 0x9E, 0xF6, 0xE5, + 0x85, 0xAD, 0xF6, 0xE6, 0x88, 0xAE, 0xF6, 0xE9, + 0x99, 0xB8, 0xF6, 0xE5, 0x80, 0xAB, 0xF6, 0xE5, + 0xB4, 0x99, 0xF6, 0xE6, 0xB7, 0xAA, 0xF6, 0xE8, + 0xBC, 0xAA, 0xF6, 0xE5, 0xBE, 0x8B, 0xF6, 0xE6, + 0x85, 0x84, 0xF6, 0xE6, 0xA0, 0x97, 0xF6, 0xE7, + 0x8E, 0x87, 0xF6, 0xE9, 0x9A, 0x86, 0xF6, 0xE5, + 0x88, 0xA9, 0xF6, 0xE5, 0x90, 0x8F, 0xF6, 0xE5, + 0xB1, 0xA5, 0xF6, 0xE6, 0x98, 0x93, 0xF6, 0xE6, + 0x9D, 0x8E, 0xF6, 0xE6, 0xA2, 0xA8, 0xF6, 0xE6, + 0xB3, 0xA5, 0xF6, 0xE7, 0x90, 0x86, 0xF6, 0xE7, + 0x97, 0xA2, 0xF6, 0xE7, 0xBD, 0xB9, 0xF6, 0xE8, + 0xA3, 0x8F, 0xF6, 0xE8, 0xA3, 0xA1, 0xF6, 0xE9, + 0x87, 0x8C, 0xF6, 0xE9, 0x9B, 0xA2, 0xF6, 0xE5, + 0x8C, 0xBF, 0xF6, 0xE6, 0xBA, 0xBA, 0xF6, 0xE5, + 0x90, 0x9D, 0xF6, 0xE7, 0x87, 0x90, 0xF6, 0xE7, + 0x92, 0x98, 0xF6, 0xE8, 0x97, 0xBA, 0xF6, 0xE9, + 0x9A, 0xA3, 0xF6, 0xE9, 0xB1, 0x97, 0xF6, 0xE9, + 0xBA, 0x9F, 0xF6, 0xE6, 0x9E, 0x97, 0xF6, 0xE6, + 0xB7, 0x8B, 0xF6, 0xE8, 0x87, 0xA8, 0xF6, 0xE7, + 0xAB, 0x8B, 0xF6, 0xE7, 0xAC, 0xA0, 0xF6, 0xE7, + 0xB2, 0x92, 0xF6, 0xE7, 0x8B, 0x80, 0xF6, 0xE7, + 0x82, 0x99, 0xF6, 0xE8, 0xAD, 0x98, 0xF6, 0xE4, + 0xBB, 0x80, 0xF6, 0xE8, 0x8C, 0xB6, 0xF6, 0xE5, + 0x88, 0xBA, 0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5, + 0xBA, 0xA6, 0xF6, 0xE6, 0x8B, 0x93, 0xF6, 0xE7, + 0xB3, 0x96, 0xF6, 0xE5, 0xAE, 0x85, 0xF6, 0xE6, + 0xB4, 0x9E, 0xF6, 0xE6, 0x9A, 0xB4, 0xF6, 0xE8, + 0xBC, 0xBB, 0xF6, 0xE8, 0xA1, 0x8C, 0xF6, 0xE9, + 0x99, 0x8D, 0xF6, 0xE8, 0xA6, 0x8B, 0xF6, 0xE5, + 0xBB, 0x93, 0xF6, 0xE5, 0x85, 0x80, 0xF6, 0xE5, + 0x97, 0x80, 0xF6, 0xE5, 0xA1, 0x9A, 0xF6, 0xE6, + 0x99, 0xB4, 0xF6, 0xE5, 0x87, 0x9E, 0xF6, 0xE7, + 0x8C, 0xAA, 0xF6, 0xE7, 0x9B, 0x8A, 0xF6, 0xE7, + 0xA4, 0xBC, 0xF6, 0xE7, 0xA5, 0x9E, 0xF6, 0xE7, + 0xA5, 0xA5, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE9, + 0x9D, 0x96, 0xF6, 0xE7, 0xB2, 0xBE, 0xF6, 0xE7, + 0xBE, 0xBD, 0xF6, 0xE8, 0x98, 0x92, 0xF6, 0xE8, + 0xAB, 0xB8, 0xF6, 0xE9, 0x80, 0xB8, 0xF6, 0xE9, + 0x83, 0xBD, 0xF6, 0xE9, 0xA3, 0xAF, 0xF6, 0xE9, + 0xA3, 0xBC, 0xF6, 0xE9, 0xA4, 0xA8, 0xF6, 0xE9, + 0xB6, 0xB4, 0xF6, 0xE4, 0xBE, 0xAE, 0xF6, 0xE5, + 0x83, 0xA7, 0xF6, 0xE5, 0x85, 0x8D, 0xF6, 0xE5, + 0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5, + 0x8D, 0x91, 0xF6, 0xE5, 0x96, 0x9D, 0xF6, 0xE5, + 0x98, 0x86, 0xF6, 0xE5, 0x99, 0xA8, 0xF6, 0xE5, + 0xA1, 0x80, 0xF6, 0xE5, 0xA2, 0xA8, 0xF6, 0xE5, + 0xB1, 0xA4, 0xF6, 0xE5, 0xB1, 0xAE, 0xF6, 0xE6, + 0x82, 0x94, 0xF6, 0xE6, 0x85, 0xA8, 0xF6, 0xE6, + 0x86, 0x8E, 0xF6, 0xE6, 0x87, 0xB2, 0xF6, 0xE6, + 0x95, 0x8F, 0xF6, 0xE6, 0x97, 0xA2, 0xF6, 0xE6, + 0x9A, 0x91, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xE6, + 0xB5, 0xB7, 0xF6, 0xE6, 0xB8, 0x9A, 0xF6, 0xE6, + 0xBC, 0xA2, 0xF6, 0xE7, 0x85, 0xAE, 0xF6, 0xE7, + 0x88, 0xAB, 0xF6, 0xE7, 0x90, 0xA2, 0xF6, 0xE7, + 0xA2, 0x91, 0xF6, 0xE7, 0xA4, 0xBE, 0xF6, 0xE7, + 0xA5, 0x89, 0xF6, 0xE7, 0xA5, 0x88, 0xF6, 0xE7, + 0xA5, 0x90, 0xF6, 0xE7, 0xA5, 0x96, 0xF6, 0xE7, + 0xA5, 0x9D, 0xF6, 0xE7, 0xA6, 0x8D, 0xF6, 0xE7, + 0xA6, 0x8E, 0xF6, 0xE7, 0xA9, 0x80, 0xF6, 0xE7, + 0xAA, 0x81, 0xF6, 0xE7, 0xAF, 0x80, 0xF6, 0xE7, + 0xB7, 0xB4, 0xF6, 0xE7, 0xB8, 0x89, 0xF6, 0xE7, + 0xB9, 0x81, 0xF6, 0xE7, 0xBD, 0xB2, 0xF6, 0xE8, + 0x80, 0x85, 0xF6, 0xE8, 0x87, 0xAD, 0xF6, 0xE8, + 0x89, 0xB9, 0xF6, 0xE8, 0x89, 0xB9, 0xF6, 0xE8, + 0x91, 0x97, 0xF6, 0xE8, 0xA4, 0x90, 0xF6, 0xE8, + 0xA6, 0x96, 0xF6, 0xE8, 0xAC, 0x81, 0xF6, 0xE8, + 0xAC, 0xB9, 0xF6, 0xE8, 0xB3, 0x93, 0xF6, 0xE8, + 0xB4, 0x88, 0xF6, 0xE8, 0xBE, 0xB6, 0xF6, 0xE9, + 0x80, 0xB8, 0xF6, 0xE9, 0x9B, 0xA3, 0xF6, 0xE9, + 0x9F, 0xBF, 0xF6, 0xE9, 0xA0, 0xBB, 0xF6, 0xE4, + 0xB8, 0xA6, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xE5, + 0x85, 0xA8, 0xF6, 0xE4, 0xBE, 0x80, 0xF6, 0xE5, + 0x85, 0x85, 0xF6, 0xE5, 0x86, 0x80, 0xF6, 0xE5, + 0x8B, 0x87, 0xF6, 0xE5, 0x8B, 0xBA, 0xF6, 0xE5, + 0x96, 0x9D, 0xF6, 0xE5, 0x95, 0x95, 0xF6, 0xE5, + 0x96, 0x99, 0xF6, 0xE5, 0x97, 0xA2, 0xF6, 0xE5, + 0xA1, 0x9A, 0xF6, 0xE5, 0xA2, 0xB3, 0xF6, 0xE5, + 0xA5, 0x84, 0xF6, 0xE5, 0xA5, 0x94, 0xF6, 0xE5, + 0xA9, 0xA2, 0xF6, 0xE5, 0xAC, 0xA8, 0xF6, 0xE5, + 0xBB, 0x92, 0xF6, 0xE5, 0xBB, 0x99, 0xF6, 0xE5, + 0xBD, 0xA9, 0xF6, 0xE5, 0xBE, 0xAD, 0xF6, 0xE6, + 0x83, 0x98, 0xF6, 0xE6, 0x85, 0x8E, 0xF6, 0xE6, + 0x84, 0x88, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6, + 0x85, 0xA0, 0xF6, 0xE6, 0x87, 0xB2, 0xF6, 0xE6, + 0x88, 0xB4, 0xF6, 0xE6, 0x8F, 0x84, 0xF6, 0xE6, + 0x90, 0x9C, 0xF6, 0xE6, 0x91, 0x92, 0xF6, 0xE6, + 0x95, 0x96, 0xF6, 0xE6, 0x99, 0xB4, 0xF6, 0xE6, + 0x9C, 0x97, 0xF6, 0xE6, 0x9C, 0x9B, 0xF6, 0xE6, + 0x9D, 0x96, 0xF6, 0xE6, 0xAD, 0xB9, 0xF6, 0xE6, + 0xAE, 0xBA, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, + 0xBB, 0x9B, 0xF6, 0xE6, 0xBB, 0x8B, 0xF6, 0xE6, + 0xBC, 0xA2, 0xF6, 0xE7, 0x80, 0x9E, 0xF6, 0xE7, + 0x85, 0xAE, 0xF6, 0xE7, 0x9E, 0xA7, 0xF6, 0xE7, + 0x88, 0xB5, 0xF6, 0xE7, 0x8A, 0xAF, 0xF6, 0xE7, + 0x8C, 0xAA, 0xF6, 0xE7, 0x91, 0xB1, 0xF6, 0xE7, + 0x94, 0x86, 0xF6, 0xE7, 0x94, 0xBB, 0xF6, 0xE7, + 0x98, 0x9D, 0xF6, 0xE7, 0x98, 0x9F, 0xF6, 0xE7, + 0x9B, 0x8A, 0xF6, 0xE7, 0x9B, 0x9B, 0xF6, 0xE7, + 0x9B, 0xB4, 0xF6, 0xE7, 0x9D, 0x8A, 0xF6, 0xE7, + 0x9D, 0x80, 0xF6, 0xE7, 0xA3, 0x8C, 0xF6, 0xE7, + 0xAA, 0xB1, 0xF6, 0xE7, 0xAF, 0x80, 0xF6, 0xE7, + 0xB1, 0xBB, 0xF6, 0xE7, 0xB5, 0x9B, 0xF6, 0xE7, + 0xB7, 0xB4, 0xF6, 0xE7, 0xBC, 0xBE, 0xF6, 0xE8, + 0x80, 0x85, 0xF6, 0xE8, 0x8D, 0x92, 0xF6, 0xE8, + 0x8F, 0xAF, 0xF6, 0xE8, 0x9D, 0xB9, 0xF6, 0xE8, + 0xA5, 0x81, 0xF6, 0xE8, 0xA6, 0x86, 0xF6, 0xE8, + 0xA6, 0x96, 0xF6, 0xE8, 0xAA, 0xBF, 0xF6, 0xE8, + 0xAB, 0xB8, 0xF6, 0xE8, 0xAB, 0x8B, 0xF6, 0xE8, + 0xAC, 0x81, 0xF6, 0xE8, 0xAB, 0xBE, 0xF6, 0xE8, + 0xAB, 0xAD, 0xF6, 0xE8, 0xAC, 0xB9, 0xF6, 0xE8, + 0xAE, 0x8A, 0xF6, 0xE8, 0xB4, 0x88, 0xF6, 0xE8, + 0xBC, 0xB8, 0xF6, 0xE9, 0x81, 0xB2, 0xF6, 0xE9, + 0x86, 0x99, 0xF6, 0xE9, 0x89, 0xB6, 0xF6, 0xE9, + 0x99, 0xBC, 0xF6, 0xE9, 0x9B, 0xA3, 0xF6, 0xE9, + 0x9D, 0x96, 0xF6, 0xE9, 0x9F, 0x9B, 0xF6, 0xE9, + 0x9F, 0xBF, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6, 0xE9, + 0xA0, 0xBB, 0xF6, 0xE9, 0xAC, 0x92, 0xF6, 0xE9, + 0xBE, 0x9C, 0xF6, 0xF0, 0xA2, 0xA1, 0x8A, 0xF6, + 0xF0, 0xA2, 0xA1, 0x84, 0xF6, 0xF0, 0xA3, 0x8F, + 0x95, 0xF6, 0xE3, 0xAE, 0x9D, 0xF6, 0xE4, 0x80, + 0x98, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xF0, 0xA5, + 0x89, 0x89, 0xF6, 0xF0, 0xA5, 0xB3, 0x90, 0xF6, + 0xF0, 0xA7, 0xBB, 0x93, 0xF6, 0xE9, 0xBD, 0x83, + 0xF6, 0xE9, 0xBE, 0x8E, 0x66, 0x66, 0x66, 0x69, + 0x66, 0x6C, 0x66, 0x66, 0x69, 0x66, 0x66, 0x6C, + 0x73, 0x74, 0x73, 0x74, 0xD5, 0xB4, 0xD5, 0xB6, + 0xD5, 0xB4, 0xD5, 0xA5, 0xD5, 0xB4, 0xD5, 0xAB, + 0xD5, 0xBE, 0xD5, 0xB6, 0xD5, 0xB4, 0xD5, 0xAD, + 0xF6, 0xD7, 0x99, 0xD6, 0xB4, 0xF6, 0xD7, 0xB2, + 0xD6, 0xB7, 0xD7, 0xA2, 0xD7, 0x90, 0xD7, 0x93, + 0xD7, 0x94, 0xD7, 0x9B, 0xD7, 0x9C, 0xD7, 0x9D, + 0xD7, 0xA8, 0xD7, 0xAA, 0x2B, 0xF6, 0xD7, 0xA9, + 0xD7, 0x81, 0xF6, 0xD7, 0xA9, 0xD7, 0x82, 0xF6, + 0xD7, 0xA9, 0xD6, 0xBC, 0xD7, 0x81, 0xF6, 0xD7, + 0xA9, 0xD6, 0xBC, 0xD7, 0x82, 0xF6, 0xD7, 0x90, + 0xD6, 0xB7, 0xF6, 0xD7, 0x90, 0xD6, 0xB8, 0xF6, + 0xD7, 0x90, 0xD6, 0xBC, 0xF6, 0xD7, 0x91, 0xD6, + 0xBC, 0xF6, 0xD7, 0x92, 0xD6, 0xBC, 0xF6, 0xD7, + 0x93, 0xD6, 0xBC, 0xF6, 0xD7, 0x94, 0xD6, 0xBC, + 0xF6, 0xD7, 0x95, 0xD6, 0xBC, 0xF6, 0xD7, 0x96, + 0xD6, 0xBC, 0xF6, 0xD7, 0x98, 0xD6, 0xBC, 0xF6, + 0xD7, 0x99, 0xD6, 0xBC, 0xF6, 0xD7, 0x9A, 0xD6, + 0xBC, 0xF6, 0xD7, 0x9B, 0xD6, 0xBC, 0xF6, 0xD7, + 0x9C, 0xD6, 0xBC, 0xF6, 0xD7, 0x9E, 0xD6, 0xBC, + 0xF6, 0xD7, 0xA0, 0xD6, 0xBC, 0xF6, 0xD7, 0xA1, + 0xD6, 0xBC, 0xF6, 0xD7, 0xA3, 0xD6, 0xBC, 0xF6, + 0xD7, 0xA4, 0xD6, 0xBC, 0xF6, 0xD7, 0xA6, 0xD6, + 0xBC, 0xF6, 0xD7, 0xA7, 0xD6, 0xBC, 0xF6, 0xD7, + 0xA8, 0xD6, 0xBC, 0xF6, 0xD7, 0xA9, 0xD6, 0xBC, + 0xF6, 0xD7, 0xAA, 0xD6, 0xBC, 0xF6, 0xD7, 0x95, + 0xD6, 0xB9, 0xF6, 0xD7, 0x91, 0xD6, 0xBF, 0xF6, + 0xD7, 0x9B, 0xD6, 0xBF, 0xF6, 0xD7, 0xA4, 0xD6, + 0xBF, 0xD7, 0x90, 0xD7, 0x9C, 0xD9, 0xB1, 0xD9, + 0xB1, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, + 0xBB, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, + 0xBE, 0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80, 0xDA, + 0x80, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, + 0xBA, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, + 0xBF, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, + 0xB9, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, + 0xA4, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, + 0xA6, 0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84, 0xDA, + 0x84, 0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83, 0xDA, + 0x83, 0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86, 0xDA, + 0x86, 0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87, 0xDA, + 0x87, 0xDA, 0x8D, 0xDA, 0x8D, 0xDA, 0x8C, 0xDA, + 0x8C, 0xDA, 0x8E, 0xDA, 0x8E, 0xDA, 0x88, 0xDA, + 0x88, 0xDA, 0x98, 0xDA, 0x98, 0xDA, 0x91, 0xDA, + 0x91, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, + 0xA9, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, + 0xAF, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, + 0xB3, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, + 0xB1, 0xDA, 0xBA, 0xDA, 0xBA, 0xDA, 0xBB, 0xDA, + 0xBB, 0xDA, 0xBB, 0xDA, 0xBB, 0xDB, 0x95, 0xD9, + 0x94, 0xDB, 0x95, 0xD9, 0x94, 0xDB, 0x81, 0xDB, + 0x81, 0xDB, 0x81, 0xDB, 0x81, 0xDA, 0xBE, 0xDA, + 0xBE, 0xDA, 0xBE, 0xDA, 0xBE, 0xDB, 0x92, 0xDB, + 0x92, 0xDB, 0x92, 0xD9, 0x94, 0xDB, 0x92, 0xD9, + 0x94, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, + 0xAD, 0xDB, 0x87, 0xDB, 0x87, 0xDB, 0x86, 0xDB, + 0x86, 0xDB, 0x88, 0xDB, 0x88, 0xDB, 0x87, 0xD9, + 0xB4, 0xDB, 0x8B, 0xDB, 0x8B, 0xDB, 0x85, 0xDB, + 0x85, 0xDB, 0x89, 0xDB, 0x89, 0xDB, 0x90, 0xDB, + 0x90, 0xDB, 0x90, 0xDB, 0x90, 0xD9, 0x89, 0xD9, + 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, + 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A, 0xD9, + 0x94, 0xDB, 0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, + 0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A, 0xD9, + 0x94, 0xDB, 0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, + 0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9, + 0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A, 0xD9, + 0x94, 0xDB, 0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, + 0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9, + 0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9, + 0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, + 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xDB, 0x8C, 0xDB, + 0x8C, 0xDB, 0x8C, 0xDB, 0x8C, 0xD9, 0x8A, 0xD9, + 0x94, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9, + 0x94, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, + 0xA8, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, + 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x89, 0xD8, + 0xA8, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD8, + 0xAA, 0xD8, 0xAD, 0xD8, 0xAA, 0xD8, 0xAE, 0xD8, + 0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x89, 0xD8, + 0xAA, 0xD9, 0x8A, 0xD8, 0xAB, 0xD8, 0xAC, 0xD8, + 0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x89, 0xD8, + 0xAB, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, + 0xAC, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, + 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD8, + 0xAE, 0xD8, 0xAD, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, + 0xB3, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, + 0xB3, 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, + 0xB5, 0xD8, 0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD8, + 0xB6, 0xD8, 0xAC, 0xD8, 0xB6, 0xD8, 0xAD, 0xD8, + 0xB6, 0xD8, 0xAE, 0xD8, 0xB6, 0xD9, 0x85, 0xD8, + 0xB7, 0xD8, 0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, + 0xB8, 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, + 0xB9, 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, + 0xBA, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, + 0x81, 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, + 0x81, 0xD9, 0x85, 0xD9, 0x81, 0xD9, 0x89, 0xD9, + 0x81, 0xD9, 0x8A, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, + 0x82, 0xD9, 0x85, 0xD9, 0x82, 0xD9, 0x89, 0xD9, + 0x82, 0xD9, 0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9, + 0x83, 0xD8, 0xAC, 0xD9, 0x83, 0xD8, 0xAD, 0xD9, + 0x83, 0xD8, 0xAE, 0xD9, 0x83, 0xD9, 0x84, 0xD9, + 0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89, 0xD9, + 0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, + 0x84, 0xD8, 0xAD, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, + 0x84, 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9, + 0x84, 0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, + 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, + 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x89, 0xD9, + 0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, + 0x86, 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, + 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x89, 0xD9, + 0x86, 0xD9, 0x8A, 0xD9, 0x87, 0xD8, 0xAC, 0xD9, + 0x87, 0xD9, 0x85, 0xD9, 0x87, 0xD9, 0x89, 0xD9, + 0x87, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, + 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x89, 0xD9, + 0x8A, 0xD9, 0x8A, 0xD8, 0xB0, 0xD9, 0xB0, 0xD8, + 0xB1, 0xD9, 0xB0, 0xD9, 0x89, 0xD9, 0xB0, 0x20, + 0xD9, 0x8C, 0xD9, 0x91, 0x20, 0xD9, 0x8D, 0xD9, + 0x91, 0x20, 0xD9, 0x8E, 0xD9, 0x91, 0x20, 0xD9, + 0x8F, 0xD9, 0x91, 0x20, 0xD9, 0x90, 0xD9, 0x91, + 0x20, 0xD9, 0x91, 0xD9, 0xB0, 0xD9, 0x8A, 0xD9, + 0x94, 0xD8, 0xB1, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, + 0xB2, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, + 0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, + 0x8A, 0xD8, 0xA8, 0xD8, 0xB1, 0xD8, 0xA8, 0xD8, + 0xB2, 0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, + 0x86, 0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8, 0xD9, + 0x8A, 0xD8, 0xAA, 0xD8, 0xB1, 0xD8, 0xAA, 0xD8, + 0xB2, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, + 0x86, 0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA, 0xD9, + 0x8A, 0xD8, 0xAB, 0xD8, 0xB1, 0xD8, 0xAB, 0xD8, + 0xB2, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9, + 0x86, 0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB, 0xD9, + 0x8A, 0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81, 0xD9, + 0x8A, 0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82, 0xD9, + 0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83, 0xD9, + 0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9, + 0x89, 0xD9, 0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD9, + 0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84, 0xD9, + 0x8A, 0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x85, 0xD9, + 0x85, 0xD9, 0x86, 0xD8, 0xB1, 0xD9, 0x86, 0xD8, + 0xB2, 0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, + 0x86, 0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86, 0xD9, + 0x8A, 0xD9, 0x89, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, + 0xB1, 0xD9, 0x8A, 0xD8, 0xB2, 0xD9, 0x8A, 0xD9, + 0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, + 0x89, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, + 0x94, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAE, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, + 0x94, 0xD9, 0x87, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, + 0xA8, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, + 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x87, 0xD8, + 0xAA, 0xD8, 0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, + 0xAA, 0xD8, 0xAE, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, + 0xAA, 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, + 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, + 0xAD, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, + 0xAE, 0xD8, 0xAC, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, + 0xB3, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, + 0xB3, 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, + 0xB5, 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAE, 0xD8, + 0xB5, 0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAC, 0xD8, + 0xB6, 0xD8, 0xAD, 0xD8, 0xB6, 0xD8, 0xAE, 0xD8, + 0xB6, 0xD9, 0x85, 0xD8, 0xB7, 0xD8, 0xAD, 0xD8, + 0xB8, 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, + 0xB9, 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, + 0xBA, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, + 0x81, 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, + 0x81, 0xD9, 0x85, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, + 0x82, 0xD9, 0x85, 0xD9, 0x83, 0xD8, 0xAC, 0xD9, + 0x83, 0xD8, 0xAD, 0xD9, 0x83, 0xD8, 0xAE, 0xD9, + 0x83, 0xD9, 0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9, + 0x84, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, + 0x84, 0xD8, 0xAE, 0xD9, 0x84, 0xD9, 0x85, 0xD9, + 0x84, 0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, + 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, + 0x85, 0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, + 0x86, 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, + 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, + 0x87, 0xD8, 0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9, + 0x87, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, + 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, + 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, + 0x94, 0xD9, 0x87, 0xD8, 0xA8, 0xD9, 0x85, 0xD8, + 0xA8, 0xD9, 0x87, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, + 0xAA, 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, + 0xAB, 0xD9, 0x87, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, + 0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x85, 0xD8, + 0xB4, 0xD9, 0x87, 0xD9, 0x83, 0xD9, 0x84, 0xD9, + 0x83, 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x85, 0xD9, + 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, + 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, + 0x80, 0xD9, 0x8E, 0xD9, 0x91, 0xD9, 0x80, 0xD9, + 0x8F, 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x90, 0xD9, + 0x91, 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, + 0x8A, 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, + 0x8A, 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, + 0x8A, 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, + 0x8A, 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, + 0x8A, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, + 0x8A, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, + 0x8A, 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, + 0x8A, 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, + 0x8A, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, + 0x85, 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, + 0xB1, 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, + 0xB1, 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, + 0x8A, 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, + 0x8A, 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, + 0x8A, 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, + 0x8A, 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, + 0x8A, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, + 0x8A, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, + 0x8A, 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, + 0x8A, 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, + 0x8A, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, + 0x85, 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, + 0xB1, 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, + 0xB1, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, + 0x85, 0xD8, 0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9, + 0x87, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, + 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, 0xD8, + 0xAE, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB7, 0xD9, + 0x85, 0xD8, 0xB8, 0xD9, 0x85, 0xD8, 0xA7, 0xD9, + 0x8B, 0xD8, 0xA7, 0xD9, 0x8B, 0xD8, 0xAA, 0xD8, + 0xAC, 0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, + 0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, + 0xAA, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAA, 0xD8, + 0xAE, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, + 0xAC, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, + 0xAA, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, + 0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, + 0xAD, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, + 0xAD, 0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xB3, 0xD8, + 0xAD, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAC, 0xD8, + 0xAD, 0xD8, 0xB3, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, + 0xB3, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, + 0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, + 0xAC, 0xD8, 0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8, + 0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB5, 0xD8, + 0xAD, 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAD, 0xD8, + 0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8, + 0xB4, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, + 0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, + 0x85, 0xD9, 0x85, 0xD8, 0xB4, 0xD9, 0x85, 0xD9, + 0x85, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, + 0xB6, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB6, 0xD8, + 0xAE, 0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, + 0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, + 0xB7, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB7, 0xD9, + 0x85, 0xD9, 0x8A, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, + 0x85, 0xD8, 0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8, + 0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, + 0x85, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x85, 0xD9, + 0x85, 0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, + 0xBA, 0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x81, 0xD8, + 0xAE, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, + 0x85, 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, + 0x82, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x84, 0xD8, + 0xAD, 0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, + 0x8A, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x89, 0xD9, + 0x84, 0xD8, 0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, + 0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, + 0x85, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85, 0xD9, + 0x84, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84, 0xD9, + 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, + 0xAC, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, + 0x85, 0xD8, 0xAD, 0xD9, 0x8A, 0xD9, 0x85, 0xD8, + 0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, + 0x85, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, + 0x85, 0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x85, 0xD8, + 0xAC, 0xD8, 0xAE, 0xD9, 0x87, 0xD9, 0x85, 0xD8, + 0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x85, 0xD9, + 0x86, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x86, 0xD8, + 0xAD, 0xD9, 0x89, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, + 0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, + 0x86, 0xD8, 0xAC, 0xD9, 0x89, 0xD9, 0x86, 0xD9, + 0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x85, 0xD9, + 0x89, 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD9, + 0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xA8, 0xD8, + 0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, + 0xAA, 0xD8, 0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, + 0xAE, 0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x85, 0xD9, + 0x8A, 0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x89, 0xD8, + 0xAC, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, + 0xAD, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, + 0x89, 0xD8, 0xB3, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, + 0xB5, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xB4, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, + 0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, + 0x84, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, + 0x85, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x82, 0xD9, + 0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAD, 0xD9, + 0x8A, 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, + 0x84, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, + 0x85, 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, + 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, + 0x85, 0xD8, 0xAE, 0xD9, 0x8A, 0xD9, 0x84, 0xD8, + 0xAC, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x85, 0xD9, + 0x85, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, + 0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, + 0x81, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, + 0xAD, 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, + 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, + 0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, + 0xAE, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, + 0x8A, 0xD8, 0xB5, 0xD9, 0x84, 0xDB, 0x92, 0xD9, + 0x82, 0xD9, 0x84, 0xDB, 0x92, 0xD8, 0xA7, 0xD9, + 0x84, 0xD9, 0x84, 0xD9, 0x87, 0xD8, 0xA7, 0xD9, + 0x83, 0xD8, 0xA8, 0xD8, 0xB1, 0xD9, 0x85, 0xD8, + 0xAD, 0xD9, 0x85, 0xD8, 0xAF, 0xD8, 0xB5, 0xD9, + 0x84, 0xD8, 0xB9, 0xD9, 0x85, 0xD8, 0xB1, 0xD8, + 0xB3, 0xD9, 0x88, 0xD9, 0x84, 0xD8, 0xB9, 0xD9, + 0x84, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x88, 0xD8, + 0xB3, 0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xB5, 0xD9, + 0x84, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x84, 0xD9, + 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x84, + 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, + 0x8A, 0xD9, 0x87, 0x20, 0xD9, 0x88, 0xD8, 0xB3, + 0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x84, + 0x20, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, + 0x84, 0xD9, 0x87, 0xD8, 0xB1, 0xDB, 0x8C, 0xD8, + 0xA7, 0xD9, 0x84, 0x2C, 0xE3, 0x80, 0x81, 0xE3, + 0x80, 0x82, 0x3A, 0x3B, 0x21, 0x3F, 0xE3, 0x80, + 0x96, 0xE3, 0x80, 0x97, 0x2E, 0x2E, 0x2E, 0x2E, + 0x2E, 0xE2, 0x80, 0x94, 0xE2, 0x80, 0x93, 0x5F, + 0x5F, 0x28, 0x29, 0x7B, 0x7D, 0xE3, 0x80, 0x94, + 0xE3, 0x80, 0x95, 0xE3, 0x80, 0x90, 0xE3, 0x80, + 0x91, 0xE3, 0x80, 0x8A, 0xE3, 0x80, 0x8B, 0xE3, + 0x80, 0x88, 0xE3, 0x80, 0x89, 0xE3, 0x80, 0x8C, + 0xE3, 0x80, 0x8D, 0xE3, 0x80, 0x8E, 0xE3, 0x80, + 0x8F, 0x5B, 0x5D, 0x20, 0xCC, 0x85, 0x20, 0xCC, + 0x85, 0x20, 0xCC, 0x85, 0x20, 0xCC, 0x85, 0x5F, + 0x5F, 0x5F, 0x2C, 0xE3, 0x80, 0x81, 0x2E, 0x3B, + 0x3A, 0x3F, 0x21, 0xE2, 0x80, 0x94, 0x28, 0x29, + 0x7B, 0x7D, 0xE3, 0x80, 0x94, 0xE3, 0x80, 0x95, + 0x23, 0x26, 0x2A, 0x2B, 0x2D, 0x3C, 0x3E, 0x3D, + 0x5C, 0x24, 0x25, 0x40, 0x20, 0xD9, 0x8B, 0xD9, + 0x80, 0xD9, 0x8B, 0x20, 0xD9, 0x8C, 0x20, 0xD9, + 0x8D, 0x20, 0xD9, 0x8E, 0xD9, 0x80, 0xD9, 0x8E, + 0x20, 0xD9, 0x8F, 0xD9, 0x80, 0xD9, 0x8F, 0x20, + 0xD9, 0x90, 0xD9, 0x80, 0xD9, 0x90, 0x20, 0xD9, + 0x91, 0xD9, 0x80, 0xD9, 0x91, 0x20, 0xD9, 0x92, + 0xD9, 0x80, 0xD9, 0x92, 0xD8, 0xA1, 0xD8, 0xA7, + 0xD9, 0x93, 0xD8, 0xA7, 0xD9, 0x93, 0xD8, 0xA7, + 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, 0x88, + 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x94, 0xD8, 0xA7, + 0xD9, 0x95, 0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x8A, + 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7, + 0xD8, 0xA7, 0xD8, 0xA8, 0xD8, 0xA8, 0xD8, 0xA8, + 0xD8, 0xA8, 0xD8, 0xA9, 0xD8, 0xA9, 0xD8, 0xAA, + 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAB, + 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAC, + 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAD, + 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAE, + 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAF, + 0xD8, 0xAF, 0xD8, 0xB0, 0xD8, 0xB0, 0xD8, 0xB1, + 0xD8, 0xB1, 0xD8, 0xB2, 0xD8, 0xB2, 0xD8, 0xB3, + 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB4, + 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB5, + 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB6, + 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB7, + 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB8, + 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB9, + 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xBA, + 0xD8, 0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD9, 0x81, + 0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x82, + 0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x83, + 0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x84, + 0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x85, + 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x86, + 0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x87, + 0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x88, + 0xD9, 0x88, 0xD9, 0x89, 0xD9, 0x89, 0xD9, 0x8A, + 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x84, + 0xD8, 0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7, + 0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94, + 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, 0x84, + 0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7, + 0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x84, + 0xD8, 0xA7, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, + 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, + 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, + 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, + 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, + 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, + 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, + 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, + 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, + 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, + 0xE2, 0xA6, 0x85, 0xE2, 0xA6, 0x86, 0xE3, 0x80, + 0x82, 0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D, 0xE3, + 0x80, 0x81, 0xE3, 0x83, 0xBB, 0xE3, 0x83, 0xB2, + 0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA3, 0xE3, 0x82, + 0xA5, 0xE3, 0x82, 0xA7, 0xE3, 0x82, 0xA9, 0xE3, + 0x83, 0xA3, 0xE3, 0x83, 0xA5, 0xE3, 0x83, 0xA7, + 0xE3, 0x83, 0x83, 0xE3, 0x83, 0xBC, 0xE3, 0x82, + 0xA2, 0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6, 0xE3, + 0x82, 0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82, 0xAB, + 0xE3, 0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82, + 0xB1, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5, 0xE3, + 0x82, 0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xBB, + 0xE3, 0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3, 0x83, + 0x81, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86, 0xE3, + 0x83, 0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8B, + 0xE3, 0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3, 0x83, + 0x8E, 0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92, 0xE3, + 0x83, 0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0x9B, + 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3, 0x83, + 0xA0, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2, 0xE3, + 0x83, 0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83, 0xA8, + 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3, 0x83, + 0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD, 0xE3, + 0x83, 0xAF, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0x99, + 0xE3, 0x82, 0x9A, 0xE1, 0x85, 0xA0, 0xE1, 0x84, + 0x80, 0xE1, 0x84, 0x81, 0xE1, 0x86, 0xAA, 0xE1, + 0x84, 0x82, 0xE1, 0x86, 0xAC, 0xE1, 0x86, 0xAD, + 0xE1, 0x84, 0x83, 0xE1, 0x84, 0x84, 0xE1, 0x84, + 0x85, 0xE1, 0x86, 0xB0, 0xE1, 0x86, 0xB1, 0xE1, + 0x86, 0xB2, 0xE1, 0x86, 0xB3, 0xE1, 0x86, 0xB4, + 0xE1, 0x86, 0xB5, 0xE1, 0x84, 0x9A, 0xE1, 0x84, + 0x86, 0xE1, 0x84, 0x87, 0xE1, 0x84, 0x88, 0xE1, + 0x84, 0xA1, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8A, + 0xE1, 0x84, 0x8B, 0xE1, 0x84, 0x8C, 0xE1, 0x84, + 0x8D, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F, 0xE1, + 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84, 0x92, + 0xE1, 0x85, 0xA1, 0xE1, 0x85, 0xA2, 0xE1, 0x85, + 0xA3, 0xE1, 0x85, 0xA4, 0xE1, 0x85, 0xA5, 0xE1, + 0x85, 0xA6, 0xE1, 0x85, 0xA7, 0xE1, 0x85, 0xA8, + 0xE1, 0x85, 0xA9, 0xE1, 0x85, 0xAA, 0xE1, 0x85, + 0xAB, 0xE1, 0x85, 0xAC, 0xE1, 0x85, 0xAD, 0xE1, + 0x85, 0xAE, 0xE1, 0x85, 0xAF, 0xE1, 0x85, 0xB0, + 0xE1, 0x85, 0xB1, 0xE1, 0x85, 0xB2, 0xE1, 0x85, + 0xB3, 0xE1, 0x85, 0xB4, 0xE1, 0x85, 0xB5, 0xC2, + 0xA2, 0xC2, 0xA3, 0xC2, 0xAC, 0x20, 0xCC, 0x84, + 0xC2, 0xA6, 0xC2, 0xA5, 0xE2, 0x82, 0xA9, 0xE2, + 0x94, 0x82, 0xE2, 0x86, 0x90, 0xE2, 0x86, 0x91, + 0xE2, 0x86, 0x92, 0xE2, 0x86, 0x93, 0xE2, 0x96, + 0xA0, 0xE2, 0x97, 0x8B, 0xF6, 0xF0, 0x9D, 0x85, + 0x97, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0, 0x9D, + 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0, + 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, + 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x85, 0x98, + 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF, + 0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, + 0xA5, 0xF0, 0x9D, 0x85, 0xB0, 0xF6, 0xF0, 0x9D, + 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, + 0x85, 0xB1, 0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0, + 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB2, 0xF6, + 0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D, 0x85, 0xA5, + 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, + 0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D, + 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0, + 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, + 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x86, 0xB9, + 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF, + 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, + 0xA5, 0xF0, 0x9D, 0x85, 0xAF, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, + 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, 0x44, + 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, + 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, + 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, + 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, + 0x41, 0x43, 0x44, 0x47, 0x4A, 0x4B, 0x4E, 0x4F, + 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x66, 0x68, + 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, + 0x45, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, + 0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, + 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, + 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, 0x45, + 0x46, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4F, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, + 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, + 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7A, 0xC4, 0xB1, 0xC8, 0xB7, 0xCE, 0x91, 0xCE, + 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, + 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, + 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, + 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, + 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, + 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, + 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, + 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, + 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, + 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, + 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, + 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, + 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, + 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, + 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, + 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, + 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, + 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, + 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, + 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, + 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, + 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, + 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, + 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, + 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, + 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, + 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, + 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, + 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, + 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, + 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, + 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, + 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, + 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, + 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, + 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, + 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, + 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, + 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, + 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, + 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, + 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, + 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, + 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, + 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, + 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, + 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, + 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, + 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, + 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, + 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, + 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, + 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, + 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, + 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, + 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, + 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, + 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, + 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, + 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, + 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, + 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, + 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, + 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, + 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, + 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, + 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, + 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, + 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, + 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, + 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, + 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, + 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, + 0x81, 0xCF, 0x80, 0xCF, 0x9C, 0xCF, 0x9D, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x30, 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0xF6, 0xE4, 0xB8, 0xBD, 0xF6, 0xE4, 0xB8, + 0xB8, 0xF6, 0xE4, 0xB9, 0x81, 0xF6, 0xF0, 0xA0, + 0x84, 0xA2, 0xF6, 0xE4, 0xBD, 0xA0, 0xF6, 0xE4, + 0xBE, 0xAE, 0xF6, 0xE4, 0xBE, 0xBB, 0xF6, 0xE5, + 0x80, 0x82, 0xF6, 0xE5, 0x81, 0xBA, 0xF6, 0xE5, + 0x82, 0x99, 0xF6, 0xE5, 0x83, 0xA7, 0xF6, 0xE5, + 0x83, 0x8F, 0xF6, 0xE3, 0x92, 0x9E, 0xF6, 0xF0, + 0xA0, 0x98, 0xBA, 0xF6, 0xE5, 0x85, 0x8D, 0xF6, + 0xE5, 0x85, 0x94, 0xF6, 0xE5, 0x85, 0xA4, 0xF6, + 0xE5, 0x85, 0xB7, 0xF6, 0xF0, 0xA0, 0x94, 0x9C, + 0xF6, 0xE3, 0x92, 0xB9, 0xF6, 0xE5, 0x85, 0xA7, + 0xF6, 0xE5, 0x86, 0x8D, 0xF6, 0xF0, 0xA0, 0x95, + 0x8B, 0xF6, 0xE5, 0x86, 0x97, 0xF6, 0xE5, 0x86, + 0xA4, 0xF6, 0xE4, 0xBB, 0x8C, 0xF6, 0xE5, 0x86, + 0xAC, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xF0, 0xA9, + 0x87, 0x9F, 0xF6, 0xE5, 0x87, 0xB5, 0xF6, 0xE5, + 0x88, 0x83, 0xF6, 0xE3, 0x93, 0x9F, 0xF6, 0xE5, + 0x88, 0xBB, 0xF6, 0xE5, 0x89, 0x86, 0xF6, 0xE5, + 0x89, 0xB2, 0xF6, 0xE5, 0x89, 0xB7, 0xF6, 0xE3, + 0x94, 0x95, 0xF6, 0xE5, 0x8B, 0x87, 0xF6, 0xE5, + 0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5, + 0x8B, 0xBA, 0xF6, 0xE5, 0x8C, 0x85, 0xF6, 0xE5, + 0x8C, 0x86, 0xF6, 0xE5, 0x8C, 0x97, 0xF6, 0xE5, + 0x8D, 0x89, 0xF6, 0xE5, 0x8D, 0x91, 0xF6, 0xE5, + 0x8D, 0x9A, 0xF6, 0xE5, 0x8D, 0xB3, 0xF6, 0xE5, + 0x8D, 0xBD, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xE5, + 0x8D, 0xBF, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xF0, + 0xA0, 0xA8, 0xAC, 0xF6, 0xE7, 0x81, 0xB0, 0xF6, + 0xE5, 0x8F, 0x8A, 0xF6, 0xE5, 0x8F, 0x9F, 0xF6, + 0xF0, 0xA0, 0xAD, 0xA3, 0xF6, 0xE5, 0x8F, 0xAB, + 0xF6, 0xE5, 0x8F, 0xB1, 0xF6, 0xE5, 0x90, 0x86, + 0xF6, 0xE5, 0x92, 0x9E, 0xF6, 0xE5, 0x90, 0xB8, + 0xF6, 0xE5, 0x91, 0x88, 0xF6, 0xE5, 0x91, 0xA8, + 0xF6, 0xE5, 0x92, 0xA2, 0xF6, 0xE5, 0x93, 0xB6, + 0xF6, 0xE5, 0x94, 0x90, 0xF6, 0xE5, 0x95, 0x93, + 0xF6, 0xE5, 0x95, 0xA3, 0xF6, 0xE5, 0x96, 0x84, + 0xF6, 0xE5, 0x96, 0x84, 0xF6, 0xE5, 0x96, 0x99, + 0xF6, 0xE5, 0x96, 0xAB, 0xF6, 0xE5, 0x96, 0xB3, + 0xF6, 0xE5, 0x97, 0x82, 0xF6, 0xE5, 0x9C, 0x96, + 0xF6, 0xE5, 0x98, 0x86, 0xF6, 0xE5, 0x9C, 0x97, + 0xF6, 0xE5, 0x99, 0x91, 0xF6, 0xE5, 0x99, 0xB4, + 0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xA3, 0xAE, + 0xF6, 0xE5, 0x9F, 0x8E, 0xF6, 0xE5, 0x9F, 0xB4, + 0xF6, 0xE5, 0xA0, 0x8D, 0xF6, 0xE5, 0x9E, 0x8B, + 0xF6, 0xE5, 0xA0, 0xB2, 0xF6, 0xE5, 0xA0, 0xB1, + 0xF6, 0xE5, 0xA2, 0xAC, 0xF6, 0xF0, 0xA1, 0x93, + 0xA4, 0xF6, 0xE5, 0xA3, 0xB2, 0xF6, 0xE5, 0xA3, + 0xB7, 0xF6, 0xE5, 0xA4, 0x86, 0xF6, 0xE5, 0xA4, + 0x9A, 0xF6, 0xE5, 0xA4, 0xA2, 0xF6, 0xE5, 0xA5, + 0xA2, 0xF6, 0xF0, 0xA1, 0x9A, 0xA8, 0xF6, 0xF0, + 0xA1, 0x9B, 0xAA, 0xF6, 0xE5, 0xA7, 0xAC, 0xF6, + 0xE5, 0xA8, 0x9B, 0xF6, 0xE5, 0xA8, 0xA7, 0xF6, + 0xE5, 0xA7, 0x98, 0xF6, 0xE5, 0xA9, 0xA6, 0xF6, + 0xE3, 0x9B, 0xAE, 0xF6, 0xE3, 0x9B, 0xBC, 0xF6, + 0xE5, 0xAC, 0x88, 0xF6, 0xE5, 0xAC, 0xBE, 0xF6, + 0xE5, 0xAC, 0xBE, 0xF6, 0xF0, 0xA1, 0xA7, 0x88, + 0xF6, 0xE5, 0xAF, 0x83, 0xF6, 0xE5, 0xAF, 0x98, + 0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xAF, 0xB3, + 0xF6, 0xF0, 0xA1, 0xAC, 0x98, 0xF6, 0xE5, 0xAF, + 0xBF, 0xF6, 0xE5, 0xB0, 0x86, 0xF6, 0xE5, 0xBD, + 0x93, 0xF6, 0xE5, 0xB0, 0xA2, 0xF6, 0xE3, 0x9E, + 0x81, 0xF6, 0xE5, 0xB1, 0xA0, 0xF6, 0xE5, 0xB1, + 0xAE, 0xF6, 0xE5, 0xB3, 0x80, 0xF6, 0xE5, 0xB2, + 0x8D, 0xF6, 0xF0, 0xA1, 0xB7, 0xA4, 0xF6, 0xE5, + 0xB5, 0x83, 0xF6, 0xF0, 0xA1, 0xB7, 0xA6, 0xF6, + 0xE5, 0xB5, 0xAE, 0xF6, 0xE5, 0xB5, 0xAB, 0xF6, + 0xE5, 0xB5, 0xBC, 0xF6, 0xE5, 0xB7, 0xA1, 0xF6, + 0xE5, 0xB7, 0xA2, 0xF6, 0xE3, 0xA0, 0xAF, 0xF6, + 0xE5, 0xB7, 0xBD, 0xF6, 0xE5, 0xB8, 0xA8, 0xF6, + 0xE5, 0xB8, 0xBD, 0xF6, 0xE5, 0xB9, 0xA9, 0xF6, + 0xE3, 0xA1, 0xA2, 0xF6, 0xF0, 0xA2, 0x86, 0x83, + 0xF6, 0xE3, 0xA1, 0xBC, 0xF6, 0xE5, 0xBA, 0xB0, + 0xF6, 0xE5, 0xBA, 0xB3, 0xF6, 0xE5, 0xBA, 0xB6, + 0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xF0, 0xAA, 0x8E, + 0x92, 0xF6, 0xE5, 0xBB, 0xBE, 0xF6, 0xF0, 0xA2, + 0x8C, 0xB1, 0xF6, 0xF0, 0xA2, 0x8C, 0xB1, 0xF6, + 0xE8, 0x88, 0x81, 0xF6, 0xE5, 0xBC, 0xA2, 0xF6, + 0xE5, 0xBC, 0xA2, 0xF6, 0xE3, 0xA3, 0x87, 0xF6, + 0xF0, 0xA3, 0x8A, 0xB8, 0xF6, 0xF0, 0xA6, 0x87, + 0x9A, 0xF6, 0xE5, 0xBD, 0xA2, 0xF6, 0xE5, 0xBD, + 0xAB, 0xF6, 0xE3, 0xA3, 0xA3, 0xF6, 0xE5, 0xBE, + 0x9A, 0xF6, 0xE5, 0xBF, 0x8D, 0xF6, 0xE5, 0xBF, + 0x97, 0xF6, 0xE5, 0xBF, 0xB9, 0xF6, 0xE6, 0x82, + 0x81, 0xF6, 0xE3, 0xA4, 0xBA, 0xF6, 0xE3, 0xA4, + 0x9C, 0xF6, 0xE6, 0x82, 0x94, 0xF6, 0xF0, 0xA2, + 0x9B, 0x94, 0xF6, 0xE6, 0x83, 0x87, 0xF6, 0xE6, + 0x85, 0x88, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6, + 0x85, 0x8E, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6, + 0x85, 0xBA, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6, + 0x86, 0xB2, 0xF6, 0xE6, 0x86, 0xA4, 0xF6, 0xE6, + 0x86, 0xAF, 0xF6, 0xE6, 0x87, 0x9E, 0xF6, 0xE6, + 0x87, 0xB2, 0xF6, 0xE6, 0x87, 0xB6, 0xF6, 0xE6, + 0x88, 0x90, 0xF6, 0xE6, 0x88, 0x9B, 0xF6, 0xE6, + 0x89, 0x9D, 0xF6, 0xE6, 0x8A, 0xB1, 0xF6, 0xE6, + 0x8B, 0x94, 0xF6, 0xE6, 0x8D, 0x90, 0xF6, 0xF0, + 0xA2, 0xAC, 0x8C, 0xF6, 0xE6, 0x8C, 0xBD, 0xF6, + 0xE6, 0x8B, 0xBC, 0xF6, 0xE6, 0x8D, 0xA8, 0xF6, + 0xE6, 0x8E, 0x83, 0xF6, 0xE6, 0x8F, 0xA4, 0xF6, + 0xF0, 0xA2, 0xAF, 0xB1, 0xF6, 0xE6, 0x90, 0xA2, + 0xF6, 0xE6, 0x8F, 0x85, 0xF6, 0xE6, 0x8E, 0xA9, + 0xF6, 0xE3, 0xA8, 0xAE, 0xF6, 0xE6, 0x91, 0xA9, + 0xF6, 0xE6, 0x91, 0xBE, 0xF6, 0xE6, 0x92, 0x9D, + 0xF6, 0xE6, 0x91, 0xB7, 0xF6, 0xE3, 0xA9, 0xAC, + 0xF6, 0xE6, 0x95, 0x8F, 0xF6, 0xE6, 0x95, 0xAC, + 0xF6, 0xF0, 0xA3, 0x80, 0x8A, 0xF6, 0xE6, 0x97, + 0xA3, 0xF6, 0xE6, 0x9B, 0xB8, 0xF6, 0xE6, 0x99, + 0x89, 0xF6, 0xE3, 0xAC, 0x99, 0xF6, 0xE6, 0x9A, + 0x91, 0xF6, 0xE3, 0xAC, 0x88, 0xF6, 0xE3, 0xAB, + 0xA4, 0xF6, 0xE5, 0x86, 0x92, 0xF6, 0xE5, 0x86, + 0x95, 0xF6, 0xE6, 0x9C, 0x80, 0xF6, 0xE6, 0x9A, + 0x9C, 0xF6, 0xE8, 0x82, 0xAD, 0xF6, 0xE4, 0x8F, + 0x99, 0xF6, 0xE6, 0x9C, 0x97, 0xF6, 0xE6, 0x9C, + 0x9B, 0xF6, 0xE6, 0x9C, 0xA1, 0xF6, 0xE6, 0x9D, + 0x9E, 0xF6, 0xE6, 0x9D, 0x93, 0xF6, 0xF0, 0xA3, + 0x8F, 0x83, 0xF6, 0xE3, 0xAD, 0x89, 0xF6, 0xE6, + 0x9F, 0xBA, 0xF6, 0xE6, 0x9E, 0x85, 0xF6, 0xE6, + 0xA1, 0x92, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xF0, + 0xA3, 0x91, 0xAD, 0xF6, 0xE6, 0xA2, 0x8E, 0xF6, + 0xE6, 0xA0, 0x9F, 0xF6, 0xE6, 0xA4, 0x94, 0xF6, + 0xE3, 0xAE, 0x9D, 0xF6, 0xE6, 0xA5, 0x82, 0xF6, + 0xE6, 0xA6, 0xA3, 0xF6, 0xE6, 0xA7, 0xAA, 0xF6, + 0xE6, 0xAA, 0xA8, 0xF6, 0xF0, 0xA3, 0x9A, 0xA3, + 0xF6, 0xE6, 0xAB, 0x9B, 0xF6, 0xE3, 0xB0, 0x98, + 0xF6, 0xE6, 0xAC, 0xA1, 0xF6, 0xF0, 0xA3, 0xA2, + 0xA7, 0xF6, 0xE6, 0xAD, 0x94, 0xF6, 0xE3, 0xB1, + 0x8E, 0xF6, 0xE6, 0xAD, 0xB2, 0xF6, 0xE6, 0xAE, + 0x9F, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE6, 0xAE, + 0xBB, 0xF6, 0xF0, 0xA3, 0xAA, 0x8D, 0xF6, 0xF0, + 0xA1, 0xB4, 0x8B, 0xF6, 0xF0, 0xA3, 0xAB, 0xBA, + 0xF6, 0xE6, 0xB1, 0x8E, 0xF6, 0xF0, 0xA3, 0xB2, + 0xBC, 0xF6, 0xE6, 0xB2, 0xBF, 0xF6, 0xE6, 0xB3, + 0x8D, 0xF6, 0xE6, 0xB1, 0xA7, 0xF6, 0xE6, 0xB4, + 0x96, 0xF6, 0xE6, 0xB4, 0xBE, 0xF6, 0xE6, 0xB5, + 0xB7, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xB5, + 0xA9, 0xF6, 0xE6, 0xB5, 0xB8, 0xF6, 0xE6, 0xB6, + 0x85, 0xF6, 0xF0, 0xA3, 0xB4, 0x9E, 0xF6, 0xE6, + 0xB4, 0xB4, 0xF6, 0xE6, 0xB8, 0xAF, 0xF6, 0xE6, + 0xB9, 0xAE, 0xF6, 0xE3, 0xB4, 0xB3, 0xF6, 0xE6, + 0xBB, 0x8B, 0xF6, 0xE6, 0xBB, 0x87, 0xF6, 0xF0, + 0xA3, 0xBB, 0x91, 0xF6, 0xE6, 0xB7, 0xB9, 0xF6, + 0xE6, 0xBD, 0xAE, 0xF6, 0xF0, 0xA3, 0xBD, 0x9E, + 0xF6, 0xF0, 0xA3, 0xBE, 0x8E, 0xF6, 0xE6, 0xBF, + 0x86, 0xF6, 0xE7, 0x80, 0xB9, 0xF6, 0xE7, 0x80, + 0x9E, 0xF6, 0xE7, 0x80, 0x9B, 0xF6, 0xE3, 0xB6, + 0x96, 0xF6, 0xE7, 0x81, 0x8A, 0xF6, 0xE7, 0x81, + 0xBD, 0xF6, 0xE7, 0x81, 0xB7, 0xF6, 0xE7, 0x82, + 0xAD, 0xF6, 0xF0, 0xA0, 0x94, 0xA5, 0xF6, 0xE7, + 0x85, 0x85, 0xF6, 0xF0, 0xA4, 0x89, 0xA3, 0xF6, + 0xE7, 0x86, 0x9C, 0xF6, 0xF0, 0xA4, 0x8E, 0xAB, + 0xF6, 0xE7, 0x88, 0xA8, 0xF6, 0xE7, 0x88, 0xB5, + 0xF6, 0xE7, 0x89, 0x90, 0xF6, 0xF0, 0xA4, 0x98, + 0x88, 0xF6, 0xE7, 0x8A, 0x80, 0xF6, 0xE7, 0x8A, + 0x95, 0xF6, 0xF0, 0xA4, 0x9C, 0xB5, 0xF6, 0xF0, + 0xA4, 0xA0, 0x94, 0xF6, 0xE7, 0x8D, 0xBA, 0xF6, + 0xE7, 0x8E, 0x8B, 0xF6, 0xE3, 0xBA, 0xAC, 0xF6, + 0xE7, 0x8E, 0xA5, 0xF6, 0xE3, 0xBA, 0xB8, 0xF6, + 0xE3, 0xBA, 0xB8, 0xF6, 0xE7, 0x91, 0x87, 0xF6, + 0xE7, 0x91, 0x9C, 0xF6, 0xE7, 0x91, 0xB1, 0xF6, + 0xE7, 0x92, 0x85, 0xF6, 0xE7, 0x93, 0x8A, 0xF6, + 0xE3, 0xBC, 0x9B, 0xF6, 0xE7, 0x94, 0xA4, 0xF6, + 0xF0, 0xA4, 0xB0, 0xB6, 0xF6, 0xE7, 0x94, 0xBE, + 0xF6, 0xF0, 0xA4, 0xB2, 0x92, 0xF6, 0xE7, 0x95, + 0xB0, 0xF6, 0xF0, 0xA2, 0x86, 0x9F, 0xF6, 0xE7, + 0x98, 0x90, 0xF6, 0xF0, 0xA4, 0xBE, 0xA1, 0xF6, + 0xF0, 0xA4, 0xBE, 0xB8, 0xF6, 0xF0, 0xA5, 0x81, + 0x84, 0xF6, 0xE3, 0xBF, 0xBC, 0xF6, 0xE4, 0x80, + 0x88, 0xF6, 0xE7, 0x9B, 0xB4, 0xF6, 0xF0, 0xA5, + 0x83, 0xB3, 0xF6, 0xF0, 0xA5, 0x83, 0xB2, 0xF6, + 0xF0, 0xA5, 0x84, 0x99, 0xF6, 0xF0, 0xA5, 0x84, + 0xB3, 0xF6, 0xE7, 0x9C, 0x9E, 0xF6, 0xE7, 0x9C, + 0x9F, 0xF6, 0xE7, 0x9C, 0x9F, 0xF6, 0xE7, 0x9D, + 0x8A, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xE7, 0x9E, + 0x8B, 0xF6, 0xE4, 0x81, 0x86, 0xF6, 0xE4, 0x82, + 0x96, 0xF6, 0xF0, 0xA5, 0x90, 0x9D, 0xF6, 0xE7, + 0xA1, 0x8E, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6, 0xE7, + 0xA3, 0x8C, 0xF6, 0xE4, 0x83, 0xA3, 0xF6, 0xF0, + 0xA5, 0x98, 0xA6, 0xF6, 0xE7, 0xA5, 0x96, 0xF6, + 0xF0, 0xA5, 0x9A, 0x9A, 0xF6, 0xF0, 0xA5, 0x9B, + 0x85, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE7, 0xA7, + 0xAB, 0xF6, 0xE4, 0x84, 0xAF, 0xF6, 0xE7, 0xA9, + 0x80, 0xF6, 0xE7, 0xA9, 0x8A, 0xF6, 0xE7, 0xA9, + 0x8F, 0xF6, 0xF0, 0xA5, 0xA5, 0xBC, 0xF6, 0xF0, + 0xA5, 0xAA, 0xA7, 0xF6, 0xF0, 0xA5, 0xAA, 0xA7, + 0xF6, 0xE7, 0xAB, 0xAE, 0xF6, 0xE4, 0x88, 0x82, + 0xF6, 0xF0, 0xA5, 0xAE, 0xAB, 0xF6, 0xE7, 0xAF, + 0x86, 0xF6, 0xE7, 0xAF, 0x89, 0xF6, 0xE4, 0x88, + 0xA7, 0xF6, 0xF0, 0xA5, 0xB2, 0x80, 0xF6, 0xE7, + 0xB3, 0x92, 0xF6, 0xE4, 0x8A, 0xA0, 0xF6, 0xE7, + 0xB3, 0xA8, 0xF6, 0xE7, 0xB3, 0xA3, 0xF6, 0xE7, + 0xB4, 0x80, 0xF6, 0xF0, 0xA5, 0xBE, 0x86, 0xF6, + 0xE7, 0xB5, 0xA3, 0xF6, 0xE4, 0x8C, 0x81, 0xF6, + 0xE7, 0xB7, 0x87, 0xF6, 0xE7, 0xB8, 0x82, 0xF6, + 0xE7, 0xB9, 0x85, 0xF6, 0xE4, 0x8C, 0xB4, 0xF6, + 0xF0, 0xA6, 0x88, 0xA8, 0xF6, 0xF0, 0xA6, 0x89, + 0x87, 0xF6, 0xE4, 0x8D, 0x99, 0xF6, 0xF0, 0xA6, + 0x8B, 0x99, 0xF6, 0xE7, 0xBD, 0xBA, 0xF6, 0xF0, + 0xA6, 0x8C, 0xBE, 0xF6, 0xE7, 0xBE, 0x95, 0xF6, + 0xE7, 0xBF, 0xBA, 0xF6, 0xE8, 0x80, 0x85, 0xF6, + 0xF0, 0xA6, 0x93, 0x9A, 0xF6, 0xF0, 0xA6, 0x94, + 0xA3, 0xF6, 0xE8, 0x81, 0xA0, 0xF6, 0xF0, 0xA6, + 0x96, 0xA8, 0xF6, 0xE8, 0x81, 0xB0, 0xF6, 0xF0, + 0xA3, 0x8D, 0x9F, 0xF6, 0xE4, 0x8F, 0x95, 0xF6, + 0xE8, 0x82, 0xB2, 0xF6, 0xE8, 0x84, 0x83, 0xF6, + 0xE4, 0x90, 0x8B, 0xF6, 0xE8, 0x84, 0xBE, 0xF6, + 0xE5, 0xAA, 0xB5, 0xF6, 0xF0, 0xA6, 0x9E, 0xA7, + 0xF6, 0xF0, 0xA6, 0x9E, 0xB5, 0xF6, 0xF0, 0xA3, + 0x8E, 0x93, 0xF6, 0xF0, 0xA3, 0x8E, 0x9C, 0xF6, + 0xE8, 0x88, 0x81, 0xF6, 0xE8, 0x88, 0x84, 0xF6, + 0xE8, 0xBE, 0x9E, 0xF6, 0xE4, 0x91, 0xAB, 0xF6, + 0xE8, 0x8A, 0x91, 0xF6, 0xE8, 0x8A, 0x8B, 0xF6, + 0xE8, 0x8A, 0x9D, 0xF6, 0xE5, 0x8A, 0xB3, 0xF6, + 0xE8, 0x8A, 0xB1, 0xF6, 0xE8, 0x8A, 0xB3, 0xF6, + 0xE8, 0x8A, 0xBD, 0xF6, 0xE8, 0x8B, 0xA6, 0xF6, + 0xF0, 0xA6, 0xAC, 0xBC, 0xF6, 0xE8, 0x8B, 0xA5, + 0xF6, 0xE8, 0x8C, 0x9D, 0xF6, 0xE8, 0x8D, 0xA3, + 0xF6, 0xE8, 0x8E, 0xAD, 0xF6, 0xE8, 0x8C, 0xA3, + 0xF6, 0xE8, 0x8E, 0xBD, 0xF6, 0xE8, 0x8F, 0xA7, + 0xF6, 0xE8, 0x91, 0x97, 0xF6, 0xE8, 0x8D, 0x93, + 0xF6, 0xE8, 0x8F, 0x8A, 0xF6, 0xE8, 0x8F, 0x8C, + 0xF6, 0xE8, 0x8F, 0x9C, 0xF6, 0xF0, 0xA6, 0xB0, + 0xB6, 0xF6, 0xF0, 0xA6, 0xB5, 0xAB, 0xF6, 0xF0, + 0xA6, 0xB3, 0x95, 0xF6, 0xE4, 0x94, 0xAB, 0xF6, + 0xE8, 0x93, 0xB1, 0xF6, 0xE8, 0x93, 0xB3, 0xF6, + 0xE8, 0x94, 0x96, 0xF6, 0xF0, 0xA7, 0x8F, 0x8A, + 0xF6, 0xE8, 0x95, 0xA4, 0xF6, 0xF0, 0xA6, 0xBC, + 0xAC, 0xF6, 0xE4, 0x95, 0x9D, 0xF6, 0xE4, 0x95, + 0xA1, 0xF6, 0xF0, 0xA6, 0xBE, 0xB1, 0xF6, 0xF0, + 0xA7, 0x83, 0x92, 0xF6, 0xE4, 0x95, 0xAB, 0xF6, + 0xE8, 0x99, 0x90, 0xF6, 0xE8, 0x99, 0x9C, 0xF6, + 0xE8, 0x99, 0xA7, 0xF6, 0xE8, 0x99, 0xA9, 0xF6, + 0xE8, 0x9A, 0xA9, 0xF6, 0xE8, 0x9A, 0x88, 0xF6, + 0xE8, 0x9C, 0x8E, 0xF6, 0xE8, 0x9B, 0xA2, 0xF6, + 0xE8, 0x9D, 0xB9, 0xF6, 0xE8, 0x9C, 0xA8, 0xF6, + 0xE8, 0x9D, 0xAB, 0xF6, 0xE8, 0x9E, 0x86, 0xF6, + 0xE4, 0x97, 0x97, 0xF6, 0xE8, 0x9F, 0xA1, 0xF6, + 0xE8, 0xA0, 0x81, 0xF6, 0xE4, 0x97, 0xB9, 0xF6, + 0xE8, 0xA1, 0xA0, 0xF6, 0xE8, 0xA1, 0xA3, 0xF6, + 0xF0, 0xA7, 0x99, 0xA7, 0xF6, 0xE8, 0xA3, 0x97, + 0xF6, 0xE8, 0xA3, 0x9E, 0xF6, 0xE4, 0x98, 0xB5, + 0xF6, 0xE8, 0xA3, 0xBA, 0xF6, 0xE3, 0x92, 0xBB, + 0xF6, 0xF0, 0xA7, 0xA2, 0xAE, 0xF6, 0xF0, 0xA7, + 0xA5, 0xA6, 0xF6, 0xE4, 0x9A, 0xBE, 0xF6, 0xE4, + 0x9B, 0x87, 0xF6, 0xE8, 0xAA, 0xA0, 0xF6, 0xE8, + 0xAB, 0xAD, 0xF6, 0xE8, 0xAE, 0x8A, 0xF6, 0xE8, + 0xB1, 0x95, 0xF6, 0xF0, 0xA7, 0xB2, 0xA8, 0xF6, + 0xE8, 0xB2, 0xAB, 0xF6, 0xE8, 0xB3, 0x81, 0xF6, + 0xE8, 0xB4, 0x9B, 0xF6, 0xE8, 0xB5, 0xB7, 0xF6, + 0xF0, 0xA7, 0xBC, 0xAF, 0xF6, 0xF0, 0xA0, 0xA0, + 0x84, 0xF6, 0xE8, 0xB7, 0x8B, 0xF6, 0xE8, 0xB6, + 0xBC, 0xF6, 0xE8, 0xB7, 0xB0, 0xF6, 0xF0, 0xA0, + 0xA3, 0x9E, 0xF6, 0xE8, 0xBB, 0x94, 0xF6, 0xE8, + 0xBC, 0xB8, 0xF6, 0xF0, 0xA8, 0x97, 0x92, 0xF6, + 0xF0, 0xA8, 0x97, 0xAD, 0xF6, 0xE9, 0x82, 0x94, + 0xF6, 0xE9, 0x83, 0xB1, 0xF6, 0xE9, 0x84, 0x91, + 0xF6, 0xF0, 0xA8, 0x9C, 0xAE, 0xF6, 0xE9, 0x84, + 0x9B, 0xF6, 0xE9, 0x88, 0xB8, 0xF6, 0xE9, 0x8B, + 0x97, 0xF6, 0xE9, 0x8B, 0x98, 0xF6, 0xE9, 0x89, + 0xBC, 0xF6, 0xE9, 0x8F, 0xB9, 0xF6, 0xE9, 0x90, + 0x95, 0xF6, 0xF0, 0xA8, 0xAF, 0xBA, 0xF6, 0xE9, + 0x96, 0x8B, 0xF6, 0xE4, 0xA6, 0x95, 0xF6, 0xE9, + 0x96, 0xB7, 0xF6, 0xF0, 0xA8, 0xB5, 0xB7, 0xF6, + 0xE4, 0xA7, 0xA6, 0xF6, 0xE9, 0x9B, 0x83, 0xF6, + 0xE5, 0xB6, 0xB2, 0xF6, 0xE9, 0x9C, 0xA3, 0xF6, + 0xF0, 0xA9, 0x85, 0x85, 0xF6, 0xF0, 0xA9, 0x88, + 0x9A, 0xF6, 0xE4, 0xA9, 0xAE, 0xF6, 0xE4, 0xA9, + 0xB6, 0xF6, 0xE9, 0x9F, 0xA0, 0xF6, 0xF0, 0xA9, + 0x90, 0x8A, 0xF6, 0xE4, 0xAA, 0xB2, 0xF6, 0xF0, + 0xA9, 0x92, 0x96, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6, + 0xE9, 0xA0, 0x8B, 0xF6, 0xE9, 0xA0, 0xA9, 0xF6, + 0xF0, 0xA9, 0x96, 0xB6, 0xF6, 0xE9, 0xA3, 0xA2, + 0xF6, 0xE4, 0xAC, 0xB3, 0xF6, 0xE9, 0xA4, 0xA9, + 0xF6, 0xE9, 0xA6, 0xA7, 0xF6, 0xE9, 0xA7, 0x82, + 0xF6, 0xE9, 0xA7, 0xBE, 0xF6, 0xE4, 0xAF, 0x8E, + 0xF6, 0xF0, 0xA9, 0xAC, 0xB0, 0xF6, 0xE9, 0xAC, + 0x92, 0xF6, 0xE9, 0xB1, 0x80, 0xF6, 0xE9, 0xB3, + 0xBD, 0xF6, 0xE4, 0xB3, 0x8E, 0xF6, 0xE4, 0xB3, + 0xAD, 0xF6, 0xE9, 0xB5, 0xA7, 0xF6, 0xF0, 0xAA, + 0x83, 0x8E, 0xF6, 0xE4, 0xB3, 0xB8, 0xF6, 0xF0, + 0xAA, 0x84, 0x85, 0xF6, 0xF0, 0xAA, 0x88, 0x8E, + 0xF6, 0xF0, 0xAA, 0x8A, 0x91, 0xF6, 0xE9, 0xBA, + 0xBB, 0xF6, 0xE4, 0xB5, 0x96, 0xF6, 0xE9, 0xBB, + 0xB9, 0xF6, 0xE9, 0xBB, 0xBE, 0xF6, 0xE9, 0xBC, + 0x85, 0xF6, 0xE9, 0xBC, 0x8F, 0xF6, 0xE9, 0xBC, + 0x96, 0xF6, 0xE9, 0xBC, 0xBB, 0xF6, 0xF0, 0xAA, + 0x98, 0x80, + }, +}; + +static const uchar_t u8_case_common_b2_tbl[2][2][256] = { + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, 1, 2, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 3, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 4, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + { + { + 0, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, 1, 2, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, 3, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + { + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + 4, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + N_, N_, N_, N_, N_, N_, N_, N_, + }, + + }, + +}; + +static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { 0, 0 }, + { 1, 60 }, { 2, 123 }, { 3, 185 }, { 4, 257 }, + { 5, 321 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 6, 373 }, { 7, 439 }, + { 8, 465 }, { 9, 561 }, { 10, 593 }, { 11, 649 }, + { 12, 703 }, { 13, 749 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 14, 795 }, { 15, 891 }, { 16, 987 }, { 17, 1068 }, + { 18, 1155 }, { 19, 1245 }, { 20, 1299 }, { 21, 1386 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 22, 1443 }, { 23, 1448 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 24, 1496 }, { 25, 1526 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 26, 1574 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 27, 1652 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + }, + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { 0, 0 }, + { 1, 60 }, { 2, 123 }, { 3, 185 }, { 4, 257 }, + { 5, 321 }, { 6, 383 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 7, 401 }, { 8, 467 }, + { 9, 505 }, { 10, 601 }, { 11, 633 }, { 12, 689 }, + { 13, 753 }, { 14, 803 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 15, 849 }, { 16, 945 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 17, 963 }, { 18, 1059 }, { 19, 1155 }, { 20, 1236 }, + { 21, 1323 }, { 22, 1413 }, { 23, 1467 }, { 24, 1554 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 25, 1611 }, { 26, 1619 }, { 27, 1667 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 28, 1670 }, { 29, 1700 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 30, 1748 }, { 31, 1889 }, { 32, 1911 }, { 33, 2007 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 34, 2061 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 35, 2139 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + }, +}; + +static const uchar_t u8_tolower_b4_tbl[2][36][257] = { + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 48, 50, 52, 54, 56, 58, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 49, 49, 51, 51, 53, 53, 55, + 55, 55, 57, 57, 59, 59, 61, 61, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 8, 10, 10, 12, 12, 14, + 14, 16, 16, 18, 18, 20, 20, 22, + 22, 24, 24, 26, 26, 28, 28, 30, + 30, 32, 32, 34, 34, 36, 36, 38, + 38, 40, 40, 42, 42, 44, 44, 46, + 46, 48, 48, 50, 50, 52, 52, 54, + 54, 56, 58, 58, 60, 60, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 4, 4, 6, 6, 8, + 10, 10, 12, 14, 16, 16, 16, 18, + 20, 22, 24, 24, 26, 28, 28, 30, + 32, 34, 34, 34, 34, 36, 38, 38, + 40, 42, 42, 44, 44, 46, 46, 48, + 50, 50, 52, 52, 52, 54, 54, 56, + 58, 58, 60, 62, 64, 64, 66, 66, + 68, 70, 70, 70, 70, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 4, 4, + 6, 8, 8, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 28, 30, + 30, 32, 32, 34, 34, 36, 36, 38, + 38, 40, 40, 42, 42, 44, 44, 46, + 46, 46, 48, 50, 50, 52, 52, 54, + 56, 58, 58, 60, 60, 62, 62, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 50, 50, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, + 2, 4, 6, 8, 8, 10, 10, 12, + 14, 14, 16, 18, 20, 22, 24, 26, + 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 48, 48, 50, 52, 54, 56, + 58, 60, 62, 64, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 24, 24, 24, 24, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 50, 50, 52, 52, 54, 54, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 14, 16, 16, 18, 18, 20, 20, 22, + 22, 24, 24, 26, 26, 28, 28, 30, + 30, 32, 32, 34, 34, 36, 36, 38, + 38, 40, 40, 42, 42, 44, 44, 46, + 46, 48, 48, 50, 50, 52, 52, 52, + 52, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 90, 90, 93, 93, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 90, 90, 93, 93, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 36, 36, 39, 39, 42, 42, 45, + 45, 48, 48, 51, 51, 54, 54, 57, + 57, 60, 60, 63, 63, 66, 66, 69, + 69, 72, 72, 75, 75, 78, 78, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 60, 63, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 69, 72, 75, 78, 81, 84, 87, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 21, 21, 24, 24, 27, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 33, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 75, 78, 81, 84, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 18, 21, 24, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 30, 33, 36, 39, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, + 2, 2, 2, 3, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, + 6, 9, 12, 15, 18, 21, 24, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 48, 51, 54, 57, 60, 63, 66, + 69, 72, 75, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 152, 152, + 152, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + }, + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 48, 50, 52, 54, 56, 58, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, + 60, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 49, 49, 51, 51, 53, 53, 55, + 55, 55, 57, 57, 59, 59, 61, 61, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 8, 10, 10, 12, 12, 14, + 14, 16, 16, 18, 18, 20, 20, 22, + 22, 24, 24, 26, 26, 28, 28, 30, + 30, 32, 32, 34, 34, 36, 36, 38, + 38, 40, 40, 42, 42, 44, 44, 46, + 46, 48, 48, 50, 50, 52, 52, 54, + 54, 56, 58, 58, 60, 60, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 4, 4, 6, 6, 8, + 10, 10, 12, 14, 16, 16, 16, 18, + 20, 22, 24, 24, 26, 28, 28, 30, + 32, 34, 34, 34, 34, 36, 38, 38, + 40, 42, 42, 44, 44, 46, 46, 48, + 50, 50, 52, 52, 52, 54, 54, 56, + 58, 58, 60, 62, 64, 64, 66, 66, + 68, 70, 70, 70, 70, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 4, 4, + 6, 8, 8, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 28, 30, + 30, 32, 32, 34, 34, 36, 36, 38, + 38, 40, 40, 42, 42, 44, 44, 46, + 46, 46, 48, 50, 50, 52, 52, 54, + 56, 58, 58, 60, 60, 62, 62, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 50, 50, 52, 52, 52, 52, 52, + 52, 52, 52, 55, 57, 57, 59, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 6, 8, 10, + 10, 12, 12, 14, 14, 16, 16, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, + 2, 4, 6, 8, 8, 10, 10, 12, + 14, 14, 16, 18, 20, 22, 24, 26, + 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 48, 48, 50, 52, 54, 56, + 58, 60, 62, 64, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 24, 24, 24, 24, 26, 26, 26, + 28, 28, 30, 32, 32, 32, 34, 36, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 50, 50, 52, 52, 54, 54, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 4, 6, 6, 8, 8, + 10, 10, 12, 12, 14, 14, 16, 16, + 16, 18, 18, 20, 20, 22, 22, 24, + 24, 26, 26, 28, 28, 30, 30, 32, + 32, 34, 34, 36, 36, 38, 38, 40, + 40, 42, 42, 44, 44, 46, 46, 48, + 48, 50, 50, 52, 52, 54, 54, 56, + 56, 58, 58, 60, 60, 62, 62, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 12, 12, 14, 14, 16, + 16, 18, 18, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 22, 24, 26, 28, 30, 32, + 34, 36, 38, 40, 42, 44, 46, 48, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 90, 90, 93, 93, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 90, 90, 93, 93, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 36, 36, 39, 39, 42, 42, 45, + 45, 48, 48, 51, 51, 54, 54, 57, + 57, 60, 60, 63, 63, 66, 66, 69, + 69, 72, 72, 75, 75, 78, 78, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 60, 63, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 69, 72, 75, 78, 81, 84, 87, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 21, 21, 24, 24, 27, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 33, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 75, 78, 81, 84, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 18, 21, 24, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 30, 33, 36, 39, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, + 2, 2, 2, 3, 5, 5, 5, 5, + 5, 5, 5, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, + 6, 9, 12, 15, 18, 21, 24, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 123, 126, 129, 132, 135, 138, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, + 141, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 5, 8, 10, 10, 10, + 13, 13, 16, 16, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 57, 57, 60, + 60, 63, 63, 66, 66, 69, 69, 72, + 72, 75, 75, 78, 78, 81, 81, 84, + 84, 87, 87, 90, 90, 93, 93, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 6, 6, 9, 9, 12, + 12, 15, 15, 18, 18, 21, 21, 24, + 24, 27, 27, 30, 30, 33, 33, 36, + 36, 39, 39, 42, 42, 45, 45, 48, + 48, 51, 51, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 48, 51, 54, 57, 60, 63, 66, + 69, 72, 75, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, + }, + }, +}; + +static const uchar_t u8_tolower_final_tbl[2][2299] = { + { + 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, + 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, + 0xC3, 0xA8, 0xC3, 0xA9, 0xC3, 0xAA, 0xC3, 0xAB, + 0xC3, 0xAC, 0xC3, 0xAD, 0xC3, 0xAE, 0xC3, 0xAF, + 0xC3, 0xB0, 0xC3, 0xB1, 0xC3, 0xB2, 0xC3, 0xB3, + 0xC3, 0xB4, 0xC3, 0xB5, 0xC3, 0xB6, 0xC3, 0xB8, + 0xC3, 0xB9, 0xC3, 0xBA, 0xC3, 0xBB, 0xC3, 0xBC, + 0xC3, 0xBD, 0xC3, 0xBE, 0xC4, 0x81, 0xC4, 0x83, + 0xC4, 0x85, 0xC4, 0x87, 0xC4, 0x89, 0xC4, 0x8B, + 0xC4, 0x8D, 0xC4, 0x8F, 0xC4, 0x91, 0xC4, 0x93, + 0xC4, 0x95, 0xC4, 0x97, 0xC4, 0x99, 0xC4, 0x9B, + 0xC4, 0x9D, 0xC4, 0x9F, 0xC4, 0xA1, 0xC4, 0xA3, + 0xC4, 0xA5, 0xC4, 0xA7, 0xC4, 0xA9, 0xC4, 0xAB, + 0xC4, 0xAD, 0xC4, 0xAF, 0x69, 0xC4, 0xB3, 0xC4, + 0xB5, 0xC4, 0xB7, 0xC4, 0xBA, 0xC4, 0xBC, 0xC4, + 0xBE, 0xC5, 0x80, 0xC5, 0x82, 0xC5, 0x84, 0xC5, + 0x86, 0xC5, 0x88, 0xC5, 0x8B, 0xC5, 0x8D, 0xC5, + 0x8F, 0xC5, 0x91, 0xC5, 0x93, 0xC5, 0x95, 0xC5, + 0x97, 0xC5, 0x99, 0xC5, 0x9B, 0xC5, 0x9D, 0xC5, + 0x9F, 0xC5, 0xA1, 0xC5, 0xA3, 0xC5, 0xA5, 0xC5, + 0xA7, 0xC5, 0xA9, 0xC5, 0xAB, 0xC5, 0xAD, 0xC5, + 0xAF, 0xC5, 0xB1, 0xC5, 0xB3, 0xC5, 0xB5, 0xC5, + 0xB7, 0xC3, 0xBF, 0xC5, 0xBA, 0xC5, 0xBC, 0xC5, + 0xBE, 0xC9, 0x93, 0xC6, 0x83, 0xC6, 0x85, 0xC9, + 0x94, 0xC6, 0x88, 0xC9, 0x96, 0xC9, 0x97, 0xC6, + 0x8C, 0xC7, 0x9D, 0xC9, 0x99, 0xC9, 0x9B, 0xC6, + 0x92, 0xC9, 0xA0, 0xC9, 0xA3, 0xC9, 0xA9, 0xC9, + 0xA8, 0xC6, 0x99, 0xC9, 0xAF, 0xC9, 0xB2, 0xC9, + 0xB5, 0xC6, 0xA1, 0xC6, 0xA3, 0xC6, 0xA5, 0xCA, + 0x80, 0xC6, 0xA8, 0xCA, 0x83, 0xC6, 0xAD, 0xCA, + 0x88, 0xC6, 0xB0, 0xCA, 0x8A, 0xCA, 0x8B, 0xC6, + 0xB4, 0xC6, 0xB6, 0xCA, 0x92, 0xC6, 0xB9, 0xC6, + 0xBD, 0xC7, 0x86, 0xC7, 0x86, 0xC7, 0x89, 0xC7, + 0x89, 0xC7, 0x8C, 0xC7, 0x8C, 0xC7, 0x8E, 0xC7, + 0x90, 0xC7, 0x92, 0xC7, 0x94, 0xC7, 0x96, 0xC7, + 0x98, 0xC7, 0x9A, 0xC7, 0x9C, 0xC7, 0x9F, 0xC7, + 0xA1, 0xC7, 0xA3, 0xC7, 0xA5, 0xC7, 0xA7, 0xC7, + 0xA9, 0xC7, 0xAB, 0xC7, 0xAD, 0xC7, 0xAF, 0xC7, + 0xB3, 0xC7, 0xB3, 0xC7, 0xB5, 0xC6, 0x95, 0xC6, + 0xBF, 0xC7, 0xB9, 0xC7, 0xBB, 0xC7, 0xBD, 0xC7, + 0xBF, 0xC8, 0x81, 0xC8, 0x83, 0xC8, 0x85, 0xC8, + 0x87, 0xC8, 0x89, 0xC8, 0x8B, 0xC8, 0x8D, 0xC8, + 0x8F, 0xC8, 0x91, 0xC8, 0x93, 0xC8, 0x95, 0xC8, + 0x97, 0xC8, 0x99, 0xC8, 0x9B, 0xC8, 0x9D, 0xC8, + 0x9F, 0xC6, 0x9E, 0xC8, 0xA3, 0xC8, 0xA5, 0xC8, + 0xA7, 0xC8, 0xA9, 0xC8, 0xAB, 0xC8, 0xAD, 0xC8, + 0xAF, 0xC8, 0xB1, 0xC8, 0xB3, 0xCE, 0xAC, 0xCE, + 0xAD, 0xCE, 0xAE, 0xCE, 0xAF, 0xCF, 0x8C, 0xCF, + 0x8D, 0xCF, 0x8E, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, + 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, + 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, + 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, + 0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x83, 0xCF, + 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF, + 0x88, 0xCF, 0x89, 0xCF, 0x8A, 0xCF, 0x8B, 0xCF, + 0x99, 0xCF, 0x9B, 0xCF, 0x9D, 0xCF, 0x9F, 0xCF, + 0xA1, 0xCF, 0xA3, 0xCF, 0xA5, 0xCF, 0xA7, 0xCF, + 0xA9, 0xCF, 0xAB, 0xCF, 0xAD, 0xCF, 0xAF, 0xCE, + 0xB8, 0xD1, 0x90, 0xD1, 0x91, 0xD1, 0x92, 0xD1, + 0x93, 0xD1, 0x94, 0xD1, 0x95, 0xD1, 0x96, 0xD1, + 0x97, 0xD1, 0x98, 0xD1, 0x99, 0xD1, 0x9A, 0xD1, + 0x9B, 0xD1, 0x9C, 0xD1, 0x9D, 0xD1, 0x9E, 0xD1, + 0x9F, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0, + 0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD0, 0xB6, 0xD0, + 0xB7, 0xD0, 0xB8, 0xD0, 0xB9, 0xD0, 0xBA, 0xD0, + 0xBB, 0xD0, 0xBC, 0xD0, 0xBD, 0xD0, 0xBE, 0xD0, + 0xBF, 0xD1, 0x80, 0xD1, 0x81, 0xD1, 0x82, 0xD1, + 0x83, 0xD1, 0x84, 0xD1, 0x85, 0xD1, 0x86, 0xD1, + 0x87, 0xD1, 0x88, 0xD1, 0x89, 0xD1, 0x8A, 0xD1, + 0x8B, 0xD1, 0x8C, 0xD1, 0x8D, 0xD1, 0x8E, 0xD1, + 0x8F, 0xD1, 0xA1, 0xD1, 0xA3, 0xD1, 0xA5, 0xD1, + 0xA7, 0xD1, 0xA9, 0xD1, 0xAB, 0xD1, 0xAD, 0xD1, + 0xAF, 0xD1, 0xB1, 0xD1, 0xB3, 0xD1, 0xB5, 0xD1, + 0xB7, 0xD1, 0xB9, 0xD1, 0xBB, 0xD1, 0xBD, 0xD1, + 0xBF, 0xD2, 0x81, 0xD2, 0x8B, 0xD2, 0x8D, 0xD2, + 0x8F, 0xD2, 0x91, 0xD2, 0x93, 0xD2, 0x95, 0xD2, + 0x97, 0xD2, 0x99, 0xD2, 0x9B, 0xD2, 0x9D, 0xD2, + 0x9F, 0xD2, 0xA1, 0xD2, 0xA3, 0xD2, 0xA5, 0xD2, + 0xA7, 0xD2, 0xA9, 0xD2, 0xAB, 0xD2, 0xAD, 0xD2, + 0xAF, 0xD2, 0xB1, 0xD2, 0xB3, 0xD2, 0xB5, 0xD2, + 0xB7, 0xD2, 0xB9, 0xD2, 0xBB, 0xD2, 0xBD, 0xD2, + 0xBF, 0xD3, 0x82, 0xD3, 0x84, 0xD3, 0x86, 0xD3, + 0x88, 0xD3, 0x8A, 0xD3, 0x8C, 0xD3, 0x8E, 0xD3, + 0x91, 0xD3, 0x93, 0xD3, 0x95, 0xD3, 0x97, 0xD3, + 0x99, 0xD3, 0x9B, 0xD3, 0x9D, 0xD3, 0x9F, 0xD3, + 0xA1, 0xD3, 0xA3, 0xD3, 0xA5, 0xD3, 0xA7, 0xD3, + 0xA9, 0xD3, 0xAB, 0xD3, 0xAD, 0xD3, 0xAF, 0xD3, + 0xB1, 0xD3, 0xB3, 0xD3, 0xB5, 0xD3, 0xB9, 0xD4, + 0x81, 0xD4, 0x83, 0xD4, 0x85, 0xD4, 0x87, 0xD4, + 0x89, 0xD4, 0x8B, 0xD4, 0x8D, 0xD4, 0x8F, 0xD5, + 0xA1, 0xD5, 0xA2, 0xD5, 0xA3, 0xD5, 0xA4, 0xD5, + 0xA5, 0xD5, 0xA6, 0xD5, 0xA7, 0xD5, 0xA8, 0xD5, + 0xA9, 0xD5, 0xAA, 0xD5, 0xAB, 0xD5, 0xAC, 0xD5, + 0xAD, 0xD5, 0xAE, 0xD5, 0xAF, 0xD5, 0xB0, 0xD5, + 0xB1, 0xD5, 0xB2, 0xD5, 0xB3, 0xD5, 0xB4, 0xD5, + 0xB5, 0xD5, 0xB6, 0xD5, 0xB7, 0xD5, 0xB8, 0xD5, + 0xB9, 0xD5, 0xBA, 0xD5, 0xBB, 0xD5, 0xBC, 0xD5, + 0xBD, 0xD5, 0xBE, 0xD5, 0xBF, 0xD6, 0x80, 0xD6, + 0x81, 0xD6, 0x82, 0xD6, 0x83, 0xD6, 0x84, 0xD6, + 0x85, 0xD6, 0x86, 0xE1, 0xB8, 0x81, 0xE1, 0xB8, + 0x83, 0xE1, 0xB8, 0x85, 0xE1, 0xB8, 0x87, 0xE1, + 0xB8, 0x89, 0xE1, 0xB8, 0x8B, 0xE1, 0xB8, 0x8D, + 0xE1, 0xB8, 0x8F, 0xE1, 0xB8, 0x91, 0xE1, 0xB8, + 0x93, 0xE1, 0xB8, 0x95, 0xE1, 0xB8, 0x97, 0xE1, + 0xB8, 0x99, 0xE1, 0xB8, 0x9B, 0xE1, 0xB8, 0x9D, + 0xE1, 0xB8, 0x9F, 0xE1, 0xB8, 0xA1, 0xE1, 0xB8, + 0xA3, 0xE1, 0xB8, 0xA5, 0xE1, 0xB8, 0xA7, 0xE1, + 0xB8, 0xA9, 0xE1, 0xB8, 0xAB, 0xE1, 0xB8, 0xAD, + 0xE1, 0xB8, 0xAF, 0xE1, 0xB8, 0xB1, 0xE1, 0xB8, + 0xB3, 0xE1, 0xB8, 0xB5, 0xE1, 0xB8, 0xB7, 0xE1, + 0xB8, 0xB9, 0xE1, 0xB8, 0xBB, 0xE1, 0xB8, 0xBD, + 0xE1, 0xB8, 0xBF, 0xE1, 0xB9, 0x81, 0xE1, 0xB9, + 0x83, 0xE1, 0xB9, 0x85, 0xE1, 0xB9, 0x87, 0xE1, + 0xB9, 0x89, 0xE1, 0xB9, 0x8B, 0xE1, 0xB9, 0x8D, + 0xE1, 0xB9, 0x8F, 0xE1, 0xB9, 0x91, 0xE1, 0xB9, + 0x93, 0xE1, 0xB9, 0x95, 0xE1, 0xB9, 0x97, 0xE1, + 0xB9, 0x99, 0xE1, 0xB9, 0x9B, 0xE1, 0xB9, 0x9D, + 0xE1, 0xB9, 0x9F, 0xE1, 0xB9, 0xA1, 0xE1, 0xB9, + 0xA3, 0xE1, 0xB9, 0xA5, 0xE1, 0xB9, 0xA7, 0xE1, + 0xB9, 0xA9, 0xE1, 0xB9, 0xAB, 0xE1, 0xB9, 0xAD, + 0xE1, 0xB9, 0xAF, 0xE1, 0xB9, 0xB1, 0xE1, 0xB9, + 0xB3, 0xE1, 0xB9, 0xB5, 0xE1, 0xB9, 0xB7, 0xE1, + 0xB9, 0xB9, 0xE1, 0xB9, 0xBB, 0xE1, 0xB9, 0xBD, + 0xE1, 0xB9, 0xBF, 0xE1, 0xBA, 0x81, 0xE1, 0xBA, + 0x83, 0xE1, 0xBA, 0x85, 0xE1, 0xBA, 0x87, 0xE1, + 0xBA, 0x89, 0xE1, 0xBA, 0x8B, 0xE1, 0xBA, 0x8D, + 0xE1, 0xBA, 0x8F, 0xE1, 0xBA, 0x91, 0xE1, 0xBA, + 0x93, 0xE1, 0xBA, 0x95, 0xE1, 0xBA, 0xA1, 0xE1, + 0xBA, 0xA3, 0xE1, 0xBA, 0xA5, 0xE1, 0xBA, 0xA7, + 0xE1, 0xBA, 0xA9, 0xE1, 0xBA, 0xAB, 0xE1, 0xBA, + 0xAD, 0xE1, 0xBA, 0xAF, 0xE1, 0xBA, 0xB1, 0xE1, + 0xBA, 0xB3, 0xE1, 0xBA, 0xB5, 0xE1, 0xBA, 0xB7, + 0xE1, 0xBA, 0xB9, 0xE1, 0xBA, 0xBB, 0xE1, 0xBA, + 0xBD, 0xE1, 0xBA, 0xBF, 0xE1, 0xBB, 0x81, 0xE1, + 0xBB, 0x83, 0xE1, 0xBB, 0x85, 0xE1, 0xBB, 0x87, + 0xE1, 0xBB, 0x89, 0xE1, 0xBB, 0x8B, 0xE1, 0xBB, + 0x8D, 0xE1, 0xBB, 0x8F, 0xE1, 0xBB, 0x91, 0xE1, + 0xBB, 0x93, 0xE1, 0xBB, 0x95, 0xE1, 0xBB, 0x97, + 0xE1, 0xBB, 0x99, 0xE1, 0xBB, 0x9B, 0xE1, 0xBB, + 0x9D, 0xE1, 0xBB, 0x9F, 0xE1, 0xBB, 0xA1, 0xE1, + 0xBB, 0xA3, 0xE1, 0xBB, 0xA5, 0xE1, 0xBB, 0xA7, + 0xE1, 0xBB, 0xA9, 0xE1, 0xBB, 0xAB, 0xE1, 0xBB, + 0xAD, 0xE1, 0xBB, 0xAF, 0xE1, 0xBB, 0xB1, 0xE1, + 0xBB, 0xB3, 0xE1, 0xBB, 0xB5, 0xE1, 0xBB, 0xB7, + 0xE1, 0xBB, 0xB9, 0xE1, 0xBC, 0x80, 0xE1, 0xBC, + 0x81, 0xE1, 0xBC, 0x82, 0xE1, 0xBC, 0x83, 0xE1, + 0xBC, 0x84, 0xE1, 0xBC, 0x85, 0xE1, 0xBC, 0x86, + 0xE1, 0xBC, 0x87, 0xE1, 0xBC, 0x90, 0xE1, 0xBC, + 0x91, 0xE1, 0xBC, 0x92, 0xE1, 0xBC, 0x93, 0xE1, + 0xBC, 0x94, 0xE1, 0xBC, 0x95, 0xE1, 0xBC, 0xA0, + 0xE1, 0xBC, 0xA1, 0xE1, 0xBC, 0xA2, 0xE1, 0xBC, + 0xA3, 0xE1, 0xBC, 0xA4, 0xE1, 0xBC, 0xA5, 0xE1, + 0xBC, 0xA6, 0xE1, 0xBC, 0xA7, 0xE1, 0xBC, 0xB0, + 0xE1, 0xBC, 0xB1, 0xE1, 0xBC, 0xB2, 0xE1, 0xBC, + 0xB3, 0xE1, 0xBC, 0xB4, 0xE1, 0xBC, 0xB5, 0xE1, + 0xBC, 0xB6, 0xE1, 0xBC, 0xB7, 0xE1, 0xBD, 0x80, + 0xE1, 0xBD, 0x81, 0xE1, 0xBD, 0x82, 0xE1, 0xBD, + 0x83, 0xE1, 0xBD, 0x84, 0xE1, 0xBD, 0x85, 0xE1, + 0xBD, 0x91, 0xE1, 0xBD, 0x93, 0xE1, 0xBD, 0x95, + 0xE1, 0xBD, 0x97, 0xE1, 0xBD, 0xA0, 0xE1, 0xBD, + 0xA1, 0xE1, 0xBD, 0xA2, 0xE1, 0xBD, 0xA3, 0xE1, + 0xBD, 0xA4, 0xE1, 0xBD, 0xA5, 0xE1, 0xBD, 0xA6, + 0xE1, 0xBD, 0xA7, 0xE1, 0xBE, 0x80, 0xE1, 0xBE, + 0x81, 0xE1, 0xBE, 0x82, 0xE1, 0xBE, 0x83, 0xE1, + 0xBE, 0x84, 0xE1, 0xBE, 0x85, 0xE1, 0xBE, 0x86, + 0xE1, 0xBE, 0x87, 0xE1, 0xBE, 0x90, 0xE1, 0xBE, + 0x91, 0xE1, 0xBE, 0x92, 0xE1, 0xBE, 0x93, 0xE1, + 0xBE, 0x94, 0xE1, 0xBE, 0x95, 0xE1, 0xBE, 0x96, + 0xE1, 0xBE, 0x97, 0xE1, 0xBE, 0xA0, 0xE1, 0xBE, + 0xA1, 0xE1, 0xBE, 0xA2, 0xE1, 0xBE, 0xA3, 0xE1, + 0xBE, 0xA4, 0xE1, 0xBE, 0xA5, 0xE1, 0xBE, 0xA6, + 0xE1, 0xBE, 0xA7, 0xE1, 0xBE, 0xB0, 0xE1, 0xBE, + 0xB1, 0xE1, 0xBD, 0xB0, 0xE1, 0xBD, 0xB1, 0xE1, + 0xBE, 0xB3, 0xE1, 0xBD, 0xB2, 0xE1, 0xBD, 0xB3, + 0xE1, 0xBD, 0xB4, 0xE1, 0xBD, 0xB5, 0xE1, 0xBF, + 0x83, 0xE1, 0xBF, 0x90, 0xE1, 0xBF, 0x91, 0xE1, + 0xBD, 0xB6, 0xE1, 0xBD, 0xB7, 0xE1, 0xBF, 0xA0, + 0xE1, 0xBF, 0xA1, 0xE1, 0xBD, 0xBA, 0xE1, 0xBD, + 0xBB, 0xE1, 0xBF, 0xA5, 0xE1, 0xBD, 0xB8, 0xE1, + 0xBD, 0xB9, 0xE1, 0xBD, 0xBC, 0xE1, 0xBD, 0xBD, + 0xE1, 0xBF, 0xB3, 0xCF, 0x89, 0x6B, 0xC3, 0xA5, + 0xE2, 0x85, 0xB0, 0xE2, 0x85, 0xB1, 0xE2, 0x85, + 0xB2, 0xE2, 0x85, 0xB3, 0xE2, 0x85, 0xB4, 0xE2, + 0x85, 0xB5, 0xE2, 0x85, 0xB6, 0xE2, 0x85, 0xB7, + 0xE2, 0x85, 0xB8, 0xE2, 0x85, 0xB9, 0xE2, 0x85, + 0xBA, 0xE2, 0x85, 0xBB, 0xE2, 0x85, 0xBC, 0xE2, + 0x85, 0xBD, 0xE2, 0x85, 0xBE, 0xE2, 0x85, 0xBF, + 0xE2, 0x93, 0x90, 0xE2, 0x93, 0x91, 0xE2, 0x93, + 0x92, 0xE2, 0x93, 0x93, 0xE2, 0x93, 0x94, 0xE2, + 0x93, 0x95, 0xE2, 0x93, 0x96, 0xE2, 0x93, 0x97, + 0xE2, 0x93, 0x98, 0xE2, 0x93, 0x99, 0xE2, 0x93, + 0x9A, 0xE2, 0x93, 0x9B, 0xE2, 0x93, 0x9C, 0xE2, + 0x93, 0x9D, 0xE2, 0x93, 0x9E, 0xE2, 0x93, 0x9F, + 0xE2, 0x93, 0xA0, 0xE2, 0x93, 0xA1, 0xE2, 0x93, + 0xA2, 0xE2, 0x93, 0xA3, 0xE2, 0x93, 0xA4, 0xE2, + 0x93, 0xA5, 0xE2, 0x93, 0xA6, 0xE2, 0x93, 0xA7, + 0xE2, 0x93, 0xA8, 0xE2, 0x93, 0xA9, 0xEF, 0xBD, + 0x81, 0xEF, 0xBD, 0x82, 0xEF, 0xBD, 0x83, 0xEF, + 0xBD, 0x84, 0xEF, 0xBD, 0x85, 0xEF, 0xBD, 0x86, + 0xEF, 0xBD, 0x87, 0xEF, 0xBD, 0x88, 0xEF, 0xBD, + 0x89, 0xEF, 0xBD, 0x8A, 0xEF, 0xBD, 0x8B, 0xEF, + 0xBD, 0x8C, 0xEF, 0xBD, 0x8D, 0xEF, 0xBD, 0x8E, + 0xEF, 0xBD, 0x8F, 0xEF, 0xBD, 0x90, 0xEF, 0xBD, + 0x91, 0xEF, 0xBD, 0x92, 0xEF, 0xBD, 0x93, 0xEF, + 0xBD, 0x94, 0xEF, 0xBD, 0x95, 0xEF, 0xBD, 0x96, + 0xEF, 0xBD, 0x97, 0xEF, 0xBD, 0x98, 0xEF, 0xBD, + 0x99, 0xEF, 0xBD, 0x9A, 0xF0, 0x90, 0x90, 0xA8, + 0xF0, 0x90, 0x90, 0xA9, 0xF0, 0x90, 0x90, 0xAA, + 0xF0, 0x90, 0x90, 0xAB, 0xF0, 0x90, 0x90, 0xAC, + 0xF0, 0x90, 0x90, 0xAD, 0xF0, 0x90, 0x90, 0xAE, + 0xF0, 0x90, 0x90, 0xAF, 0xF0, 0x90, 0x90, 0xB0, + 0xF0, 0x90, 0x90, 0xB1, 0xF0, 0x90, 0x90, 0xB2, + 0xF0, 0x90, 0x90, 0xB3, 0xF0, 0x90, 0x90, 0xB4, + 0xF0, 0x90, 0x90, 0xB5, 0xF0, 0x90, 0x90, 0xB6, + 0xF0, 0x90, 0x90, 0xB7, 0xF0, 0x90, 0x90, 0xB8, + 0xF0, 0x90, 0x90, 0xB9, 0xF0, 0x90, 0x90, 0xBA, + 0xF0, 0x90, 0x90, 0xBB, 0xF0, 0x90, 0x90, 0xBC, + 0xF0, 0x90, 0x90, 0xBD, 0xF0, 0x90, 0x90, 0xBE, + 0xF0, 0x90, 0x90, 0xBF, 0xF0, 0x90, 0x91, 0x80, + 0xF0, 0x90, 0x91, 0x81, 0xF0, 0x90, 0x91, 0x82, + 0xF0, 0x90, 0x91, 0x83, 0xF0, 0x90, 0x91, 0x84, + 0xF0, 0x90, 0x91, 0x85, 0xF0, 0x90, 0x91, 0x86, + 0xF0, 0x90, 0x91, 0x87, 0xF0, 0x90, 0x91, 0x88, + 0xF0, 0x90, 0x91, 0x89, 0xF0, 0x90, 0x91, 0x8A, + 0xF0, 0x90, 0x91, 0x8B, 0xF0, 0x90, 0x91, 0x8C, + 0xF0, 0x90, 0x91, 0x8D, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + }, + { + 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, + 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, + 0xC3, 0xA8, 0xC3, 0xA9, 0xC3, 0xAA, 0xC3, 0xAB, + 0xC3, 0xAC, 0xC3, 0xAD, 0xC3, 0xAE, 0xC3, 0xAF, + 0xC3, 0xB0, 0xC3, 0xB1, 0xC3, 0xB2, 0xC3, 0xB3, + 0xC3, 0xB4, 0xC3, 0xB5, 0xC3, 0xB6, 0xC3, 0xB8, + 0xC3, 0xB9, 0xC3, 0xBA, 0xC3, 0xBB, 0xC3, 0xBC, + 0xC3, 0xBD, 0xC3, 0xBE, 0xC4, 0x81, 0xC4, 0x83, + 0xC4, 0x85, 0xC4, 0x87, 0xC4, 0x89, 0xC4, 0x8B, + 0xC4, 0x8D, 0xC4, 0x8F, 0xC4, 0x91, 0xC4, 0x93, + 0xC4, 0x95, 0xC4, 0x97, 0xC4, 0x99, 0xC4, 0x9B, + 0xC4, 0x9D, 0xC4, 0x9F, 0xC4, 0xA1, 0xC4, 0xA3, + 0xC4, 0xA5, 0xC4, 0xA7, 0xC4, 0xA9, 0xC4, 0xAB, + 0xC4, 0xAD, 0xC4, 0xAF, 0x69, 0xC4, 0xB3, 0xC4, + 0xB5, 0xC4, 0xB7, 0xC4, 0xBA, 0xC4, 0xBC, 0xC4, + 0xBE, 0xC5, 0x80, 0xC5, 0x82, 0xC5, 0x84, 0xC5, + 0x86, 0xC5, 0x88, 0xC5, 0x8B, 0xC5, 0x8D, 0xC5, + 0x8F, 0xC5, 0x91, 0xC5, 0x93, 0xC5, 0x95, 0xC5, + 0x97, 0xC5, 0x99, 0xC5, 0x9B, 0xC5, 0x9D, 0xC5, + 0x9F, 0xC5, 0xA1, 0xC5, 0xA3, 0xC5, 0xA5, 0xC5, + 0xA7, 0xC5, 0xA9, 0xC5, 0xAB, 0xC5, 0xAD, 0xC5, + 0xAF, 0xC5, 0xB1, 0xC5, 0xB3, 0xC5, 0xB5, 0xC5, + 0xB7, 0xC3, 0xBF, 0xC5, 0xBA, 0xC5, 0xBC, 0xC5, + 0xBE, 0xC9, 0x93, 0xC6, 0x83, 0xC6, 0x85, 0xC9, + 0x94, 0xC6, 0x88, 0xC9, 0x96, 0xC9, 0x97, 0xC6, + 0x8C, 0xC7, 0x9D, 0xC9, 0x99, 0xC9, 0x9B, 0xC6, + 0x92, 0xC9, 0xA0, 0xC9, 0xA3, 0xC9, 0xA9, 0xC9, + 0xA8, 0xC6, 0x99, 0xC9, 0xAF, 0xC9, 0xB2, 0xC9, + 0xB5, 0xC6, 0xA1, 0xC6, 0xA3, 0xC6, 0xA5, 0xCA, + 0x80, 0xC6, 0xA8, 0xCA, 0x83, 0xC6, 0xAD, 0xCA, + 0x88, 0xC6, 0xB0, 0xCA, 0x8A, 0xCA, 0x8B, 0xC6, + 0xB4, 0xC6, 0xB6, 0xCA, 0x92, 0xC6, 0xB9, 0xC6, + 0xBD, 0xC7, 0x86, 0xC7, 0x86, 0xC7, 0x89, 0xC7, + 0x89, 0xC7, 0x8C, 0xC7, 0x8C, 0xC7, 0x8E, 0xC7, + 0x90, 0xC7, 0x92, 0xC7, 0x94, 0xC7, 0x96, 0xC7, + 0x98, 0xC7, 0x9A, 0xC7, 0x9C, 0xC7, 0x9F, 0xC7, + 0xA1, 0xC7, 0xA3, 0xC7, 0xA5, 0xC7, 0xA7, 0xC7, + 0xA9, 0xC7, 0xAB, 0xC7, 0xAD, 0xC7, 0xAF, 0xC7, + 0xB3, 0xC7, 0xB3, 0xC7, 0xB5, 0xC6, 0x95, 0xC6, + 0xBF, 0xC7, 0xB9, 0xC7, 0xBB, 0xC7, 0xBD, 0xC7, + 0xBF, 0xC8, 0x81, 0xC8, 0x83, 0xC8, 0x85, 0xC8, + 0x87, 0xC8, 0x89, 0xC8, 0x8B, 0xC8, 0x8D, 0xC8, + 0x8F, 0xC8, 0x91, 0xC8, 0x93, 0xC8, 0x95, 0xC8, + 0x97, 0xC8, 0x99, 0xC8, 0x9B, 0xC8, 0x9D, 0xC8, + 0x9F, 0xC6, 0x9E, 0xC8, 0xA3, 0xC8, 0xA5, 0xC8, + 0xA7, 0xC8, 0xA9, 0xC8, 0xAB, 0xC8, 0xAD, 0xC8, + 0xAF, 0xC8, 0xB1, 0xC8, 0xB3, 0xE2, 0xB1, 0xA5, + 0xC8, 0xBC, 0xC6, 0x9A, 0xE2, 0xB1, 0xA6, 0xC9, + 0x82, 0xC6, 0x80, 0xCA, 0x89, 0xCA, 0x8C, 0xC9, + 0x87, 0xC9, 0x89, 0xC9, 0x8B, 0xC9, 0x8D, 0xC9, + 0x8F, 0xCE, 0xAC, 0xCE, 0xAD, 0xCE, 0xAE, 0xCE, + 0xAF, 0xCF, 0x8C, 0xCF, 0x8D, 0xCF, 0x8E, 0xCE, + 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, + 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, + 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, + 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, + 0x81, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF, + 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xCF, + 0x8A, 0xCF, 0x8B, 0xCF, 0x99, 0xCF, 0x9B, 0xCF, + 0x9D, 0xCF, 0x9F, 0xCF, 0xA1, 0xCF, 0xA3, 0xCF, + 0xA5, 0xCF, 0xA7, 0xCF, 0xA9, 0xCF, 0xAB, 0xCF, + 0xAD, 0xCF, 0xAF, 0xCE, 0xB8, 0xCF, 0xB8, 0xCF, + 0xB2, 0xCF, 0xBB, 0xCD, 0xBB, 0xCD, 0xBC, 0xCD, + 0xBD, 0xD1, 0x90, 0xD1, 0x91, 0xD1, 0x92, 0xD1, + 0x93, 0xD1, 0x94, 0xD1, 0x95, 0xD1, 0x96, 0xD1, + 0x97, 0xD1, 0x98, 0xD1, 0x99, 0xD1, 0x9A, 0xD1, + 0x9B, 0xD1, 0x9C, 0xD1, 0x9D, 0xD1, 0x9E, 0xD1, + 0x9F, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0, + 0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD0, 0xB6, 0xD0, + 0xB7, 0xD0, 0xB8, 0xD0, 0xB9, 0xD0, 0xBA, 0xD0, + 0xBB, 0xD0, 0xBC, 0xD0, 0xBD, 0xD0, 0xBE, 0xD0, + 0xBF, 0xD1, 0x80, 0xD1, 0x81, 0xD1, 0x82, 0xD1, + 0x83, 0xD1, 0x84, 0xD1, 0x85, 0xD1, 0x86, 0xD1, + 0x87, 0xD1, 0x88, 0xD1, 0x89, 0xD1, 0x8A, 0xD1, + 0x8B, 0xD1, 0x8C, 0xD1, 0x8D, 0xD1, 0x8E, 0xD1, + 0x8F, 0xD1, 0xA1, 0xD1, 0xA3, 0xD1, 0xA5, 0xD1, + 0xA7, 0xD1, 0xA9, 0xD1, 0xAB, 0xD1, 0xAD, 0xD1, + 0xAF, 0xD1, 0xB1, 0xD1, 0xB3, 0xD1, 0xB5, 0xD1, + 0xB7, 0xD1, 0xB9, 0xD1, 0xBB, 0xD1, 0xBD, 0xD1, + 0xBF, 0xD2, 0x81, 0xD2, 0x8B, 0xD2, 0x8D, 0xD2, + 0x8F, 0xD2, 0x91, 0xD2, 0x93, 0xD2, 0x95, 0xD2, + 0x97, 0xD2, 0x99, 0xD2, 0x9B, 0xD2, 0x9D, 0xD2, + 0x9F, 0xD2, 0xA1, 0xD2, 0xA3, 0xD2, 0xA5, 0xD2, + 0xA7, 0xD2, 0xA9, 0xD2, 0xAB, 0xD2, 0xAD, 0xD2, + 0xAF, 0xD2, 0xB1, 0xD2, 0xB3, 0xD2, 0xB5, 0xD2, + 0xB7, 0xD2, 0xB9, 0xD2, 0xBB, 0xD2, 0xBD, 0xD2, + 0xBF, 0xD3, 0x8F, 0xD3, 0x82, 0xD3, 0x84, 0xD3, + 0x86, 0xD3, 0x88, 0xD3, 0x8A, 0xD3, 0x8C, 0xD3, + 0x8E, 0xD3, 0x91, 0xD3, 0x93, 0xD3, 0x95, 0xD3, + 0x97, 0xD3, 0x99, 0xD3, 0x9B, 0xD3, 0x9D, 0xD3, + 0x9F, 0xD3, 0xA1, 0xD3, 0xA3, 0xD3, 0xA5, 0xD3, + 0xA7, 0xD3, 0xA9, 0xD3, 0xAB, 0xD3, 0xAD, 0xD3, + 0xAF, 0xD3, 0xB1, 0xD3, 0xB3, 0xD3, 0xB5, 0xD3, + 0xB7, 0xD3, 0xB9, 0xD3, 0xBB, 0xD3, 0xBD, 0xD3, + 0xBF, 0xD4, 0x81, 0xD4, 0x83, 0xD4, 0x85, 0xD4, + 0x87, 0xD4, 0x89, 0xD4, 0x8B, 0xD4, 0x8D, 0xD4, + 0x8F, 0xD4, 0x91, 0xD4, 0x93, 0xD5, 0xA1, 0xD5, + 0xA2, 0xD5, 0xA3, 0xD5, 0xA4, 0xD5, 0xA5, 0xD5, + 0xA6, 0xD5, 0xA7, 0xD5, 0xA8, 0xD5, 0xA9, 0xD5, + 0xAA, 0xD5, 0xAB, 0xD5, 0xAC, 0xD5, 0xAD, 0xD5, + 0xAE, 0xD5, 0xAF, 0xD5, 0xB0, 0xD5, 0xB1, 0xD5, + 0xB2, 0xD5, 0xB3, 0xD5, 0xB4, 0xD5, 0xB5, 0xD5, + 0xB6, 0xD5, 0xB7, 0xD5, 0xB8, 0xD5, 0xB9, 0xD5, + 0xBA, 0xD5, 0xBB, 0xD5, 0xBC, 0xD5, 0xBD, 0xD5, + 0xBE, 0xD5, 0xBF, 0xD6, 0x80, 0xD6, 0x81, 0xD6, + 0x82, 0xD6, 0x83, 0xD6, 0x84, 0xD6, 0x85, 0xD6, + 0x86, 0xE2, 0xB4, 0x80, 0xE2, 0xB4, 0x81, 0xE2, + 0xB4, 0x82, 0xE2, 0xB4, 0x83, 0xE2, 0xB4, 0x84, + 0xE2, 0xB4, 0x85, 0xE2, 0xB4, 0x86, 0xE2, 0xB4, + 0x87, 0xE2, 0xB4, 0x88, 0xE2, 0xB4, 0x89, 0xE2, + 0xB4, 0x8A, 0xE2, 0xB4, 0x8B, 0xE2, 0xB4, 0x8C, + 0xE2, 0xB4, 0x8D, 0xE2, 0xB4, 0x8E, 0xE2, 0xB4, + 0x8F, 0xE2, 0xB4, 0x90, 0xE2, 0xB4, 0x91, 0xE2, + 0xB4, 0x92, 0xE2, 0xB4, 0x93, 0xE2, 0xB4, 0x94, + 0xE2, 0xB4, 0x95, 0xE2, 0xB4, 0x96, 0xE2, 0xB4, + 0x97, 0xE2, 0xB4, 0x98, 0xE2, 0xB4, 0x99, 0xE2, + 0xB4, 0x9A, 0xE2, 0xB4, 0x9B, 0xE2, 0xB4, 0x9C, + 0xE2, 0xB4, 0x9D, 0xE2, 0xB4, 0x9E, 0xE2, 0xB4, + 0x9F, 0xE2, 0xB4, 0xA0, 0xE2, 0xB4, 0xA1, 0xE2, + 0xB4, 0xA2, 0xE2, 0xB4, 0xA3, 0xE2, 0xB4, 0xA4, + 0xE2, 0xB4, 0xA5, 0xE1, 0xB8, 0x81, 0xE1, 0xB8, + 0x83, 0xE1, 0xB8, 0x85, 0xE1, 0xB8, 0x87, 0xE1, + 0xB8, 0x89, 0xE1, 0xB8, 0x8B, 0xE1, 0xB8, 0x8D, + 0xE1, 0xB8, 0x8F, 0xE1, 0xB8, 0x91, 0xE1, 0xB8, + 0x93, 0xE1, 0xB8, 0x95, 0xE1, 0xB8, 0x97, 0xE1, + 0xB8, 0x99, 0xE1, 0xB8, 0x9B, 0xE1, 0xB8, 0x9D, + 0xE1, 0xB8, 0x9F, 0xE1, 0xB8, 0xA1, 0xE1, 0xB8, + 0xA3, 0xE1, 0xB8, 0xA5, 0xE1, 0xB8, 0xA7, 0xE1, + 0xB8, 0xA9, 0xE1, 0xB8, 0xAB, 0xE1, 0xB8, 0xAD, + 0xE1, 0xB8, 0xAF, 0xE1, 0xB8, 0xB1, 0xE1, 0xB8, + 0xB3, 0xE1, 0xB8, 0xB5, 0xE1, 0xB8, 0xB7, 0xE1, + 0xB8, 0xB9, 0xE1, 0xB8, 0xBB, 0xE1, 0xB8, 0xBD, + 0xE1, 0xB8, 0xBF, 0xE1, 0xB9, 0x81, 0xE1, 0xB9, + 0x83, 0xE1, 0xB9, 0x85, 0xE1, 0xB9, 0x87, 0xE1, + 0xB9, 0x89, 0xE1, 0xB9, 0x8B, 0xE1, 0xB9, 0x8D, + 0xE1, 0xB9, 0x8F, 0xE1, 0xB9, 0x91, 0xE1, 0xB9, + 0x93, 0xE1, 0xB9, 0x95, 0xE1, 0xB9, 0x97, 0xE1, + 0xB9, 0x99, 0xE1, 0xB9, 0x9B, 0xE1, 0xB9, 0x9D, + 0xE1, 0xB9, 0x9F, 0xE1, 0xB9, 0xA1, 0xE1, 0xB9, + 0xA3, 0xE1, 0xB9, 0xA5, 0xE1, 0xB9, 0xA7, 0xE1, + 0xB9, 0xA9, 0xE1, 0xB9, 0xAB, 0xE1, 0xB9, 0xAD, + 0xE1, 0xB9, 0xAF, 0xE1, 0xB9, 0xB1, 0xE1, 0xB9, + 0xB3, 0xE1, 0xB9, 0xB5, 0xE1, 0xB9, 0xB7, 0xE1, + 0xB9, 0xB9, 0xE1, 0xB9, 0xBB, 0xE1, 0xB9, 0xBD, + 0xE1, 0xB9, 0xBF, 0xE1, 0xBA, 0x81, 0xE1, 0xBA, + 0x83, 0xE1, 0xBA, 0x85, 0xE1, 0xBA, 0x87, 0xE1, + 0xBA, 0x89, 0xE1, 0xBA, 0x8B, 0xE1, 0xBA, 0x8D, + 0xE1, 0xBA, 0x8F, 0xE1, 0xBA, 0x91, 0xE1, 0xBA, + 0x93, 0xE1, 0xBA, 0x95, 0xE1, 0xBA, 0xA1, 0xE1, + 0xBA, 0xA3, 0xE1, 0xBA, 0xA5, 0xE1, 0xBA, 0xA7, + 0xE1, 0xBA, 0xA9, 0xE1, 0xBA, 0xAB, 0xE1, 0xBA, + 0xAD, 0xE1, 0xBA, 0xAF, 0xE1, 0xBA, 0xB1, 0xE1, + 0xBA, 0xB3, 0xE1, 0xBA, 0xB5, 0xE1, 0xBA, 0xB7, + 0xE1, 0xBA, 0xB9, 0xE1, 0xBA, 0xBB, 0xE1, 0xBA, + 0xBD, 0xE1, 0xBA, 0xBF, 0xE1, 0xBB, 0x81, 0xE1, + 0xBB, 0x83, 0xE1, 0xBB, 0x85, 0xE1, 0xBB, 0x87, + 0xE1, 0xBB, 0x89, 0xE1, 0xBB, 0x8B, 0xE1, 0xBB, + 0x8D, 0xE1, 0xBB, 0x8F, 0xE1, 0xBB, 0x91, 0xE1, + 0xBB, 0x93, 0xE1, 0xBB, 0x95, 0xE1, 0xBB, 0x97, + 0xE1, 0xBB, 0x99, 0xE1, 0xBB, 0x9B, 0xE1, 0xBB, + 0x9D, 0xE1, 0xBB, 0x9F, 0xE1, 0xBB, 0xA1, 0xE1, + 0xBB, 0xA3, 0xE1, 0xBB, 0xA5, 0xE1, 0xBB, 0xA7, + 0xE1, 0xBB, 0xA9, 0xE1, 0xBB, 0xAB, 0xE1, 0xBB, + 0xAD, 0xE1, 0xBB, 0xAF, 0xE1, 0xBB, 0xB1, 0xE1, + 0xBB, 0xB3, 0xE1, 0xBB, 0xB5, 0xE1, 0xBB, 0xB7, + 0xE1, 0xBB, 0xB9, 0xE1, 0xBC, 0x80, 0xE1, 0xBC, + 0x81, 0xE1, 0xBC, 0x82, 0xE1, 0xBC, 0x83, 0xE1, + 0xBC, 0x84, 0xE1, 0xBC, 0x85, 0xE1, 0xBC, 0x86, + 0xE1, 0xBC, 0x87, 0xE1, 0xBC, 0x90, 0xE1, 0xBC, + 0x91, 0xE1, 0xBC, 0x92, 0xE1, 0xBC, 0x93, 0xE1, + 0xBC, 0x94, 0xE1, 0xBC, 0x95, 0xE1, 0xBC, 0xA0, + 0xE1, 0xBC, 0xA1, 0xE1, 0xBC, 0xA2, 0xE1, 0xBC, + 0xA3, 0xE1, 0xBC, 0xA4, 0xE1, 0xBC, 0xA5, 0xE1, + 0xBC, 0xA6, 0xE1, 0xBC, 0xA7, 0xE1, 0xBC, 0xB0, + 0xE1, 0xBC, 0xB1, 0xE1, 0xBC, 0xB2, 0xE1, 0xBC, + 0xB3, 0xE1, 0xBC, 0xB4, 0xE1, 0xBC, 0xB5, 0xE1, + 0xBC, 0xB6, 0xE1, 0xBC, 0xB7, 0xE1, 0xBD, 0x80, + 0xE1, 0xBD, 0x81, 0xE1, 0xBD, 0x82, 0xE1, 0xBD, + 0x83, 0xE1, 0xBD, 0x84, 0xE1, 0xBD, 0x85, 0xE1, + 0xBD, 0x91, 0xE1, 0xBD, 0x93, 0xE1, 0xBD, 0x95, + 0xE1, 0xBD, 0x97, 0xE1, 0xBD, 0xA0, 0xE1, 0xBD, + 0xA1, 0xE1, 0xBD, 0xA2, 0xE1, 0xBD, 0xA3, 0xE1, + 0xBD, 0xA4, 0xE1, 0xBD, 0xA5, 0xE1, 0xBD, 0xA6, + 0xE1, 0xBD, 0xA7, 0xE1, 0xBE, 0x80, 0xE1, 0xBE, + 0x81, 0xE1, 0xBE, 0x82, 0xE1, 0xBE, 0x83, 0xE1, + 0xBE, 0x84, 0xE1, 0xBE, 0x85, 0xE1, 0xBE, 0x86, + 0xE1, 0xBE, 0x87, 0xE1, 0xBE, 0x90, 0xE1, 0xBE, + 0x91, 0xE1, 0xBE, 0x92, 0xE1, 0xBE, 0x93, 0xE1, + 0xBE, 0x94, 0xE1, 0xBE, 0x95, 0xE1, 0xBE, 0x96, + 0xE1, 0xBE, 0x97, 0xE1, 0xBE, 0xA0, 0xE1, 0xBE, + 0xA1, 0xE1, 0xBE, 0xA2, 0xE1, 0xBE, 0xA3, 0xE1, + 0xBE, 0xA4, 0xE1, 0xBE, 0xA5, 0xE1, 0xBE, 0xA6, + 0xE1, 0xBE, 0xA7, 0xE1, 0xBE, 0xB0, 0xE1, 0xBE, + 0xB1, 0xE1, 0xBD, 0xB0, 0xE1, 0xBD, 0xB1, 0xE1, + 0xBE, 0xB3, 0xE1, 0xBD, 0xB2, 0xE1, 0xBD, 0xB3, + 0xE1, 0xBD, 0xB4, 0xE1, 0xBD, 0xB5, 0xE1, 0xBF, + 0x83, 0xE1, 0xBF, 0x90, 0xE1, 0xBF, 0x91, 0xE1, + 0xBD, 0xB6, 0xE1, 0xBD, 0xB7, 0xE1, 0xBF, 0xA0, + 0xE1, 0xBF, 0xA1, 0xE1, 0xBD, 0xBA, 0xE1, 0xBD, + 0xBB, 0xE1, 0xBF, 0xA5, 0xE1, 0xBD, 0xB8, 0xE1, + 0xBD, 0xB9, 0xE1, 0xBD, 0xBC, 0xE1, 0xBD, 0xBD, + 0xE1, 0xBF, 0xB3, 0xCF, 0x89, 0x6B, 0xC3, 0xA5, + 0xE2, 0x85, 0x8E, 0xE2, 0x85, 0xB0, 0xE2, 0x85, + 0xB1, 0xE2, 0x85, 0xB2, 0xE2, 0x85, 0xB3, 0xE2, + 0x85, 0xB4, 0xE2, 0x85, 0xB5, 0xE2, 0x85, 0xB6, + 0xE2, 0x85, 0xB7, 0xE2, 0x85, 0xB8, 0xE2, 0x85, + 0xB9, 0xE2, 0x85, 0xBA, 0xE2, 0x85, 0xBB, 0xE2, + 0x85, 0xBC, 0xE2, 0x85, 0xBD, 0xE2, 0x85, 0xBE, + 0xE2, 0x85, 0xBF, 0xE2, 0x86, 0x84, 0xE2, 0x93, + 0x90, 0xE2, 0x93, 0x91, 0xE2, 0x93, 0x92, 0xE2, + 0x93, 0x93, 0xE2, 0x93, 0x94, 0xE2, 0x93, 0x95, + 0xE2, 0x93, 0x96, 0xE2, 0x93, 0x97, 0xE2, 0x93, + 0x98, 0xE2, 0x93, 0x99, 0xE2, 0x93, 0x9A, 0xE2, + 0x93, 0x9B, 0xE2, 0x93, 0x9C, 0xE2, 0x93, 0x9D, + 0xE2, 0x93, 0x9E, 0xE2, 0x93, 0x9F, 0xE2, 0x93, + 0xA0, 0xE2, 0x93, 0xA1, 0xE2, 0x93, 0xA2, 0xE2, + 0x93, 0xA3, 0xE2, 0x93, 0xA4, 0xE2, 0x93, 0xA5, + 0xE2, 0x93, 0xA6, 0xE2, 0x93, 0xA7, 0xE2, 0x93, + 0xA8, 0xE2, 0x93, 0xA9, 0xE2, 0xB0, 0xB0, 0xE2, + 0xB0, 0xB1, 0xE2, 0xB0, 0xB2, 0xE2, 0xB0, 0xB3, + 0xE2, 0xB0, 0xB4, 0xE2, 0xB0, 0xB5, 0xE2, 0xB0, + 0xB6, 0xE2, 0xB0, 0xB7, 0xE2, 0xB0, 0xB8, 0xE2, + 0xB0, 0xB9, 0xE2, 0xB0, 0xBA, 0xE2, 0xB0, 0xBB, + 0xE2, 0xB0, 0xBC, 0xE2, 0xB0, 0xBD, 0xE2, 0xB0, + 0xBE, 0xE2, 0xB0, 0xBF, 0xE2, 0xB1, 0x80, 0xE2, + 0xB1, 0x81, 0xE2, 0xB1, 0x82, 0xE2, 0xB1, 0x83, + 0xE2, 0xB1, 0x84, 0xE2, 0xB1, 0x85, 0xE2, 0xB1, + 0x86, 0xE2, 0xB1, 0x87, 0xE2, 0xB1, 0x88, 0xE2, + 0xB1, 0x89, 0xE2, 0xB1, 0x8A, 0xE2, 0xB1, 0x8B, + 0xE2, 0xB1, 0x8C, 0xE2, 0xB1, 0x8D, 0xE2, 0xB1, + 0x8E, 0xE2, 0xB1, 0x8F, 0xE2, 0xB1, 0x90, 0xE2, + 0xB1, 0x91, 0xE2, 0xB1, 0x92, 0xE2, 0xB1, 0x93, + 0xE2, 0xB1, 0x94, 0xE2, 0xB1, 0x95, 0xE2, 0xB1, + 0x96, 0xE2, 0xB1, 0x97, 0xE2, 0xB1, 0x98, 0xE2, + 0xB1, 0x99, 0xE2, 0xB1, 0x9A, 0xE2, 0xB1, 0x9B, + 0xE2, 0xB1, 0x9C, 0xE2, 0xB1, 0x9D, 0xE2, 0xB1, + 0x9E, 0xE2, 0xB1, 0xA1, 0xC9, 0xAB, 0xE1, 0xB5, + 0xBD, 0xC9, 0xBD, 0xE2, 0xB1, 0xA8, 0xE2, 0xB1, + 0xAA, 0xE2, 0xB1, 0xAC, 0xE2, 0xB1, 0xB6, 0xE2, + 0xB2, 0x81, 0xE2, 0xB2, 0x83, 0xE2, 0xB2, 0x85, + 0xE2, 0xB2, 0x87, 0xE2, 0xB2, 0x89, 0xE2, 0xB2, + 0x8B, 0xE2, 0xB2, 0x8D, 0xE2, 0xB2, 0x8F, 0xE2, + 0xB2, 0x91, 0xE2, 0xB2, 0x93, 0xE2, 0xB2, 0x95, + 0xE2, 0xB2, 0x97, 0xE2, 0xB2, 0x99, 0xE2, 0xB2, + 0x9B, 0xE2, 0xB2, 0x9D, 0xE2, 0xB2, 0x9F, 0xE2, + 0xB2, 0xA1, 0xE2, 0xB2, 0xA3, 0xE2, 0xB2, 0xA5, + 0xE2, 0xB2, 0xA7, 0xE2, 0xB2, 0xA9, 0xE2, 0xB2, + 0xAB, 0xE2, 0xB2, 0xAD, 0xE2, 0xB2, 0xAF, 0xE2, + 0xB2, 0xB1, 0xE2, 0xB2, 0xB3, 0xE2, 0xB2, 0xB5, + 0xE2, 0xB2, 0xB7, 0xE2, 0xB2, 0xB9, 0xE2, 0xB2, + 0xBB, 0xE2, 0xB2, 0xBD, 0xE2, 0xB2, 0xBF, 0xE2, + 0xB3, 0x81, 0xE2, 0xB3, 0x83, 0xE2, 0xB3, 0x85, + 0xE2, 0xB3, 0x87, 0xE2, 0xB3, 0x89, 0xE2, 0xB3, + 0x8B, 0xE2, 0xB3, 0x8D, 0xE2, 0xB3, 0x8F, 0xE2, + 0xB3, 0x91, 0xE2, 0xB3, 0x93, 0xE2, 0xB3, 0x95, + 0xE2, 0xB3, 0x97, 0xE2, 0xB3, 0x99, 0xE2, 0xB3, + 0x9B, 0xE2, 0xB3, 0x9D, 0xE2, 0xB3, 0x9F, 0xE2, + 0xB3, 0xA1, 0xE2, 0xB3, 0xA3, 0xEF, 0xBD, 0x81, + 0xEF, 0xBD, 0x82, 0xEF, 0xBD, 0x83, 0xEF, 0xBD, + 0x84, 0xEF, 0xBD, 0x85, 0xEF, 0xBD, 0x86, 0xEF, + 0xBD, 0x87, 0xEF, 0xBD, 0x88, 0xEF, 0xBD, 0x89, + 0xEF, 0xBD, 0x8A, 0xEF, 0xBD, 0x8B, 0xEF, 0xBD, + 0x8C, 0xEF, 0xBD, 0x8D, 0xEF, 0xBD, 0x8E, 0xEF, + 0xBD, 0x8F, 0xEF, 0xBD, 0x90, 0xEF, 0xBD, 0x91, + 0xEF, 0xBD, 0x92, 0xEF, 0xBD, 0x93, 0xEF, 0xBD, + 0x94, 0xEF, 0xBD, 0x95, 0xEF, 0xBD, 0x96, 0xEF, + 0xBD, 0x97, 0xEF, 0xBD, 0x98, 0xEF, 0xBD, 0x99, + 0xEF, 0xBD, 0x9A, 0xF0, 0x90, 0x90, 0xA8, 0xF0, + 0x90, 0x90, 0xA9, 0xF0, 0x90, 0x90, 0xAA, 0xF0, + 0x90, 0x90, 0xAB, 0xF0, 0x90, 0x90, 0xAC, 0xF0, + 0x90, 0x90, 0xAD, 0xF0, 0x90, 0x90, 0xAE, 0xF0, + 0x90, 0x90, 0xAF, 0xF0, 0x90, 0x90, 0xB0, 0xF0, + 0x90, 0x90, 0xB1, 0xF0, 0x90, 0x90, 0xB2, 0xF0, + 0x90, 0x90, 0xB3, 0xF0, 0x90, 0x90, 0xB4, 0xF0, + 0x90, 0x90, 0xB5, 0xF0, 0x90, 0x90, 0xB6, 0xF0, + 0x90, 0x90, 0xB7, 0xF0, 0x90, 0x90, 0xB8, 0xF0, + 0x90, 0x90, 0xB9, 0xF0, 0x90, 0x90, 0xBA, 0xF0, + 0x90, 0x90, 0xBB, 0xF0, 0x90, 0x90, 0xBC, 0xF0, + 0x90, 0x90, 0xBD, 0xF0, 0x90, 0x90, 0xBE, 0xF0, + 0x90, 0x90, 0xBF, 0xF0, 0x90, 0x91, 0x80, 0xF0, + 0x90, 0x91, 0x81, 0xF0, 0x90, 0x91, 0x82, 0xF0, + 0x90, 0x91, 0x83, 0xF0, 0x90, 0x91, 0x84, 0xF0, + 0x90, 0x91, 0x85, 0xF0, 0x90, 0x91, 0x86, 0xF0, + 0x90, 0x91, 0x87, 0xF0, 0x90, 0x91, 0x88, 0xF0, + 0x90, 0x91, 0x89, 0xF0, 0x90, 0x91, 0x8A, 0xF0, + 0x90, 0x91, 0x8B, 0xF0, 0x90, 0x91, 0x8C, 0xF0, + 0x90, 0x91, 0x8D, 0xF0, 0x90, 0x91, 0x8E, 0xF0, + 0x90, 0x91, 0x8F, + }, +}; + +static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 0 }, { 1, 2 }, + { 2, 64 }, { 3, 125 }, { 4, 188 }, { 5, 226 }, + { 6, 288 }, { 7, 338 }, { 8, 364 }, { N_, 0 }, + { N_, 0 }, { 9, 376 }, { 10, 378 }, { 11, 416 }, + { 12, 486 }, { 13, 518 }, { 14, 614 }, { 15, 670 }, + { 16, 724 }, { 17, 740 }, { 18, 802 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 19, 816 }, { 20, 912 }, { 21, 1008 }, { 22, 1092 }, + { 23, 1179 }, { 24, 1269 }, { 25, 1365 }, { 26, 1448 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 27, 1469 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { 28, 1517 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 29, 1595 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 30, 1673 }, { 31, 1769 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + }, + { + { /* Third byte table 0. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { 0, 0 }, { 1, 2 }, + { 2, 64 }, { 3, 125 }, { 4, 188 }, { 5, 230 }, + { 6, 292 }, { 7, 344 }, { 8, 388 }, { N_, 0 }, + { N_, 0 }, { 9, 404 }, { 10, 412 }, { 11, 450 }, + { 12, 524 }, { 13, 556 }, { 14, 652 }, { 15, 708 }, + { 16, 772 }, { 17, 792 }, { 18, 854 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 1. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 19, 868 }, { N_, 0 }, { N_, 0 }, + { 20, 871 }, { 21, 967 }, { 22, 1063 }, { 23, 1147 }, + { 24, 1234 }, { 25, 1324 }, { 26, 1420 }, { 27, 1503 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 2. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 28, 1524 }, { 29, 1575 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { 30, 1578 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 31, 1656 }, { 32, 1704 }, { 33, 1816 }, { 34, 1912 }, + { 35, 1966 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 3. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { 36, 2080 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + { /* Third byte table 4. */ + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { 37, 2158 }, { 38, 2254 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, + }, + }, +}; + +static const uchar_t u8_toupper_b4_tbl[2][39][257] = { + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 49, 49, 51, 51, 53, 53, + 55, 55, 55, 57, 57, 59, 59, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 50, 50, 52, 52, 54, 54, + 56, 56, 56, 58, 58, 60, 60, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 2, 4, 4, + 4, 6, 6, 6, 6, 8, 8, 8, + 8, 8, 8, 10, 10, 10, 12, 12, + 12, 12, 14, 14, 14, 14, 14, 16, + 16, 16, 18, 18, 20, 20, 22, 22, + 22, 24, 24, 24, 24, 24, 26, 26, + 26, 28, 28, 28, 28, 30, 30, 32, + 32, 32, 34, 34, 34, 34, 36, 36, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 4, + 4, 6, 8, 8, 10, 12, 12, 14, + 14, 16, 16, 18, 18, 20, 20, 22, + 22, 24, 24, 26, 26, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 48, 50, 52, 52, 54, 54, + 54, 54, 56, 56, 58, 58, 60, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 32, 32, 34, 34, 36, 36, + 38, 38, 40, 40, 42, 42, 44, 44, + 46, 46, 48, 48, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, + 50, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 4, 4, 6, + 8, 8, 10, 10, 12, 12, 12, 12, + 12, 14, 14, 14, 16, 16, 16, 16, + 16, 18, 20, 20, 20, 20, 20, 20, + 22, 22, 22, 24, 24, 24, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 4, 4, 4, 4, + 4, 6, 6, 8, 10, 10, 10, 10, + 10, 10, 10, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 4, 6, + 8, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30, 32, 34, 36, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 30, 32, 34, 34, 34, 34, 36, 38, + 38, 38, 40, 40, 42, 42, 44, 44, + 46, 46, 48, 48, 50, 50, 52, 52, + 54, 54, 56, 56, 58, 58, 60, 60, + 62, 64, 66, 68, 68, 68, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, + 70, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 64, 66, 66, 68, 68, 70, 70, + 72, 72, 74, 74, 76, 76, 78, 78, + 80, 80, 82, 82, 84, 84, 86, 86, + 88, 88, 90, 90, 92, 92, 94, 94, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 50, 50, 52, 52, 54, 54, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 4, 4, 6, + 6, 8, 8, 10, 10, 12, 12, 14, + 14, 14, 16, 16, 18, 18, 20, 20, + 22, 22, 24, 24, 26, 26, 28, 28, + 30, 30, 32, 32, 34, 34, 36, 36, + 38, 38, 40, 40, 42, 42, 44, 44, + 46, 46, 48, 48, 50, 50, 52, 52, + 52, 52, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 4, 6, 8, 10, 12, + 14, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 90, 90, 93, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 90, 90, 93, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 33, 33, 33, 33, 36, 36, 36, 36, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 60, 63, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 69, 72, 75, 78, 81, 84, 87, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 21, 21, 24, 24, 27, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 33, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 57, 60, 63, 66, 69, 72, 75, + 78, 81, 84, 87, 90, 93, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 75, 78, 78, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 6, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 12, 15, 15, 15, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 48, 51, 54, 57, 60, 63, 66, + 69, 72, 75, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, + }, + }, + { + { /* Fourth byte table 0. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, + }, + { /* Fourth byte table 1. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 2. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 49, 49, 51, 51, 53, 53, + 55, 55, 55, 57, 57, 59, 59, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, + }, + { /* Fourth byte table 3. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 4, 4, 6, 6, 8, + 8, 10, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 50, 50, 52, 52, 54, 54, + 56, 56, 56, 58, 58, 60, 60, 62, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, + }, + { /* Fourth byte table 4. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 4, 4, 6, 6, + 6, 8, 8, 8, 8, 10, 10, 10, + 10, 10, 10, 12, 12, 12, 14, 14, + 14, 14, 16, 18, 18, 18, 18, 20, + 20, 20, 22, 22, 24, 24, 26, 26, + 26, 28, 28, 28, 28, 28, 30, 30, + 30, 32, 32, 32, 32, 34, 34, 36, + 36, 36, 38, 38, 38, 38, 40, 40, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, + }, + { /* Fourth byte table 5. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 4, + 4, 6, 8, 8, 10, 12, 12, 14, + 14, 16, 16, 18, 18, 20, 20, 22, + 22, 24, 24, 26, 26, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 48, 50, 52, 52, 54, 54, + 54, 54, 56, 56, 58, 58, 60, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 6. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 32, 32, 34, 34, 36, 36, + 38, 38, 40, 40, 42, 42, 44, 44, + 46, 46, 48, 48, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, + 52, + }, + { /* Fourth byte table 7. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 2, 2, 2, + 4, 4, 6, 6, 8, 8, 10, 10, + 12, 12, 12, 12, 14, 16, 16, 18, + 20, 20, 22, 22, 24, 24, 24, 24, + 24, 26, 26, 26, 28, 28, 28, 28, + 28, 30, 32, 32, 35, 35, 35, 35, + 37, 37, 37, 39, 39, 39, 41, 41, + 41, 41, 41, 41, 41, 41, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 44, + }, + { /* Fourth byte table 8. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 4, 4, 4, 4, + 4, 6, 8, 10, 12, 14, 14, 14, + 14, 14, 14, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, + }, + { /* Fourth byte table 9. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 4, 6, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, + }, + { /* Fourth byte table 10. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 4, 6, + 8, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30, 32, 34, 36, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, + }, + { /* Fourth byte table 11. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 30, 32, 34, 34, 34, 34, 36, 38, + 38, 38, 40, 40, 42, 42, 44, 44, + 46, 46, 48, 48, 50, 50, 52, 52, + 54, 54, 56, 56, 58, 58, 60, 60, + 62, 64, 66, 68, 68, 68, 70, 70, + 70, 72, 72, 72, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, + }, + { /* Fourth byte table 12. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, + }, + { /* Fourth byte table 13. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 64, 66, 66, 68, 68, 70, 70, + 72, 72, 74, 74, 76, 76, 78, 78, + 80, 80, 82, 82, 84, 84, 86, 86, + 88, 88, 90, 90, 92, 92, 94, 94, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 14. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 50, 50, 52, 52, 54, 54, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, + 56, + }, + { /* Fourth byte table 15. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 4, 4, 6, + 6, 8, 8, 10, 10, 12, 12, 14, + 16, 16, 18, 18, 20, 20, 22, 22, + 24, 24, 26, 26, 28, 28, 30, 30, + 32, 32, 34, 34, 36, 36, 38, 38, + 40, 40, 42, 42, 44, 44, 46, 46, + 48, 48, 50, 50, 52, 52, 54, 54, + 56, 56, 58, 58, 60, 60, 62, 62, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + { /* Fourth byte table 16. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 4, 4, 6, 6, + 8, 8, 10, 10, 12, 12, 14, 14, + 16, 16, 18, 18, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, + 20, + }, + { /* Fourth byte table 17. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 4, 6, 8, 10, 12, + 14, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, + 46, 48, 50, 52, 54, 56, 58, 60, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, + }, + { /* Fourth byte table 18. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 4, 6, 8, 10, 12, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, + }, + { /* Fourth byte table 19. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 20. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 90, 90, 93, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 21. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 90, 90, 93, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 22. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 33, 33, 33, 33, 36, 36, 36, 36, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, + }, + { /* Fourth byte table 23. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, + 87, + }, + { /* Fourth byte table 24. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, + 42, 45, 48, 51, 54, 57, 60, 63, + 66, 66, 66, 66, 66, 66, 66, 66, + 66, 69, 72, 75, 78, 81, 84, 87, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, + 90, + }, + { /* Fourth byte table 25. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 21, 21, 24, 24, 27, 27, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 33, 36, 39, 42, 45, 48, 51, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 57, 60, 63, 66, 69, 72, 75, + 78, 81, 84, 87, 90, 93, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 26. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 72, 72, 72, 72, 72, 72, 72, + 72, 75, 78, 78, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, + 83, + }, + { /* Fourth byte table 27. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 6, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 12, 15, 15, 15, 15, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, + }, + { /* Fourth byte table 28. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 6, 9, 12, 15, 18, 21, 24, + 27, 30, 33, 36, 39, 42, 45, 48, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, + 51, + }, + { /* Fourth byte table 29. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, + }, + { /* Fourth byte table 30. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 31. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, + }, + { /* Fourth byte table 32. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 93, 93, 96, 96, 96, 96, 98, 100, + 100, 103, 103, 106, 106, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, + 112, + }, + { /* Fourth byte table 33. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 57, 57, + 60, 60, 63, 63, 66, 66, 69, 69, + 72, 72, 75, 75, 78, 78, 81, 81, + 84, 84, 87, 87, 90, 90, 93, 93, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 34. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 6, 6, 9, 9, + 12, 12, 15, 15, 18, 18, 21, 21, + 24, 24, 27, 27, 30, 30, 33, 33, + 36, 36, 39, 39, 42, 42, 45, 45, + 48, 48, 51, 51, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, + 54, + }, + { /* Fourth byte table 35. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 6, 9, 12, 15, 18, 21, + 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, + 72, 75, 78, 81, 84, 87, 90, 93, + 96, 99, 102, 105, 108, 111, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, 114, 114, 114, 114, 114, 114, 114, + 114, + }, + { /* Fourth byte table 36. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 6, 9, 12, 15, 18, + 21, 24, 27, 30, 33, 36, 39, 42, + 45, 48, 51, 54, 57, 60, 63, 66, + 69, 72, 75, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, + }, + { /* Fourth byte table 37. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, + }, + { /* Fourth byte table 38. */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, + }, + }, +}; + +static const uchar_t u8_toupper_final_tbl[2][2318] = { + { + 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, + 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, + 0xC3, 0x87, 0xC3, 0x88, 0xC3, 0x89, 0xC3, 0x8A, + 0xC3, 0x8B, 0xC3, 0x8C, 0xC3, 0x8D, 0xC3, 0x8E, + 0xC3, 0x8F, 0xC3, 0x90, 0xC3, 0x91, 0xC3, 0x92, + 0xC3, 0x93, 0xC3, 0x94, 0xC3, 0x95, 0xC3, 0x96, + 0xC3, 0x98, 0xC3, 0x99, 0xC3, 0x9A, 0xC3, 0x9B, + 0xC3, 0x9C, 0xC3, 0x9D, 0xC3, 0x9E, 0xC5, 0xB8, + 0xC4, 0x80, 0xC4, 0x82, 0xC4, 0x84, 0xC4, 0x86, + 0xC4, 0x88, 0xC4, 0x8A, 0xC4, 0x8C, 0xC4, 0x8E, + 0xC4, 0x90, 0xC4, 0x92, 0xC4, 0x94, 0xC4, 0x96, + 0xC4, 0x98, 0xC4, 0x9A, 0xC4, 0x9C, 0xC4, 0x9E, + 0xC4, 0xA0, 0xC4, 0xA2, 0xC4, 0xA4, 0xC4, 0xA6, + 0xC4, 0xA8, 0xC4, 0xAA, 0xC4, 0xAC, 0xC4, 0xAE, + 0x49, 0xC4, 0xB2, 0xC4, 0xB4, 0xC4, 0xB6, 0xC4, + 0xB9, 0xC4, 0xBB, 0xC4, 0xBD, 0xC4, 0xBF, 0xC5, + 0x81, 0xC5, 0x83, 0xC5, 0x85, 0xC5, 0x87, 0xC5, + 0x8A, 0xC5, 0x8C, 0xC5, 0x8E, 0xC5, 0x90, 0xC5, + 0x92, 0xC5, 0x94, 0xC5, 0x96, 0xC5, 0x98, 0xC5, + 0x9A, 0xC5, 0x9C, 0xC5, 0x9E, 0xC5, 0xA0, 0xC5, + 0xA2, 0xC5, 0xA4, 0xC5, 0xA6, 0xC5, 0xA8, 0xC5, + 0xAA, 0xC5, 0xAC, 0xC5, 0xAE, 0xC5, 0xB0, 0xC5, + 0xB2, 0xC5, 0xB4, 0xC5, 0xB6, 0xC5, 0xB9, 0xC5, + 0xBB, 0xC5, 0xBD, 0x53, 0xC6, 0x82, 0xC6, 0x84, + 0xC6, 0x87, 0xC6, 0x8B, 0xC6, 0x91, 0xC7, 0xB6, + 0xC6, 0x98, 0xC8, 0xA0, 0xC6, 0xA0, 0xC6, 0xA2, + 0xC6, 0xA4, 0xC6, 0xA7, 0xC6, 0xAC, 0xC6, 0xAF, + 0xC6, 0xB3, 0xC6, 0xB5, 0xC6, 0xB8, 0xC6, 0xBC, + 0xC7, 0xB7, 0xC7, 0x84, 0xC7, 0x84, 0xC7, 0x87, + 0xC7, 0x87, 0xC7, 0x8A, 0xC7, 0x8A, 0xC7, 0x8D, + 0xC7, 0x8F, 0xC7, 0x91, 0xC7, 0x93, 0xC7, 0x95, + 0xC7, 0x97, 0xC7, 0x99, 0xC7, 0x9B, 0xC6, 0x8E, + 0xC7, 0x9E, 0xC7, 0xA0, 0xC7, 0xA2, 0xC7, 0xA4, + 0xC7, 0xA6, 0xC7, 0xA8, 0xC7, 0xAA, 0xC7, 0xAC, + 0xC7, 0xAE, 0xC7, 0xB1, 0xC7, 0xB1, 0xC7, 0xB4, + 0xC7, 0xB8, 0xC7, 0xBA, 0xC7, 0xBC, 0xC7, 0xBE, + 0xC8, 0x80, 0xC8, 0x82, 0xC8, 0x84, 0xC8, 0x86, + 0xC8, 0x88, 0xC8, 0x8A, 0xC8, 0x8C, 0xC8, 0x8E, + 0xC8, 0x90, 0xC8, 0x92, 0xC8, 0x94, 0xC8, 0x96, + 0xC8, 0x98, 0xC8, 0x9A, 0xC8, 0x9C, 0xC8, 0x9E, + 0xC8, 0xA2, 0xC8, 0xA4, 0xC8, 0xA6, 0xC8, 0xA8, + 0xC8, 0xAA, 0xC8, 0xAC, 0xC8, 0xAE, 0xC8, 0xB0, + 0xC8, 0xB2, 0xC6, 0x81, 0xC6, 0x86, 0xC6, 0x89, + 0xC6, 0x8A, 0xC6, 0x8F, 0xC6, 0x90, 0xC6, 0x93, + 0xC6, 0x94, 0xC6, 0x97, 0xC6, 0x96, 0xC6, 0x9C, + 0xC6, 0x9D, 0xC6, 0x9F, 0xC6, 0xA6, 0xC6, 0xA9, + 0xC6, 0xAE, 0xC6, 0xB1, 0xC6, 0xB2, 0xC6, 0xB7, + 0xCE, 0x99, 0xCE, 0x86, 0xCE, 0x88, 0xCE, 0x89, + 0xCE, 0x8A, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, + 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, + 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, + 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, + 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0xA3, 0xCE, 0xA3, + 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, + 0xCE, 0xA8, 0xCE, 0xA9, 0xCE, 0xAA, 0xCE, 0xAB, + 0xCE, 0x8C, 0xCE, 0x8E, 0xCE, 0x8F, 0xCE, 0x92, + 0xCE, 0x98, 0xCE, 0xA6, 0xCE, 0xA0, 0xCF, 0x98, + 0xCF, 0x9A, 0xCF, 0x9C, 0xCF, 0x9E, 0xCF, 0xA0, + 0xCF, 0xA2, 0xCF, 0xA4, 0xCF, 0xA6, 0xCF, 0xA8, + 0xCF, 0xAA, 0xCF, 0xAC, 0xCF, 0xAE, 0xCE, 0x9A, + 0xCE, 0xA1, 0xCE, 0xA3, 0xCE, 0x95, 0xD0, 0x90, + 0xD0, 0x91, 0xD0, 0x92, 0xD0, 0x93, 0xD0, 0x94, + 0xD0, 0x95, 0xD0, 0x96, 0xD0, 0x97, 0xD0, 0x98, + 0xD0, 0x99, 0xD0, 0x9A, 0xD0, 0x9B, 0xD0, 0x9C, + 0xD0, 0x9D, 0xD0, 0x9E, 0xD0, 0x9F, 0xD0, 0xA0, + 0xD0, 0xA1, 0xD0, 0xA2, 0xD0, 0xA3, 0xD0, 0xA4, + 0xD0, 0xA5, 0xD0, 0xA6, 0xD0, 0xA7, 0xD0, 0xA8, + 0xD0, 0xA9, 0xD0, 0xAA, 0xD0, 0xAB, 0xD0, 0xAC, + 0xD0, 0xAD, 0xD0, 0xAE, 0xD0, 0xAF, 0xD0, 0x80, + 0xD0, 0x81, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0x84, + 0xD0, 0x85, 0xD0, 0x86, 0xD0, 0x87, 0xD0, 0x88, + 0xD0, 0x89, 0xD0, 0x8A, 0xD0, 0x8B, 0xD0, 0x8C, + 0xD0, 0x8D, 0xD0, 0x8E, 0xD0, 0x8F, 0xD1, 0xA0, + 0xD1, 0xA2, 0xD1, 0xA4, 0xD1, 0xA6, 0xD1, 0xA8, + 0xD1, 0xAA, 0xD1, 0xAC, 0xD1, 0xAE, 0xD1, 0xB0, + 0xD1, 0xB2, 0xD1, 0xB4, 0xD1, 0xB6, 0xD1, 0xB8, + 0xD1, 0xBA, 0xD1, 0xBC, 0xD1, 0xBE, 0xD2, 0x80, + 0xD2, 0x8A, 0xD2, 0x8C, 0xD2, 0x8E, 0xD2, 0x90, + 0xD2, 0x92, 0xD2, 0x94, 0xD2, 0x96, 0xD2, 0x98, + 0xD2, 0x9A, 0xD2, 0x9C, 0xD2, 0x9E, 0xD2, 0xA0, + 0xD2, 0xA2, 0xD2, 0xA4, 0xD2, 0xA6, 0xD2, 0xA8, + 0xD2, 0xAA, 0xD2, 0xAC, 0xD2, 0xAE, 0xD2, 0xB0, + 0xD2, 0xB2, 0xD2, 0xB4, 0xD2, 0xB6, 0xD2, 0xB8, + 0xD2, 0xBA, 0xD2, 0xBC, 0xD2, 0xBE, 0xD3, 0x81, + 0xD3, 0x83, 0xD3, 0x85, 0xD3, 0x87, 0xD3, 0x89, + 0xD3, 0x8B, 0xD3, 0x8D, 0xD3, 0x90, 0xD3, 0x92, + 0xD3, 0x94, 0xD3, 0x96, 0xD3, 0x98, 0xD3, 0x9A, + 0xD3, 0x9C, 0xD3, 0x9E, 0xD3, 0xA0, 0xD3, 0xA2, + 0xD3, 0xA4, 0xD3, 0xA6, 0xD3, 0xA8, 0xD3, 0xAA, + 0xD3, 0xAC, 0xD3, 0xAE, 0xD3, 0xB0, 0xD3, 0xB2, + 0xD3, 0xB4, 0xD3, 0xB8, 0xD4, 0x80, 0xD4, 0x82, + 0xD4, 0x84, 0xD4, 0x86, 0xD4, 0x88, 0xD4, 0x8A, + 0xD4, 0x8C, 0xD4, 0x8E, 0xD4, 0xB1, 0xD4, 0xB2, + 0xD4, 0xB3, 0xD4, 0xB4, 0xD4, 0xB5, 0xD4, 0xB6, + 0xD4, 0xB7, 0xD4, 0xB8, 0xD4, 0xB9, 0xD4, 0xBA, + 0xD4, 0xBB, 0xD4, 0xBC, 0xD4, 0xBD, 0xD4, 0xBE, + 0xD4, 0xBF, 0xD5, 0x80, 0xD5, 0x81, 0xD5, 0x82, + 0xD5, 0x83, 0xD5, 0x84, 0xD5, 0x85, 0xD5, 0x86, + 0xD5, 0x87, 0xD5, 0x88, 0xD5, 0x89, 0xD5, 0x8A, + 0xD5, 0x8B, 0xD5, 0x8C, 0xD5, 0x8D, 0xD5, 0x8E, + 0xD5, 0x8F, 0xD5, 0x90, 0xD5, 0x91, 0xD5, 0x92, + 0xD5, 0x93, 0xD5, 0x94, 0xD5, 0x95, 0xD5, 0x96, + 0xE1, 0xB8, 0x80, 0xE1, 0xB8, 0x82, 0xE1, 0xB8, + 0x84, 0xE1, 0xB8, 0x86, 0xE1, 0xB8, 0x88, 0xE1, + 0xB8, 0x8A, 0xE1, 0xB8, 0x8C, 0xE1, 0xB8, 0x8E, + 0xE1, 0xB8, 0x90, 0xE1, 0xB8, 0x92, 0xE1, 0xB8, + 0x94, 0xE1, 0xB8, 0x96, 0xE1, 0xB8, 0x98, 0xE1, + 0xB8, 0x9A, 0xE1, 0xB8, 0x9C, 0xE1, 0xB8, 0x9E, + 0xE1, 0xB8, 0xA0, 0xE1, 0xB8, 0xA2, 0xE1, 0xB8, + 0xA4, 0xE1, 0xB8, 0xA6, 0xE1, 0xB8, 0xA8, 0xE1, + 0xB8, 0xAA, 0xE1, 0xB8, 0xAC, 0xE1, 0xB8, 0xAE, + 0xE1, 0xB8, 0xB0, 0xE1, 0xB8, 0xB2, 0xE1, 0xB8, + 0xB4, 0xE1, 0xB8, 0xB6, 0xE1, 0xB8, 0xB8, 0xE1, + 0xB8, 0xBA, 0xE1, 0xB8, 0xBC, 0xE1, 0xB8, 0xBE, + 0xE1, 0xB9, 0x80, 0xE1, 0xB9, 0x82, 0xE1, 0xB9, + 0x84, 0xE1, 0xB9, 0x86, 0xE1, 0xB9, 0x88, 0xE1, + 0xB9, 0x8A, 0xE1, 0xB9, 0x8C, 0xE1, 0xB9, 0x8E, + 0xE1, 0xB9, 0x90, 0xE1, 0xB9, 0x92, 0xE1, 0xB9, + 0x94, 0xE1, 0xB9, 0x96, 0xE1, 0xB9, 0x98, 0xE1, + 0xB9, 0x9A, 0xE1, 0xB9, 0x9C, 0xE1, 0xB9, 0x9E, + 0xE1, 0xB9, 0xA0, 0xE1, 0xB9, 0xA2, 0xE1, 0xB9, + 0xA4, 0xE1, 0xB9, 0xA6, 0xE1, 0xB9, 0xA8, 0xE1, + 0xB9, 0xAA, 0xE1, 0xB9, 0xAC, 0xE1, 0xB9, 0xAE, + 0xE1, 0xB9, 0xB0, 0xE1, 0xB9, 0xB2, 0xE1, 0xB9, + 0xB4, 0xE1, 0xB9, 0xB6, 0xE1, 0xB9, 0xB8, 0xE1, + 0xB9, 0xBA, 0xE1, 0xB9, 0xBC, 0xE1, 0xB9, 0xBE, + 0xE1, 0xBA, 0x80, 0xE1, 0xBA, 0x82, 0xE1, 0xBA, + 0x84, 0xE1, 0xBA, 0x86, 0xE1, 0xBA, 0x88, 0xE1, + 0xBA, 0x8A, 0xE1, 0xBA, 0x8C, 0xE1, 0xBA, 0x8E, + 0xE1, 0xBA, 0x90, 0xE1, 0xBA, 0x92, 0xE1, 0xBA, + 0x94, 0xE1, 0xB9, 0xA0, 0xE1, 0xBA, 0xA0, 0xE1, + 0xBA, 0xA2, 0xE1, 0xBA, 0xA4, 0xE1, 0xBA, 0xA6, + 0xE1, 0xBA, 0xA8, 0xE1, 0xBA, 0xAA, 0xE1, 0xBA, + 0xAC, 0xE1, 0xBA, 0xAE, 0xE1, 0xBA, 0xB0, 0xE1, + 0xBA, 0xB2, 0xE1, 0xBA, 0xB4, 0xE1, 0xBA, 0xB6, + 0xE1, 0xBA, 0xB8, 0xE1, 0xBA, 0xBA, 0xE1, 0xBA, + 0xBC, 0xE1, 0xBA, 0xBE, 0xE1, 0xBB, 0x80, 0xE1, + 0xBB, 0x82, 0xE1, 0xBB, 0x84, 0xE1, 0xBB, 0x86, + 0xE1, 0xBB, 0x88, 0xE1, 0xBB, 0x8A, 0xE1, 0xBB, + 0x8C, 0xE1, 0xBB, 0x8E, 0xE1, 0xBB, 0x90, 0xE1, + 0xBB, 0x92, 0xE1, 0xBB, 0x94, 0xE1, 0xBB, 0x96, + 0xE1, 0xBB, 0x98, 0xE1, 0xBB, 0x9A, 0xE1, 0xBB, + 0x9C, 0xE1, 0xBB, 0x9E, 0xE1, 0xBB, 0xA0, 0xE1, + 0xBB, 0xA2, 0xE1, 0xBB, 0xA4, 0xE1, 0xBB, 0xA6, + 0xE1, 0xBB, 0xA8, 0xE1, 0xBB, 0xAA, 0xE1, 0xBB, + 0xAC, 0xE1, 0xBB, 0xAE, 0xE1, 0xBB, 0xB0, 0xE1, + 0xBB, 0xB2, 0xE1, 0xBB, 0xB4, 0xE1, 0xBB, 0xB6, + 0xE1, 0xBB, 0xB8, 0xE1, 0xBC, 0x88, 0xE1, 0xBC, + 0x89, 0xE1, 0xBC, 0x8A, 0xE1, 0xBC, 0x8B, 0xE1, + 0xBC, 0x8C, 0xE1, 0xBC, 0x8D, 0xE1, 0xBC, 0x8E, + 0xE1, 0xBC, 0x8F, 0xE1, 0xBC, 0x98, 0xE1, 0xBC, + 0x99, 0xE1, 0xBC, 0x9A, 0xE1, 0xBC, 0x9B, 0xE1, + 0xBC, 0x9C, 0xE1, 0xBC, 0x9D, 0xE1, 0xBC, 0xA8, + 0xE1, 0xBC, 0xA9, 0xE1, 0xBC, 0xAA, 0xE1, 0xBC, + 0xAB, 0xE1, 0xBC, 0xAC, 0xE1, 0xBC, 0xAD, 0xE1, + 0xBC, 0xAE, 0xE1, 0xBC, 0xAF, 0xE1, 0xBC, 0xB8, + 0xE1, 0xBC, 0xB9, 0xE1, 0xBC, 0xBA, 0xE1, 0xBC, + 0xBB, 0xE1, 0xBC, 0xBC, 0xE1, 0xBC, 0xBD, 0xE1, + 0xBC, 0xBE, 0xE1, 0xBC, 0xBF, 0xE1, 0xBD, 0x88, + 0xE1, 0xBD, 0x89, 0xE1, 0xBD, 0x8A, 0xE1, 0xBD, + 0x8B, 0xE1, 0xBD, 0x8C, 0xE1, 0xBD, 0x8D, 0xE1, + 0xBD, 0x99, 0xE1, 0xBD, 0x9B, 0xE1, 0xBD, 0x9D, + 0xE1, 0xBD, 0x9F, 0xE1, 0xBD, 0xA8, 0xE1, 0xBD, + 0xA9, 0xE1, 0xBD, 0xAA, 0xE1, 0xBD, 0xAB, 0xE1, + 0xBD, 0xAC, 0xE1, 0xBD, 0xAD, 0xE1, 0xBD, 0xAE, + 0xE1, 0xBD, 0xAF, 0xE1, 0xBE, 0xBA, 0xE1, 0xBE, + 0xBB, 0xE1, 0xBF, 0x88, 0xE1, 0xBF, 0x89, 0xE1, + 0xBF, 0x8A, 0xE1, 0xBF, 0x8B, 0xE1, 0xBF, 0x9A, + 0xE1, 0xBF, 0x9B, 0xE1, 0xBF, 0xB8, 0xE1, 0xBF, + 0xB9, 0xE1, 0xBF, 0xAA, 0xE1, 0xBF, 0xAB, 0xE1, + 0xBF, 0xBA, 0xE1, 0xBF, 0xBB, 0xE1, 0xBE, 0x88, + 0xE1, 0xBE, 0x89, 0xE1, 0xBE, 0x8A, 0xE1, 0xBE, + 0x8B, 0xE1, 0xBE, 0x8C, 0xE1, 0xBE, 0x8D, 0xE1, + 0xBE, 0x8E, 0xE1, 0xBE, 0x8F, 0xE1, 0xBE, 0x98, + 0xE1, 0xBE, 0x99, 0xE1, 0xBE, 0x9A, 0xE1, 0xBE, + 0x9B, 0xE1, 0xBE, 0x9C, 0xE1, 0xBE, 0x9D, 0xE1, + 0xBE, 0x9E, 0xE1, 0xBE, 0x9F, 0xE1, 0xBE, 0xA8, + 0xE1, 0xBE, 0xA9, 0xE1, 0xBE, 0xAA, 0xE1, 0xBE, + 0xAB, 0xE1, 0xBE, 0xAC, 0xE1, 0xBE, 0xAD, 0xE1, + 0xBE, 0xAE, 0xE1, 0xBE, 0xAF, 0xE1, 0xBE, 0xB8, + 0xE1, 0xBE, 0xB9, 0xE1, 0xBE, 0xBC, 0xCE, 0x99, + 0xE1, 0xBF, 0x8C, 0xE1, 0xBF, 0x98, 0xE1, 0xBF, + 0x99, 0xE1, 0xBF, 0xA8, 0xE1, 0xBF, 0xA9, 0xE1, + 0xBF, 0xAC, 0xE1, 0xBF, 0xBC, 0xE2, 0x85, 0xA0, + 0xE2, 0x85, 0xA1, 0xE2, 0x85, 0xA2, 0xE2, 0x85, + 0xA3, 0xE2, 0x85, 0xA4, 0xE2, 0x85, 0xA5, 0xE2, + 0x85, 0xA6, 0xE2, 0x85, 0xA7, 0xE2, 0x85, 0xA8, + 0xE2, 0x85, 0xA9, 0xE2, 0x85, 0xAA, 0xE2, 0x85, + 0xAB, 0xE2, 0x85, 0xAC, 0xE2, 0x85, 0xAD, 0xE2, + 0x85, 0xAE, 0xE2, 0x85, 0xAF, 0xE2, 0x92, 0xB6, + 0xE2, 0x92, 0xB7, 0xE2, 0x92, 0xB8, 0xE2, 0x92, + 0xB9, 0xE2, 0x92, 0xBA, 0xE2, 0x92, 0xBB, 0xE2, + 0x92, 0xBC, 0xE2, 0x92, 0xBD, 0xE2, 0x92, 0xBE, + 0xE2, 0x92, 0xBF, 0xE2, 0x93, 0x80, 0xE2, 0x93, + 0x81, 0xE2, 0x93, 0x82, 0xE2, 0x93, 0x83, 0xE2, + 0x93, 0x84, 0xE2, 0x93, 0x85, 0xE2, 0x93, 0x86, + 0xE2, 0x93, 0x87, 0xE2, 0x93, 0x88, 0xE2, 0x93, + 0x89, 0xE2, 0x93, 0x8A, 0xE2, 0x93, 0x8B, 0xE2, + 0x93, 0x8C, 0xE2, 0x93, 0x8D, 0xE2, 0x93, 0x8E, + 0xE2, 0x93, 0x8F, 0xEF, 0xBC, 0xA1, 0xEF, 0xBC, + 0xA2, 0xEF, 0xBC, 0xA3, 0xEF, 0xBC, 0xA4, 0xEF, + 0xBC, 0xA5, 0xEF, 0xBC, 0xA6, 0xEF, 0xBC, 0xA7, + 0xEF, 0xBC, 0xA8, 0xEF, 0xBC, 0xA9, 0xEF, 0xBC, + 0xAA, 0xEF, 0xBC, 0xAB, 0xEF, 0xBC, 0xAC, 0xEF, + 0xBC, 0xAD, 0xEF, 0xBC, 0xAE, 0xEF, 0xBC, 0xAF, + 0xEF, 0xBC, 0xB0, 0xEF, 0xBC, 0xB1, 0xEF, 0xBC, + 0xB2, 0xEF, 0xBC, 0xB3, 0xEF, 0xBC, 0xB4, 0xEF, + 0xBC, 0xB5, 0xEF, 0xBC, 0xB6, 0xEF, 0xBC, 0xB7, + 0xEF, 0xBC, 0xB8, 0xEF, 0xBC, 0xB9, 0xEF, 0xBC, + 0xBA, 0xF0, 0x90, 0x90, 0x80, 0xF0, 0x90, 0x90, + 0x81, 0xF0, 0x90, 0x90, 0x82, 0xF0, 0x90, 0x90, + 0x83, 0xF0, 0x90, 0x90, 0x84, 0xF0, 0x90, 0x90, + 0x85, 0xF0, 0x90, 0x90, 0x86, 0xF0, 0x90, 0x90, + 0x87, 0xF0, 0x90, 0x90, 0x88, 0xF0, 0x90, 0x90, + 0x89, 0xF0, 0x90, 0x90, 0x8A, 0xF0, 0x90, 0x90, + 0x8B, 0xF0, 0x90, 0x90, 0x8C, 0xF0, 0x90, 0x90, + 0x8D, 0xF0, 0x90, 0x90, 0x8E, 0xF0, 0x90, 0x90, + 0x8F, 0xF0, 0x90, 0x90, 0x90, 0xF0, 0x90, 0x90, + 0x91, 0xF0, 0x90, 0x90, 0x92, 0xF0, 0x90, 0x90, + 0x93, 0xF0, 0x90, 0x90, 0x94, 0xF0, 0x90, 0x90, + 0x95, 0xF0, 0x90, 0x90, 0x96, 0xF0, 0x90, 0x90, + 0x97, 0xF0, 0x90, 0x90, 0x98, 0xF0, 0x90, 0x90, + 0x99, 0xF0, 0x90, 0x90, 0x9A, 0xF0, 0x90, 0x90, + 0x9B, 0xF0, 0x90, 0x90, 0x9C, 0xF0, 0x90, 0x90, + 0x9D, 0xF0, 0x90, 0x90, 0x9E, 0xF0, 0x90, 0x90, + 0x9F, 0xF0, 0x90, 0x90, 0xA0, 0xF0, 0x90, 0x90, + 0xA1, 0xF0, 0x90, 0x90, 0xA2, 0xF0, 0x90, 0x90, + 0xA3, 0xF0, 0x90, 0x90, 0xA4, 0xF0, 0x90, 0x90, + 0xA5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + }, + { + 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, + 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, + 0xC3, 0x87, 0xC3, 0x88, 0xC3, 0x89, 0xC3, 0x8A, + 0xC3, 0x8B, 0xC3, 0x8C, 0xC3, 0x8D, 0xC3, 0x8E, + 0xC3, 0x8F, 0xC3, 0x90, 0xC3, 0x91, 0xC3, 0x92, + 0xC3, 0x93, 0xC3, 0x94, 0xC3, 0x95, 0xC3, 0x96, + 0xC3, 0x98, 0xC3, 0x99, 0xC3, 0x9A, 0xC3, 0x9B, + 0xC3, 0x9C, 0xC3, 0x9D, 0xC3, 0x9E, 0xC5, 0xB8, + 0xC4, 0x80, 0xC4, 0x82, 0xC4, 0x84, 0xC4, 0x86, + 0xC4, 0x88, 0xC4, 0x8A, 0xC4, 0x8C, 0xC4, 0x8E, + 0xC4, 0x90, 0xC4, 0x92, 0xC4, 0x94, 0xC4, 0x96, + 0xC4, 0x98, 0xC4, 0x9A, 0xC4, 0x9C, 0xC4, 0x9E, + 0xC4, 0xA0, 0xC4, 0xA2, 0xC4, 0xA4, 0xC4, 0xA6, + 0xC4, 0xA8, 0xC4, 0xAA, 0xC4, 0xAC, 0xC4, 0xAE, + 0x49, 0xC4, 0xB2, 0xC4, 0xB4, 0xC4, 0xB6, 0xC4, + 0xB9, 0xC4, 0xBB, 0xC4, 0xBD, 0xC4, 0xBF, 0xC5, + 0x81, 0xC5, 0x83, 0xC5, 0x85, 0xC5, 0x87, 0xC5, + 0x8A, 0xC5, 0x8C, 0xC5, 0x8E, 0xC5, 0x90, 0xC5, + 0x92, 0xC5, 0x94, 0xC5, 0x96, 0xC5, 0x98, 0xC5, + 0x9A, 0xC5, 0x9C, 0xC5, 0x9E, 0xC5, 0xA0, 0xC5, + 0xA2, 0xC5, 0xA4, 0xC5, 0xA6, 0xC5, 0xA8, 0xC5, + 0xAA, 0xC5, 0xAC, 0xC5, 0xAE, 0xC5, 0xB0, 0xC5, + 0xB2, 0xC5, 0xB4, 0xC5, 0xB6, 0xC5, 0xB9, 0xC5, + 0xBB, 0xC5, 0xBD, 0x53, 0xC9, 0x83, 0xC6, 0x82, + 0xC6, 0x84, 0xC6, 0x87, 0xC6, 0x8B, 0xC6, 0x91, + 0xC7, 0xB6, 0xC6, 0x98, 0xC8, 0xBD, 0xC8, 0xA0, + 0xC6, 0xA0, 0xC6, 0xA2, 0xC6, 0xA4, 0xC6, 0xA7, + 0xC6, 0xAC, 0xC6, 0xAF, 0xC6, 0xB3, 0xC6, 0xB5, + 0xC6, 0xB8, 0xC6, 0xBC, 0xC7, 0xB7, 0xC7, 0x84, + 0xC7, 0x84, 0xC7, 0x87, 0xC7, 0x87, 0xC7, 0x8A, + 0xC7, 0x8A, 0xC7, 0x8D, 0xC7, 0x8F, 0xC7, 0x91, + 0xC7, 0x93, 0xC7, 0x95, 0xC7, 0x97, 0xC7, 0x99, + 0xC7, 0x9B, 0xC6, 0x8E, 0xC7, 0x9E, 0xC7, 0xA0, + 0xC7, 0xA2, 0xC7, 0xA4, 0xC7, 0xA6, 0xC7, 0xA8, + 0xC7, 0xAA, 0xC7, 0xAC, 0xC7, 0xAE, 0xC7, 0xB1, + 0xC7, 0xB1, 0xC7, 0xB4, 0xC7, 0xB8, 0xC7, 0xBA, + 0xC7, 0xBC, 0xC7, 0xBE, 0xC8, 0x80, 0xC8, 0x82, + 0xC8, 0x84, 0xC8, 0x86, 0xC8, 0x88, 0xC8, 0x8A, + 0xC8, 0x8C, 0xC8, 0x8E, 0xC8, 0x90, 0xC8, 0x92, + 0xC8, 0x94, 0xC8, 0x96, 0xC8, 0x98, 0xC8, 0x9A, + 0xC8, 0x9C, 0xC8, 0x9E, 0xC8, 0xA2, 0xC8, 0xA4, + 0xC8, 0xA6, 0xC8, 0xA8, 0xC8, 0xAA, 0xC8, 0xAC, + 0xC8, 0xAE, 0xC8, 0xB0, 0xC8, 0xB2, 0xC8, 0xBB, + 0xC9, 0x81, 0xC9, 0x86, 0xC9, 0x88, 0xC9, 0x8A, + 0xC9, 0x8C, 0xC9, 0x8E, 0xC6, 0x81, 0xC6, 0x86, + 0xC6, 0x89, 0xC6, 0x8A, 0xC6, 0x8F, 0xC6, 0x90, + 0xC6, 0x93, 0xC6, 0x94, 0xC6, 0x97, 0xC6, 0x96, + 0xE2, 0xB1, 0xA2, 0xC6, 0x9C, 0xC6, 0x9D, 0xC6, + 0x9F, 0xE2, 0xB1, 0xA4, 0xC6, 0xA6, 0xC6, 0xA9, + 0xC6, 0xAE, 0xC9, 0x84, 0xC6, 0xB1, 0xC6, 0xB2, + 0xC9, 0x85, 0xC6, 0xB7, 0xCE, 0x99, 0xCF, 0xBD, + 0xCF, 0xBE, 0xCF, 0xBF, 0xCE, 0x86, 0xCE, 0x88, + 0xCE, 0x89, 0xCE, 0x8A, 0xCE, 0x91, 0xCE, 0x92, + 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, + 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, + 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, + 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0xA3, + 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, + 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xCE, 0xAA, + 0xCE, 0xAB, 0xCE, 0x8C, 0xCE, 0x8E, 0xCE, 0x8F, + 0xCE, 0x92, 0xCE, 0x98, 0xCE, 0xA6, 0xCE, 0xA0, + 0xCF, 0x98, 0xCF, 0x9A, 0xCF, 0x9C, 0xCF, 0x9E, + 0xCF, 0xA0, 0xCF, 0xA2, 0xCF, 0xA4, 0xCF, 0xA6, + 0xCF, 0xA8, 0xCF, 0xAA, 0xCF, 0xAC, 0xCF, 0xAE, + 0xCE, 0x9A, 0xCE, 0xA1, 0xCF, 0xB9, 0xCE, 0x95, + 0xCF, 0xB7, 0xCF, 0xBA, 0xD0, 0x90, 0xD0, 0x91, + 0xD0, 0x92, 0xD0, 0x93, 0xD0, 0x94, 0xD0, 0x95, + 0xD0, 0x96, 0xD0, 0x97, 0xD0, 0x98, 0xD0, 0x99, + 0xD0, 0x9A, 0xD0, 0x9B, 0xD0, 0x9C, 0xD0, 0x9D, + 0xD0, 0x9E, 0xD0, 0x9F, 0xD0, 0xA0, 0xD0, 0xA1, + 0xD0, 0xA2, 0xD0, 0xA3, 0xD0, 0xA4, 0xD0, 0xA5, + 0xD0, 0xA6, 0xD0, 0xA7, 0xD0, 0xA8, 0xD0, 0xA9, + 0xD0, 0xAA, 0xD0, 0xAB, 0xD0, 0xAC, 0xD0, 0xAD, + 0xD0, 0xAE, 0xD0, 0xAF, 0xD0, 0x80, 0xD0, 0x81, + 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0x84, 0xD0, 0x85, + 0xD0, 0x86, 0xD0, 0x87, 0xD0, 0x88, 0xD0, 0x89, + 0xD0, 0x8A, 0xD0, 0x8B, 0xD0, 0x8C, 0xD0, 0x8D, + 0xD0, 0x8E, 0xD0, 0x8F, 0xD1, 0xA0, 0xD1, 0xA2, + 0xD1, 0xA4, 0xD1, 0xA6, 0xD1, 0xA8, 0xD1, 0xAA, + 0xD1, 0xAC, 0xD1, 0xAE, 0xD1, 0xB0, 0xD1, 0xB2, + 0xD1, 0xB4, 0xD1, 0xB6, 0xD1, 0xB8, 0xD1, 0xBA, + 0xD1, 0xBC, 0xD1, 0xBE, 0xD2, 0x80, 0xD2, 0x8A, + 0xD2, 0x8C, 0xD2, 0x8E, 0xD2, 0x90, 0xD2, 0x92, + 0xD2, 0x94, 0xD2, 0x96, 0xD2, 0x98, 0xD2, 0x9A, + 0xD2, 0x9C, 0xD2, 0x9E, 0xD2, 0xA0, 0xD2, 0xA2, + 0xD2, 0xA4, 0xD2, 0xA6, 0xD2, 0xA8, 0xD2, 0xAA, + 0xD2, 0xAC, 0xD2, 0xAE, 0xD2, 0xB0, 0xD2, 0xB2, + 0xD2, 0xB4, 0xD2, 0xB6, 0xD2, 0xB8, 0xD2, 0xBA, + 0xD2, 0xBC, 0xD2, 0xBE, 0xD3, 0x81, 0xD3, 0x83, + 0xD3, 0x85, 0xD3, 0x87, 0xD3, 0x89, 0xD3, 0x8B, + 0xD3, 0x8D, 0xD3, 0x80, 0xD3, 0x90, 0xD3, 0x92, + 0xD3, 0x94, 0xD3, 0x96, 0xD3, 0x98, 0xD3, 0x9A, + 0xD3, 0x9C, 0xD3, 0x9E, 0xD3, 0xA0, 0xD3, 0xA2, + 0xD3, 0xA4, 0xD3, 0xA6, 0xD3, 0xA8, 0xD3, 0xAA, + 0xD3, 0xAC, 0xD3, 0xAE, 0xD3, 0xB0, 0xD3, 0xB2, + 0xD3, 0xB4, 0xD3, 0xB6, 0xD3, 0xB8, 0xD3, 0xBA, + 0xD3, 0xBC, 0xD3, 0xBE, 0xD4, 0x80, 0xD4, 0x82, + 0xD4, 0x84, 0xD4, 0x86, 0xD4, 0x88, 0xD4, 0x8A, + 0xD4, 0x8C, 0xD4, 0x8E, 0xD4, 0x90, 0xD4, 0x92, + 0xD4, 0xB1, 0xD4, 0xB2, 0xD4, 0xB3, 0xD4, 0xB4, + 0xD4, 0xB5, 0xD4, 0xB6, 0xD4, 0xB7, 0xD4, 0xB8, + 0xD4, 0xB9, 0xD4, 0xBA, 0xD4, 0xBB, 0xD4, 0xBC, + 0xD4, 0xBD, 0xD4, 0xBE, 0xD4, 0xBF, 0xD5, 0x80, + 0xD5, 0x81, 0xD5, 0x82, 0xD5, 0x83, 0xD5, 0x84, + 0xD5, 0x85, 0xD5, 0x86, 0xD5, 0x87, 0xD5, 0x88, + 0xD5, 0x89, 0xD5, 0x8A, 0xD5, 0x8B, 0xD5, 0x8C, + 0xD5, 0x8D, 0xD5, 0x8E, 0xD5, 0x8F, 0xD5, 0x90, + 0xD5, 0x91, 0xD5, 0x92, 0xD5, 0x93, 0xD5, 0x94, + 0xD5, 0x95, 0xD5, 0x96, 0xE2, 0xB1, 0xA3, 0xE1, + 0xB8, 0x80, 0xE1, 0xB8, 0x82, 0xE1, 0xB8, 0x84, + 0xE1, 0xB8, 0x86, 0xE1, 0xB8, 0x88, 0xE1, 0xB8, + 0x8A, 0xE1, 0xB8, 0x8C, 0xE1, 0xB8, 0x8E, 0xE1, + 0xB8, 0x90, 0xE1, 0xB8, 0x92, 0xE1, 0xB8, 0x94, + 0xE1, 0xB8, 0x96, 0xE1, 0xB8, 0x98, 0xE1, 0xB8, + 0x9A, 0xE1, 0xB8, 0x9C, 0xE1, 0xB8, 0x9E, 0xE1, + 0xB8, 0xA0, 0xE1, 0xB8, 0xA2, 0xE1, 0xB8, 0xA4, + 0xE1, 0xB8, 0xA6, 0xE1, 0xB8, 0xA8, 0xE1, 0xB8, + 0xAA, 0xE1, 0xB8, 0xAC, 0xE1, 0xB8, 0xAE, 0xE1, + 0xB8, 0xB0, 0xE1, 0xB8, 0xB2, 0xE1, 0xB8, 0xB4, + 0xE1, 0xB8, 0xB6, 0xE1, 0xB8, 0xB8, 0xE1, 0xB8, + 0xBA, 0xE1, 0xB8, 0xBC, 0xE1, 0xB8, 0xBE, 0xE1, + 0xB9, 0x80, 0xE1, 0xB9, 0x82, 0xE1, 0xB9, 0x84, + 0xE1, 0xB9, 0x86, 0xE1, 0xB9, 0x88, 0xE1, 0xB9, + 0x8A, 0xE1, 0xB9, 0x8C, 0xE1, 0xB9, 0x8E, 0xE1, + 0xB9, 0x90, 0xE1, 0xB9, 0x92, 0xE1, 0xB9, 0x94, + 0xE1, 0xB9, 0x96, 0xE1, 0xB9, 0x98, 0xE1, 0xB9, + 0x9A, 0xE1, 0xB9, 0x9C, 0xE1, 0xB9, 0x9E, 0xE1, + 0xB9, 0xA0, 0xE1, 0xB9, 0xA2, 0xE1, 0xB9, 0xA4, + 0xE1, 0xB9, 0xA6, 0xE1, 0xB9, 0xA8, 0xE1, 0xB9, + 0xAA, 0xE1, 0xB9, 0xAC, 0xE1, 0xB9, 0xAE, 0xE1, + 0xB9, 0xB0, 0xE1, 0xB9, 0xB2, 0xE1, 0xB9, 0xB4, + 0xE1, 0xB9, 0xB6, 0xE1, 0xB9, 0xB8, 0xE1, 0xB9, + 0xBA, 0xE1, 0xB9, 0xBC, 0xE1, 0xB9, 0xBE, 0xE1, + 0xBA, 0x80, 0xE1, 0xBA, 0x82, 0xE1, 0xBA, 0x84, + 0xE1, 0xBA, 0x86, 0xE1, 0xBA, 0x88, 0xE1, 0xBA, + 0x8A, 0xE1, 0xBA, 0x8C, 0xE1, 0xBA, 0x8E, 0xE1, + 0xBA, 0x90, 0xE1, 0xBA, 0x92, 0xE1, 0xBA, 0x94, + 0xE1, 0xB9, 0xA0, 0xE1, 0xBA, 0xA0, 0xE1, 0xBA, + 0xA2, 0xE1, 0xBA, 0xA4, 0xE1, 0xBA, 0xA6, 0xE1, + 0xBA, 0xA8, 0xE1, 0xBA, 0xAA, 0xE1, 0xBA, 0xAC, + 0xE1, 0xBA, 0xAE, 0xE1, 0xBA, 0xB0, 0xE1, 0xBA, + 0xB2, 0xE1, 0xBA, 0xB4, 0xE1, 0xBA, 0xB6, 0xE1, + 0xBA, 0xB8, 0xE1, 0xBA, 0xBA, 0xE1, 0xBA, 0xBC, + 0xE1, 0xBA, 0xBE, 0xE1, 0xBB, 0x80, 0xE1, 0xBB, + 0x82, 0xE1, 0xBB, 0x84, 0xE1, 0xBB, 0x86, 0xE1, + 0xBB, 0x88, 0xE1, 0xBB, 0x8A, 0xE1, 0xBB, 0x8C, + 0xE1, 0xBB, 0x8E, 0xE1, 0xBB, 0x90, 0xE1, 0xBB, + 0x92, 0xE1, 0xBB, 0x94, 0xE1, 0xBB, 0x96, 0xE1, + 0xBB, 0x98, 0xE1, 0xBB, 0x9A, 0xE1, 0xBB, 0x9C, + 0xE1, 0xBB, 0x9E, 0xE1, 0xBB, 0xA0, 0xE1, 0xBB, + 0xA2, 0xE1, 0xBB, 0xA4, 0xE1, 0xBB, 0xA6, 0xE1, + 0xBB, 0xA8, 0xE1, 0xBB, 0xAA, 0xE1, 0xBB, 0xAC, + 0xE1, 0xBB, 0xAE, 0xE1, 0xBB, 0xB0, 0xE1, 0xBB, + 0xB2, 0xE1, 0xBB, 0xB4, 0xE1, 0xBB, 0xB6, 0xE1, + 0xBB, 0xB8, 0xE1, 0xBC, 0x88, 0xE1, 0xBC, 0x89, + 0xE1, 0xBC, 0x8A, 0xE1, 0xBC, 0x8B, 0xE1, 0xBC, + 0x8C, 0xE1, 0xBC, 0x8D, 0xE1, 0xBC, 0x8E, 0xE1, + 0xBC, 0x8F, 0xE1, 0xBC, 0x98, 0xE1, 0xBC, 0x99, + 0xE1, 0xBC, 0x9A, 0xE1, 0xBC, 0x9B, 0xE1, 0xBC, + 0x9C, 0xE1, 0xBC, 0x9D, 0xE1, 0xBC, 0xA8, 0xE1, + 0xBC, 0xA9, 0xE1, 0xBC, 0xAA, 0xE1, 0xBC, 0xAB, + 0xE1, 0xBC, 0xAC, 0xE1, 0xBC, 0xAD, 0xE1, 0xBC, + 0xAE, 0xE1, 0xBC, 0xAF, 0xE1, 0xBC, 0xB8, 0xE1, + 0xBC, 0xB9, 0xE1, 0xBC, 0xBA, 0xE1, 0xBC, 0xBB, + 0xE1, 0xBC, 0xBC, 0xE1, 0xBC, 0xBD, 0xE1, 0xBC, + 0xBE, 0xE1, 0xBC, 0xBF, 0xE1, 0xBD, 0x88, 0xE1, + 0xBD, 0x89, 0xE1, 0xBD, 0x8A, 0xE1, 0xBD, 0x8B, + 0xE1, 0xBD, 0x8C, 0xE1, 0xBD, 0x8D, 0xE1, 0xBD, + 0x99, 0xE1, 0xBD, 0x9B, 0xE1, 0xBD, 0x9D, 0xE1, + 0xBD, 0x9F, 0xE1, 0xBD, 0xA8, 0xE1, 0xBD, 0xA9, + 0xE1, 0xBD, 0xAA, 0xE1, 0xBD, 0xAB, 0xE1, 0xBD, + 0xAC, 0xE1, 0xBD, 0xAD, 0xE1, 0xBD, 0xAE, 0xE1, + 0xBD, 0xAF, 0xE1, 0xBE, 0xBA, 0xE1, 0xBE, 0xBB, + 0xE1, 0xBF, 0x88, 0xE1, 0xBF, 0x89, 0xE1, 0xBF, + 0x8A, 0xE1, 0xBF, 0x8B, 0xE1, 0xBF, 0x9A, 0xE1, + 0xBF, 0x9B, 0xE1, 0xBF, 0xB8, 0xE1, 0xBF, 0xB9, + 0xE1, 0xBF, 0xAA, 0xE1, 0xBF, 0xAB, 0xE1, 0xBF, + 0xBA, 0xE1, 0xBF, 0xBB, 0xE1, 0xBE, 0x88, 0xE1, + 0xBE, 0x89, 0xE1, 0xBE, 0x8A, 0xE1, 0xBE, 0x8B, + 0xE1, 0xBE, 0x8C, 0xE1, 0xBE, 0x8D, 0xE1, 0xBE, + 0x8E, 0xE1, 0xBE, 0x8F, 0xE1, 0xBE, 0x98, 0xE1, + 0xBE, 0x99, 0xE1, 0xBE, 0x9A, 0xE1, 0xBE, 0x9B, + 0xE1, 0xBE, 0x9C, 0xE1, 0xBE, 0x9D, 0xE1, 0xBE, + 0x9E, 0xE1, 0xBE, 0x9F, 0xE1, 0xBE, 0xA8, 0xE1, + 0xBE, 0xA9, 0xE1, 0xBE, 0xAA, 0xE1, 0xBE, 0xAB, + 0xE1, 0xBE, 0xAC, 0xE1, 0xBE, 0xAD, 0xE1, 0xBE, + 0xAE, 0xE1, 0xBE, 0xAF, 0xE1, 0xBE, 0xB8, 0xE1, + 0xBE, 0xB9, 0xE1, 0xBE, 0xBC, 0xCE, 0x99, 0xE1, + 0xBF, 0x8C, 0xE1, 0xBF, 0x98, 0xE1, 0xBF, 0x99, + 0xE1, 0xBF, 0xA8, 0xE1, 0xBF, 0xA9, 0xE1, 0xBF, + 0xAC, 0xE1, 0xBF, 0xBC, 0xE2, 0x84, 0xB2, 0xE2, + 0x85, 0xA0, 0xE2, 0x85, 0xA1, 0xE2, 0x85, 0xA2, + 0xE2, 0x85, 0xA3, 0xE2, 0x85, 0xA4, 0xE2, 0x85, + 0xA5, 0xE2, 0x85, 0xA6, 0xE2, 0x85, 0xA7, 0xE2, + 0x85, 0xA8, 0xE2, 0x85, 0xA9, 0xE2, 0x85, 0xAA, + 0xE2, 0x85, 0xAB, 0xE2, 0x85, 0xAC, 0xE2, 0x85, + 0xAD, 0xE2, 0x85, 0xAE, 0xE2, 0x85, 0xAF, 0xE2, + 0x86, 0x83, 0xE2, 0x92, 0xB6, 0xE2, 0x92, 0xB7, + 0xE2, 0x92, 0xB8, 0xE2, 0x92, 0xB9, 0xE2, 0x92, + 0xBA, 0xE2, 0x92, 0xBB, 0xE2, 0x92, 0xBC, 0xE2, + 0x92, 0xBD, 0xE2, 0x92, 0xBE, 0xE2, 0x92, 0xBF, + 0xE2, 0x93, 0x80, 0xE2, 0x93, 0x81, 0xE2, 0x93, + 0x82, 0xE2, 0x93, 0x83, 0xE2, 0x93, 0x84, 0xE2, + 0x93, 0x85, 0xE2, 0x93, 0x86, 0xE2, 0x93, 0x87, + 0xE2, 0x93, 0x88, 0xE2, 0x93, 0x89, 0xE2, 0x93, + 0x8A, 0xE2, 0x93, 0x8B, 0xE2, 0x93, 0x8C, 0xE2, + 0x93, 0x8D, 0xE2, 0x93, 0x8E, 0xE2, 0x93, 0x8F, + 0xE2, 0xB0, 0x80, 0xE2, 0xB0, 0x81, 0xE2, 0xB0, + 0x82, 0xE2, 0xB0, 0x83, 0xE2, 0xB0, 0x84, 0xE2, + 0xB0, 0x85, 0xE2, 0xB0, 0x86, 0xE2, 0xB0, 0x87, + 0xE2, 0xB0, 0x88, 0xE2, 0xB0, 0x89, 0xE2, 0xB0, + 0x8A, 0xE2, 0xB0, 0x8B, 0xE2, 0xB0, 0x8C, 0xE2, + 0xB0, 0x8D, 0xE2, 0xB0, 0x8E, 0xE2, 0xB0, 0x8F, + 0xE2, 0xB0, 0x90, 0xE2, 0xB0, 0x91, 0xE2, 0xB0, + 0x92, 0xE2, 0xB0, 0x93, 0xE2, 0xB0, 0x94, 0xE2, + 0xB0, 0x95, 0xE2, 0xB0, 0x96, 0xE2, 0xB0, 0x97, + 0xE2, 0xB0, 0x98, 0xE2, 0xB0, 0x99, 0xE2, 0xB0, + 0x9A, 0xE2, 0xB0, 0x9B, 0xE2, 0xB0, 0x9C, 0xE2, + 0xB0, 0x9D, 0xE2, 0xB0, 0x9E, 0xE2, 0xB0, 0x9F, + 0xE2, 0xB0, 0xA0, 0xE2, 0xB0, 0xA1, 0xE2, 0xB0, + 0xA2, 0xE2, 0xB0, 0xA3, 0xE2, 0xB0, 0xA4, 0xE2, + 0xB0, 0xA5, 0xE2, 0xB0, 0xA6, 0xE2, 0xB0, 0xA7, + 0xE2, 0xB0, 0xA8, 0xE2, 0xB0, 0xA9, 0xE2, 0xB0, + 0xAA, 0xE2, 0xB0, 0xAB, 0xE2, 0xB0, 0xAC, 0xE2, + 0xB0, 0xAD, 0xE2, 0xB0, 0xAE, 0xE2, 0xB1, 0xA0, + 0xC8, 0xBA, 0xC8, 0xBE, 0xE2, 0xB1, 0xA7, 0xE2, + 0xB1, 0xA9, 0xE2, 0xB1, 0xAB, 0xE2, 0xB1, 0xB5, + 0xE2, 0xB2, 0x80, 0xE2, 0xB2, 0x82, 0xE2, 0xB2, + 0x84, 0xE2, 0xB2, 0x86, 0xE2, 0xB2, 0x88, 0xE2, + 0xB2, 0x8A, 0xE2, 0xB2, 0x8C, 0xE2, 0xB2, 0x8E, + 0xE2, 0xB2, 0x90, 0xE2, 0xB2, 0x92, 0xE2, 0xB2, + 0x94, 0xE2, 0xB2, 0x96, 0xE2, 0xB2, 0x98, 0xE2, + 0xB2, 0x9A, 0xE2, 0xB2, 0x9C, 0xE2, 0xB2, 0x9E, + 0xE2, 0xB2, 0xA0, 0xE2, 0xB2, 0xA2, 0xE2, 0xB2, + 0xA4, 0xE2, 0xB2, 0xA6, 0xE2, 0xB2, 0xA8, 0xE2, + 0xB2, 0xAA, 0xE2, 0xB2, 0xAC, 0xE2, 0xB2, 0xAE, + 0xE2, 0xB2, 0xB0, 0xE2, 0xB2, 0xB2, 0xE2, 0xB2, + 0xB4, 0xE2, 0xB2, 0xB6, 0xE2, 0xB2, 0xB8, 0xE2, + 0xB2, 0xBA, 0xE2, 0xB2, 0xBC, 0xE2, 0xB2, 0xBE, + 0xE2, 0xB3, 0x80, 0xE2, 0xB3, 0x82, 0xE2, 0xB3, + 0x84, 0xE2, 0xB3, 0x86, 0xE2, 0xB3, 0x88, 0xE2, + 0xB3, 0x8A, 0xE2, 0xB3, 0x8C, 0xE2, 0xB3, 0x8E, + 0xE2, 0xB3, 0x90, 0xE2, 0xB3, 0x92, 0xE2, 0xB3, + 0x94, 0xE2, 0xB3, 0x96, 0xE2, 0xB3, 0x98, 0xE2, + 0xB3, 0x9A, 0xE2, 0xB3, 0x9C, 0xE2, 0xB3, 0x9E, + 0xE2, 0xB3, 0xA0, 0xE2, 0xB3, 0xA2, 0xE1, 0x82, + 0xA0, 0xE1, 0x82, 0xA1, 0xE1, 0x82, 0xA2, 0xE1, + 0x82, 0xA3, 0xE1, 0x82, 0xA4, 0xE1, 0x82, 0xA5, + 0xE1, 0x82, 0xA6, 0xE1, 0x82, 0xA7, 0xE1, 0x82, + 0xA8, 0xE1, 0x82, 0xA9, 0xE1, 0x82, 0xAA, 0xE1, + 0x82, 0xAB, 0xE1, 0x82, 0xAC, 0xE1, 0x82, 0xAD, + 0xE1, 0x82, 0xAE, 0xE1, 0x82, 0xAF, 0xE1, 0x82, + 0xB0, 0xE1, 0x82, 0xB1, 0xE1, 0x82, 0xB2, 0xE1, + 0x82, 0xB3, 0xE1, 0x82, 0xB4, 0xE1, 0x82, 0xB5, + 0xE1, 0x82, 0xB6, 0xE1, 0x82, 0xB7, 0xE1, 0x82, + 0xB8, 0xE1, 0x82, 0xB9, 0xE1, 0x82, 0xBA, 0xE1, + 0x82, 0xBB, 0xE1, 0x82, 0xBC, 0xE1, 0x82, 0xBD, + 0xE1, 0x82, 0xBE, 0xE1, 0x82, 0xBF, 0xE1, 0x83, + 0x80, 0xE1, 0x83, 0x81, 0xE1, 0x83, 0x82, 0xE1, + 0x83, 0x83, 0xE1, 0x83, 0x84, 0xE1, 0x83, 0x85, + 0xEF, 0xBC, 0xA1, 0xEF, 0xBC, 0xA2, 0xEF, 0xBC, + 0xA3, 0xEF, 0xBC, 0xA4, 0xEF, 0xBC, 0xA5, 0xEF, + 0xBC, 0xA6, 0xEF, 0xBC, 0xA7, 0xEF, 0xBC, 0xA8, + 0xEF, 0xBC, 0xA9, 0xEF, 0xBC, 0xAA, 0xEF, 0xBC, + 0xAB, 0xEF, 0xBC, 0xAC, 0xEF, 0xBC, 0xAD, 0xEF, + 0xBC, 0xAE, 0xEF, 0xBC, 0xAF, 0xEF, 0xBC, 0xB0, + 0xEF, 0xBC, 0xB1, 0xEF, 0xBC, 0xB2, 0xEF, 0xBC, + 0xB3, 0xEF, 0xBC, 0xB4, 0xEF, 0xBC, 0xB5, 0xEF, + 0xBC, 0xB6, 0xEF, 0xBC, 0xB7, 0xEF, 0xBC, 0xB8, + 0xEF, 0xBC, 0xB9, 0xEF, 0xBC, 0xBA, 0xF0, 0x90, + 0x90, 0x80, 0xF0, 0x90, 0x90, 0x81, 0xF0, 0x90, + 0x90, 0x82, 0xF0, 0x90, 0x90, 0x83, 0xF0, 0x90, + 0x90, 0x84, 0xF0, 0x90, 0x90, 0x85, 0xF0, 0x90, + 0x90, 0x86, 0xF0, 0x90, 0x90, 0x87, 0xF0, 0x90, + 0x90, 0x88, 0xF0, 0x90, 0x90, 0x89, 0xF0, 0x90, + 0x90, 0x8A, 0xF0, 0x90, 0x90, 0x8B, 0xF0, 0x90, + 0x90, 0x8C, 0xF0, 0x90, 0x90, 0x8D, 0xF0, 0x90, + 0x90, 0x8E, 0xF0, 0x90, 0x90, 0x8F, 0xF0, 0x90, + 0x90, 0x90, 0xF0, 0x90, 0x90, 0x91, 0xF0, 0x90, + 0x90, 0x92, 0xF0, 0x90, 0x90, 0x93, 0xF0, 0x90, + 0x90, 0x94, 0xF0, 0x90, 0x90, 0x95, 0xF0, 0x90, + 0x90, 0x96, 0xF0, 0x90, 0x90, 0x97, 0xF0, 0x90, + 0x90, 0x98, 0xF0, 0x90, 0x90, 0x99, 0xF0, 0x90, + 0x90, 0x9A, 0xF0, 0x90, 0x90, 0x9B, 0xF0, 0x90, + 0x90, 0x9C, 0xF0, 0x90, 0x90, 0x9D, 0xF0, 0x90, + 0x90, 0x9E, 0xF0, 0x90, 0x90, 0x9F, 0xF0, 0x90, + 0x90, 0xA0, 0xF0, 0x90, 0x90, 0xA1, 0xF0, 0x90, + 0x90, 0xA2, 0xF0, 0x90, 0x90, 0xA3, 0xF0, 0x90, + 0x90, 0xA4, 0xF0, 0x90, 0x90, 0xA5, 0xF0, 0x90, + 0x90, 0xA6, 0xF0, 0x90, 0x90, 0xA7, + }, +}; + +#undef N_ +#undef FIL_ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_U8_TEXTPREP_DATA_H */ diff --git a/uts/common/sys/vnode.h b/uts/common/sys/vnode.h new file mode 100644 index 000000000000..d29152346e2c --- /dev/null +++ b/uts/common/sys/vnode.h @@ -0,0 +1,1431 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +#ifndef _SYS_VNODE_H +#define _SYS_VNODE_H + +#include <sys/types.h> +#include <sys/t_lock.h> +#include <sys/rwstlock.h> +#include <sys/time_impl.h> +#include <sys/cred.h> +#include <sys/uio.h> +#include <sys/resource.h> +#include <vm/seg_enum.h> +#include <sys/kstat.h> +#include <sys/kmem.h> +#include <sys/list.h> +#ifdef _KERNEL +#include <sys/buf.h> +#endif /* _KERNEL */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Statistics for all vnode operations. + * All operations record number of ops (since boot/mount/zero'ed). + * Certain I/O operations (read, write, readdir) also record number + * of bytes transferred. + * This appears in two places in the system: one is embedded in each + * vfs_t. There is also an array of vopstats_t structures allocated + * on a per-fstype basis. + */ + +#define VOPSTATS_STR "vopstats_" /* Initial string for vopstat kstats */ + +typedef struct vopstats { + kstat_named_t nopen; /* VOP_OPEN */ + kstat_named_t nclose; /* VOP_CLOSE */ + kstat_named_t nread; /* VOP_READ */ + kstat_named_t read_bytes; + kstat_named_t nwrite; /* VOP_WRITE */ + kstat_named_t write_bytes; + kstat_named_t nioctl; /* VOP_IOCTL */ + kstat_named_t nsetfl; /* VOP_SETFL */ + kstat_named_t ngetattr; /* VOP_GETATTR */ + kstat_named_t nsetattr; /* VOP_SETATTR */ + kstat_named_t naccess; /* VOP_ACCESS */ + kstat_named_t nlookup; /* VOP_LOOKUP */ + kstat_named_t ncreate; /* VOP_CREATE */ + kstat_named_t nremove; /* VOP_REMOVE */ + kstat_named_t nlink; /* VOP_LINK */ + kstat_named_t nrename; /* VOP_RENAME */ + kstat_named_t nmkdir; /* VOP_MKDIR */ + kstat_named_t nrmdir; /* VOP_RMDIR */ + kstat_named_t nreaddir; /* VOP_READDIR */ + kstat_named_t readdir_bytes; + kstat_named_t nsymlink; /* VOP_SYMLINK */ + kstat_named_t nreadlink; /* VOP_READLINK */ + kstat_named_t nfsync; /* VOP_FSYNC */ + kstat_named_t ninactive; /* VOP_INACTIVE */ + kstat_named_t nfid; /* VOP_FID */ + kstat_named_t nrwlock; /* VOP_RWLOCK */ + kstat_named_t nrwunlock; /* VOP_RWUNLOCK */ + kstat_named_t nseek; /* VOP_SEEK */ + kstat_named_t ncmp; /* VOP_CMP */ + kstat_named_t nfrlock; /* VOP_FRLOCK */ + kstat_named_t nspace; /* VOP_SPACE */ + kstat_named_t nrealvp; /* VOP_REALVP */ + kstat_named_t ngetpage; /* VOP_GETPAGE */ + kstat_named_t nputpage; /* VOP_PUTPAGE */ + kstat_named_t nmap; /* VOP_MAP */ + kstat_named_t naddmap; /* VOP_ADDMAP */ + kstat_named_t ndelmap; /* VOP_DELMAP */ + kstat_named_t npoll; /* VOP_POLL */ + kstat_named_t ndump; /* VOP_DUMP */ + kstat_named_t npathconf; /* VOP_PATHCONF */ + kstat_named_t npageio; /* VOP_PAGEIO */ + kstat_named_t ndumpctl; /* VOP_DUMPCTL */ + kstat_named_t ndispose; /* VOP_DISPOSE */ + kstat_named_t nsetsecattr; /* VOP_SETSECATTR */ + kstat_named_t ngetsecattr; /* VOP_GETSECATTR */ + kstat_named_t nshrlock; /* VOP_SHRLOCK */ + kstat_named_t nvnevent; /* VOP_VNEVENT */ + kstat_named_t nreqzcbuf; /* VOP_REQZCBUF */ + kstat_named_t nretzcbuf; /* VOP_RETZCBUF */ +} vopstats_t; + +/* + * The vnode is the focus of all file activity in UNIX. + * A vnode is allocated for each active file, each current + * directory, each mounted-on file, and the root. + * + * Each vnode is usually associated with a file-system-specific node (for + * UFS, this is the in-memory inode). Generally, a vnode and an fs-node + * should be created and destroyed together as a pair. + * + * If a vnode is reused for a new file, it should be reinitialized by calling + * either vn_reinit() or vn_recycle(). + * + * vn_reinit() resets the entire vnode as if it was returned by vn_alloc(). + * The caller is responsible for setting up the entire vnode after calling + * vn_reinit(). This is important when using kmem caching where the vnode is + * allocated by a constructor, for instance. + * + * vn_recycle() is used when the file system keeps some state around in both + * the vnode and the associated FS-node. In UFS, for example, the inode of + * a deleted file can be reused immediately. The v_data, v_vfsp, v_op, etc. + * remains the same but certain fields related to the previous instance need + * to be reset. In particular: + * v_femhead + * v_path + * v_rdcnt, v_wrcnt + * v_mmap_read, v_mmap_write + */ + +/* + * vnode types. VNON means no type. These values are unrelated to + * values in on-disk inodes. + */ +typedef enum vtype { + VNON = 0, + VREG = 1, + VDIR = 2, + VBLK = 3, + VCHR = 4, + VLNK = 5, + VFIFO = 6, + VDOOR = 7, + VPROC = 8, + VSOCK = 9, + VPORT = 10, + VBAD = 11 +} vtype_t; + +/* + * VSD - Vnode Specific Data + * Used to associate additional private data with a vnode. + */ +struct vsd_node { + list_node_t vs_nodes; /* list of all VSD nodes */ + uint_t vs_nkeys; /* entries in value array */ + void **vs_value; /* array of value/key */ +}; + +/* + * Many of the fields in the vnode are read-only once they are initialized + * at vnode creation time. Other fields are protected by locks. + * + * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc(). They + * may not be embedded into the file-system specific node (inode). The + * size of vnodes may change. + * + * The v_lock protects: + * v_flag + * v_stream + * v_count + * v_shrlocks + * v_path + * v_vsd + * v_xattrdir + * + * A special lock (implemented by vn_vfswlock in vnode.c) protects: + * v_vfsmountedhere + * + * The global flock_lock mutex (in flock.c) protects: + * v_filocks + * + * IMPORTANT NOTE: + * + * The following vnode fields are considered public and may safely be + * accessed by file systems or other consumers: + * + * v_lock + * v_flag + * v_count + * v_data + * v_vfsp + * v_stream + * v_type + * v_rdev + * + * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD. + * In particular, file systems should not access other fields; they may + * change or even be removed. The functionality which was once provided + * by these fields is available through vn_* functions. + */ + +struct fem_head; /* from fem.h */ + +typedef struct vnode { + kmutex_t v_lock; /* protects vnode fields */ + uint_t v_flag; /* vnode flags (see below) */ + uint_t v_count; /* reference count */ + void *v_data; /* private data for fs */ + struct vfs *v_vfsp; /* ptr to containing VFS */ + struct stdata *v_stream; /* associated stream */ + enum vtype v_type; /* vnode type */ + dev_t v_rdev; /* device (VCHR, VBLK) */ + + /* PRIVATE FIELDS BELOW - DO NOT USE */ + + struct vfs *v_vfsmountedhere; /* ptr to vfs mounted here */ + struct vnodeops *v_op; /* vnode operations */ + struct page *v_pages; /* vnode pages list */ + struct filock *v_filocks; /* ptr to filock list */ + struct shrlocklist *v_shrlocks; /* ptr to shrlock list */ + krwlock_t v_nbllock; /* sync for NBMAND locks */ + kcondvar_t v_cv; /* synchronize locking */ + void *v_locality; /* hook for locality info */ + struct fem_head *v_femhead; /* fs monitoring */ + char *v_path; /* cached path */ + uint_t v_rdcnt; /* open for read count (VREG only) */ + uint_t v_wrcnt; /* open for write count (VREG only) */ + u_longlong_t v_mmap_read; /* mmap read count */ + u_longlong_t v_mmap_write; /* mmap write count */ + void *v_mpssdata; /* info for large page mappings */ + void *v_fopdata; /* list of file ops event watches */ + kmutex_t v_vsd_lock; /* protects v_vsd field */ + struct vsd_node *v_vsd; /* vnode specific data */ + struct vnode *v_xattrdir; /* unnamed extended attr dir (GFS) */ + uint_t v_count_dnlc; /* dnlc reference count */ +} vnode_t; + +#define IS_DEVVP(vp) \ + ((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO) + +#define VNODE_ALIGN 64 +/* Count of low-order 0 bits in a vnode *, based on size and alignment. */ +#if defined(_LP64) +#define VNODE_ALIGN_LOG2 8 +#else +#define VNODE_ALIGN_LOG2 7 +#endif + +/* + * vnode flags. + */ +#define VROOT 0x01 /* root of its file system */ +#define VNOCACHE 0x02 /* don't keep cache pages on vnode */ +#define VNOMAP 0x04 /* file cannot be mapped/faulted */ +#define VDUP 0x08 /* file should be dup'ed rather then opened */ +#define VNOSWAP 0x10 /* file cannot be used as virtual swap device */ +#define VNOMOUNT 0x20 /* file cannot be covered by mount */ +#define VISSWAP 0x40 /* vnode is being used for swap */ +#define VSWAPLIKE 0x80 /* vnode acts like swap (but may not be) */ + +#define IS_SWAPVP(vp) (((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0) + +typedef struct vn_vfslocks_entry { + rwstlock_t ve_lock; + void *ve_vpvfs; + struct vn_vfslocks_entry *ve_next; + uint32_t ve_refcnt; + char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \ + sizeof (uint32_t)]; +} vn_vfslocks_entry_t; + +/* + * The following two flags are used to lock the v_vfsmountedhere field + */ +#define VVFSLOCK 0x100 +#define VVFSWAIT 0x200 + +/* + * Used to serialize VM operations on a vnode + */ +#define VVMLOCK 0x400 + +/* + * Tell vn_open() not to fail a directory open for writing but + * to go ahead and call VOP_OPEN() to let the filesystem check. + */ +#define VDIROPEN 0x800 + +/* + * Flag to let the VM system know that this file is most likely a binary + * or shared library since it has been mmap()ed EXEC at some time. + */ +#define VVMEXEC 0x1000 + +#define VPXFS 0x2000 /* clustering: global fs proxy vnode */ + +#define IS_PXFSVP(vp) ((vp)->v_flag & VPXFS) + +#define V_XATTRDIR 0x4000 /* attribute unnamed directory */ + +#define IS_XATTRDIR(vp) ((vp)->v_flag & V_XATTRDIR) + +#define V_LOCALITY 0x8000 /* whether locality aware */ + +/* + * Flag that indicates the VM should maintain the v_pages list with all modified + * pages on one end and unmodified pages at the other. This makes finding dirty + * pages to write back to disk much faster at the expense of taking a minor + * fault on the first store instruction which touches a writable page. + */ +#define VMODSORT (0x10000) +#define IS_VMODSORT(vp) \ + (pvn_vmodsort_supported != 0 && ((vp)->v_flag & VMODSORT) != 0) + +#define VISSWAPFS 0x20000 /* vnode is being used for swapfs */ + +/* + * The mdb memstat command assumes that IS_SWAPFSVP only uses the + * vnode's v_flag field. If this changes, cache the additional + * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c + */ +#define IS_SWAPFSVP(vp) (((vp)->v_flag & VISSWAPFS) != 0) + +#define V_SYSATTR 0x40000 /* vnode is a GFS system attribute */ + +/* + * Vnode attributes. A bit-mask is supplied as part of the + * structure to indicate the attributes the caller wants to + * set (setattr) or extract (getattr). + */ + +/* + * Note that va_nodeid and va_nblocks are 64bit data type. + * We support large files over NFSV3. With Solaris client and + * Server that generates 64bit ino's and sizes these fields + * will overflow if they are 32 bit sizes. + */ + +typedef struct vattr { + uint_t va_mask; /* bit-mask of attributes */ + vtype_t va_type; /* vnode type (for create) */ + mode_t va_mode; /* file access mode */ + uid_t va_uid; /* owner user id */ + gid_t va_gid; /* owner group id */ + dev_t va_fsid; /* file system id (dev for now) */ + u_longlong_t va_nodeid; /* node id */ + nlink_t va_nlink; /* number of references to file */ + u_offset_t va_size; /* file size in bytes */ + timestruc_t va_atime; /* time of last access */ + timestruc_t va_mtime; /* time of last modification */ + timestruc_t va_ctime; /* time of last status change */ + dev_t va_rdev; /* device the file represents */ + uint_t va_blksize; /* fundamental block size */ + u_longlong_t va_nblocks; /* # of blocks allocated */ + uint_t va_seq; /* sequence number */ +} vattr_t; + +#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ + +/* + * Structure of all optional attributes. + */ +typedef struct xoptattr { + timestruc_t xoa_createtime; /* Create time of file */ + uint8_t xoa_archive; + uint8_t xoa_system; + uint8_t xoa_readonly; + uint8_t xoa_hidden; + uint8_t xoa_nounlink; + uint8_t xoa_immutable; + uint8_t xoa_appendonly; + uint8_t xoa_nodump; + uint8_t xoa_opaque; + uint8_t xoa_av_quarantined; + uint8_t xoa_av_modified; + uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ]; + uint8_t xoa_reparse; + uint64_t xoa_generation; + uint8_t xoa_offline; + uint8_t xoa_sparse; +} xoptattr_t; + +/* + * The xvattr structure is really a variable length structure that + * is made up of: + * - The classic vattr_t (xva_vattr) + * - a 32 bit quantity (xva_mapsize) that specifies the size of the + * attribute bitmaps in 32 bit words. + * - A pointer to the returned attribute bitmap (needed because the + * previous element, the requested attribute bitmap) is variable lenth. + * - The requested attribute bitmap, which is an array of 32 bit words. + * Callers use the XVA_SET_REQ() macro to set the bits corresponding to + * the attributes that are being requested. + * - The returned attribute bitmap, which is an array of 32 bit words. + * File systems that support optional attributes use the XVA_SET_RTN() + * macro to set the bits corresponding to the attributes that are being + * returned. + * - The xoptattr_t structure which contains the attribute values + * + * xva_mapsize determines how many words in the attribute bitmaps. + * Immediately following the attribute bitmaps is the xoptattr_t. + * xva_getxoptattr() is used to get the pointer to the xoptattr_t + * section. + */ + +#define XVA_MAPSIZE 3 /* Size of attr bitmaps */ +#define XVA_MAGIC 0x78766174 /* Magic # for verification */ + +/* + * The xvattr structure is an extensible structure which permits optional + * attributes to be requested/returned. File systems may or may not support + * optional attributes. They do so at their own discretion but if they do + * support optional attributes, they must register the VFSFT_XVATTR feature + * so that the optional attributes can be set/retrived. + * + * The fields of the xvattr structure are: + * + * xva_vattr - The first element of an xvattr is a legacy vattr structure + * which includes the common attributes. If AT_XVATTR is set in the va_mask + * then the entire structure is treated as an xvattr. If AT_XVATTR is not + * set, then only the xva_vattr structure can be used. + * + * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification. + * + * xva_mapsize - Size of requested and returned attribute bitmaps. + * + * xva_rtnattrmapp - Pointer to xva_rtnattrmap[]. We need this since the + * size of the array before it, xva_reqattrmap[], could change which means + * the location of xva_rtnattrmap[] could change. This will allow unbundled + * file systems to find the location of xva_rtnattrmap[] when the sizes change. + * + * xva_reqattrmap[] - Array of requested attributes. Attributes are + * represented by a specific bit in a specific element of the attribute + * map array. Callers set the bits corresponding to the attributes + * that the caller wants to get/set. + * + * xva_rtnattrmap[] - Array of attributes that the file system was able to + * process. Not all file systems support all optional attributes. This map + * informs the caller which attributes the underlying file system was able + * to set/get. (Same structure as the requested attributes array in terms + * of each attribute corresponding to specific bits and array elements.) + * + * xva_xoptattrs - Structure containing values of optional attributes. + * These values are only valid if the corresponding bits in xva_reqattrmap + * are set and the underlying file system supports those attributes. + */ +typedef struct xvattr { + vattr_t xva_vattr; /* Embedded vattr structure */ + uint32_t xva_magic; /* Magic Number */ + uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */ + uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */ + uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */ + uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */ + xoptattr_t xva_xoptattrs; /* Optional attributes */ +} xvattr_t; + +#ifdef _SYSCALL32 +/* + * For bigtypes time_t changed to 64 bit on the 64-bit kernel. + * Define an old version for user/kernel interface + */ + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack(4) +#endif + +typedef struct vattr32 { + uint32_t va_mask; /* bit-mask of attributes */ + vtype_t va_type; /* vnode type (for create) */ + mode32_t va_mode; /* file access mode */ + uid32_t va_uid; /* owner user id */ + gid32_t va_gid; /* owner group id */ + dev32_t va_fsid; /* file system id (dev for now) */ + u_longlong_t va_nodeid; /* node id */ + nlink_t va_nlink; /* number of references to file */ + u_offset_t va_size; /* file size in bytes */ + timestruc32_t va_atime; /* time of last access */ + timestruc32_t va_mtime; /* time of last modification */ + timestruc32_t va_ctime; /* time of last status change */ + dev32_t va_rdev; /* device the file represents */ + uint32_t va_blksize; /* fundamental block size */ + u_longlong_t va_nblocks; /* # of blocks allocated */ + uint32_t va_seq; /* sequence number */ +} vattr32_t; + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack() +#endif + +#else /* not _SYSCALL32 */ +#define vattr32 vattr +typedef vattr_t vattr32_t; +#endif /* _SYSCALL32 */ + +/* + * Attributes of interest to the caller of setattr or getattr. + */ +#define AT_TYPE 0x00001 +#define AT_MODE 0x00002 +#define AT_UID 0x00004 +#define AT_GID 0x00008 +#define AT_FSID 0x00010 +#define AT_NODEID 0x00020 +#define AT_NLINK 0x00040 +#define AT_SIZE 0x00080 +#define AT_ATIME 0x00100 +#define AT_MTIME 0x00200 +#define AT_CTIME 0x00400 +#define AT_RDEV 0x00800 +#define AT_BLKSIZE 0x01000 +#define AT_NBLOCKS 0x02000 +/* 0x04000 */ /* unused */ +#define AT_SEQ 0x08000 +/* + * If AT_XVATTR is set then there are additional bits to process in + * the xvattr_t's attribute bitmap. If this is not set then the bitmap + * MUST be ignored. Note that this bit must be set/cleared explicitly. + * That is, setting AT_ALL will NOT set AT_XVATTR. + */ +#define AT_XVATTR 0x10000 + +#define AT_ALL (AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\ + AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\ + AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ) + +#define AT_STAT (AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\ + AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE) + +#define AT_TIMES (AT_ATIME|AT_MTIME|AT_CTIME) + +#define AT_NOSET (AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\ + AT_BLKSIZE|AT_NBLOCKS|AT_SEQ) + +/* + * Attribute bits used in the extensible attribute's (xva's) attribute + * bitmaps. Note that the bitmaps are made up of a variable length number + * of 32-bit words. The convention is to use XAT{n}_{attrname} where "n" + * is the element in the bitmap (starting at 1). This convention is for + * the convenience of the maintainer to keep track of which element each + * attribute belongs to. + * + * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY. CONSUMERS + * MUST USE THE XAT_* DEFINES. + */ +#define XAT0_INDEX 0LL /* Index into bitmap for XAT0 attrs */ +#define XAT0_CREATETIME 0x00000001 /* Create time of file */ +#define XAT0_ARCHIVE 0x00000002 /* Archive */ +#define XAT0_SYSTEM 0x00000004 /* System */ +#define XAT0_READONLY 0x00000008 /* Readonly */ +#define XAT0_HIDDEN 0x00000010 /* Hidden */ +#define XAT0_NOUNLINK 0x00000020 /* Nounlink */ +#define XAT0_IMMUTABLE 0x00000040 /* immutable */ +#define XAT0_APPENDONLY 0x00000080 /* appendonly */ +#define XAT0_NODUMP 0x00000100 /* nodump */ +#define XAT0_OPAQUE 0x00000200 /* opaque */ +#define XAT0_AV_QUARANTINED 0x00000400 /* anti-virus quarantine */ +#define XAT0_AV_MODIFIED 0x00000800 /* anti-virus modified */ +#define XAT0_AV_SCANSTAMP 0x00001000 /* anti-virus scanstamp */ +#define XAT0_REPARSE 0x00002000 /* FS reparse point */ +#define XAT0_GEN 0x00004000 /* object generation number */ +#define XAT0_OFFLINE 0x00008000 /* offline */ +#define XAT0_SPARSE 0x00010000 /* sparse */ + +#define XAT0_ALL_ATTRS (XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \ + XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \ + XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED| XAT0_AV_MODIFIED| \ + XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE) + +/* Support for XAT_* optional attributes */ +#define XVA_MASK 0xffffffff /* Used to mask off 32 bits */ +#define XVA_SHFT 32 /* Used to shift index */ + +/* + * Used to pry out the index and attribute bits from the XAT_* attributes + * defined below. Note that we're masking things down to 32 bits then + * casting to uint32_t. + */ +#define XVA_INDEX(attr) ((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK)) +#define XVA_ATTRBIT(attr) ((uint32_t)((attr) & XVA_MASK)) + +/* + * The following defines present a "flat namespace" so that consumers don't + * need to keep track of which element belongs to which bitmap entry. + * + * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER + */ +#define XAT_CREATETIME ((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME) +#define XAT_ARCHIVE ((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE) +#define XAT_SYSTEM ((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM) +#define XAT_READONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY) +#define XAT_HIDDEN ((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN) +#define XAT_NOUNLINK ((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK) +#define XAT_IMMUTABLE ((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE) +#define XAT_APPENDONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY) +#define XAT_NODUMP ((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP) +#define XAT_OPAQUE ((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE) +#define XAT_AV_QUARANTINED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED) +#define XAT_AV_MODIFIED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED) +#define XAT_AV_SCANSTAMP ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP) +#define XAT_REPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE) +#define XAT_GEN ((XAT0_INDEX << XVA_SHFT) | XAT0_GEN) +#define XAT_OFFLINE ((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE) +#define XAT_SPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE) + +/* + * The returned attribute map array (xva_rtnattrmap[]) is located past the + * requested attribute map array (xva_reqattrmap[]). Its location changes + * when the array sizes change. We use a separate pointer in a known location + * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[]. This is + * set in xva_init() + */ +#define XVA_RTNATTRMAP(xvap) ((xvap)->xva_rtnattrmapp) + +/* + * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap + * of requested attributes (xva_reqattrmap[]). + */ +#define XVA_SET_REQ(xvap, attr) \ + ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \ + ASSERT((xvap)->xva_magic == XVA_MAGIC); \ + (xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr) +/* + * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap + * of requested attributes (xva_reqattrmap[]). + */ +#define XVA_CLR_REQ(xvap, attr) \ + ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \ + ASSERT((xvap)->xva_magic == XVA_MAGIC); \ + (xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr) + +/* + * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap + * of returned attributes (xva_rtnattrmap[]). + */ +#define XVA_SET_RTN(xvap, attr) \ + ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \ + ASSERT((xvap)->xva_magic == XVA_MAGIC); \ + (XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr) + +/* + * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[]) + * to see of the corresponding attribute bit is set. If so, returns non-zero. + */ +#define XVA_ISSET_REQ(xvap, attr) \ + ((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \ + ((xvap)->xva_magic == XVA_MAGIC) && \ + ((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \ + ((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0) + +/* + * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[]) + * to see of the corresponding attribute bit is set. If so, returns non-zero. + */ +#define XVA_ISSET_RTN(xvap, attr) \ + ((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \ + ((xvap)->xva_magic == XVA_MAGIC) && \ + ((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \ + ((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0) + +/* + * Modes. Some values same as S_xxx entries from stat.h for convenience. + */ +#define VSUID 04000 /* set user id on execution */ +#define VSGID 02000 /* set group id on execution */ +#define VSVTX 01000 /* save swapped text even after use */ + +/* + * Permissions. + */ +#define VREAD 00400 +#define VWRITE 00200 +#define VEXEC 00100 + +#define MODEMASK 07777 /* mode bits plus permission bits */ +#define PERMMASK 00777 /* permission bits */ + +/* + * VOP_ACCESS flags + */ +#define V_ACE_MASK 0x1 /* mask represents NFSv4 ACE permissions */ +#define V_APPEND 0x2 /* want to do append only check */ + +/* + * Check whether mandatory file locking is enabled. + */ + +#define MANDMODE(mode) (((mode) & (VSGID|(VEXEC>>3))) == VSGID) +#define MANDLOCK(vp, mode) ((vp)->v_type == VREG && MANDMODE(mode)) + +/* + * Flags for vnode operations. + */ +enum rm { RMFILE, RMDIRECTORY }; /* rm or rmdir (remove) */ +enum symfollow { NO_FOLLOW, FOLLOW }; /* follow symlinks (or not) */ +enum vcexcl { NONEXCL, EXCL }; /* (non)excl create */ +enum create { CRCREAT, CRMKNOD, CRMKDIR }; /* reason for create */ + +typedef enum rm rm_t; +typedef enum symfollow symfollow_t; +typedef enum vcexcl vcexcl_t; +typedef enum create create_t; + +/* Vnode Events - Used by VOP_VNEVENT */ +typedef enum vnevent { + VE_SUPPORT = 0, /* Query */ + VE_RENAME_SRC = 1, /* Rename, with vnode as source */ + VE_RENAME_DEST = 2, /* Rename, with vnode as target/destination */ + VE_REMOVE = 3, /* Remove of vnode's name */ + VE_RMDIR = 4, /* Remove of directory vnode's name */ + VE_CREATE = 5, /* Create with vnode's name which exists */ + VE_LINK = 6, /* Link with vnode's name as source */ + VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ + VE_MOUNTEDOVER = 8 /* File or Filesystem got mounted over vnode */ +} vnevent_t; + +/* + * Values for checking vnode open and map counts + */ +enum v_mode { V_READ, V_WRITE, V_RDORWR, V_RDANDWR }; + +typedef enum v_mode v_mode_t; + +#define V_TRUE 1 +#define V_FALSE 0 + +/* + * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations + */ + +typedef struct vsecattr { + uint_t vsa_mask; /* See below */ + int vsa_aclcnt; /* ACL entry count */ + void *vsa_aclentp; /* pointer to ACL entries */ + int vsa_dfaclcnt; /* default ACL entry count */ + void *vsa_dfaclentp; /* pointer to default ACL entries */ + size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */ + uint_t vsa_aclflags; /* ACE ACL flags */ +} vsecattr_t; + +/* vsa_mask values */ +#define VSA_ACL 0x0001 +#define VSA_ACLCNT 0x0002 +#define VSA_DFACL 0x0004 +#define VSA_DFACLCNT 0x0008 +#define VSA_ACE 0x0010 +#define VSA_ACECNT 0x0020 +#define VSA_ACE_ALLTYPES 0x0040 +#define VSA_ACE_ACLFLAGS 0x0080 /* get/set ACE ACL flags */ + +/* + * Structure used by various vnode operations to determine + * the context (pid, host, identity) of a caller. + * + * The cc_caller_id is used to identify one or more callers who invoke + * operations, possibly on behalf of others. For example, the NFS + * server could have it's own cc_caller_id which can be detected by + * vnode/vfs operations or (FEM) monitors on those operations. New + * caller IDs are generated by fs_new_caller_id(). + */ +typedef struct caller_context { + pid_t cc_pid; /* Process ID of the caller */ + int cc_sysid; /* System ID, used for remote calls */ + u_longlong_t cc_caller_id; /* Identifier for (set of) caller(s) */ + ulong_t cc_flags; +} caller_context_t; + +/* + * Flags for caller context. The caller sets CC_DONTBLOCK if it does not + * want to block inside of a FEM monitor. The monitor will set CC_WOULDBLOCK + * and return EAGAIN if the operation would have blocked. + */ +#define CC_WOULDBLOCK 0x01 +#define CC_DONTBLOCK 0x02 + +/* + * Structure tags for function prototypes, defined elsewhere. + */ +struct pathname; +struct fid; +struct flock64; +struct flk_callback; +struct shrlock; +struct page; +struct seg; +struct as; +struct pollhead; +struct taskq; + +#ifdef _KERNEL + +/* + * VNODE_OPS defines all the vnode operations. It is used to define + * the vnodeops structure (below) and the fs_func_p union (vfs_opreg.h). + */ +#define VNODE_OPS \ + int (*vop_open)(vnode_t **, int, cred_t *, \ + caller_context_t *); \ + int (*vop_close)(vnode_t *, int, int, offset_t, cred_t *, \ + caller_context_t *); \ + int (*vop_read)(vnode_t *, uio_t *, int, cred_t *, \ + caller_context_t *); \ + int (*vop_write)(vnode_t *, uio_t *, int, cred_t *, \ + caller_context_t *); \ + int (*vop_ioctl)(vnode_t *, int, intptr_t, int, cred_t *, \ + int *, caller_context_t *); \ + int (*vop_setfl)(vnode_t *, int, int, cred_t *, \ + caller_context_t *); \ + int (*vop_getattr)(vnode_t *, vattr_t *, int, cred_t *, \ + caller_context_t *); \ + int (*vop_setattr)(vnode_t *, vattr_t *, int, cred_t *, \ + caller_context_t *); \ + int (*vop_access)(vnode_t *, int, int, cred_t *, \ + caller_context_t *); \ + int (*vop_lookup)(vnode_t *, char *, vnode_t **, \ + struct pathname *, \ + int, vnode_t *, cred_t *, \ + caller_context_t *, int *, \ + struct pathname *); \ + int (*vop_create)(vnode_t *, char *, vattr_t *, vcexcl_t, \ + int, vnode_t **, cred_t *, int, \ + caller_context_t *, vsecattr_t *); \ + int (*vop_remove)(vnode_t *, char *, cred_t *, \ + caller_context_t *, int); \ + int (*vop_link)(vnode_t *, vnode_t *, char *, cred_t *, \ + caller_context_t *, int); \ + int (*vop_rename)(vnode_t *, char *, vnode_t *, char *, \ + cred_t *, caller_context_t *, int); \ + int (*vop_mkdir)(vnode_t *, char *, vattr_t *, vnode_t **, \ + cred_t *, caller_context_t *, int, \ + vsecattr_t *); \ + int (*vop_rmdir)(vnode_t *, char *, vnode_t *, cred_t *, \ + caller_context_t *, int); \ + int (*vop_readdir)(vnode_t *, uio_t *, cred_t *, int *, \ + caller_context_t *, int); \ + int (*vop_symlink)(vnode_t *, char *, vattr_t *, char *, \ + cred_t *, caller_context_t *, int); \ + int (*vop_readlink)(vnode_t *, uio_t *, cred_t *, \ + caller_context_t *); \ + int (*vop_fsync)(vnode_t *, int, cred_t *, \ + caller_context_t *); \ + void (*vop_inactive)(vnode_t *, cred_t *, \ + caller_context_t *); \ + int (*vop_fid)(vnode_t *, struct fid *, \ + caller_context_t *); \ + int (*vop_rwlock)(vnode_t *, int, caller_context_t *); \ + void (*vop_rwunlock)(vnode_t *, int, caller_context_t *); \ + int (*vop_seek)(vnode_t *, offset_t, offset_t *, \ + caller_context_t *); \ + int (*vop_cmp)(vnode_t *, vnode_t *, caller_context_t *); \ + int (*vop_frlock)(vnode_t *, int, struct flock64 *, \ + int, offset_t, \ + struct flk_callback *, cred_t *, \ + caller_context_t *); \ + int (*vop_space)(vnode_t *, int, struct flock64 *, \ + int, offset_t, \ + cred_t *, caller_context_t *); \ + int (*vop_realvp)(vnode_t *, vnode_t **, \ + caller_context_t *); \ + int (*vop_getpage)(vnode_t *, offset_t, size_t, uint_t *, \ + struct page **, size_t, struct seg *, \ + caddr_t, enum seg_rw, cred_t *, \ + caller_context_t *); \ + int (*vop_putpage)(vnode_t *, offset_t, size_t, \ + int, cred_t *, caller_context_t *); \ + int (*vop_map)(vnode_t *, offset_t, struct as *, \ + caddr_t *, size_t, \ + uchar_t, uchar_t, uint_t, cred_t *, \ + caller_context_t *); \ + int (*vop_addmap)(vnode_t *, offset_t, struct as *, \ + caddr_t, size_t, \ + uchar_t, uchar_t, uint_t, cred_t *, \ + caller_context_t *); \ + int (*vop_delmap)(vnode_t *, offset_t, struct as *, \ + caddr_t, size_t, \ + uint_t, uint_t, uint_t, cred_t *, \ + caller_context_t *); \ + int (*vop_poll)(vnode_t *, short, int, short *, \ + struct pollhead **, \ + caller_context_t *); \ + int (*vop_dump)(vnode_t *, caddr_t, offset_t, offset_t, \ + caller_context_t *); \ + int (*vop_pathconf)(vnode_t *, int, ulong_t *, cred_t *, \ + caller_context_t *); \ + int (*vop_pageio)(vnode_t *, struct page *, \ + u_offset_t, size_t, int, cred_t *, \ + caller_context_t *); \ + int (*vop_dumpctl)(vnode_t *, int, offset_t *, \ + caller_context_t *); \ + void (*vop_dispose)(vnode_t *, struct page *, \ + int, int, cred_t *, \ + caller_context_t *); \ + int (*vop_setsecattr)(vnode_t *, vsecattr_t *, \ + int, cred_t *, caller_context_t *); \ + int (*vop_getsecattr)(vnode_t *, vsecattr_t *, \ + int, cred_t *, caller_context_t *); \ + int (*vop_shrlock)(vnode_t *, int, struct shrlock *, \ + int, cred_t *, caller_context_t *); \ + int (*vop_vnevent)(vnode_t *, vnevent_t, vnode_t *, \ + char *, caller_context_t *); \ + int (*vop_reqzcbuf)(vnode_t *, enum uio_rw, xuio_t *, \ + cred_t *, caller_context_t *); \ + int (*vop_retzcbuf)(vnode_t *, xuio_t *, cred_t *, \ + caller_context_t *) + /* NB: No ";" */ + +/* + * Operations on vnodes. Note: File systems must never operate directly + * on a 'vnodeops' structure -- it WILL change in future releases! They + * must use vn_make_ops() to create the structure. + */ +typedef struct vnodeops { + const char *vnop_name; + VNODE_OPS; /* Signatures of all vnode operations (vops) */ +} vnodeops_t; + +typedef int (*fs_generic_func_p) (); /* Generic vop/vfsop/femop/fsemop ptr */ + +extern int fop_open(vnode_t **, int, cred_t *, caller_context_t *); +extern int fop_close(vnode_t *, int, int, offset_t, cred_t *, + caller_context_t *); +extern int fop_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); +extern int fop_write(vnode_t *, uio_t *, int, cred_t *, + caller_context_t *); +extern int fop_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *, + caller_context_t *); +extern int fop_setfl(vnode_t *, int, int, cred_t *, caller_context_t *); +extern int fop_getattr(vnode_t *, vattr_t *, int, cred_t *, + caller_context_t *); +extern int fop_setattr(vnode_t *, vattr_t *, int, cred_t *, + caller_context_t *); +extern int fop_access(vnode_t *, int, int, cred_t *, caller_context_t *); +extern int fop_lookup(vnode_t *, char *, vnode_t **, struct pathname *, + int, vnode_t *, cred_t *, caller_context_t *, + int *, struct pathname *); +extern int fop_create(vnode_t *, char *, vattr_t *, vcexcl_t, int, + vnode_t **, cred_t *, int, caller_context_t *, + vsecattr_t *); +extern int fop_remove(vnode_t *vp, char *, cred_t *, caller_context_t *, + int); +extern int fop_link(vnode_t *, vnode_t *, char *, cred_t *, + caller_context_t *, int); +extern int fop_rename(vnode_t *, char *, vnode_t *, char *, cred_t *, + caller_context_t *, int); +extern int fop_mkdir(vnode_t *, char *, vattr_t *, vnode_t **, cred_t *, + caller_context_t *, int, vsecattr_t *); +extern int fop_rmdir(vnode_t *, char *, vnode_t *, cred_t *, + caller_context_t *, int); +extern int fop_readdir(vnode_t *, uio_t *, cred_t *, int *, + caller_context_t *, int); +extern int fop_symlink(vnode_t *, char *, vattr_t *, char *, cred_t *, + caller_context_t *, int); +extern int fop_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); +extern int fop_fsync(vnode_t *, int, cred_t *, caller_context_t *); +extern void fop_inactive(vnode_t *, cred_t *, caller_context_t *); +extern int fop_fid(vnode_t *, struct fid *, caller_context_t *); +extern int fop_rwlock(vnode_t *, int, caller_context_t *); +extern void fop_rwunlock(vnode_t *, int, caller_context_t *); +extern int fop_seek(vnode_t *, offset_t, offset_t *, caller_context_t *); +extern int fop_cmp(vnode_t *, vnode_t *, caller_context_t *); +extern int fop_frlock(vnode_t *, int, struct flock64 *, int, offset_t, + struct flk_callback *, cred_t *, + caller_context_t *); +extern int fop_space(vnode_t *, int, struct flock64 *, int, offset_t, + cred_t *, caller_context_t *); +extern int fop_realvp(vnode_t *, vnode_t **, caller_context_t *); +extern int fop_getpage(vnode_t *, offset_t, size_t, uint_t *, + struct page **, size_t, struct seg *, + caddr_t, enum seg_rw, cred_t *, + caller_context_t *); +extern int fop_putpage(vnode_t *, offset_t, size_t, int, cred_t *, + caller_context_t *); +extern int fop_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t, + uchar_t, uchar_t, uint_t, cred_t *cr, + caller_context_t *); +extern int fop_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, + uchar_t, uchar_t, uint_t, cred_t *, + caller_context_t *); +extern int fop_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, + uint_t, uint_t, uint_t, cred_t *, + caller_context_t *); +extern int fop_poll(vnode_t *, short, int, short *, struct pollhead **, + caller_context_t *); +extern int fop_dump(vnode_t *, caddr_t, offset_t, offset_t, + caller_context_t *); +extern int fop_pathconf(vnode_t *, int, ulong_t *, cred_t *, + caller_context_t *); +extern int fop_pageio(vnode_t *, struct page *, u_offset_t, size_t, int, + cred_t *, caller_context_t *); +extern int fop_dumpctl(vnode_t *, int, offset_t *, caller_context_t *); +extern void fop_dispose(vnode_t *, struct page *, int, int, cred_t *, + caller_context_t *); +extern int fop_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *, + caller_context_t *); +extern int fop_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, + caller_context_t *); +extern int fop_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *, + caller_context_t *); +extern int fop_vnevent(vnode_t *, vnevent_t, vnode_t *, char *, + caller_context_t *); +extern int fop_reqzcbuf(vnode_t *, enum uio_rw, xuio_t *, cred_t *, + caller_context_t *); +extern int fop_retzcbuf(vnode_t *, xuio_t *, cred_t *, caller_context_t *); + +#endif /* _KERNEL */ + +#define VOP_OPEN(vpp, mode, cr, ct) \ + fop_open(vpp, mode, cr, ct) +#define VOP_CLOSE(vp, f, c, o, cr, ct) \ + fop_close(vp, f, c, o, cr, ct) +#define VOP_READ(vp, uiop, iof, cr, ct) \ + fop_read(vp, uiop, iof, cr, ct) +#define VOP_WRITE(vp, uiop, iof, cr, ct) \ + fop_write(vp, uiop, iof, cr, ct) +#define VOP_IOCTL(vp, cmd, a, f, cr, rvp, ct) \ + fop_ioctl(vp, cmd, a, f, cr, rvp, ct) +#define VOP_SETFL(vp, f, a, cr, ct) \ + fop_setfl(vp, f, a, cr, ct) +#define VOP_GETATTR(vp, vap, f, cr, ct) \ + fop_getattr(vp, vap, f, cr, ct) +#define VOP_SETATTR(vp, vap, f, cr, ct) \ + fop_setattr(vp, vap, f, cr, ct) +#define VOP_ACCESS(vp, mode, f, cr, ct) \ + fop_access(vp, mode, f, cr, ct) +#define VOP_LOOKUP(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp) \ + fop_lookup(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp) +#define VOP_CREATE(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap) \ + fop_create(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap) +#define VOP_REMOVE(dvp, p, cr, ct, f) \ + fop_remove(dvp, p, cr, ct, f) +#define VOP_LINK(tdvp, fvp, p, cr, ct, f) \ + fop_link(tdvp, fvp, p, cr, ct, f) +#define VOP_RENAME(fvp, fnm, tdvp, tnm, cr, ct, f) \ + fop_rename(fvp, fnm, tdvp, tnm, cr, ct, f) +#define VOP_MKDIR(dp, p, vap, vpp, cr, ct, f, vsap) \ + fop_mkdir(dp, p, vap, vpp, cr, ct, f, vsap) +#define VOP_RMDIR(dp, p, cdir, cr, ct, f) \ + fop_rmdir(dp, p, cdir, cr, ct, f) +#define VOP_READDIR(vp, uiop, cr, eofp, ct, f) \ + fop_readdir(vp, uiop, cr, eofp, ct, f) +#define VOP_SYMLINK(dvp, lnm, vap, tnm, cr, ct, f) \ + fop_symlink(dvp, lnm, vap, tnm, cr, ct, f) +#define VOP_READLINK(vp, uiop, cr, ct) \ + fop_readlink(vp, uiop, cr, ct) +#define VOP_FSYNC(vp, syncflag, cr, ct) \ + fop_fsync(vp, syncflag, cr, ct) +#define VOP_INACTIVE(vp, cr, ct) \ + fop_inactive(vp, cr, ct) +#define VOP_FID(vp, fidp, ct) \ + fop_fid(vp, fidp, ct) +#define VOP_RWLOCK(vp, w, ct) \ + fop_rwlock(vp, w, ct) +#define VOP_RWUNLOCK(vp, w, ct) \ + fop_rwunlock(vp, w, ct) +#define VOP_SEEK(vp, ooff, noffp, ct) \ + fop_seek(vp, ooff, noffp, ct) +#define VOP_CMP(vp1, vp2, ct) \ + fop_cmp(vp1, vp2, ct) +#define VOP_FRLOCK(vp, cmd, a, f, o, cb, cr, ct) \ + fop_frlock(vp, cmd, a, f, o, cb, cr, ct) +#define VOP_SPACE(vp, cmd, a, f, o, cr, ct) \ + fop_space(vp, cmd, a, f, o, cr, ct) +#define VOP_REALVP(vp1, vp2, ct) \ + fop_realvp(vp1, vp2, ct) +#define VOP_GETPAGE(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct) \ + fop_getpage(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct) +#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) \ + fop_putpage(vp, of, sz, fl, cr, ct) +#define VOP_MAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \ + fop_map(vp, of, as, a, sz, p, mp, fl, cr, ct) +#define VOP_ADDMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \ + fop_addmap(vp, of, as, a, sz, p, mp, fl, cr, ct) +#define VOP_DELMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \ + fop_delmap(vp, of, as, a, sz, p, mp, fl, cr, ct) +#define VOP_POLL(vp, events, anyyet, reventsp, phpp, ct) \ + fop_poll(vp, events, anyyet, reventsp, phpp, ct) +#define VOP_DUMP(vp, addr, bn, count, ct) \ + fop_dump(vp, addr, bn, count, ct) +#define VOP_PATHCONF(vp, cmd, valp, cr, ct) \ + fop_pathconf(vp, cmd, valp, cr, ct) +#define VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct) \ + fop_pageio(vp, pp, io_off, io_len, flags, cr, ct) +#define VOP_DUMPCTL(vp, action, blkp, ct) \ + fop_dumpctl(vp, action, blkp, ct) +#define VOP_DISPOSE(vp, pp, flag, dn, cr, ct) \ + fop_dispose(vp, pp, flag, dn, cr, ct) +#define VOP_GETSECATTR(vp, vsap, f, cr, ct) \ + fop_getsecattr(vp, vsap, f, cr, ct) +#define VOP_SETSECATTR(vp, vsap, f, cr, ct) \ + fop_setsecattr(vp, vsap, f, cr, ct) +#define VOP_SHRLOCK(vp, cmd, shr, f, cr, ct) \ + fop_shrlock(vp, cmd, shr, f, cr, ct) +#define VOP_VNEVENT(vp, vnevent, dvp, fnm, ct) \ + fop_vnevent(vp, vnevent, dvp, fnm, ct) +#define VOP_REQZCBUF(vp, rwflag, xuiop, cr, ct) \ + fop_reqzcbuf(vp, rwflag, xuiop, cr, ct) +#define VOP_RETZCBUF(vp, xuiop, cr, ct) \ + fop_retzcbuf(vp, xuiop, cr, ct) + +#define VOPNAME_OPEN "open" +#define VOPNAME_CLOSE "close" +#define VOPNAME_READ "read" +#define VOPNAME_WRITE "write" +#define VOPNAME_IOCTL "ioctl" +#define VOPNAME_SETFL "setfl" +#define VOPNAME_GETATTR "getattr" +#define VOPNAME_SETATTR "setattr" +#define VOPNAME_ACCESS "access" +#define VOPNAME_LOOKUP "lookup" +#define VOPNAME_CREATE "create" +#define VOPNAME_REMOVE "remove" +#define VOPNAME_LINK "link" +#define VOPNAME_RENAME "rename" +#define VOPNAME_MKDIR "mkdir" +#define VOPNAME_RMDIR "rmdir" +#define VOPNAME_READDIR "readdir" +#define VOPNAME_SYMLINK "symlink" +#define VOPNAME_READLINK "readlink" +#define VOPNAME_FSYNC "fsync" +#define VOPNAME_INACTIVE "inactive" +#define VOPNAME_FID "fid" +#define VOPNAME_RWLOCK "rwlock" +#define VOPNAME_RWUNLOCK "rwunlock" +#define VOPNAME_SEEK "seek" +#define VOPNAME_CMP "cmp" +#define VOPNAME_FRLOCK "frlock" +#define VOPNAME_SPACE "space" +#define VOPNAME_REALVP "realvp" +#define VOPNAME_GETPAGE "getpage" +#define VOPNAME_PUTPAGE "putpage" +#define VOPNAME_MAP "map" +#define VOPNAME_ADDMAP "addmap" +#define VOPNAME_DELMAP "delmap" +#define VOPNAME_POLL "poll" +#define VOPNAME_DUMP "dump" +#define VOPNAME_PATHCONF "pathconf" +#define VOPNAME_PAGEIO "pageio" +#define VOPNAME_DUMPCTL "dumpctl" +#define VOPNAME_DISPOSE "dispose" +#define VOPNAME_GETSECATTR "getsecattr" +#define VOPNAME_SETSECATTR "setsecattr" +#define VOPNAME_SHRLOCK "shrlock" +#define VOPNAME_VNEVENT "vnevent" +#define VOPNAME_REQZCBUF "reqzcbuf" +#define VOPNAME_RETZCBUF "retzcbuf" + +/* + * Flags for VOP_LOOKUP + * + * Defined in file.h, but also possible, FIGNORECASE and FSEARCH + * + */ +#define LOOKUP_DIR 0x01 /* want parent dir vp */ +#define LOOKUP_XATTR 0x02 /* lookup up extended attr dir */ +#define CREATE_XATTR_DIR 0x04 /* Create extended attr dir */ +#define LOOKUP_HAVE_SYSATTR_DIR 0x08 /* Already created virtual GFS dir */ + +/* + * Flags for VOP_READDIR + */ +#define V_RDDIR_ENTFLAGS 0x01 /* request dirent flags */ +#define V_RDDIR_ACCFILTER 0x02 /* filter out inaccessible dirents */ + +/* + * Flags for VOP_RWLOCK/VOP_RWUNLOCK + * VOP_RWLOCK will return the flag that was actually set, or -1 if none. + */ +#define V_WRITELOCK_TRUE (1) /* Request write-lock on the vnode */ +#define V_WRITELOCK_FALSE (0) /* Request read-lock on the vnode */ + +/* + * Flags for VOP_DUMPCTL + */ +#define DUMP_ALLOC 0 +#define DUMP_FREE 1 +#define DUMP_SCAN 2 + +/* + * Public vnode manipulation functions. + */ +#ifdef _KERNEL + +vnode_t *vn_alloc(int); +void vn_reinit(vnode_t *); +void vn_recycle(vnode_t *); +void vn_free(vnode_t *); + +int vn_is_readonly(vnode_t *); +int vn_is_opened(vnode_t *, v_mode_t); +int vn_is_mapped(vnode_t *, v_mode_t); +int vn_has_other_opens(vnode_t *, v_mode_t); +void vn_open_upgrade(vnode_t *, int); +void vn_open_downgrade(vnode_t *, int); + +int vn_can_change_zones(vnode_t *vp); + +int vn_has_flocks(vnode_t *); +int vn_has_mandatory_locks(vnode_t *, int); +int vn_has_cached_data(vnode_t *); + +void vn_setops(vnode_t *, vnodeops_t *); +vnodeops_t *vn_getops(vnode_t *); +int vn_matchops(vnode_t *, vnodeops_t *); +int vn_matchopval(vnode_t *, char *, fs_generic_func_p); +int vn_ismntpt(vnode_t *); + +struct vfs *vn_mountedvfs(vnode_t *); + +int vn_in_dnlc(vnode_t *); + +void vn_create_cache(void); +void vn_destroy_cache(void); + +void vn_freevnodeops(vnodeops_t *); + +int vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode, + struct vnode **vpp, enum create crwhy, mode_t umask); +int vn_openat(char *pnamep, enum uio_seg seg, int filemode, int createmode, + struct vnode **vpp, enum create crwhy, + mode_t umask, struct vnode *startvp, int fd); +int vn_create(char *pnamep, enum uio_seg seg, struct vattr *vap, + enum vcexcl excl, int mode, struct vnode **vpp, + enum create why, int flag, mode_t umask); +int vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap, + enum vcexcl excl, int mode, struct vnode **vpp, + enum create why, int flag, mode_t umask, struct vnode *startvp); +int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len, + offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit, + cred_t *cr, ssize_t *residp); +void vn_rele(struct vnode *vp); +void vn_rele_async(struct vnode *vp, struct taskq *taskq); +void vn_rele_dnlc(struct vnode *vp); +void vn_rele_stream(struct vnode *vp); +int vn_link(char *from, char *to, enum uio_seg seg); +int vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow, + vnode_t *tstartvp, char *to, enum uio_seg seg); +int vn_rename(char *from, char *to, enum uio_seg seg); +int vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, char *tname, + enum uio_seg seg); +int vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag); +int vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, + enum rm dirflag); +int vn_compare(vnode_t *vp1, vnode_t *vp2); +int vn_vfswlock(struct vnode *vp); +int vn_vfswlock_wait(struct vnode *vp); +int vn_vfsrlock(struct vnode *vp); +int vn_vfsrlock_wait(struct vnode *vp); +void vn_vfsunlock(struct vnode *vp); +int vn_vfswlock_held(struct vnode *vp); +vnode_t *specvp(struct vnode *vp, dev_t dev, vtype_t type, struct cred *cr); +vnode_t *makespecvp(dev_t dev, vtype_t type); +vn_vfslocks_entry_t *vn_vfslocks_getlock(void *); +void vn_vfslocks_rele(vn_vfslocks_entry_t *); +boolean_t vn_is_reparse(vnode_t *, cred_t *, caller_context_t *); + +void vn_copypath(struct vnode *src, struct vnode *dst); +void vn_setpath_str(struct vnode *vp, const char *str, size_t len); +void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, + const char *path, size_t plen); +void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len); + +/* Vnode event notification */ +void vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *); +void vnevent_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *); +void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *); +void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *); +void vnevent_create(vnode_t *, caller_context_t *); +void vnevent_link(vnode_t *, caller_context_t *); +void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct); +void vnevent_mountedover(vnode_t *, caller_context_t *); +int vnevent_support(vnode_t *, caller_context_t *); + +/* Vnode specific data */ +void vsd_create(uint_t *, void (*)(void *)); +void vsd_destroy(uint_t *); +void *vsd_get(vnode_t *, uint_t); +int vsd_set(vnode_t *, uint_t, void *); +void vsd_free(vnode_t *); + +/* + * Extensible vnode attribute (xva) routines: + * xva_init() initializes an xvattr_t (zero struct, init mapsize, set AT_XATTR) + * xva_getxoptattr() returns a ponter to the xoptattr_t section of xvattr_t + */ +void xva_init(xvattr_t *); +xoptattr_t *xva_getxoptattr(xvattr_t *); /* Get ptr to xoptattr_t */ + +void xattr_init(void); /* Initialize vnodeops for xattrs */ + +/* GFS tunnel for xattrs */ +int xattr_dir_lookup(vnode_t *, vnode_t **, int, cred_t *); + +/* Reparse Point */ +void reparse_point_init(void); + +/* Context identification */ +u_longlong_t fs_new_caller_id(); + +int vn_vmpss_usepageio(vnode_t *); + +/* + * Needed for use of IS_VMODSORT() in kernel. + */ +extern uint_t pvn_vmodsort_supported; + +#define VN_HOLD(vp) { \ + mutex_enter(&(vp)->v_lock); \ + (vp)->v_count++; \ + mutex_exit(&(vp)->v_lock); \ +} + +#define VN_RELE(vp) { \ + vn_rele(vp); \ +} + +#define VN_RELE_ASYNC(vp, taskq) { \ + vn_rele_async(vp, taskq); \ +} + +#define VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev) { \ + (vp)->v_vfsp = (vfsp); \ + (vp)->v_type = (type); \ + (vp)->v_rdev = (dev); \ +} + +/* + * Compare two vnodes for equality. In general this macro should be used + * in preference to calling VOP_CMP directly. + */ +#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \ + ((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \ + VOP_CMP(VP1, VP2, NULL) : 0)) + +/* + * Some well-known global vnodes used by the VM system to name pages. + */ +extern struct vnode kvps[]; + +typedef enum { + KV_KVP, /* vnode for all segkmem pages */ + KV_ZVP, /* vnode for all ZFS pages */ +#if defined(__sparc) + KV_MPVP, /* vnode for all page_t meta-pages */ + KV_PROMVP, /* vnode for all PROM pages */ +#endif /* __sparc */ + KV_MAX /* total number of vnodes in kvps[] */ +} kvps_index_t; + +#define VN_ISKAS(vp) ((vp) >= &kvps[0] && (vp) < &kvps[KV_MAX]) + +#endif /* _KERNEL */ + +/* + * Flags to VOP_SETATTR/VOP_GETATTR. + */ +#define ATTR_UTIME 0x01 /* non-default utime(2) request */ +#define ATTR_EXEC 0x02 /* invocation from exec(2) */ +#define ATTR_COMM 0x04 /* yield common vp attributes */ +#define ATTR_HINT 0x08 /* information returned will be `hint' */ +#define ATTR_REAL 0x10 /* yield attributes of the real vp */ +#define ATTR_NOACLCHECK 0x20 /* Don't check ACL when checking permissions */ +#define ATTR_TRIGGER 0x40 /* Mount first if vnode is a trigger mount */ +/* + * Generally useful macros. + */ +#define VBSIZE(vp) ((vp)->v_vfsp->vfs_bsize) + +#define VTOZONE(vp) ((vp)->v_vfsp->vfs_zone) + +#define NULLVP ((struct vnode *)0) +#define NULLVPP ((struct vnode **)0) + +#ifdef _KERNEL + +/* + * Structure used while handling asynchronous VOP_PUTPAGE operations. + */ +struct async_reqs { + struct async_reqs *a_next; /* pointer to next arg struct */ + struct vnode *a_vp; /* vnode pointer */ + u_offset_t a_off; /* offset in file */ + uint_t a_len; /* size of i/o request */ + int a_flags; /* flags to indicate operation type */ + struct cred *a_cred; /* cred pointer */ + ushort_t a_prealloced; /* set if struct is pre-allocated */ +}; + +/* + * VN_DISPOSE() -- given a page pointer, safely invoke VOP_DISPOSE(). + * Note that there is no guarantee that the page passed in will be + * freed. If that is required, then a check after calling VN_DISPOSE would + * be necessary to ensure the page was freed. + */ +#define VN_DISPOSE(pp, flag, dn, cr) { \ + if ((pp)->p_vnode != NULL && !VN_ISKAS((pp)->p_vnode)) \ + VOP_DISPOSE((pp)->p_vnode, (pp), (flag), (dn), (cr), NULL); \ + else if ((flag) == B_FREE) \ + page_free((pp), (dn)); \ + else \ + page_destroy((pp), (dn)); \ + } + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VNODE_H */ diff --git a/uts/common/sys/zmod.h b/uts/common/sys/zmod.h new file mode 100644 index 000000000000..ba0267203ce3 --- /dev/null +++ b/uts/common/sys/zmod.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZMOD_H +#define _ZMOD_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * zmod - RFC-1950-compatible decompression routines + * + * This file provides the public interfaces to zmod, an in-kernel RFC 1950 + * decompression library. More information about the implementation of these + * interfaces can be found in the usr/src/uts/common/zmod/ directory. + */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) + +extern int z_uncompress(void *, size_t *, const void *, size_t); +extern int z_compress(void *, size_t *, const void *, size_t); +extern int z_compress_level(void *, size_t *, const void *, size_t, int); +extern const char *z_strerror(int); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZMOD_H */ diff --git a/uts/common/zmod/adler32.c b/uts/common/zmod/adler32.c new file mode 100644 index 000000000000..59d84632ed5d --- /dev/null +++ b/uts/common/zmod/adler32.c @@ -0,0 +1,149 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define ZLIB_INTERNAL +#include "zlib.h" + +#define BASE 65521UL /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware */ +#ifdef NO_DIVIDE +# define MOD(a) \ + do { \ + if (a >= (BASE << 16)) a -= (BASE << 16); \ + if (a >= (BASE << 15)) a -= (BASE << 15); \ + if (a >= (BASE << 14)) a -= (BASE << 14); \ + if (a >= (BASE << 13)) a -= (BASE << 13); \ + if (a >= (BASE << 12)) a -= (BASE << 12); \ + if (a >= (BASE << 11)) a -= (BASE << 11); \ + if (a >= (BASE << 10)) a -= (BASE << 10); \ + if (a >= (BASE << 9)) a -= (BASE << 9); \ + if (a >= (BASE << 8)) a -= (BASE << 8); \ + if (a >= (BASE << 7)) a -= (BASE << 7); \ + if (a >= (BASE << 6)) a -= (BASE << 6); \ + if (a >= (BASE << 5)) a -= (BASE << 5); \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD4(a) \ + do { \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD4(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD4(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* the derivation of this formula is left as an exercise for the reader */ + rem = (unsigned)(len2 % BASE); + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 > BASE) sum1 -= BASE; + if (sum1 > BASE) sum1 -= BASE; + if (sum2 > (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 > BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} diff --git a/uts/common/zmod/crc32.c b/uts/common/zmod/crc32.c new file mode 100644 index 000000000000..61ad581ef562 --- /dev/null +++ b/uts/common/zmod/crc32.c @@ -0,0 +1,428 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + */ + +#ifdef MAKECRCH +# include <stdio.h> +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +#define local static + +/* Find a four-byte integer type for crc32_little() and crc32_big(). */ +#ifndef NOBYFOUR +# ifdef STDC /* need ANSI C limits.h to determine sizes */ +# include <limits.h> +# define BYFOUR +# if (UINT_MAX == 0xffffffffUL) + typedef unsigned int u4; +# else +# if (ULONG_MAX == 0xffffffffUL) + typedef unsigned long u4; +# else +# if (USHRT_MAX == 0xffffffffUL) + typedef unsigned short u4; +# else +# undef BYFOUR /* can't find a four-byte integer type! */ +# endif +# endif +# endif +# endif /* STDC */ +#endif /* !NOBYFOUR */ + +/* Definitions for doing the crc four data bytes at a time. */ +#ifdef BYFOUR +# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ + (((w)&0xff00)<<8)+(((w)&0xff)<<24)) + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, unsigned)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, unsigned)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local unsigned long FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const unsigned long FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + unsigned long c; + int n, k; + unsigned long poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0UL; + for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) + poly |= 1UL << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (unsigned long)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = REV(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = REV(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const unsigned long FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const unsigned long FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const unsigned long FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const unsigned long FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + u4 endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +#ifdef BYFOUR + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = (u4)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *++buf4; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = REV((u4)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4--; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf4++; + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(REV(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case */ + if (len2 == 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} diff --git a/uts/common/zmod/crc32.h b/uts/common/zmod/crc32.h new file mode 100644 index 000000000000..495c83e03179 --- /dev/null +++ b/uts/common/zmod/crc32.h @@ -0,0 +1,443 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +local const unsigned long FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/uts/common/zmod/deflate.c b/uts/common/zmod/deflate.c new file mode 100644 index 000000000000..7847e40ba327 --- /dev/null +++ b/uts/common/zmod/deflate.c @@ -0,0 +1,1742 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://www.ietf.org/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +#include "deflate.h" + +static const char deflate_copyright[] = + " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifndef FASTEST +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif +#endif +local uInt longest_match_fast OF((deflate_state *s, IPos cur_match)); + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +#ifndef NO_DUMMY_DECL +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ +#endif + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + (void) zmemzero((Bytef *)s->head, \ + (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + (void) deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || + strm->state->wrap == 2 || + (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + return Z_STREAM_ERROR; + + s = strm->state; + if (s->wrap) + strm->adler = adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); + dictionary += dictLength - length; /* use the tail of the dictionary */ + } + (void) zmemcpy(s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (strm->state->wrap != 2) return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + strm->state->bi_valid = bits; + strm->state->bi_buf = (ush)(value & ((1 << bits) - 1)); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = good_length; + s->max_lazy_match = max_lazy; + s->nice_match = nice_length; + s->max_chain_length = max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds + * for every combination of windowBits and memLevel, as well as wrap. + * But even the conservative upper bound of about 14% expansion does not + * seem onerous for output buffer allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong destLen; + + /* conservative upper bound */ + destLen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11; + + /* if can't get parameters, return conservative bound */ + if (strm == Z_NULL || strm->state == Z_NULL) + return destLen; + + /* if not default parameters, return conservative bound */ + s = strm->state; + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return destLen; + + /* default settings: return tight bound for that case */ + return compressBound(sourceLen); +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len = strm->state->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->state->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + strm->state->pending -= len; + if (strm->state->pending == 0) { + strm->state->pending_out = strm->state->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the header */ + if (s->status == INIT_STATE) { +#ifdef GZIP + if (s->wrap == 2) { + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + else +#endif + { + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + } + } +#ifdef GZIP + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + + while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) + break; + } + put_byte(s, s->gzhead->extra[s->gzindex]); + s->gzindex++; + } + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (s->gzindex == s->gzhead->extra_len) { + s->gzindex = 0; + s->status = NAME_STATE; + } + } + else + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) { + s->gzindex = 0; + s->status = COMMENT_STATE; + } + } + else + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) + s->status = HCRC_STATE; + } + else + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) + flush_pending(strm); + if (s->pending + 2 <= s->pending_buf_size) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + } + } + else + s->status = BUSY_STATE; + } +#endif + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && + status != EXTRA_STATE && + status != NAME_STATE && + status != COMMENT_STATE && + status != HCRC_STATE && + status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy(dest, source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy(ds, ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, strm->next_in, len); + } +#endif + zmemcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ +#endif /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for level == 1 or strategy == Z_RLE only + */ +local uInt longest_match_fast(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + /* %%% avoid this when Z_RLE */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ +#ifdef FASTEST + if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) || + (s->strategy == Z_RLE && s->strstart - hash_head == 1)) { + s->match_length = longest_match_fast (s, hash_head); + } +#else + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } +#endif + /* longest_match() or longest_match_fast() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } + /* longest_match() or longest_match_fast() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif /* FASTEST */ + +#if 0 +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt run; /* length of run */ + uInt max; /* maximum length of run */ + uInt prev; /* byte at distance one to match */ + Bytef *scan; /* scan for end of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest encodable run. + */ + if (s->lookahead < MAX_MATCH) { + fill_window(s); + if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + run = 0; + if (s->strstart > 0) { /* if there is a previous byte, that is */ + max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH; + scan = s->window + s->strstart - 1; + prev = *scan++; + do { + if (*scan++ != prev) + break; + } while (++run < max); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (run >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, run); + _tr_tally_dist(s, 1, run - MIN_MATCH, bflush); + s->lookahead -= run; + s->strstart += run; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif diff --git a/uts/common/zmod/deflate.h b/uts/common/zmod/deflate.h new file mode 100644 index 000000000000..d01a3c10e449 --- /dev/null +++ b/uts/common/zmod/deflate.h @@ -0,0 +1,331 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2004 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +#ifndef _DEFLATE_H +#define _DEFLATE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define EXTRA_STATE 69 +#define NAME_STATE 73 +#define COMMENT_STATE 91 +#define HCRC_STATE 103 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + uInt pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + uInt gzindex; /* where in extra, name, or comment */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void _tr_init OF((deflate_state *s)); +int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +void _tr_align OF((deflate_state *s)); +void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch _length_code[]; + extern uch _dist_code[]; +#else + extern const uch _length_code[]; + extern const uch _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* _DEFLATE_H */ diff --git a/uts/common/zmod/inffast.c b/uts/common/zmod/inffast.c new file mode 100644 index 000000000000..a6dcf3f5c8b8 --- /dev/null +++ b/uts/common/zmod/inffast.c @@ -0,0 +1,320 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifndef ASMINF + +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +#else +# define OFF 1 +# define PUP(a) *++(a) +#endif + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + unsigned char FAR *in; /* local strm->next_in */ + unsigned char FAR *last; /* while in < last, enough input available */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code this; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in - OFF; + last = in + (strm->avail_in - 5); + out = strm->next_out - OFF; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + write = state->write; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = lcode[hold & lmask]; + dolen: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op == 0) { /* literal */ + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + PUP(out) = (unsigned char)(this.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = dcode[hold & dmask]; + dodist: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + from = window - OFF; + if (write == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (write < op) { /* wrap around window */ + from += wsize + write - op; + op -= write; + if (op < len) { /* some from end of window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = window - OFF; + if (write < len) { /* some from start of window */ + op = write; + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += write - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } while (len > 2); + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + this = dcode[this.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + this = lcode[this.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in + OFF; + strm->next_out = out + OFF; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and write == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/uts/common/zmod/inffast.h b/uts/common/zmod/inffast.h new file mode 100644 index 000000000000..2d214efa299a --- /dev/null +++ b/uts/common/zmod/inffast.h @@ -0,0 +1,13 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/uts/common/zmod/inffixed.h b/uts/common/zmod/inffixed.h new file mode 100644 index 000000000000..ed55df899109 --- /dev/null +++ b/uts/common/zmod/inffixed.h @@ -0,0 +1,96 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + /* WARNING: this file should *not* be used by applications. It + is part of the implementation of the compression library and + is subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/uts/common/zmod/inflate.c b/uts/common/zmod/inflate.c new file mode 100644 index 000000000000..023e7a121ce7 --- /dev/null +++ b/uts/common/zmod/inflate.c @@ -0,0 +1,1395 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common write == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, unsigned out)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf, + unsigned len)); + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + strm->adler = 1; /* to support ill-conceived Java test suite */ + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->wsize = 0; + state->whave = 0; + state->write = 0; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + if (windowBits < 0) { + state->wrap = 0; + windowBits = -windowBits; + } + else { + state->wrap = (windowBits >> 4) + 1; +#ifdef GUNZIP + if (windowBits < 48) windowBits &= 15; +#endif + } + if (windowBits < 8 || windowBits > 15) { + ZFREE(strm, state); + strm->state = Z_NULL; + return Z_STREAM_ERROR; + } + state->wbits = (unsigned)windowBits; + state->window = Z_NULL; + return inflateReset(strm); +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include <stdio.h> + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits, + state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, out) +z_streamp strm; +unsigned out; +{ + struct inflate_state FAR *state; + unsigned copy, dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->write = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + copy = out - strm->avail_out; + if (copy >= state->wsize) { + zmemcpy(state->window, strm->next_out - state->wsize, state->wsize); + state->write = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->write; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->write, strm->next_out - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, strm->next_out - copy, copy); + state->write = copy; + state->whave = state->wsize; + } + else { + state->write += dist; + if (state->write == state->wsize) state->write = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Reverse the bytes in a 32-bit value */ +#define REVERSE(q) \ + ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + /*FALLTHRU*/ + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if (state->flags & 0x0200) CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + /*FALLTHRU*/ + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + /*FALLTHRU*/ + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + /*FALLTHRU*/ + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + /*FALLTHRU*/ + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + /*FALLTHRU*/ + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + /*FALLTHRU*/ + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if (hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = REVERSE(hold); + INITBITS(); + state->mode = DICT; + /*FALLTHRU*/ + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + /*FALLTHRU*/ + case TYPE: + if (flush == Z_BLOCK) goto inf_leave; + /*FALLTHRU*/ + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY; + /*FALLTHRU*/ + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + /*FALLTHRU*/ + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + /*FALLTHRU*/ + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + /*FALLTHRU*/ + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + break; + } + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + if ((int)(this.op) == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + state->mode = LIT; + break; + } + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(this.op) & 15; + state->mode = LENEXT; + /*FALLTHRU*/ + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->mode = DIST; + /*FALLTHRU*/ + case DIST: + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + state->extra = (unsigned)(this.op) & 15; + state->mode = DISTEXT; + /*FALLTHRU*/ + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + if (state->offset > state->whave + out - left) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + /*FALLTHRU*/ + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->write) { + copy -= state->write; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->write - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if (out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if (( +#ifdef GUNZIP + state->flags ? hold : +#endif + REVERSE(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + /*FALLTHRU*/ + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + /*FALLTHRU*/ + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) + if (updatewindow(strm, out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if (state->wrap && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long id; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary id */ + if (state->mode == DICT) { + id = adler32(0L, Z_NULL, 0); + id = adler32(id, dictionary, dictLength); + if (id != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window */ + if (updatewindow(strm, strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + if (dictLength > state->wsize) { + zmemcpy(state->window, dictionary + dictLength - state->wsize, + state->wsize); + state->whave = state->wsize; + } + else { + zmemcpy(state->window + state->wsize - dictLength, dictionary, + dictLength); + state->whave = dictLength; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + (void) syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + (void) inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL || + source->zalloc == (alloc_func)0 || source->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy(dest, source, sizeof(z_stream)); + zmemcpy(copy, state, sizeof(struct inflate_state)); + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} diff --git a/uts/common/zmod/inflate.h b/uts/common/zmod/inflate.h new file mode 100644 index 000000000000..4d28b221770b --- /dev/null +++ b/uts/common/zmod/inflate.h @@ -0,0 +1,117 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN, /* i: waiting for length/lit code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD or MEM mode -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME + NAME -> COMMENT -> HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or CHECK + STORED -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; diff --git a/uts/common/zmod/inftrees.c b/uts/common/zmod/inftrees.c new file mode 100644 index 000000000000..2d371675301c --- /dev/null +++ b/uts/common/zmod/inftrees.c @@ -0,0 +1,331 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +static const char inflate_copyright[] = + " inflate 1.2.3 Copyright 1995-2005 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)1; + this.val = (unsigned short)0; + *(*table)++ = this; /* make a table to force an error */ + *(*table)++ = this; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked when a LENS table is being made + against the space in *table, ENOUGH, minus the maximum space needed by + the worst case distance code, MAXD. This should never happen, but the + sufficiency of ENOUGH has not been proven exhaustively, hence the check. + This assumes that when type == LENS, bits == 9. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/uts/common/zmod/inftrees.h b/uts/common/zmod/inftrees.h new file mode 100644 index 000000000000..546e8c082fdb --- /dev/null +++ b/uts/common/zmod/inftrees.h @@ -0,0 +1,57 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an + exhaustive search). The true maximum is not known, but the value + below is more than safe. */ +#define ENOUGH 2048 +#define MAXD 592 + +/* Type of code to build for inftable() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/uts/common/zmod/trees.c b/uts/common/zmod/trees.c new file mode 100644 index 000000000000..ce0cebc0304c --- /dev/null +++ b/uts/common/zmod/trees.c @@ -0,0 +1,1219 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2005 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include <ctype.h> +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +local void set_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1<<extra_lbits[code]); n++) { + _length_code[length++] = (uch)code; + } + } + Assert (length == 256, "tr_static_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + _length_code[length-1] = (uch)code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<<extra_dbits[code]); n++) { + _dist_code[dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include <stdio.h> +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = bi_reverse(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); + } +} + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ +local void build_tree(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; +#endif + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) + set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (eof) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to BINARY or TEXT, using a crude approximation: + * set it to Z_TEXT if all symbols are either printable characters (33 to 255) + * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local void set_data_type(s) + deflate_state *s; +{ + int n; + + for (n = 0; n < 9; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + if (n == 9) + for (n = 14; n < 32; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} diff --git a/uts/common/zmod/zconf.h b/uts/common/zmod/zconf.h new file mode 100644 index 000000000000..ccce7b2742da --- /dev/null +++ b/uts/common/zmod/zconf.h @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZCONF_H +#define _ZCONF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * We don't want to turn on zlib's debugging. + */ +#undef DEBUG + +/* + * We define our own memory allocation and deallocation routines that use kmem. + */ +#define MY_ZCALLOC + +/* + * We don't define HAVE_MEMCPY here, but do in zutil.c, and implement our + * our versions of zmemcpy(), zmemzero(), and zmemcmp(). + */ + +/* + * We have a sufficiently capable compiler as to not need zlib's compiler hack. + */ +#define NO_DUMMY_DECL + +#define compressBound(len) (len + (len >> 12) + (len >> 14) + 11) + +#define z_off_t off_t +#define OF(p) p +#define ZEXTERN extern +#define ZEXPORT +#define ZEXPORTVA +#define FAR + +#define deflateInit_ z_deflateInit_ +#define deflate z_deflate +#define deflateEnd z_deflateEnd +#define inflateInit_ z_inflateInit_ +#define inflate z_inflate +#define inflateEnd z_inflateEnd +#define deflateInit2_ z_deflateInit2_ +#define deflateSetDictionary z_deflateSetDictionary +#define deflateCopy z_deflateCopy +#define deflateReset z_deflateReset +#define deflateParams z_deflateParams +#define deflateBound z_deflateBound +#define deflatePrime z_deflatePrime +#define inflateInit2_ z_inflateInit2_ +#define inflateSetDictionary z_inflateSetDictionary +#define inflateSync z_inflateSync +#define inflateSyncPoint z_inflateSyncPoint +#define inflateCopy z_inflateCopy +#define inflateReset z_inflateReset +#define inflateBack z_inflateBack +#define inflateBackEnd z_inflateBackEnd +#define compress zz_compress +#define compress2 zz_compress2 +#define uncompress zz_uncompress +#define adler32 z_adler32 +#define crc32 z_crc32 +#define get_crc_table z_get_crc_table +#define zError z_zError + +#define MAX_MEM_LEVEL 9 +#define MAX_WBITS 15 + +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef unsigned long uLong; +typedef Byte Bytef; +typedef char charf; +typedef int intf; +typedef uInt uIntf; +typedef uLong uLongf; +typedef void *voidpc; +typedef void *voidpf; +typedef void *voidp; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZCONF_H */ diff --git a/uts/common/zmod/zlib.h b/uts/common/zmod/zlib.h new file mode 100644 index 000000000000..9b971a0f5722 --- /dev/null +++ b/uts/common/zmod/zlib.h @@ -0,0 +1,1359 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.3, July 18th, 2005 + + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). +*/ + +#ifndef _ZLIB_H +#define _ZLIB_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.3" +#define ZLIB_VERNUM 0x1230 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms will be added later and will have the same + stream interface. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never + crash even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative + * values are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is + not compatible with the zlib.h header file used by the application. + This check is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. + If zalloc and zfree are set to Z_NULL, deflateInit updates them to + use default allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at + all (the input data is simply copied a block at a time). + Z_DEFAULT_COMPRESSION requests a default compromise between speed and + compression (currently equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if level is not a valid compression level, + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). + msg is set to null if there is no error message. deflateInit does not + perform any compression: this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce some + output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumualte before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In particular + avail_in is zero after the call if enough output space has been provided + before the call.) Flushing may degrade compression for some compression + algorithms and so it should be used only when necessary. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + the value returned by deflateBound (see below). If deflate does not return + Z_STREAM_END, then it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the exact + value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller. msg is set to null if there is no error + message. inflateInit does not perform any decompression apart from reading + the zlib header if present: this will be done by inflate(). (So next_in and + avail_in may be modified, but next_out and avail_out are unchanged.) +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there + is no more input data or no more space in the output buffer (see below + about the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). If inflate returns Z_OK and with zero avail_out, it + must be called again after making room in the output buffer because there + might be more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, + Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() stop + if and when it gets to the next deflate block boundary. When decoding the + zlib or gzip format, this will cause inflate() to return immediately after + the header and before the first block. When doing a raw inflate, inflate() + will go ahead and process the first block, and will return when it gets to + the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 + if inflate() is currently decoding the last block in the deflate stream, + plus 128 if inflate() returned immediately after decoding an end-of-block + code or decoding the complete header up to just before the first byte of the + deflate stream. The end-of-block will not be indicated until all of the + uncompressed data from that block has been written to strm->next_out. The + number of unused bits may in general be greater than seven, except when + bit 7 of data_type is set, in which case the number of unused bits will be + less than eight. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster approach + may be used for the single inflate() call. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the only effect of the flush parameter in this implementation + is on the return value of inflate(), as noted below, or when it returns early + because Z_BLOCK is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the adler32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the adler32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() will decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically. Any information + contained in the gzip header is not retained, so applications that need that + information should instead use raw inflate, see inflateInit2() below, or + inflateBack() and perform their own processing of the gzip header and + trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may then + call inflateSync() to look for a good compression block if a partial recovery + of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), + no header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as + Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy + parameter only affects the compression ratio but not the correctness of the + compressed output even if it is not set appropriately. Z_FIXED prevents the + use of dynamic Huffman codes, allowing for a simpler decoder for special + applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid + method). msg is set to null if there is no error message. deflateInit2 does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. This function must be called + immediately after deflateInit, deflateInit2 or deflateReset, before any + call of deflate. The compressor and decompressor must use exactly the same + dictionary (see inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size in + deflate or deflate2. Thus the strings most likely to be useful should be + put at the end of the dictionary, not at the front. In addition, the + current implementation of deflate will use at most the window size minus + 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if the compression method is bsort). deflateSetDictionary does not + perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and + can consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different + strategy. If the compression level is changed, the input available so far + is compressed with the old level (and may be flushed); the new level will + take effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to + be compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR + if strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() + or deflateInit2(). This would be used to allocate an output buffer + for deflation in a single pass, and so would be called before deflate(). +*/ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the + bits leftover from a previous deflate stream when appending to it. As such, + this function can only be used for raw deflate, and must be used before the + first deflate() call after a deflateInit2() or deflateReset(). bits must be + less than or equal to 16, and that many of the least significant bits of + value will be inserted in the output. + + deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is + a crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg + is set to null if there is no error message. inflateInit2 does not perform + any decompression apart from reading the zlib header if present: this will + be done by inflate(). (So next_in and avail_in may be modified, but next_out + and avail_out are unchanged.) +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called + immediately after inflateInit2() or inflateReset() and before any call of + inflate() to set the dictionary. The application must insure that the + dictionary that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (such as NULL dictionary) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a full flush point (see above the + description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR + if no more input was provided, Z_DATA_ERROR if no flush point has been found, + or Z_STREAM_ERROR if the stream structure was inconsistent. In the success + case, the application may save the current current value of total_in which + indicates where valid compressed data was found. In the error case, the + application may repeatedly call inflateSync, providing more input each time, + until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. + The stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK can be used to + force inflate() to return immediately after header processing is complete + and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When + any of extra, name, or comment are not Z_NULL and the respective field is + not present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the paramaters are invalid, Z_MEM_ERROR if the internal state could not + be allocated, or Z_VERSION_ERROR if the version of the library does not + match the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is more efficient than inflate() for + file i/o applications in that it avoids copying between the output and the + sliding window by simply making the window itself the output buffer. This + function trusts the application to not change the output buffer passed by + the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free + the allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects + only the raw deflate stream to decompress. This is different from the + normal behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format + error in the deflate stream (in which case strm->msg is set to indicate the + nature of the error), or Z_STREAM_ERROR if the stream was not properly + initialized. In the case of Z_BUF_ERROR, an input or output error can be + distinguished using strm->next_in which will be Z_NULL only if in() returned + an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to + out() returning non-zero. (in() will always be called before out(), so + strm->next_in is assured to be defined if out() returns non-zero.) Note + that inflateBack() cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + + + /* utility functions */ + +/* + The following utility functions are implemented on top of the + basic stream-oriented functions. To simplify the interface, some + default options are assumed (compression level and memory usage, + standard memory allocation functions). The source code of these + utility functions can easily be modified if you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least the value returned + by compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before + a compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. +*/ + + +typedef voidp gzFile; + +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); +/* + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb") but can also include a compression level + ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for + Huffman only compression as in "wb1h", or 'R' for run-length encoding + as in "wb1R". (See the description of deflateInit2 for more information + about the strategy parameter.) + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. + + gzopen returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). */ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen() associates a gzFile with the file descriptor fd. File + descriptors are obtained from calls like open, dup, creat, pipe or + fileno (in the file has been previously opened with fopen). + The mode parameter is as in gzopen. + The next call of gzclose on the returned gzFile will also close the + file descriptor fd, just like fclose(fdopen(fd), mode) closes the file + descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). + gzdopen returns NULL if there was insufficient memory to allocate + the (de)compression state. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. + If the input file was not in gzip format, gzread copies the given number + of bytes into the buffer. + gzread returns the number of uncompressed bytes actually read (0 for + end of file, -1 for error). */ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes actually written + (0 in case of error). +*/ + +ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). The number of + uncompressed bytes written is limited to 4095. The caller should assure that + this limit is not exceeded. If it is exceeded, then gzprintf() will return + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or + a newline character is read and transferred to buf, or an end-of-file + condition is encountered. The string is then terminated with a null + character. + gzgets returns buf, or Z_NULL in case of error. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read again later. + Only one character of push-back is allowed. gzungetc() returns the + character pushed, or -1 on failure. gzungetc() will fail if a + character has been pushed but not read yet, or if c is -1. The pushed + character will be discarded if the stream is repositioned with gzseek() + or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. The return value is the zlib + error number (see function gzerror below). gzflush returns Z_OK if + the flush parameter is Z_FINISH and all output could be flushed. + gzflush should be called only when strictly necessary because it can + degrade compression. +*/ + +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); +/* + Sets the starting position for the next gzread or gzwrite on the + given compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); +/* + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns 1 if file is being read directly without decompression, otherwise + zero. +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. The return value is the zlib + error number (see function gzerror below). +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the + compression library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); +/* + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is NULL, this function returns the required initial + value for the for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + +/* + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + + +#if !defined(_ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; /* hack for buggy compilers */ +#endif + +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); +ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZLIB_H */ diff --git a/uts/common/zmod/zmod.c b/uts/common/zmod/zmod.c new file mode 100644 index 000000000000..d22f5950826c --- /dev/null +++ b/uts/common/zmod/zmod.c @@ -0,0 +1,113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/modctl.h> +#include <sys/zmod.h> + +#include "zlib.h" +#include "zutil.h" + +/* + * Uncompress the buffer 'src' into the buffer 'dst'. The caller must store + * the expected decompressed data size externally so it can be passed in. + * The resulting decompressed size is then returned through dstlen. This + * function return Z_OK on success, or another error code on failure. + */ +int +z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + z_stream zs; + int err; + + bzero(&zs, sizeof (zs)); + zs.next_in = (uchar_t *)src; + zs.avail_in = srclen; + zs.next_out = dst; + zs.avail_out = *dstlen; + + /* + * Call inflateInit2() specifying a window size of DEF_WBITS + * with the 6th bit set to indicate that the compression format + * type (zlib or gzip) should be automatically detected. + */ + if ((err = inflateInit2(&zs, DEF_WBITS | 0x20)) != Z_OK) + return (err); + + if ((err = inflate(&zs, Z_FINISH)) != Z_STREAM_END) { + (void) inflateEnd(&zs); + return (err == Z_OK ? Z_BUF_ERROR : err); + } + + *dstlen = zs.total_out; + return (inflateEnd(&zs)); +} + +int +z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, + int level) +{ + + z_stream zs; + int err; + + bzero(&zs, sizeof (zs)); + zs.next_in = (uchar_t *)src; + zs.avail_in = srclen; + zs.next_out = dst; + zs.avail_out = *dstlen; + + if ((err = deflateInit(&zs, level)) != Z_OK) + return (err); + + if ((err = deflate(&zs, Z_FINISH)) != Z_STREAM_END) { + (void) deflateEnd(&zs); + return (err == Z_OK ? Z_BUF_ERROR : err); + } + + *dstlen = zs.total_out; + return (deflateEnd(&zs)); +} + +int +z_compress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + return (z_compress_level(dst, dstlen, src, srclen, + Z_DEFAULT_COMPRESSION)); +} + +/* + * Convert a zlib error code into a string error message. + */ +const char * +z_strerror(int err) +{ + int i = Z_NEED_DICT - err; + + if (i < 0 || i > Z_NEED_DICT - Z_VERSION_ERROR) + return ("unknown error"); + + return (zError(err)); +} diff --git a/uts/common/zmod/zmod_subr.c b/uts/common/zmod/zmod_subr.c new file mode 100644 index 000000000000..000925753fd7 --- /dev/null +++ b/uts/common/zmod/zmod_subr.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/kobj.h> +#include <sys/kobj_impl.h> + +struct zchdr { + uint_t zch_magic; + uint_t zch_size; +}; + +#define ZCH_MAGIC 0x3cc13cc1 + +/*ARGSUSED*/ +void * +zcalloc(void *opaque, uint_t items, uint_t size) +{ + size_t nbytes = sizeof (struct zchdr) + items * size; + struct zchdr *z = kobj_zalloc(nbytes, KM_NOWAIT|KM_TMP); + + if (z == NULL) + return (NULL); + + z->zch_magic = ZCH_MAGIC; + z->zch_size = nbytes; + + return (z + 1); +} + +/*ARGSUSED*/ +void +zcfree(void *opaque, void *ptr) +{ + struct zchdr *z = ((struct zchdr *)ptr) - 1; + + if (z->zch_magic != ZCH_MAGIC) + panic("zcfree region corrupt: hdr=%p ptr=%p", (void *)z, ptr); + + kobj_free(z, z->zch_size); +} + +void +zmemcpy(void *dest, const void *source, uint_t len) +{ + bcopy(source, dest, len); +} + +int +zmemcmp(const void *s1, const void *s2, uint_t len) +{ + return (bcmp(s1, s2, len)); +} + +void +zmemzero(void *dest, uint_t len) +{ + bzero(dest, len); +} diff --git a/uts/common/zmod/zutil.c b/uts/common/zmod/zutil.c new file mode 100644 index 000000000000..7d46e30b3edf --- /dev/null +++ b/uts/common/zmod/zutil.c @@ -0,0 +1,324 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "zutil.h" + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +const char * const z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch (sizeof(uInt)) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch (sizeof(uLong)) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch (sizeof(voidpf)) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch (sizeof(z_off_t)) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#ifdef STDC +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int z_verbose = verbose; + +void z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#define HAVE_MEMCPY +#ifndef HAVE_MEMCPY + +void zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ diff --git a/uts/common/zmod/zutil.h b/uts/common/zmod/zutil.h new file mode 100644 index 000000000000..1d02c1d09783 --- /dev/null +++ b/uts/common/zmod/zutil.h @@ -0,0 +1,274 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +#ifndef _ZUTIL_H +#define _ZUTIL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define ZLIB_INTERNAL +#include "zlib.h" + +#ifdef STDC +# ifndef _WIN32_WCE +# include <stddef.h> +# endif +# include <string.h> +# include <stdlib.h> +#endif +#ifdef NO_ERRNO_H +# ifdef _WIN32_WCE + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. We rename it to + * avoid conflict with other libraries that use the same workaround. + */ +# define errno z_errno +# endif + extern int errno; +#else +# ifndef _WIN32_WCE +# include <sys/errno.h> +# endif +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include <alloc.h> +# endif +# else /* MSC or DJGPP */ +# include <malloc.h> +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# ifdef M_I86 + #include <malloc.h> +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include <unix.h> /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS + /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 + /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# define vsnprintf _vsnprintf +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +#endif +#ifdef VMS +# define NO_vsnprintf +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((void* dest, const void* source, uInt len)); + extern int zmemcmp OF((const void* s1, const void* s2, uInt len)); + extern void zmemzero OF((void* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include <stdio.h> + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* _ZUTIL_H */ |
