From c3a54f5bcff790079cde86e12621112837369752 Mon Sep 17 00:00:00 2001 From: mm Date: Tue, 7 Feb 2012 17:57:33 +0000 Subject: [PATCH 072/175] MFC r230514: Merge illumos revisions 13572, 13573, 13574: Rev. 13572: disk sync write perf regression when slog is used post oi_148 [1] Rev. 13573: crash during reguid causes stale config [2] allow and unallow missing from zpool history since removal of pyzfs [5] Rev. 13574: leaking a vdev when removing an l2cache device [3] memory leak when adding a file-based l2arc device [4] leak in ZFS from metaslab_group_create and zfs_ereport_checksum [6] References: https://www.illumos.org/issues/1909 [1] https://www.illumos.org/issues/1949 [2] https://www.illumos.org/issues/1951 [3] https://www.illumos.org/issues/1952 [4] https://www.illumos.org/issues/1953 [5] https://www.illumos.org/issues/1954 [6] Obtained from: illumos (issues #1909, #1949, #1951, #1952, #1953, #1954) git-svn-id: http://svn.freebsd.org/base/stable/9@231141 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f (cherry picked from commit a2dcc5ced7e9720dd2f7680b6d405c123c3152dd) Signed-off-by: Xin Li --- .../opensolaris/lib/libzfs/common/libzfs_dataset.c | 2 +- .../opensolaris/uts/common/fs/zfs/metaslab.c | 4 ++-- .../contrib/opensolaris/uts/common/fs/zfs/spa.c | 19 ++++++++++--------- .../opensolaris/uts/common/fs/zfs/sys/vdev.h | 3 ++- .../opensolaris/uts/common/fs/zfs/sys/vdev_impl.h | 1 + .../contrib/opensolaris/uts/common/fs/zfs/vdev.c | 20 ++++++++++++++------ .../contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c | 8 ++++++++ .../contrib/opensolaris/uts/common/fs/zfs/zio.c | 19 ++++++++++++++----- 8 files changed, 52 insertions(+), 24 deletions(-) diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c index bff0586..d00d777 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c @@ -4197,7 +4197,7 @@ tryagain: (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); - if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) { + if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), zc.zc_name); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index a0620b8..3a289da 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -36,7 +36,7 @@ * avoid having to load lots of space_maps in a given txg. There are, * however, some cases where we want to avoid "fast" ganging and instead * we want to do an exhaustive search of all metaslabs on this device. - * Currently we don't allow any gang or dump device related allocations + * Currently we don't allow any gang, zil, or dump device related allocations * to "fast" gang. */ #define CAN_FASTGANG(flags) \ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 8da471b..f467223 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -1073,8 +1073,10 @@ spa_unload(spa_t *spa) } spa->spa_spares.sav_count = 0; - for (i = 0; i < spa->spa_l2cache.sav_count; i++) + for (i = 0; i < spa->spa_l2cache.sav_count; i++) { + vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); vdev_free(spa->spa_l2cache.sav_vdevs[i]); + } if (spa->spa_l2cache.sav_vdevs) { kmem_free(spa->spa_l2cache.sav_vdevs, spa->spa_l2cache.sav_count * sizeof (void *)); @@ -1302,11 +1304,13 @@ spa_load_l2cache(spa_t *spa) vd = oldvdevs[i]; if (vd != NULL) { + ASSERT(vd->vdev_isl2cache); + if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); - (void) vdev_close(vd); - spa_l2cache_remove(vd); + vdev_clear_stats(vd); + vdev_free(vd); } } @@ -1949,7 +1953,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, */ if (type != SPA_IMPORT_ASSEMBLE) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_validate(rvd); + error = vdev_validate(rvd, mosconfig); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) @@ -2818,6 +2822,7 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { error = ENOTBLK; + vdev_free(vd); goto out; } #endif @@ -2927,10 +2932,6 @@ spa_l2cache_drop(spa_t *spa) if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); - if (vd->vdev_isl2cache) - spa_l2cache_remove(vd); - vdev_clear_stats(vd); - (void) vdev_close(vd); } } @@ -3929,7 +3930,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) pvd = oldvd->vdev_parent; if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, - VDEV_ALLOC_ADD)) != 0) + VDEV_ALLOC_ATTACH)) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); if (newrootvd->vdev_children != 1) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h index 941f234..aa6559c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_H @@ -48,7 +49,7 @@ extern boolean_t zfs_nocacheflush; extern int vdev_open(vdev_t *); extern void vdev_open_children(vdev_t *); extern boolean_t vdev_uses_zvols(vdev_t *); -extern int vdev_validate(vdev_t *); +extern int vdev_validate(vdev_t *, boolean_t); extern void vdev_close(vdev_t *); extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace); extern void vdev_reopen(vdev_t *); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h index 7efa3f3..992ce0c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -261,6 +261,7 @@ typedef struct vdev_label { #define VDEV_ALLOC_L2CACHE 3 #define VDEV_ALLOC_ROOTPOOL 4 #define VDEV_ALLOC_SPLIT 5 +#define VDEV_ALLOC_ATTACH 6 /* * Allocate or free a vdev diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 04597ec..b5d6fda 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -499,7 +499,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_removing); } - if (parent && !parent->vdev_parent) { + if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) { ASSERT(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_ADD || alloctype == VDEV_ALLOC_SPLIT || @@ -675,6 +675,8 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) svd->vdev_ms_shift = 0; svd->vdev_ms_count = 0; + if (tvd->vdev_mg) + ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg); tvd->vdev_mg = svd->vdev_mg; tvd->vdev_ms = svd->vdev_ms; @@ -1294,13 +1296,18 @@ vdev_open(vdev_t *vd) * contents. This needs to be done before vdev_load() so that we don't * inadvertently do repair I/Os to the wrong device. * + * If 'strict' is false ignore the spa guid check. This is necessary because + * if the machine crashed during a re-guid the new guid might have been written + * to all of the vdev labels, but not the cached config. The strict check + * will be performed when the pool is opened again using the mos config. + * * This function will only return failure if one of the vdevs indicates that it * has since been destroyed or exported. This is only possible if * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state * will be updated but the function will return 0. */ int -vdev_validate(vdev_t *vd) +vdev_validate(vdev_t *vd, boolean_t strict) { spa_t *spa = vd->vdev_spa; nvlist_t *label; @@ -1308,7 +1315,7 @@ vdev_validate(vdev_t *vd) uint64_t state; for (int c = 0; c < vd->vdev_children; c++) - if (vdev_validate(vd->vdev_child[c]) != 0) + if (vdev_validate(vd->vdev_child[c], strict) != 0) return (EBADF); /* @@ -1338,8 +1345,9 @@ vdev_validate(vdev_t *vd) return (0); } - if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, - &guid) != 0 || guid != spa_guid(spa)) { + if (strict && (nvlist_lookup_uint64(label, + ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || + guid != spa_guid(spa))) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); nvlist_free(label); @@ -1501,7 +1509,7 @@ vdev_reopen(vdev_t *vd) !l2arc_vdev_present(vd)) l2arc_add_vdev(spa, vd); } else { - (void) vdev_validate(vd); + (void) vdev_validate(vd, B_TRUE); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c index 0b48126..fa5903a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #include #include #include @@ -709,6 +713,10 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, if (report->zcr_ereport == NULL) { report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); + if (report->zcr_ckinfo != NULL) { + kmem_free(report->zcr_ckinfo, + sizeof (*report->zcr_ckinfo)); + } kmem_free(report, sizeof (*report)); return; } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 5f8f00b..3eeeb58 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -2235,13 +2235,22 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, ASSERT(txg > spa_syncing_txg(spa)); - if (use_slog) + /* + * ZIL blocks are always contiguous (i.e. not gang blocks) so we + * set the METASLAB_GANG_AVOID flag so that they don't "fast gang" + * when allocating them. + */ + if (use_slog) { error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + new_bp, 1, txg, old_bp, + METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + } - if (error) + if (error) { error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + new_bp, 1, txg, old_bp, + METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + } if (error == 0) { BP_SET_LSIZE(new_bp, size); -- 1.7.9.4