diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c57876f1838eeca8c03ac0b7d7c9df9e03bc7d13..b377b73d6cd631a935ad9229ce80a2ef1e189e26 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -273,20 +273,6 @@ test_full/cp37-cp37m-linux:
         python:3.7
 
 
-# for universal builds we only need to gpg sign once
-gpgsign/cp37-cp37m-linux:
-    <<: 
-        - *gpgsign_template
-    image:
-        python:3.7
-
-deploy/cp37-cp37m-linux:
-    <<: 
-        - *deploy_template
-    image:
-        python:3.7
-
-
 # ---------------
 # Python 3.6 Jobs
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf5f8eab2bafff8f6400576958f723714dae2300..5f314384168c9579b5bfbc2bf198b4280fe11ff2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,34 @@ This changelog follows the specifications detailed in: [Keep a Changelog](https:
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release.
 
 
-## Version 0.5.6 - Unreleased
+## Version 0.5.7 - Unreleased
+
+### Changed
+* `harn.deploy_fpath` is now populated when the model is deployed.
+* Improved docs on `netharn/data/toydata.py`
+* Changed name of `torch_shapshots` directory name to `checkpoints`.
+
+### Added
+* Ported experimental `ChannelSpec` and `DataContainser` from bioharn to netharn.data.
+* Added basic classification example that works on generic coco datasets
+* Threshold curves to ConfusionVector metrics
+* Initial weights are now saved in `initial_state` directory.
+* New `plots` submodule.
+
+### Fixed
+* Fixed bug in XPU auto mode which caused it always to choose GPU 0.
+* Bug in hyperparams where dict-based loader spec was not working.
+* Display intervals were not working correctly with ProgIter, hacked in a temporary fix.
+
+
+## Version 0.5.6 - Released 2020-04-16
+
+### Changed
+* Enhanced VOC ensure data 
+
+
+### Fixed
+* Version issues from last release
 
 
 ## Version 0.5.5
diff --git a/README.rst b/README.rst
index 1172769cfa581794105da960cc35a24901f8656a..bb1ac3b7c6eba7a89860d89f640869ff750aaa50 100644
--- a/README.rst
+++ b/README.rst
@@ -130,7 +130,6 @@ Features (continued)
   ``kwplot``. 
 
 
-
 Installation
 ============
 
@@ -262,50 +261,50 @@ useful to look at.  Its complexity is more than CIFAR but less than YOLO.
     >>> hyper = netharn.HyperParams(**{
     >>>     # ================
     >>>     # Environment Components
+    >>>     'name'        : 'demo',
     >>>     'workdir'     : ub.ensure_app_cache_dir('netharn/demo'),
-    >>>     'nice'        : 'demo',
-    >>>     'xpu'         : netharn.XPU.cast('auto'),
+    >>>     'xpu'         : netharn.XPU.coerce('auto'),
     >>>     # workdir is a directory where intermediate results can be saved
-    >>>     # nice symlinks <workdir>/fit/nice/<nice> -> ../runs/<hashid>
+    >>>     # "nice" symlinks <workdir>/fit/name/<name> -> ../runs/<hashid>
     >>>     # XPU auto select a gpu if idle and VRAM>6GB else a cpu
     >>>     # ================
     >>>     # Data Components
     >>>     'datasets'    : {  # dict of plain ol torch.data.Dataset instances
     >>>         'train': netharn.data.ToyData2d(size=3, border=1, n=256, rng=0),
-    >>>         'vali': netharn.data.ToyData2d(size=3, border=1, n=128, rng=1),
-    >>>         'test': netharn.data.ToyData2d(size=3, border=1, n=128, rng=2),
+    >>>         'vali': netharn.data.ToyData2d(size=3, border=1, n=64, rng=1),
+    >>>         'test': netharn.data.ToyData2d(size=3, border=1, n=64, rng=2),
     >>>     },
-    >>>     'loaders'     : {'batch_size': 64}, # DataLoader instances or kw
+    >>>     'loaders'     : {'batch_size': 4}, # DataLoader instances or kw
     >>>     # ================
     >>>     # Algorithm Components
     >>>     # Note the (cls, kw) tuple formatting
     >>>     'model'       : (netharn.models.ToyNet2d, {}),
     >>>     'optimizer'   : (netharn.optimizers.SGD, {
-    >>>         'lr': 0.0001
+    >>>         'lr': 0.01
     >>>     }),
     >>>     # focal loss is usually better than netharn.criterions.CrossEntropyLoss
     >>>     'criterion'   : (netharn.criterions.FocalLoss, {}),
     >>>     'initializer' : (netharn.initializers.KaimingNormal, {
     >>>         'param': 0,
     >>>     }),
-    >>>     # these may receive an overhaul soon
+    >>>     # The scheduler adjusts learning rate over the training run
     >>>     'scheduler'   : (netharn.schedulers.ListedScheduler, {
-    >>>         'points': {'lr': {0: .0001, 2: .01, 5: .015, 6: .005, 9: .001}},
+    >>>         'points': {'lr': {0: 0.1, 2: 10.0, 4: .15, 6: .05, 9: .01}},
     >>>         'interpolation': 'linear',
     >>>     }),
     >>>     'monitor'     : (netharn.Monitor, {
     >>>         'max_epoch': 10,
+    >>>         'patience': 7,
     >>>     }),
     >>>     # dynamics are a config option that modify the behavior of the main
     >>>     # training loop. These parameters effect the learned model.
-    >>>     'dynamics'   : {'batch_step': 4},
+    >>>     'dynamics'   : {'batch_step': 2},
     >>> })
     >>> harn = netharn.FitHarn(hyper)
-    >>> # non-algorithmic behavior configs (do not change learned models)
-    >>> harn.preferences['prog_backend'] = 'progiter'  # alternative: 'tqdm'
+    >>> # non-algorithmic behavior preferences (do not change learned models)
     >>> harn.preferences['num_keep'] = 10
     >>> # start training.
-    >>> harn.initialize(reset='delete')
+    >>> harn.initialize(reset='delete')  # delete removes an existing run
     >>> harn.run()  # note: run calls initialize it hasn't already been called.
     >>> # xdoc: +IGNORE_WANT
 
diff --git a/analytic/analytic_for.py b/analytic/analytic_for.py
deleted file mode 100644
index 5ef73f1c5554c060bcbbab8121559822b7fa9564..0000000000000000000000000000000000000000
--- a/analytic/analytic_for.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# TODO: new api
-from netharn.analytic.analytic_for import *
diff --git a/analytic/output_shape_for.py b/analytic/output_shape_for.py
deleted file mode 100644
index b792abb86ef2ef5e2e1ccb7383600a108b593717..0000000000000000000000000000000000000000
--- a/analytic/output_shape_for.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# TODO: new api
-from netharn.analytic.output_shape_for import *
diff --git a/analytic/receptive_field_for.py b/analytic/receptive_field_for.py
deleted file mode 100644
index a8f17dd69bdeef3da21f6375817542467bf6a162..0000000000000000000000000000000000000000
--- a/analytic/receptive_field_for.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# TODO: new api
-from netharn.analytic.receptive_field_for import *
diff --git a/dev/ggr_matching.py b/dev/ggr_matching.py
index 7f54c206e20940d467af4d0d1aece3ce02d6c00b..4f149e8309b367418da4d2c66856611335b53f7c 100644
--- a/dev/ggr_matching.py
+++ b/dev/ggr_matching.py
@@ -21,6 +21,8 @@ import torch
 import torchvision  # NOQA
 import ndsampler
 from sklearn import metrics
+import kwimage
+import kwarray
 
 
 class MatchingHarness(nh.FitHarn):
@@ -41,7 +43,7 @@ class MatchingHarness(nh.FitHarn):
         harn.POS_LABEL = 1
         harn.NEG_LABEL = 0
         # BUG: should have one for each tag
-        harn.confusion_vectors = nh.util.DataFrameLight(
+        harn.confusion_vectors = kwarray.DataFrameLight(
             columns=['y_true', 'y_dist']
         )
 
@@ -169,7 +171,7 @@ class MatchingHarness(nh.FitHarn):
             stacked = harn._draw_batch(batch, decoded)
             dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag))
             fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch))
-            nh.util.imwrite(fpath, stacked)
+            kwimage.imwrite(fpath, stacked)
 
         # Record metrics for epoch scores
         n = len(outputs['distAP'])
@@ -260,10 +262,10 @@ class MatchingHarness(nh.FitHarn):
             >>> decoded = harn._decode(outputs)
             >>> stacked = harn._draw_batch(batch, decoded, limit=42)
             >>> # xdoctest: +REQUIRES(--show)
-            >>> import netharn as nh
-            >>> nh.util.autompl()
-            >>> nh.util.imshow(stacked, colorspace='rgb', doclf=True)
-            >>> nh.util.show_if_requested()
+            >>> import kwplot
+            >>> kwplot.autompl()
+            >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True)
+            >>> kwplot.show_if_requested()
         """
         tostack = []
         fontkw = {
@@ -275,7 +277,7 @@ class MatchingHarness(nh.FitHarn):
         for i in range(n):
             ims = [g[i].transpose(1, 2, 0) for g in decoded['triple_imgs']]
             ims = [cv2.resize(g, dsize) for g in ims]
-            ims = [nh.util.atleast_3channels(g) for g in ims]
+            ims = [kwimage.atleast_3channels(g) for g in ims]
             triple_nxs = [n[i] for n in decoded['triple_nxs']]
 
             text = 'distAP={:.3g} -- distAN={:.3g} -- {}'.format(
@@ -287,17 +289,17 @@ class MatchingHarness(nh.FitHarn):
                 'dodgerblue' if decoded['distAP'][i] < decoded['distAN'][i]
                 else 'orangered')
 
-            img = nh.util.stack_images(
+            img = kwimage.stack_images(
                 ims, overlap=-2, axis=1,
                 bg_value=(10 / 255, 40 / 255, 30 / 255)
             )
             img = (img * 255).astype(np.uint8)
-            img = nh.util.draw_text_on_image(img, text,
+            img = kwimage.draw_text_on_image(img, text,
                                              org=(2, img.shape[0] - 2),
                                              color=color, **fontkw)
             tostack.append(img)
 
-        stacked = nh.util.stack_images_grid(tostack, overlap=-10,
+        stacked = kwimage.stack_images_grid(tostack, overlap=-10,
                                             bg_value=(30, 10, 40),
                                             axis=1, chunksize=3)
         return stacked
@@ -319,12 +321,12 @@ class AnnotCocoDataset(torch.utils.data.Dataset, ub.NiceRepr):
         >>> index = 0
         >>> item = torch_dset[index]
         >>> import netharn as nh
-        >>> nh.util.autompl()
-        >>> nh.util.imshow(item['chip'])
+        >>> kwplot.autompl()
+        >>> kwplot.util.imshow(item['chip'])
         >>> torch_loader = torch_dset.make_loader()
         >>> raw_batch = ub.peek(torch_loader)
-        >>> stacked = nh.util.stack_images_grid(raw_batch['chip'].numpy().transpose(0, 2, 3, 1), overlap=-1)
-        >>> nh.util.imshow(stacked)
+        >>> stacked = kwplot.stack_images_grid(raw_batch['chip'].numpy().transpose(0, 2, 3, 1), overlap=-1)
+        >>> kwplot.imshow(stacked)
 
         for batch_idxs in torch_loader.batch_sampler:
             print('batch_idxs = {!r}'.format(batch_idxs))
@@ -360,7 +362,7 @@ class AnnotCocoDataset(torch.utils.data.Dataset, ub.NiceRepr):
         self.window_dim = window_dim
         self.dims = (window_dim, window_dim)
 
-        self.rng = nh.util.ensure_rng(0)
+        self.rng = kwarray.ensure_rng(0)
         if augment:
             import imgaug.augmenters as iaa
             self.independent = iaa.Sequential([
@@ -792,7 +794,8 @@ def main():
         ns['lr'] = 1e-99
 
         if args.interact:
-            nh.util.autompl()
+            import kwplot
+            kwplot.autompl()
             import matplotlib.pyplot as plt
 
         harn = setup_harn(**ns)
diff --git a/dev/manage_snapshots.py b/dev/manage_snapshots.py
index 44bef23898ea706e74e7d1e536a6a8bb953fc741..f878efdc6e6b3557fd0e580830ea7c28496e4f65 100755
--- a/dev/manage_snapshots.py
+++ b/dev/manage_snapshots.py
@@ -139,13 +139,13 @@ def session_info(dpath):
     dpath = realpath(dpath)
 
     if True:
-        # Determine if we are pointed to by a nice directory or not
-        nice = basename(dirname(dpath))
-        info['nice'] = nice
+        # Determine if we are pointed to by a "name" directory or not
+        name = basename(dirname(dpath))
+        info['name'] = name
         fitdir = dirname(dirname(dirname(dpath)))
-        nice_dpath = join(fitdir, 'nice', nice)
+        name_dpath = join(fitdir, 'name', name)
         try:
-            target = realpath(ub.util_links._readlink(nice_dpath))
+            target = realpath(ub.util_links._readlink(name_dpath))
         except Exception:
             target = None
         info['linked'] = (target == dpath)
@@ -206,10 +206,10 @@ def _devcheck_remove_dead_runs(workdir, dry=True, dead_num_snap_thresh=10,
         else:
             session['decision'] = 'good'
 
-    nice_groups = ub.group_items(all_sessions, lambda x: x['nice'])
+    nice_groups = ub.group_items(all_sessions, lambda x: x['name'])
 
-    for nice, group in nice_groups.items():
-        print(' --- {} --- '.format(nice))
+    for name, group in nice_groups.items():
+        print(' --- {} --- '.format(name))
         group = sorted(group, key=lambda x: x['size'])
         group_ = copy.deepcopy(group)
         for item in group_:
@@ -218,12 +218,12 @@ def _devcheck_remove_dead_runs(workdir, dry=True, dead_num_snap_thresh=10,
             item['size'] = byte_str(item['size'])
         print(ub.repr2(group_, nl=1))
 
-    # Partion your "nice" sessions into broken and live symlinks.
+    # Partion your "name" sessions into broken and live symlinks.
     # For each live link remember what the real path is.
     broken_links = []
-    nice_dpath = join(workdir, 'fit', 'nice')
-    for dname in os.listdir(nice_dpath):
-        dpath = join(nice_dpath, dname)
+    name_dpath = join(workdir, 'fit', 'name')
+    for dname in os.listdir(name_dpath):
+        dpath = join(name_dpath, dname)
         if is_symlink_broken(dpath):
             broken_links.append(dpath)
 
diff --git a/dev/mnist_matching.py b/dev/mnist_matching.py
index 196220dbda785afbda10ae81170e9147064ae61f..f7be6aab3551e1b91312c4624bb11f13cad5d0a5 100644
--- a/dev/mnist_matching.py
+++ b/dev/mnist_matching.py
@@ -7,6 +7,8 @@ import torchvision
 import ubelt as ub
 from torch import nn
 from sklearn import metrics
+import kwimage
+import kwarray
 
 
 class MNISTEmbeddingNet(nh.layers.Module):
@@ -107,7 +109,7 @@ class MNIST_MatchingHarness(nh.FitHarn):
         harn._has_preselected = False
         harn.POS_LABEL = 1
         harn.NEG_LABEL = 0
-        harn.confusion_vectors = nh.util.DataFrameLight(
+        harn.confusion_vectors = kwarray.DataFrameLight(
             columns=['y_true', 'y_dist']
         )
 
@@ -158,7 +160,6 @@ class MNIST_MatchingHarness(nh.FitHarn):
         batch['cpu_chips'] = image
         return batch
 
-    @nh.util.profile
     def run_batch(harn, batch):
         """
         Two - run the batch
@@ -241,7 +242,6 @@ class MNIST_MatchingHarness(nh.FitHarn):
         outputs['distAN'] = neg_dists
         return outputs, loss
 
-    @nh.util.profile
     def on_batch(harn, batch, outputs, loss):
         """
         custom netharn callback
@@ -253,9 +253,10 @@ class MNIST_MatchingHarness(nh.FitHarn):
             >>> decoded = harn._decode(outputs)
             >>> stacked = harn._draw_batch(decoded, limit=42)
             >>> # xdoctest: +REQUIRES(--show)
-            >>> nh.util.autompl()
-            >>> nh.util.imshow(stacked)
-            >>> nh.util.show_if_requested()
+            >>> import kwplot
+            >>> kwplot.autompl()
+            >>> kwplot.imshow(stacked)
+            >>> kwplot.show_if_requested()
         """
         batch_metrics = ub.odict()
         for key, value in harn._loss_parts.items():
@@ -270,7 +271,7 @@ class MNIST_MatchingHarness(nh.FitHarn):
             stacked = harn._draw_batch(decoded)
             dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag))
             fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch))
-            nh.util.imwrite(fpath, stacked)
+            kwimage.imwrite(fpath, stacked)
 
         # Record metrics for epoch scores
         n = len(outputs['distAP'])
@@ -282,7 +283,6 @@ class MNIST_MatchingHarness(nh.FitHarn):
         harn.confusion_vectors._data['y_dist'].extend(outputs['distAN'].data.cpu().numpy().tolist())
         return batch_metrics
 
-    @nh.util.profile
     def on_epoch(harn):
         """
         custom netharn callback
@@ -345,7 +345,6 @@ class MNIST_MatchingHarness(nh.FitHarn):
         harn.confusion_vectors.clear()
         return epoch_metrics
 
-    @nh.util.profile
     def _decode(harn, outputs):
         """
         Convert raw network outputs to something interpretable
@@ -366,7 +365,6 @@ class MNIST_MatchingHarness(nh.FitHarn):
         decoded['distAN'] = outputs['distAN'].data.cpu().numpy()
         return decoded
 
-    @nh.util.profile
     def _draw_batch(harn, decoded, limit=12):
         """
         Example:
@@ -376,10 +374,10 @@ class MNIST_MatchingHarness(nh.FitHarn):
             >>> decoded = harn._decode(outputs)
             >>> stacked = harn._draw_batch(decoded)
             >>> # xdoctest: +REQUIRES(--show)
-            >>> import netharn as nh
-            >>> nh.util.autompl()
-            >>> nh.util.imshow(stacked, colorspace='rgb', doclf=True)
-            >>> nh.util.show_if_requested()
+            >>> import kwplot
+            >>> kwplot.autompl()
+            >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True)
+            >>> kwplot.show_if_requested()
         """
         tostack = []
         fontkw = {
@@ -391,7 +389,7 @@ class MNIST_MatchingHarness(nh.FitHarn):
         for i in range(n):
             ims = [g[i].transpose(1, 2, 0) for g in decoded['triple_imgs']]
             ims = [cv2.resize(g, dsize) for g in ims]
-            ims = [nh.util.atleast_3channels(g) for g in ims]
+            ims = [kwimage.atleast_3channels(g) for g in ims]
             triple_nxs = [n[i] for n in decoded['triple_nxs']]
 
             text = 'dAP={:.3g} -- dAN={:.3g} -- {}'.format(
@@ -403,16 +401,16 @@ class MNIST_MatchingHarness(nh.FitHarn):
                 'dodgerblue' if decoded['distAP'][i] < decoded['distAN'][i]
                 else 'orangered')
 
-            img = nh.util.stack_images(
+            img = kwimage.stack_images(
                 ims, overlap=-2, axis=1,
                 bg_value=(10 / 255, 40 / 255, 30 / 255)
             )
             img = (img * 255).astype(np.uint8)
-            img = nh.util.draw_text_on_image(img, text,
+            img = kwimage.draw_text_on_image(img, text,
                                              org=(2, img.shape[0] - 2),
                                              color=color, **fontkw)
             tostack.append(img)
-        stacked = nh.util.stack_images_grid(tostack, overlap=-10,
+        stacked = kwimage.stack_images_grid(tostack, overlap=-10,
                                             bg_value=(30, 10, 40),
                                             axis=1, chunksize=3)
         return stacked
@@ -464,7 +462,7 @@ def setup_datasets(workdir=None):
             labels = dset.dataset.train_labels[dset.indices]
         else:
             labels = dset.labels
-        unique_labels, groupxs = nh.util.group_indices(labels.numpy())
+        unique_labels, groupxs = kwarray.group_indices(labels.numpy())
         dset.pccs = [xs.tolist() for xs in groupxs]
 
     # Give the training dataset an input_id
@@ -637,7 +635,8 @@ def main():
         ns['lr'] = 1e-99
 
         if args.interact:
-            nh.util.autompl()
+            import kwplot
+            kwplot.autompl()
             import matplotlib.pyplot as plt
 
         harn = setup_harn(**ns)
diff --git a/netharn/__init__.py b/netharn/__init__.py
index dc31d9ca4f5d3f3162a6231b64a5c8f4ca1de4b1..2ebad78468af8c375db78358f6dab99e9f301404 100644
--- a/netharn/__init__.py
+++ b/netharn/__init__.py
@@ -4,7 +4,7 @@
 mkinit netharn --noattrs --dry
 mkinit netharn --noattrs
 """
-__version__ = '0.5.6'
+__version__ = '0.5.7'
 
 try:
     # PIL 7.0.0 removed PIL_VERSION, which breaks torchvision, monkey patch it
diff --git a/netharn/analytic/output_shape_for.py b/netharn/analytic/output_shape_for.py
index a8398b07525f4d16ac40681346a162d8c6c7feb4..0f4123f121282aece504fec7c40842a74c0ecbfa 100644
--- a/netharn/analytic/output_shape_for.py
+++ b/netharn/analytic/output_shape_for.py
@@ -9,10 +9,7 @@ import torchvision
 from collections import OrderedDict
 from six.moves import builtins
 from netharn.analytic import analytic_for
-# try:
 from netharn.device import DataSerial
-# except ImportError:
-#     DataSerial = None
 
 REGISTERED_TYPES = []
 
diff --git a/netharn/analytic/receptive_field_for.py b/netharn/analytic/receptive_field_for.py
index 471b95c315811597613c75d816a65752fe108536..0d10905732d49252b0b58cb671e9c8502c423290 100644
--- a/netharn/analytic/receptive_field_for.py
+++ b/netharn/analytic/receptive_field_for.py
@@ -10,6 +10,7 @@ import numpy as np
 from collections import OrderedDict
 from netharn.analytic.output_shape_for import OutputShapeFor
 from netharn.analytic import analytic_for
+from distutils.version import LooseVersion
 # try:
 # from netharn.device import MountedModel
 # except ImportError:
@@ -18,6 +19,12 @@ from netharn.analytic import analytic_for
 REGISTERED_TYPES = []
 
 
+if LooseVersion(torch.__version__) >= LooseVersion('1.5.0'):
+    CONV_TRANSPOSE_TYPES = (nn.modules.conv._ConvTransposeNd,)
+else:
+    CONV_TRANSPOSE_TYPES = (nn.modules.conv._ConvTransposeMixin,)
+
+
 def ensure_array_nd(data, n):
     if ub.iterable(data):
         return np.array(data)
@@ -494,7 +501,7 @@ class _TorchMixin(object):
         return field
         # raise NotImplementedError('todo')
 
-    @compute_type(nn.modules.conv._ConvTransposeMixin)
+    @compute_type(*CONV_TRANSPOSE_TYPES)
     def convT(module, input_field=None):
         return ReceptiveFieldFor._kernelized_tranpose(module, input_field)
 
diff --git a/netharn/data/__init__.py b/netharn/data/__init__.py
index 7ed7517a713c650d15dfc18c200a11989f648917..ee84408674dc52dc1b515991be34cc536e06bdb6 100644
--- a/netharn/data/__init__.py
+++ b/netharn/data/__init__.py
@@ -3,33 +3,28 @@ mkinit netharn.data
 """
 # flake8: noqa
 
-__DYNAMIC__ = False
-if __DYNAMIC__:
-    from mkinit import dynamic_init
-    exec(dynamic_init(__name__))
-else:
-    # <AUTOGEN_INIT>
-    from netharn.data import base
-    from netharn.data import batch_samplers
-    from netharn.data import coco_api
-    from netharn.data import collate
-    from netharn.data import mnist
-    from netharn.data import toydata
-    from netharn.data import transforms
-    from netharn.data import voc
+# <AUTOGEN_INIT>
+from netharn.data import base
+from netharn.data import batch_samplers
+from netharn.data import coco_api
+from netharn.data import collate
+from netharn.data import mnist
+from netharn.data import toydata
+from netharn.data import transforms
+from netharn.data import voc
 
-    from netharn.data.base import (DataMixin,)
-    from netharn.data.batch_samplers import (MatchingSamplerPK,)
-    from netharn.data.coco_api import (CocoDataset,)
-    from netharn.data.collate import (CollateException, default_collate,
-                                      list_collate, numpy_type_map,
-                                      padded_collate,)
-    from netharn.data.mnist import (MNIST,)
-    from netharn.data.toydata import (ToyData1d, ToyData2d,)
-    from netharn.data.voc import (VOCDataset,)
+from netharn.data.base import (DataMixin,)
+from netharn.data.batch_samplers import (MatchingSamplerPK,)
+from netharn.data.coco_api import (CocoDataset,)
+from netharn.data.collate import (CollateException, default_collate,
+                                  list_collate, numpy_type_map,
+                                  padded_collate,)
+from netharn.data.mnist import (MNIST,)
+from netharn.data.toydata import (ToyData1d, ToyData2d,)
+from netharn.data.voc import (VOCDataset,)
 
-    __all__ = ['CocoDataset', 'CollateException', 'DataMixin', 'MNIST',
-               'MatchingSamplerPK', 'ToyData1d', 'ToyData2d', 'VOCDataset', 'base',
-               'batch_samplers', 'coco_api', 'collate', 'default_collate',
-               'list_collate', 'mnist', 'numpy_type_map', 'padded_collate',
-               'toydata', 'transforms', 'voc']
+__all__ = ['CocoDataset', 'CollateException', 'DataMixin', 'MNIST',
+           'MatchingSamplerPK', 'ToyData1d', 'ToyData2d', 'VOCDataset', 'base',
+           'batch_samplers', 'coco_api', 'collate', 'default_collate',
+           'list_collate', 'mnist', 'numpy_type_map', 'padded_collate',
+           'toydata', 'transforms', 'voc']
diff --git a/netharn/data/base.py b/netharn/data/base.py
index 57a88ead88b23118d7695040562129d2d6d94ad2..7f4e341fb4b88b7b8fc194d5b48cbd21d356c51d 100644
--- a/netharn/data/base.py
+++ b/netharn/data/base.py
@@ -1,3 +1,6 @@
+"""
+DEPRECATE
+"""
 from torch.utils import data as torch_data
 
 
diff --git a/netharn/data/batch_samplers.py b/netharn/data/batch_samplers.py
index 88758ad974019bfea7f26cd1adbf61e47f572db2..5dc3300386622a3f2d7a25204f232b032d41e0d9 100644
--- a/netharn/data/batch_samplers.py
+++ b/netharn/data/batch_samplers.py
@@ -273,6 +273,8 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
         num_batches (int | str, default='auto'): number of batches to generate
         shuffle (bool, default=False): if True randomize batch ordering
         drop_last (bool): unused, exists for compatibility
+        label_to_weight (dict, default=None):
+            mapping from labels to user-specified weights
         rng (RandomState, default=None): random seed
 
     References:
@@ -289,17 +291,17 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
         >>> # Create a rare class
         >>> index_to_labels[0][0] = 42
         >>> self = GroupedBalancedBatchSampler(index_to_labels, batch_size=4)
-        >>> print('self.label_to_freq = {!r}'.format(self.label_to_freq))
+        >>> print('self.label_to_freq = {}'.format(ub.repr2(self.label_to_freq, nl=1)))
         >>> indices = list(self)
         >>> print('indices = {!r}'.format(indices))
         >>> # Print the epoch / item label frequency per epoch
         >>> label_sequence = []
         >>> index_sequence = []
-        >>> for item_indices in self:
+        >>> for item_indices, _ in zip(self, range(1000)):
         >>>     item_indices = np.array(item_indices)
         >>>     item_labels = list(ub.flatten(ub.take(index_to_labels, item_indices)))
         >>>     index_sequence.extend(item_indices)
-        >>>     label_sequence.extend(item_labels)
+        >>>     label_sequence.extend(ub.unique(item_labels))
         >>> label_hist = ub.dict_hist(label_sequence)
         >>> index_hist = ub.dict_hist(index_sequence)
         >>> label_hist = ub.sorted_vals(label_hist, reverse=True)
@@ -310,7 +312,7 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
     """
 
     def __init__(self, index_to_labels, batch_size=1, num_batches='auto',
-                 shuffle=False, rng=None):
+                 label_to_weight=None, shuffle=False, rng=None):
         import kwarray
 
         rng = kwarray.ensure_rng(rng, api='python')
@@ -322,20 +324,49 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
             for label in item_labels:
                 label_to_indices[label].add(index)
         flat_labels = np.hstack(index_to_labels)
-        self.label_to_freq = ub.dict_hist(flat_labels)
+        label_to_freq = ub.dict_hist(flat_labels)
 
         # Use tf-idf based scheme to compute sample probabilities
+        label_to_idf = {}
         label_to_tfidf = {}
         labels = sorted(set(flat_labels))
         for label in labels:
+            # tf for each img, is the number of times the label appears
             index_to_tf = np.zeros(len(index_to_labels))
             for index, item_labels in enumerate(index_to_labels):
                 index_to_tf[index] = (label == item_labels).sum()
+            # idf is the #imgs / #imgs-with-label
             idf = len(index_to_tf) / (index_to_tf > 0).sum()
+            if label_to_weight:
+                idf = idf * label_to_weight[label]
+            label_to_idf[label] = idf
             label_to_tfidf[label] = np.maximum(index_to_tf * idf, 1)
         index_to_weight = sum(label_to_tfidf.values())
         index_to_prob = index_to_weight / index_to_weight.sum()
 
+        if 0:
+            index_to_unique_labels = list(map(set, index_to_labels))
+            unique_freq = ub.dict_hist(ub.flatten(index_to_unique_labels))
+            tot = sum(unique_freq.values())
+            unweighted_odds = ub.map_vals(lambda x: x / tot, unique_freq)
+
+            label_to_indices = ub.ddict(set)
+            for index, item_labels in enumerate(index_to_labels):
+                for label in item_labels:
+                    label_to_indices[label].add(index)
+            ub.map_vals(len, label_to_indices)
+
+            label_to_odds = ub.ddict(lambda: 0)
+            for label, indices in label_to_indices.items():
+                for idx in indices:
+                    label_to_odds[label] += index_to_prob[idx]
+
+            coi = {x for x, w in label_to_weight.items() if w > 0}
+            coi_weighted = ub.dict_subset(label_to_odds, coi)
+            coi_unweighted = ub.dict_subset(unweighted_odds, coi)
+            print('coi_weighted = {}'.format(ub.repr2(coi_weighted, nl=1)))
+            print('coi_unweighted = {}'.format(ub.repr2(coi_unweighted, nl=1)))
+
         self.index_to_prob = index_to_prob
         self.indices = np.arange(len(index_to_prob))
 
@@ -344,6 +375,7 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
         else:
             self.num_batches = num_batches
 
+        self.label_to_freq = label_to_freq
         self.index_to_labels = index_to_labels
         self.batch_size = batch_size
         self.shuffle = shuffle
@@ -356,6 +388,25 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam
             'label_to_freq': self.label_to_freq,
         }, nl=0)
 
+    def _balance_report(self, limit=None):
+        # Print the epoch / item label frequency per epoch
+        label_sequence = []
+        index_sequence = []
+        if limit is None:
+            limit = self.num_batches
+        for item_indices, _ in zip(self, range(limit)):
+            item_indices = np.array(item_indices)
+            item_labels = list(ub.flatten(ub.take(self.index_to_labels, item_indices)))
+            index_sequence.extend(item_indices)
+            label_sequence.extend(ub.unique(item_labels))
+        label_hist = ub.dict_hist(label_sequence)
+        index_hist = ub.dict_hist(index_sequence)
+        label_hist = ub.sorted_vals(label_hist, reverse=True)
+        index_hist = ub.sorted_vals(index_hist, reverse=True)
+        index_hist = ub.dict_subset(index_hist, list(index_hist.keys())[0:5])
+        print('label_hist = {}'.format(ub.repr2(label_hist, nl=1)))
+        print('index_hist = {}'.format(ub.repr2(index_hist, nl=1)))
+
     def _auto_num_batches(self):
         # The right way to calculate num samples would be using a generalized
         # solutions to the coupon collector problem, but in practice that
diff --git a/netharn/data/channel_spec.py b/netharn/data/channel_spec.py
new file mode 100644
index 0000000000000000000000000000000000000000..08e2f21c57731a3d1b47b929e83839049e3c2bea
--- /dev/null
+++ b/netharn/data/channel_spec.py
@@ -0,0 +1,323 @@
+import ubelt as ub
+import six
+
+
+class ChannelSpec(ub.NiceRepr):
+    """
+    Parse and extract information about network input channel specs for
+    early or late fusion networks.
+
+    Notes:
+        The pipe ('|') character represents an early-fused input stream, and
+        order matters (it is non-communative).
+
+        The comma (',') character separates different inputs streams/branches
+        for a multi-stream/branch network which will be lated fused. Order does
+        not matter
+
+    TODO:
+        - [ ] : normalize representations? e.g: rgb = r|g|b?
+        - [ ] : rename to BandsSpec or SensorSpec?
+
+    Example:
+        >>> # Integer spec
+        >>> ChannelSpec.coerce(3)
+        <ChannelSpec(u0|u1|u2) ...>
+
+        >>> # single mode spec
+        >>> ChannelSpec.coerce('rgb')
+        <ChannelSpec(rgb) ...>
+
+        >>> # early fused input spec
+        >>> ChannelSpec.coerce('rgb|disprity')
+        <ChannelSpec(rgb|disprity) ...>
+
+        >>> # late fused input spec
+        >>> ChannelSpec.coerce('rgb,disprity')
+        <ChannelSpec(rgb,disprity) ...>
+
+        >>> # early and late fused input spec
+        >>> ChannelSpec.coerce('rgb|ir,disprity')
+        <ChannelSpec(rgb|ir,disprity) ...>
+
+    Example:
+        >>> from netharn.data.channel_spec import *  # NOQA
+        >>> self = ChannelSpec('gray')
+        >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+        >>> self = ChannelSpec('rgb')
+        >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+        >>> self = ChannelSpec('rgb|disparity')
+        >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+        >>> self = ChannelSpec('rgb|disparity,disparity')
+        >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+        >>> self = ChannelSpec('rgb,disparity,flowx|flowy')
+        >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+
+    Example:
+        >>> from netharn.data.channel_spec import *  # NOQA
+        >>> specs = [
+        >>>     'rgb',              # and rgb input
+        >>>     'rgb|disprity',     # rgb early fused with disparity
+        >>>     'rgb,disprity',     # rgb early late with disparity
+        >>>     'rgb|ir,disprity',  # rgb early fused with ir and late fused with disparity
+        >>>     3,                  # 3 unknown channels
+        >>> ]
+        >>> for spec in specs:
+        >>>     print('=======================')
+        >>>     print('spec = {!r}'.format(spec))
+        >>>     #
+        >>>     self = ChannelSpec.coerce(spec)
+        >>>     print('self = {!r}'.format(self))
+        >>>     sizes = self.sizes()
+        >>>     print('sizes = {!r}'.format(sizes))
+        >>>     print('self.info = {}'.format(ub.repr2(self.info, nl=1)))
+        >>>     #
+        >>>     item = self._demo_item((1, 1), rng=0)
+        >>>     inputs = self.encode(item)
+        >>>     components = self.decode(inputs)
+        >>>     input_shapes = ub.map_vals(lambda x: x.shape, inputs)
+        >>>     component_shapes = ub.map_vals(lambda x: x.shape, components)
+        >>>     print('item = {}'.format(ub.repr2(item, precision=1)))
+        >>>     print('inputs = {}'.format(ub.repr2(inputs, precision=1)))
+        >>>     print('input_shapes = {}'.format(ub.repr2(input_shapes)))
+        >>>     print('components = {}'.format(ub.repr2(components, precision=1)))
+        >>>     print('component_shapes = {}'.format(ub.repr2(component_shapes, nl=1)))
+
+    """
+
+    _known = {
+        'rgb': 'r|g|b'
+    }
+
+    _size_lut = {
+        'rgb': 3,
+    }
+
+    def __init__(self, spec):
+        # TODO: allow integer specs
+        self.spec = spec
+        self._info = {}
+
+    def __nice__(self):
+        return self.spec
+
+    def __json__(self):
+        return self.spec
+
+    def __contains__(self, key):
+        """
+        Example:
+            >>> 'disparity' in ChannelSpec('rgb,disparity,flowx|flowy')
+            True
+            >>> 'gray' in ChannelSpec('rgb,disparity,flowx|flowy')
+            False
+        """
+        return key in self.unique()
+
+    @property
+    def info(self):
+        self._info = {
+            'spec': self.spec,
+            'parsed': self.parse(),
+            'unique': self.unique(),
+            'normed': self.normalize(),
+        }
+        return self._info
+
+    @classmethod
+    def coerce(cls, data):
+        if isinstance(data, cls):
+            self = data
+            return self
+        else:
+            if isinstance(data, int):
+                # we know the number of channels, but not their names
+                spec = '|'.join(['u{}'.format(i) for i in range(data)])
+            elif isinstance(data, six.string_types):
+                spec = data
+            else:
+                raise TypeError(type(data))
+
+            self = cls(spec)
+            return self
+
+    def parse(self):
+        """
+        Build internal representation
+        """
+        # commas break inputs into multiple streams
+        stream_specs = self.spec.split(',')
+        parsed = {ss: ss.split('|') for ss in stream_specs}
+        return parsed
+
+    def normalize(self):
+        spec = self.spec
+        stream_specs = spec.split(',')
+        parsed = {ss: ss for ss in stream_specs}
+        for k1 in parsed.keys():
+            for k, v in self._known.items():
+                parsed[k1] = parsed[k1].replace(k, v)
+        parsed = {k: v.split('|') for k, v in parsed.items()}
+        return parsed
+
+    def keys(self):
+        spec = self.spec
+        stream_specs = spec.split(',')
+        for spec in stream_specs:
+            yield spec
+
+    def sizes(self):
+        """
+        Number of dimensions for each fused stream channel
+
+        IE: The EARLY-FUSED channel sizes
+
+        Example:
+            >>> self = ChannelSpec('rgb|disparity,flowx|flowy')
+            >>> self.sizes()
+        """
+        sizes = {
+            key: sum(self._size_lut.get(part, 1) for part in vals)
+            for key, vals in self.parse().items()
+        }
+        return sizes
+
+    def unique(self):
+        """
+        Returns the unique channels that will need to be given or loaded
+        """
+        return set(ub.flatten(self.parse().values()))
+
+    def _item_shapes(self, dims):
+        """
+        Expected shape for an input item
+
+        Args:
+            dims (Tuple[int, int]): the spatial dimension
+
+        Returns:
+            Dict[int, tuple]
+        """
+        item_shapes = {}
+        parsed = self.parse()
+        # normed = self.normalize()
+        fused_keys = list(self.keys())
+        for fused_key in fused_keys:
+            components = parsed[fused_key]
+            for mode_key in components:
+                c = self._size_lut.get(mode_key, 1)
+                shape = (c,) + tuple(dims)
+                item_shapes[mode_key] = shape
+        return item_shapes
+
+    def _demo_item(self, dims=(4, 4), rng=None):
+        """
+        Create an input that satisfies this spec
+
+        Returns:
+            dict: an item like it might appear when its returned from the
+                `__getitem__` method of a :class:`torch...Dataset`.
+
+        Example:
+            >>> dims = (1, 1)
+            >>> ChannelSpec.coerce(3)._demo_item(dims, rng=0)
+            >>> ChannelSpec.coerce('r|g|b|disaprity')._demo_item(dims, rng=0)
+            >>> ChannelSpec.coerce('rgb|disaprity')._demo_item(dims, rng=0)
+            >>> ChannelSpec.coerce('rgb,disaprity')._demo_item(dims, rng=0)
+            >>> ChannelSpec.coerce('rgb')._demo_item(dims, rng=0)
+            >>> ChannelSpec.coerce('gray')._demo_item(dims, rng=0)
+        """
+        import torch
+        import kwarray
+        rng = kwarray.ensure_rng(rng)
+        item_shapes = self._item_shapes(dims)
+        item = {
+            key: torch.from_numpy(rng.rand(*shape))
+            for key, shape in item_shapes.items()
+        }
+        return item
+
+    def encode(self, item, axis=0):
+        """
+        Given a dictionary containing preloaded components of the network
+        inputs, build a concatenated network representations of each input
+        stream.
+
+        Args:
+            item (dict): a batch item
+            axis (int, default=0): concatenation dimension
+
+        Returns:
+            Dict[str, Tensor]: mapping between input stream and its early fused
+                tensor input.
+
+        Example:
+            >>> import torch
+            >>> dims = (4, 4)
+            >>> item = {
+            >>>     'rgb': torch.rand(3, *dims),
+            >>>     'disparity': torch.rand(1, *dims),
+            >>>     'flowx': torch.rand(1, *dims),
+            >>>     'flowy': torch.rand(1, *dims),
+            >>> }
+            >>> # Complex Case
+            >>> self = ChannelSpec('rgb,disparity,rgb|disparity|flowx|flowy,flowx|flowy')
+            >>> inputs = self.encode(item)
+            >>> input_shapes = ub.map_vals(lambda x: x.shape, inputs)
+            >>> print('input_shapes = {}'.format(ub.repr2(input_shapes, nl=1)))
+            >>> # Simpler case
+            >>> self = ChannelSpec('rgb|disparity')
+            >>> inputs = self.encode(item)
+            >>> input_shapes = ub.map_vals(lambda x: x.shape, inputs)
+            >>> print('input_shapes = {}'.format(ub.repr2(input_shapes, nl=1)))
+        """
+        import torch
+        inputs = dict()
+        parsed = self.parse()
+        unique = self.unique()
+        components = {k: item[k] for k in unique}
+        for key, parts in parsed.items():
+            inputs[key] = torch.cat([components[k] for k in parts], dim=axis)
+        return inputs
+
+    def decode(self, inputs, axis=1):
+        """
+        break an early fused item into its components
+
+        Example:
+            >>> import torch
+            >>> dims = (4, 4)
+            >>> components = {
+            >>>     'rgb': torch.rand(3, *dims),
+            >>>     'ir': torch.rand(1, *dims),
+            >>> }
+            >>> self = ChannelSpec('rgb|ir')
+            >>> inputs = self.encode(components)
+            >>> from netharn.data import data_containers
+            >>> item = {k: data_containers.ItemContainer(v, stack=True)
+            >>>         for k, v in inputs.items()}
+            >>> batch = data_containers.container_collate([item, item])
+            >>> components = self.decode(batch)
+        """
+        parsed = self.parse()
+        components = dict()
+        for key, parts in parsed.items():
+            idx1 = 0
+            for part in parts:
+                size = self._size_lut.get(part, 1)
+                idx2 = idx1 + size
+                fused = inputs[key]
+                index = ([slice(None)] * axis + [slice(idx1, idx2)])
+                component = fused[index]
+                components[part] = component
+                idx1 = idx2
+        return components
+
+
+if __name__ == '__main__':
+    """
+    CommandLine:
+        python ~/code/netharn/netharn/data/channel_spec.py all
+    """
+    import xdoctest
+    xdoctest.doctest_module(__file__)
diff --git a/netharn/data/coco_api.py b/netharn/data/coco_api.py
index 5024973fd6b79cfa95e88f58332df8fdd570b94a..207750b8230ef8925b78a1d87013e76eab687c81 100644
--- a/netharn/data/coco_api.py
+++ b/netharn/data/coco_api.py
@@ -3,7 +3,7 @@
 DEPRECATED
 
 NOTE:
-    THIS IS DEPRECATED IN FAVOR OF COCO_DATASET IN NDSAMPLER
+    THIS IS DEPRECATED IN FAVOR OF COCO_DATASET IN KWCOCO
 
 
 Extended MS-COCO API. Currently only supports keypoints and bounding boxes.
diff --git a/netharn/data/data_containers.py b/netharn/data/data_containers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8abfdc3fc273e00b7db7342f07dc55cb28f483e
--- /dev/null
+++ b/netharn/data/data_containers.py
@@ -0,0 +1,846 @@
+"""
+Proof-of-concept for porting mmcv DataContainer concept to netharn. Depending
+on how well this works these features might be useful as a standalone module or
+to contribute to torch proper.
+
+References:
+    https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py
+    https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/collate.py
+    https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/scatter_gather.py
+
+FIXME 0 dimension tensors
+"""
+import torch.utils.data as torch_data
+import torch
+import ubelt as ub
+import numpy as np  # NOQA
+import re
+import collections
+import torch.nn.functional as F
+# from torch.nn.parallel import DataParallel
+from itertools import chain
+from netharn.device import DataParallel, DataSerial, XPU
+from torch.nn.parallel._functions import _get_stream
+from torch.nn.parallel._functions import Scatter as OrigScatter
+from torch.nn.parallel._functions import Gather as OrigGather
+from torch._six import container_abcs
+from torch._six import int_classes, string_classes
+default_collate = torch_data.dataloader.default_collate
+
+
+# numpy_type_map = torch_data.dataloader.numpy_type_map  # moved in torch 1.1.0
+numpy_type_map = {
+    'float64': torch.DoubleTensor,
+    'float32': torch.FloatTensor,
+    'float16': torch.HalfTensor,
+    'int64': torch.LongTensor,
+    'int32': torch.IntTensor,
+    'int16': torch.ShortTensor,
+    'int8': torch.CharTensor,
+    'uint8': torch.ByteTensor,
+}
+
+
+class CollateException(Exception):
+    pass
+
+
+_DEBUG = False
+
+
+class BatchContainer(ub.NiceRepr):
+    """
+    A container for a set of items in a batch. Usually this is for network
+    outputs or a set of items that have already been collated.
+
+    Attributes:
+        data (List): Unlike ItemContainer, data is always a list where
+            len(data) is the number of devices this batch will run on.
+    """
+    def __init__(self, data, stack=False, padding_value=-1, cpu_only=False,
+                 pad_dims=2):
+        self.data = data
+        self.meta = {
+            'stack': stack,
+            'padding_value': padding_value,
+            'cpu_only': cpu_only,
+            'pad_dims': pad_dims,
+        }
+
+    def __nice__(self):
+        shape_repr = ub.repr2(nestshape(self.data), nl=-2)
+        # return 'nestshape(data)={}, **{}'.format(shape_repr, ub.repr2(self.meta, nl=0))
+        return 'nestshape(data)={}'.format(shape_repr)
+
+    def __getitem__(self, index):
+        cls = self.__class__
+        return cls([d[index] for d in self.data], **self.meta)
+
+    @property
+    def cpu_only(self):
+        return self.meta['cpu_only']
+
+    @property
+    def stack(self):
+        return self.meta['stack']
+
+    @property
+    def padding_value(self):
+        return self.meta['padding_value']
+
+    @property
+    def pad_dims(self):
+        return self.meta['pad_dims']
+
+    @classmethod
+    def cat(cls, items, dim=0):
+        """
+        Concatenate data in multiple BatchContainers
+
+        Example:
+            d1 = BatchContainer([torch.rand(3, 3, 1, 1), torch.rand(2, 3, 1, 1)])
+            d2 = BatchContainer([torch.rand(3, 1, 1, 1), torch.rand(2, 1, 1, 1)])
+            items = [d1, d2]
+            self = BatchContainer.cat(items, dim=1)
+        """
+        newdata = []
+        num_devices = len(items[0].data)
+        for device_idx in range(num_devices):
+            parts = [item.data[device_idx] for item in items]
+            newpart = torch.cat(parts, dim=dim)
+            newdata.append(newpart)
+        self = cls(newdata, **items[0].meta)
+        return self
+
+
+class ItemContainer(ub.NiceRepr):
+    """
+    A container for uncollated items that defines a specific collation
+    strategy. Based on mmdetections ItemContainer.
+    """
+
+    def __init__(
+        self,
+        data,
+        stack=False,
+        padding_value=-1,
+        cpu_only=False,
+        pad_dims=2
+    ):
+        self._data = data
+        assert pad_dims in [None, 1, 2, 3]
+        self.meta = {
+            'stack': stack,
+            'padding_value': padding_value,
+            'cpu_only': cpu_only,
+            'pad_dims': pad_dims,
+        }
+
+    def __nice__(self):
+        shape_repr = ub.repr2(nestshape(self.data), nl=-2)
+        return 'nestshape(data)={}'.format(shape_repr)
+        # return 'nestshape(data)={}, **{}'.format(shape_repr, ub.repr2(self.meta, nl=0))
+
+    @classmethod
+    def demo(cls, key='img', rng=None, **kwargs):
+        """
+        Create data for tests
+        """
+        import kwarray
+        rng = kwarray.ensure_rng(rng)
+        if key == 'img':
+            shape = kwargs.get('shape', (3, 512, 512))
+            data = rng.rand(*shape).astype(np.float32)
+            data = torch.from_numpy(data)
+            self = cls(data, stack=True)
+        elif key == 'labels':
+            n = rng.randint(0, 10)
+            data = rng.randint(0, 10, n)
+            data = torch.from_numpy(data)
+            self = cls(data, stack=False)
+        else:
+            raise KeyError(key)
+        return self
+
+    def __getitem__(self, index):
+        assert self.stack, 'can only index into stackable items'
+        cls = self.__class__
+        return cls(self.data[index], **self.meta)
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def datatype(self):
+        if isinstance(self.data, torch.Tensor):
+            return self.data.type()
+        else:
+            return type(self.data)
+
+    @property
+    def cpu_only(self):
+        return self.meta['cpu_only']
+
+    @property
+    def stack(self):
+        return self.meta['stack']
+
+    @property
+    def padding_value(self):
+        return self.meta['padding_value']
+
+    @property
+    def pad_dims(self):
+        return self.meta['pad_dims']
+
+    def size(self, *args, **kwargs):
+        return self.data.size(*args, **kwargs)
+
+    @property
+    def shape(self):
+        return self.data.shape
+
+    def dim(self):
+        return self.data.dim()
+
+    @classmethod
+    def _collate(cls, inbatch, num_devices=None):
+        """
+        Collates a sequence of DataContainers
+
+        Args:
+            inbatch (Sequence[ItemContainer]): datacontainers with the same
+                parameters.
+
+            num_devices (int): number of groups, if None, then uses one group.
+
+        Example:
+            >>> print('Collate Image ItemContainer')
+            >>> inbatch = [ItemContainer.demo('img') for _ in range(5)]
+            >>> print('inbatch = {}'.format(ub.repr2(inbatch)))
+            >>> result = ItemContainer._collate(inbatch, 2)
+            >>> print('result1 = {}'.format(ub.repr2(result, nl=1)))
+            >>> result = ItemContainer._collate(inbatch, 1)
+            >>> print('result2 = {}'.format(ub.repr2(result, nl=1)))
+            >>> result = ItemContainer._collate(inbatch, None)
+            >>> print('resultN = {}'.format(ub.repr2(result, nl=1)))
+
+            >>> print('Collate Label ItemContainer')
+            >>> inbatch = [ItemContainer.demo('labels') for _ in range(5)]
+            >>> print('inbatch = {}'.format(ub.repr2(inbatch, nl=1)))
+            >>> result = ItemContainer._collate(inbatch, 1)
+            >>> print('result1 = {}'.format(ub.repr2(result, nl=1)))
+            >>> result = ItemContainer._collate(inbatch, 2)
+            >>> print('result2 = {}'.format(ub.repr2(result, nl=1)))
+            >>> result = ItemContainer._collate(inbatch, None)
+            >>> print('resultN = {}'.format(ub.repr2(result, nl=1)))
+        """
+        item0 = inbatch[0]
+        bsize = len(inbatch)
+        if num_devices is None:
+            num_devices = 1
+
+        samples_per_device = int(np.ceil(bsize / num_devices))
+
+        # assert bsize % samples_per_device == 0
+        stacked = []
+        if item0.cpu_only:
+            # chunking logic
+            stacked = []
+            for i in range(0, bsize, samples_per_device):
+                stacked.append(
+                    [sample.data for sample in inbatch[i:i + samples_per_device]])
+
+        elif item0.stack:
+            for i in range(0, bsize, samples_per_device):
+                item = inbatch[i]
+                pad_dims_ = item.pad_dims
+                assert isinstance(item.data, torch.Tensor)
+
+                if pad_dims_ is not None:
+                    # Note: can probably reimplement this using padded collate
+                    # logic
+                    ndim = item.dim()
+                    assert ndim > pad_dims_
+                    max_shape = [0 for _ in range(pad_dims_)]
+                    for dim in range(1, pad_dims_ + 1):
+                        max_shape[dim - 1] = item.shape[-dim]
+                    for sample in inbatch[i:i + samples_per_device]:
+                        for dim in range(0, ndim - pad_dims_):
+                            assert item.shape[dim] == sample.shape[dim]
+                        for dim in range(1, pad_dims_ + 1):
+                            max_shape[dim - 1] = max(max_shape[dim - 1], sample.shape[-dim])
+                    padded_samples = []
+                    for sample in inbatch[i:i + samples_per_device]:
+                        pad = [0 for _ in range(pad_dims_ * 2)]
+                        for dim in range(1, pad_dims_ + 1):
+                            pad[2 * dim - 1] = max_shape[dim - 1] - sample.shape[-dim]
+                        padded_samples.append(
+                            F.pad(sample.data, pad, value=sample.padding_value))
+                    stacked.append(default_collate(padded_samples))
+
+                elif pad_dims_ is None:
+                    stacked.append(
+                        default_collate([
+                            sample.data
+                            for sample in inbatch[i:i + samples_per_device]
+                        ]))
+                else:
+                    raise ValueError(
+                        'pad_dims should be either None or integers (1-3)')
+
+        else:
+            for i in range(0, bsize, samples_per_device):
+                stacked.append(
+                    [sample.data for sample in inbatch[i:i + samples_per_device]])
+        result = BatchContainer(stacked, **item0.meta)
+        return result
+
+
+def container_collate(inbatch, num_devices=None):
+    """Puts each data field into a tensor/DataContainer with outer dimension
+    batch size.
+
+    Extend default_collate to add support for
+    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
+
+    1. cpu_only = True, e.g., meta data
+    2. cpu_only = False, stack = True, e.g., images tensors
+    3. cpu_only = False, stack = False, e.g., gt bboxes
+
+    Ignore:
+        >>> # DISABLE_DOCTSET
+        >>> dataset = DetectFitDataset.demo(key='shapes8', augment='complex', window_dims=(512, 512), gsize=(1920, 1080))
+
+        >>> inbatch = [dataset[0], dataset[1], dataset[2]]
+        >>> raw_batch = container_collate(inbatch)
+
+        >>> target_gpus = [0]
+        >>> inputs, kwargs = container_scatter_kwargs(raw_batch, {}, target_gpus)
+
+        >>> loader = torch.utils.data.DataLoader(dataset, collate_fn=container_collate, num_workers=0)
+
+
+    Example:
+        >>> item1 = {
+        >>>     'im': torch.rand(3, 512, 512),
+        >>>     'label': torch.rand(3),
+        >>> }
+        >>> item2 = {
+        >>>     'im': torch.rand(3, 512, 512),
+        >>>     'label': torch.rand(3),
+        >>> }
+        >>> item3 = {
+        >>>     'im': torch.rand(3, 512, 512),
+        >>>     'label': torch.rand(3),
+        >>> }
+        >>> batch = batch_items = [item1, item2, item3]
+        >>> raw_batch = container_collate(batch_items)
+        >>> print('batch_items = {}'.format(ub.repr2(batch_items, nl=2)))
+        >>> print('raw_batch = {}'.format(ub.repr2(raw_batch, nl=2)))
+
+        >>> batch = batch_items = [
+        >>>     {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')},
+        >>>     {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')},
+        >>>     {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')},
+        >>> ]
+        >>> raw_batch = container_collate(batch, num_devices=2)
+        >>> print('batch_items = {}'.format(ub.repr2(batch_items, nl=2)))
+        >>> print('raw_batch = {}'.format(ub.repr2(raw_batch, nl=2)))
+
+        >>> raw_batch = container_collate(batch, num_devices=6)
+        >>> raw_batch = container_collate(batch, num_devices=3)
+        >>> raw_batch = container_collate(batch, num_devices=4)
+        >>> raw_batch = container_collate(batch, num_devices=1)
+        >>> print('batch = {}'.format(ub.repr2(batch, nl=1)))
+    """
+
+    if not isinstance(inbatch, collections.Sequence):
+        raise TypeError("{} is not supported.".format(inbatch.dtype))
+    item0 = inbatch[0]
+    if isinstance(item0, ItemContainer):
+        return item0.__class__._collate(inbatch, num_devices=num_devices)
+    elif isinstance(item0, collections.Sequence):
+        transposed = zip(*inbatch)
+        return [container_collate(samples,
+                                  num_devices=num_devices)
+                for samples in transposed]
+    elif isinstance(item0, collections.Mapping):
+        return {
+            key: container_collate([d[key] for d in inbatch],
+                                   num_devices=num_devices)
+            for key in item0
+        }
+    else:
+        return default_collate(inbatch)
+        # return _collate_else(inbatch, container_collate)
+
+
+def _collate_else(batch, collate_func):
+    """
+    Handles recursion in the else case for these special collate functions
+
+    This is duplicates all non-tensor cases from `torch_data.dataloader.default_collate`
+    This also contains support for collating slices.
+    """
+    error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
+    elem_type = type(batch[0])
+    if elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        elem = batch[0]
+        if elem_type.__name__ == 'ndarray':
+            # array of string classes and object
+            if re.search('[SaUO]', elem.dtype.str) is not None:
+                raise TypeError(error_msg.format(elem.dtype))
+
+            return torch.stack([torch.from_numpy(b) for b in batch], 0)
+        if elem.shape == ():  # scalars
+            py_type = float if elem.dtype.name.startswith('float') else int
+            return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
+    elif isinstance(batch[0], slice):
+        batch = default_collate([{
+            'start': sl.start,
+            'stop': sl.stop,
+            'step': 1 if sl.step is None else sl.step
+        } for sl in batch])
+        return batch
+    elif isinstance(batch[0], int_classes):
+        return torch.LongTensor(batch)
+    elif isinstance(batch[0], float):
+        return torch.DoubleTensor(batch)
+    elif isinstance(batch[0], string_classes):
+        return batch
+    elif isinstance(batch[0], container_abcs.Mapping):
+        # Hack the mapping collation implementation to print error info
+        if _DEBUG:
+            collated = {}
+            try:
+                for key in batch[0]:
+                    collated[key] = collate_func([d[key] for d in batch])
+            except Exception:
+                print('\n!!Error collating key = {!r}\n'.format(key))
+                raise
+            return collated
+        else:
+            return {key: collate_func([d[key] for d in batch]) for key in batch[0]}
+    elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'):  # namedtuple
+        return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
+    elif isinstance(batch[0], container_abcs.Sequence):
+        transposed = zip(*batch)
+        return [collate_func(samples) for samples in transposed]
+    else:
+        raise TypeError((error_msg.format(type(batch[0]))))
+
+
+# ----
+
+
+def _fn_scatter(input, devices, streams=None):
+    """Scatters tensor across multiple GPUs.
+
+    from mmcv.parallel._functions
+    """
+    if streams is None:
+        streams = [None] * len(devices)
+
+    if isinstance(input, list):
+        chunk_size = (len(input) - 1) // len(devices) + 1
+        outputs = [
+            _fn_scatter(input[i], [devices[i // chunk_size]],
+                          [streams[i // chunk_size]]) for i in range(len(input))
+        ]
+        return outputs
+    elif isinstance(input, torch.Tensor):
+        output = input.contiguous()
+        # TODO: copy to a pinned buffer first (if copying from CPU)
+        stream = streams[0] if output.numel() > 0 else None
+        with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
+            output = output.cuda(devices[0], non_blocking=True)
+        return output
+    else:
+        raise Exception('Unknown type {}.'.format(type(input)))
+
+
+def synchronize_stream(output, devices, streams):
+    if isinstance(output, list):
+        chunk_size = len(output) // len(devices)
+        for i in range(len(devices)):
+            for j in range(chunk_size):
+                synchronize_stream(output[i * chunk_size + j], [devices[i]],
+                                   [streams[i]])
+    elif isinstance(output, torch.Tensor):
+        if output.numel() != 0:
+            with torch.cuda.device(devices[0]):
+                main_stream = torch.cuda.current_stream()
+                main_stream.wait_stream(streams[0])
+                output.record_stream(main_stream)
+    else:
+        raise Exception('Unknown type {}.'.format(type(output)))
+
+
+def get_input_device(input):
+    if isinstance(input, list):
+        for item in input:
+            input_device = get_input_device(item)
+            if input_device != -1:
+                return input_device
+        return -1
+    elif isinstance(input, torch.Tensor):
+        return input.get_device() if input.is_cuda else -1
+    else:
+        raise Exception('Unknown type {}.'.format(type(input)))
+
+
+class ContainerScatter(object):
+
+    @staticmethod
+    def forward(target_gpus, input):
+        input_device = get_input_device(input)
+        streams = None
+        if input_device == -1:
+            # Perform CPU to GPU copies in a background stream
+            streams = [_get_stream(device) for device in target_gpus]
+
+        outputs = _fn_scatter(input, target_gpus, streams)
+        # Synchronize with the copy stream
+        if streams is not None:
+            synchronize_stream(outputs, target_gpus, streams)
+
+        return tuple(outputs)
+
+# ----
+
+
+class ContainerDataParallel(DataParallel):
+    """
+
+    Ignore:
+        import torch
+        from torch.nn.parallel import DataParallel
+
+        # First lets create a simple model where the forward function accepts
+        # kwargs. I don't really care what they do for this example, but imaging
+        # they are flags that change the behavior of forward.
+
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.conv = torch.nn.Conv2d(1, 1, 1)
+
+            def forward(self, im, **kwargs):
+                return self.conv(im)
+
+        raw_model = MyModel()
+        raw_model = raw_model.to(0)
+
+        # Next create some dummy input and verify the model works by itself
+        im = torch.zeros(1, 1, 1, 1).to(0)
+        raw_model.forward(im)
+
+        # Now create a DataParallel object to map the input across two devices
+        par_model = DataParallel(raw_model, device_ids=[0, 1], output_device=0)
+
+        # In the case where kwargs are not specified DataParallel correctly
+        # understands that there is only one item in the batch and applies the
+        # operation on only one GPU.
+        par_model.forward(im)
+
+        # Howver, if you pass kwargs, then data parallel breaks
+        par_model.forward(im, flag1=True)
+
+        inputs = (im,)
+        kwargs = dict(flag1=True, flag2=False)
+        s1, k1 = par_model.scatter(inputs, kwargs, [0, 1])
+        replicas = par_model.replicate(par_model.module, par_model.device_ids[:len(s1)])
+        outputs = par_model.parallel_apply(replicas, s1, k1)
+
+        container_scatter(inputs, [0, 1])[0]
+
+        inbatch = [ItemContainer.demo('img', shape=(1, 1, 1)) for _ in range(5)]
+        im = ItemContainer._collate(inbatch, 5)
+
+        im = torch.zeros(1, 1, 1, 1).to(0)
+        inputs = (im,)
+        self = ContainerDataParallel(raw_model, device_ids=[0, 1], output_device=0)
+        self.forward(*inputs, **kwargs)
+    """
+
+    def forward(self, *inputs, **kwargs):
+        """
+        Unchanged version for torch.nn.DataParallel
+        """
+        if not self.device_ids:
+            return self.module(*inputs, **kwargs)
+
+        for t in chain(self.module.parameters(), self.module.buffers()):
+            if t.device != self.src_device_obj:
+                raise RuntimeError("module must have its parameters and buffers "
+                                   "on device {} (device_ids[0]) but found one of "
+                                   "them on device: {}".format(self.src_device_obj, t.device))
+
+        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+        if len(self.device_ids) == 1:
+            return self.module(*inputs[0], **kwargs[0])
+        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
+        outputs = self.parallel_apply(replicas, inputs, kwargs)
+        return self.gather(outputs, self.output_device)
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return container_scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def gather(self, outputs, output_device):
+        # not part of mmcv's original impl
+        return container_gather(outputs, output_device, dim=self.dim)
+
+# ----
+
+
+def container_scatter(inputs, target_gpus, dim=0):
+    """Scatter inputs to target gpus.
+
+    from mmcv.parallel.scatter_gather
+
+    The only difference from original :func:`scatter` is to add support for
+    :type:`~mmcv.parallel.DataContainer`.
+    """
+
+    def scatter_map(obj):
+        if isinstance(obj, torch.Tensor):
+            return OrigScatter.apply(target_gpus, None, dim, obj)
+        if isinstance(obj, BatchContainer):
+            if obj.cpu_only:
+                return obj.data
+            else:
+                return ContainerScatter.forward(target_gpus, obj.data)
+        if isinstance(obj, tuple) and len(obj) > 0:
+            return list(zip(*map(scatter_map, obj)))
+        if isinstance(obj, list) and len(obj) > 0:
+            out = list(map(list, zip(*map(scatter_map, obj))))
+            return out
+        if isinstance(obj, dict) and len(obj) > 0:
+            out = list(map(type(obj), zip(*map(scatter_map, obj.items()))))
+            return out
+        return [obj for targets in target_gpus]
+
+    # After scatter_map is called, a scatter_map cell will exist. This cell
+    # has a reference to the actual function scatter_map, which has references
+    # to a closure that has a reference to the scatter_map cell (because the
+    # fn is recursive). To avoid this reference cycle, we set the function to
+    # None, clearing the cell
+    try:
+        return scatter_map(inputs)
+    finally:
+        scatter_map = None
+
+
+def container_scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
+    """
+    Scatter with support for kwargs dictionary
+
+    Example:
+        >>> # xdoctest: +REQUIRES(--multi-gpu)
+        >>> inputs = [torch.rand(1, 1, 1, 1)]
+        >>> kwargs = dict(a=1, b=2)
+        >>> target_gpus = [0, 1]
+        >>> a1, k1 = container_scatter_kwargs(inputs, kwargs, target_gpus)
+
+        >>> # xdoctest: +REQUIRES(--multi-gpu)
+        >>> inputs = [torch.rand(1, 1, 1, 1)]
+        >>> kwargs = dict(a=torch.rand(1, 1, 1, 1), b=2)
+        >>> target_gpus = [0, 1]
+        >>> a1, k1 = container_scatter_kwargs(inputs, kwargs, target_gpus)
+    """
+    inputs = container_scatter(inputs, target_gpus, dim) if inputs else []
+    kwargs = container_scatter(kwargs, target_gpus, dim) if kwargs else []
+
+    if len(inputs) < len(kwargs):
+        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
+    elif len(kwargs) < len(inputs):
+        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
+
+    # patch for cases where #inputs < len(target_gpus) and len(kwargs) > 0
+    PATCH = 1
+    if PATCH:
+        is_empty = [len(p) == 0 for p in inputs]
+        num_empty = sum(is_empty)
+        num_full = len(inputs) - num_empty
+        if num_full > 0 and num_empty > 0:
+            kwargs = kwargs[0:num_full]
+            inputs = inputs[0:num_full]
+
+    inputs = tuple(inputs)
+    kwargs = tuple(kwargs)
+    return inputs, kwargs
+
+
+def container_gather(outputs, target_device, dim=0):
+    r"""
+    Gathers tensors from different GPUs on a specified device
+      (-1 means the CPU).
+
+    The only difference from original :func:`gather` is to add support for
+    :type:`BatchContainer`.
+
+    Ignore:
+        >>> import kwarray
+        >>> rng = kwarray.ensure_rng(0)
+        >>> outputs = [
+        >>>     {
+        >>>         'batch_results': BatchContainer([
+        >>>             torch.rand(rng.randint(0, 10), 5).to(0)
+        >>>             for _ in range(4)
+        >>>         ], stack=False),
+        >>>         'loss_parts': {
+        >>>             'part1': torch.rand(2).sum().to(0),
+        >>>             'part2': torch.rand(3).sum().to(0),
+        >>>         },
+        >>>     },
+        >>>     {
+        >>>         'batch_results': BatchContainer([
+        >>>             torch.rand(rng.randint(0, 10), 5).to(1)
+        >>>             for _ in range(4)
+        >>>         ], stack=False),
+        >>>         'loss_parts': {
+        >>>             'part1': torch.rand(2).sum().to(1),
+        >>>             'part2': torch.rand(3).sum().to(1),
+        >>>         }
+        >>>     }
+        >>> ]
+        >>> _report_data_shape(outputs)
+        >>> target_device = 0
+        >>> dim = 0
+        >>> gathered = container_gather(outputs, target_device, dim)
+        >>> _report_data_shape(gathered)
+    """
+    def gather_map(outputs_):
+        out = outputs_[0]
+        if isinstance(out, torch.Tensor):
+            # if all(t.dim() == 0 for t in outputs_) and dim == 0:
+            #     # unsqueeze warnings will trigger
+            #     import xdev
+            #     xdev.embed()
+            return OrigGather.apply(target_device, dim, *outputs_)
+        if isinstance(out, BatchContainer):
+            # if out.datatype is list:
+            newdata = [d for dc in outputs_ for d in dc.data]
+            if not out.cpu_only:
+                import netharn as nh
+                target_xpu = nh.XPU(target_device)
+                newdata = target_xpu.move(newdata)
+            return newdata
+            # else:
+            #     raise NotImplementedError(repr(out.datatype))
+        if out is None:
+            return None
+        if isinstance(out, dict):
+            out0_keys = set(out.keys())
+            output_keys = [set(d.keys()) for d in outputs_]
+            if not all(out0_keys == k for k in output_keys):
+                problem_keys = (
+                    set.union(*output_keys) - set.intersection(*output_keys)
+                )
+                raise ValueError(
+                    'All dicts must have the same keys. '
+                    'problem_keys={}'.format(problem_keys))
+            return type(out)(((k, gather_map([d[k] for d in outputs_]))
+                              for k in out))
+        return type(out)(map(gather_map, zip(*outputs_)))
+
+    # Recursive function calls like this create reference cycles.
+    # Setting the function to None clears the refcycle.
+    try:
+        res = gather_map(outputs)
+    finally:
+        gather_map = None
+    return res
+
+
+# ---
+
+
+class ContainerXPU(XPU):
+
+    def mount(xpu, model):
+        """
+        Like move, but only for models.
+        Note that this works inplace for non-Tensor objects.
+
+        Args:
+            model (torch.nn.Module): the model to mount
+
+        Returns:
+            DataSerial | DataParallel :
+                the model mounted on the XPU (which may be multiple GPUs)
+
+        Example:
+            >>> model = torch.nn.Conv2d(1, 1, 1)
+            >>> xpu = XPU()
+        """
+        # Unwrap the core model if necessary
+        model = xpu.raw(model)
+        model = xpu.move(model)
+        if xpu._device_ids and len(xpu._device_ids) > 1:
+            model = ContainerDataParallel(
+                model, device_ids=xpu._device_ids,
+                output_device=xpu._main_device_id)
+        else:
+            model = DataSerial(model)
+        return model
+
+
+def nestshape(data):
+    import ubelt as ub
+
+    def _recurse(d):
+        import torch
+        import numpy as np
+        if isinstance(d, dict):
+            return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()]))
+        elif 'Container' in type(d).__name__:
+            meta = ub.odict(sorted([
+                ('stack', d.stack),
+                # ('padding_value', d.padding_value),
+                # ('pad_dims', d.pad_dims),
+                # ('datatype', d.datatype),
+                ('cpu_only', d.cpu_only),
+            ]))
+            meta = ub.repr2(meta, nl=0)
+            return {type(d).__name__ + meta: _recurse(d.data)}
+        elif isinstance(d, list):
+            return [_recurse(v) for v in d]
+        elif isinstance(d, tuple):
+            return tuple([_recurse(v) for v in d])
+        elif isinstance(d, torch.Tensor):
+            return d.shape
+        elif isinstance(d, np.ndarray):
+            return d.shape
+        elif isinstance(d, (str, bytes)):
+            return d
+        elif isinstance(d, (int, float)):
+            return d
+        elif isinstance(d, slice):
+            return d
+        else:
+            raise TypeError(type(d))
+
+    # globals()['_recurse'] = _recurse
+    d = _recurse(data)
+    return d
+
+
+def _report_data_shape(data):
+    d = nestshape(data)
+    print('d = {}'.format(ub.repr2(d, nl=-2)))
+
+
+def _debug_inbatch_shapes(inbatch):
+    import ubelt as ub
+    print('len(inbatch) = {}'.format(len(inbatch)))
+    extensions = ub.util_format.FormatterExtensions()
+
+    @extensions.register((torch.Tensor, np.ndarray))
+    def format_shape(data, **kwargs):
+        return ub.repr2(dict(type=str(type(data)), shape=data.shape), nl=1, sv=1)
+
+    print('inbatch = ' + ub.repr2(inbatch, extensions=extensions, nl=True))
diff --git a/netharn/data/toydata.py b/netharn/data/toydata.py
index b633cd0197d0d34c7e90edfb082e3ff5472ddf25..23b1e37fc646c8b40eb1cd783bcb4c5792f2f67e 100644
--- a/netharn/data/toydata.py
+++ b/netharn/data/toydata.py
@@ -1,49 +1,61 @@
-import torch
+"""
+Simple arbitrary-sized datasets for testing / demo purposes
+"""
 import numpy as np
 import itertools as it
-from torch.utils import data as torch_data
-from netharn.data import base
-from netharn import util
 import ubelt as ub
+import torch
+from torch.utils import data as torch_data
+
+import kwarray
+
+
+class ToyData1d(torch_data.Dataset):
+    """
+    Spiral xy-data points
+
+    Args:
+        n (int, default=2000): dataset size
+        rng (RandomCoercable, default=None): seed or random state
+
+    Note:
+        this is 1d in the sense that each data point has shape with len(1),
+        even though they can be interpreted as 2d vector points.
 
+    CommandLine:
+        python -m netharn.data.toydata ToyData1d --show
+
+    Example:
+        >>> dset = ToyData1d()
+        >>> data, labels = next(iter(dset.make_loader(batch_size=2000)))
+        >>> # xdoctest: +REQUIRES(--show)
+        >>> import kwplot
+        >>> plt = kwplot.autoplt()
+        >>> kwplot.figure(fnum=1, doclf=True)
+        >>> cls1 = data[labels == 0]
+        >>> cls2 = data[labels == 1]
+        >>> a, b = cls1.T.numpy()
+        >>> c, d = cls2.T.numpy()
+        >>> plt.plot(a, b, 'rx')
+        >>> plt.plot(c, d, 'bx')
+        >>> kwplot.show_if_requested()
+    """
 
-class ToyData1d(torch_data.Dataset, base.DataMixin):
-    def __init__(self, rng=None):
-        """
-        Spiral 2d data points
-
-        CommandLine:
-            python ~/code/netharn/netharn/data/toydata.py ToyData1d --show
-
-        Example:
-            >>> dset = ToyData1d()
-            >>> data, labels = next(iter(dset.make_loader(batch_size=2000)))
-            >>> # xdoctest: +REQUIRES(--show)
-            >>> from netharn.util import mplutil
-            >>> mplutil.qtensure()  # xdoc: +SKIP
-            >>> mplutil.figure(fnum=1, doclf=True)
-            >>> cls1 = data[labels == 0]
-            >>> cls2 = data[labels == 1]
-            >>> from matplotlib import pyplot as plt
-            >>> a, b = cls1.T.numpy()
-            >>> c, d = cls2.T.numpy()
-            >>> plt.plot(a, b, 'rx')
-            >>> plt.plot(c, d, 'bx')
-            >>> mplutil.show_if_requested()
-        """
-        rng = util.ensure_rng(rng)
+    def __init__(self, n=2000, rng=None):
+        rng = kwarray.ensure_rng(rng)
 
         # spiral equation in parameteric form:
         # x(t) = r(t) * cos(t)
         # y(t) = r(t) * sin(t)
 
         # class 1
-        n = 1000
-        theta1 = rng.rand(n) * 10
+        n1 = n // 2
+        theta1 = rng.rand(n1) * 10
         x1 = theta1 * np.cos(theta1)
         y1 = theta1 * np.sin(theta1)
 
-        theta2 = rng.rand(n) * 10
+        n2 = n - n1
+        theta2 = rng.rand(n2) * 10
         x2 = -theta2 * np.cos(theta2)
         y2 = -theta2 * np.sin(theta2)
 
@@ -51,10 +63,10 @@ class ToyData1d(torch_data.Dataset, base.DataMixin):
         labels = []
 
         data.extend(list(zip(x1, y1)))
-        labels.extend([0] * n)
+        labels.extend([0] * n1)
 
         data.extend(list(zip(x2, y2)))
-        labels.extend([1] * n)
+        labels.extend([1] * n2)
 
         data = np.array(data)
         labels = np.array(labels)
@@ -62,7 +74,8 @@ class ToyData1d(torch_data.Dataset, base.DataMixin):
         self.data = data
         self.labels = labels
 
-        suffix = ub.hash_data([rng], base='abc', hasher='sha1')[0:16]
+        suffix = ub.hash_data([
+            rng], base='abc', hasher='sha1')[0:16]
         self.input_id = 'TD1D_{}_'.format(n) + suffix
 
     def __len__(self):
@@ -73,26 +86,38 @@ class ToyData1d(torch_data.Dataset, base.DataMixin):
         label = int(self.labels[index])
         return data, label
 
+    def make_loader(self, *args, **kwargs):
+        loader = torch_data.DataLoader(self, *args, **kwargs)
+        return loader
+
 
-class ToyData2d(torch_data.Dataset, base.DataMixin):
+class ToyData2d(torch_data.Dataset):
     """
+    Simple black-on-white and white-on-black images.
+
+    Args:
+        n (int, default=100): dataset size
+        size (int, default=4): width / height
+        border (int, default=1): border mode
+        rng (RandomCoercable, default=None): seed or random state
+
     CommandLine:
-        python ~/code/netharn/netharn/data/toydata.py ToyData2d --show
+        python -m netharn.data.toydata ToyData2d --show
 
     Example:
         >>> self = ToyData2d()
         >>> data1, label1 = self[0]
         >>> data2, label2 = self[-1]
         >>> # xdoctest: +REQUIRES(--show)
-        >>> from netharn.util import mplutil
-        >>> mplutil.qtensure()
-        >>> mplutil.figure(fnum=1, doclf=True)
-        >>> mplutil.imshow(data1.numpy().squeeze(), pnum=(1, 2, 1))
-        >>> mplutil.imshow(data2.numpy().squeeze(), pnum=(1, 2, 2))
-        >>> mplutil.show_if_requested()
+        >>> import kwplot
+        >>> plt = kwplot.autoplt()
+        >>> kwplot.figure(fnum=1, doclf=True)
+        >>> kwplot.imshow(data1.numpy().squeeze(), pnum=(1, 2, 1))
+        >>> kwplot.imshow(data2.numpy().squeeze(), pnum=(1, 2, 2))
+        >>> kwplot.show_if_requested()
     """
     def __init__(self, size=4, border=1, n=100, rng=None):
-        rng = util.ensure_rng(rng)
+        rng = kwarray.ensure_rng(rng)
 
         h = w = size
 
@@ -130,6 +155,10 @@ class ToyData2d(torch_data.Dataset, base.DataMixin):
         label = int(self.labels[index])
         return data, label
 
+    def make_loader(self, *args, **kwargs):
+        loader = torch_data.DataLoader(self, *args, **kwargs)
+        return loader
+
 
 if __name__ == '__main__':
     """
diff --git a/netharn/device.py b/netharn/device.py
index c302e3b71ff6baa1f1520a94e64bdfda2bf478ed..34fbdad15e71f0cf736c7df1119e3eab5ed4f396 100644
--- a/netharn/device.py
+++ b/netharn/device.py
@@ -408,7 +408,6 @@ class XPU(ub.NiceRepr):
             }
 
         """
-        gpus = gpu_info()
         info = {
             'available': 0,
             'total': 0,
@@ -427,10 +426,11 @@ class XPU(ub.NiceRepr):
             info['used'] += tup.used * MB
             info['available'] += tup.available * MB
         else:
-            for id in self._device_ids:
-                info['total'] += gpus[id]['mem_total']
-                info['used'] += gpus[id]['mem_used']
-                info['available'] += gpus[id]['mem_avail']
+            gpus = gpu_info()
+            for index in self._device_ids:
+                info['total'] += gpus[index]['mem_total']
+                info['used'] += gpus[index]['mem_used']
+                info['available'] += gpus[index]['mem_avail']
         return info
 
     def __str__(xpu):
@@ -673,7 +673,11 @@ def find_unused_gpu(min_memory=0):
         >>>     item = find_unused_gpu()
         >>>     assert item is None or isinstance(item, int)
     """
-    gpus = gpu_info()
+    try:
+        gpus = gpu_info()
+    except NvidiaSMIError:
+        gpus = None
+
     if not gpus:
         return None
 
@@ -725,6 +729,10 @@ def _query_nvidia_smi(mode, fields):
     return rows
 
 
+class NvidiaSMIError(Exception):
+    pass
+
+
 def gpu_info(new_mode=True):
     """
     Run nvidia-smi and parse output
@@ -852,7 +860,7 @@ def gpu_info(new_mode=True):
             print(info['err'])
             warnings.warn('Problem running nvidia-smi: ret='.format(
                 info['ret']))
-            return None
+            raise NvidiaSMIError
         xml_string = info['out']
         root = ET.fromstring(xml_string)
 
@@ -900,7 +908,7 @@ def gpu_info(new_mode=True):
             gpu_rows = _query_nvidia_smi(mode, fields)
         except Exception as ex:
             warnings.warn('Problem running nvidia-smi: {!r}'.format(ex))
-            return None
+            raise NvidiaSMIError
 
         fields = ['pid', 'name', 'gpu_uuid', 'used_memory']
         mode = 'query-compute-apps'
@@ -918,7 +926,7 @@ def gpu_info(new_mode=True):
             gpu['procs'] = []
             gpus[num] = gpu
 
-        gpu_uuid_to_num = {g['gpu_uuid']: gpu['num'] for g in gpus.values()}
+        gpu_uuid_to_num = {gpu['gpu_uuid']: gpu['num'] for gpu in gpus.values()}
 
         for row in proc_rows:
             # Give each GPU info on which processes are using it
@@ -974,10 +982,10 @@ def gpu_info(new_mode=True):
             result = ub.cmd('nvidia-smi')
             if result['ret'] != 0:
                 warnings.warn('Problem running nvidia-smi.')
-                return None
+                raise NvidiaSMIError
         except Exception:
             warnings.warn('Could not run nvidia-smi.')
-            return {}
+            raise NvidiaSMIError
 
         lines = result['out'].splitlines()
 
diff --git a/netharn/examples/classification.py b/netharn/examples/classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff6a1c860254b53cf6246dca5e698e134415e8d5
--- /dev/null
+++ b/netharn/examples/classification.py
@@ -0,0 +1,862 @@
+# -*- coding: utf-8 -*-
+"""
+This is a simple generalized harness for training a classifier on a coco dataset.
+
+Given a COCO-style dataset data (you can create a sample coco dataset using the
+kwcoco CLI), this module trains a classifier on chipped regions denoted by the
+coco annotations. These chips are cropped from the image and resized to the
+specified ``input_dims``. The default network architecture is resnet50. Other
+settings like augmentation, learning rate, batch size, etc can all be specified
+via the command line, a config file, or a Python dictionary (see
+:class:`ClfConfig` for all available arguments).
+
+For details see the other docstrings in this file and / or try running
+yourself.
+
+.. code-block:: bash
+
+    # Install netharn
+    # pip3 install netharn   # TODO: uncomment once 0.5.7 is live
+    pip3 install git+https://gitlab.kitware.com/computer-vision/netharn.git@dev/0.5.7
+
+    # Install kwcoco and autogenerate a image toy datasets
+    pip3 install kwcoco
+    kwcoco toydata --dst ./toydata_train.json --key shapes1024
+    kwcoco toydata --dst ./toydata_vali.json --key shapes128  # optional
+    kwcoco toydata --dst ./toydata_test.json --key shapes256  # optional
+
+    # Train a classifier on your dataset
+    python3 -m netharn.examples.classification \
+        --name="My Classification Example" \
+        --train_dataset=./toydata_train.json \
+        --vali_dataset=./toydata_vali.json \
+        --test_dataset=./toydata_test.json \
+        --input_dims=224,244 \
+        --batch_size=32 \
+        --max_epoch=100 \
+        --patience=40 \
+        --xpu=gpu0 \
+        --schedule=ReduceLROnPlateau-p10-c10 \
+        --augmenter=medium \
+        --lr=1e-3
+
+# TODO: describe what the output of this should look like.
+
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+from os.path import join
+import numpy as np
+import sys
+import torch
+import ubelt as ub
+
+import netharn as nh
+import kwarray
+import scriptconfig as scfg
+from netharn.data.channel_spec import ChannelSpec
+
+
+class ClfConfig(scfg.Config):
+    """
+    This is the default configuration for running the classification example.
+
+    Instances of this class behave like a dictionary. However, they can also be
+    specified on the command line, via kwargs, or by pointing to a YAML/json
+    file. See :module:``scriptconfig`` for details of how to use
+    :class:`scriptconfig.Config` objects.
+    """
+    default = {
+        'name': scfg.Value('clf_example', help='A human readable tag that is "name" for humans'),
+        'workdir': scfg.Path('~/work/netharn', help='Dump all results in your workdir'),
+
+        'workers': scfg.Value(2, help='number of parallel dataloading jobs'),
+        'xpu': scfg.Value('auto', help='See netharn.XPU for details. can be auto/cpu/xpu/cuda0/0,1,2,3)'),
+
+        'datasets': scfg.Value('special:shapes256', help='Either a special key or a coco file'),
+        'train_dataset': scfg.Value(None),
+        'vali_dataset': scfg.Value(None),
+        'test_dataset': scfg.Value(None),
+
+        'sampler_backend': scfg.Value(None, help='ndsampler backend'),
+
+        'channels': scfg.Value('rgb', help='special channel code. See ChannelSpec'),
+
+        'arch': scfg.Value('resnet50', help='Network architecture code'),
+        'optim': scfg.Value('adam', help='Weight optimizer. Can be SGD, ADAM, ADAMW, etc..'),
+
+        'input_dims': scfg.Value((224, 224), help='Window size to input to the network'),
+        'normalize_inputs': scfg.Value(True, help=(
+            'if True, precompute training mean and std for data whitening')),
+
+        'balance': scfg.Value(None, help='balance strategy. Can be category or None'),
+
+        'augmenter': scfg.Value('simple', help='type of training dataset augmentation'),
+
+        'batch_size': scfg.Value(3, help='number of items per batch'),
+        'num_batches': scfg.Value('auto', help='Number of batches per epoch (mainly for balanced batch sampling)'),
+
+        'max_epoch': scfg.Value(140, help='Maximum number of epochs'),
+        'patience': scfg.Value(140, help='Maximum "bad" validation epochs before early stopping'),
+
+        'lr': scfg.Value(1e-4, help='Base learning rate'),
+        'decay':  scfg.Value(1e-5, help='Base weight decay'),
+        'schedule': scfg.Value(
+            'step90-120', help=(
+                'Special coercible netharn code. Eg: onecycle50, step50, gamma, ReduceLROnPlateau-p10-c10')),
+
+        'init': scfg.Value('noop', help='How to initialized weights: e.g. noop, kaiming_normal, path-to-a-pretrained-model)'),
+        'pretrained': scfg.Path(help=('alternative way to specify a path to a pretrained model')),
+    }
+
+    def normalize(self):
+        if self['pretrained'] in ['null', 'None']:
+            self['pretrained'] = None
+
+        if self['pretrained'] is not None:
+            self['init'] = 'pretrained'
+
+
+class ClfModel(nh.layers.Module):
+    """
+    A simple pytorch classification model.
+
+    Note what I consider as "reproducibility" conventions present in this
+        model:
+
+        (1) classes can be specified as a list of class names (or
+            technically anything that is :class:`ndsampler.CategoryTree`
+            coercible). This helps anyone with your pretrained model to
+            understand what its predicting.
+
+        (2) The expected input channels are specified, as a
+            :class:`netharn.data.ChannelSpec` coercible (e.g. a number, a
+            code like "rgb" or "rgb|disparity", or a dict like structure)
+
+            # TODO: properly define the dict structure, for now just use
+            # strings.
+
+        (3) The input statistics are specified as a dict and applied at runtime
+
+            {
+                'mean': <tensor to subtract>,
+                'std': <tensor to divide by>,
+            }
+
+            This means you don't have to remember these values when loading
+            data at test time, the network remembers them instead.
+
+            # TODO: this has to be better rectified with channel specifications
+            # for now assume only one early fused stream like rgb.
+
+        (4) The inputs and outputs to the network are dictionaries with
+            keys hinting at the proper interpretation of the values.
+
+            The inputs provide a mapping from channel spec keys to early-fused
+            tensors, which can be used in specific ways (e.g. to connect input
+            rgb and disparity signals into late fused network components).
+
+            The outputs provide a mapping to whatever type of output you want
+            to provide. DONT JUST RETURN A SOMETIMES TUPLE OF LOSS AND OUTPUTS
+            IN SOME RANDOM FORMAT! Instead if your network sometimes returns
+            loss then sometimes add the value ``outputs['loss'] = <your
+            loss>``.  And maybe you do some decoding of the outputs to
+            probabilities, in that case add the value ``outputs['class_probs']
+            = <class-probs>``. Or maybe you return the logits, so return
+            ``outputs['class_logits'``. This is far easier to use than
+            returning tuples of data. </rant over>
+
+        (5) A coder that performs postprocessing on batch outputs to
+            obtain a useable form for the predictions.
+
+    Example:
+        >>> from netharn.examples.classification import *  # NOQA
+        >>> classes = ['a', 'b', 'c']
+        >>> input_stats = {
+        >>>     'mean': torch.Tensor([[[0.1]], [[0.2]], [[0.2]]]),
+        >>>     'std': torch.Tensor([[[0.3]], [[0.3]], [[0.3]]]),
+        >>> }
+        >>> channels = 'rgb'
+        >>> self = ClfModel(
+        >>>     arch='resnet50', channels=channels,
+        >>>     input_stats=input_stats, classes=classes)
+        >>> inputs = torch.rand(4, 1, 256, 256)
+        >>> outputs = self(inputs)
+        >>> self.coder.decode_batch(outputs)
+    """
+
+    def __init__(self, arch='resnet50', classes=1000, channels='rgb',
+                 input_stats=None):
+        super(ClfModel, self).__init__()
+
+        import ndsampler
+        if input_stats is None:
+            input_stats = {}
+        input_norm = nh.layers.InputNorm(**input_stats)
+
+        self.classes = ndsampler.CategoryTree.coerce(classes)
+
+        self.channels = ChannelSpec.coerce(channels)
+        chann_norm = self.channels.normalize()
+        assert len(chann_norm) == 1
+        in_channels = len(ub.peek(chann_norm.values()))
+        num_classes = len(self.classes)
+
+        if arch == 'resnet50':
+            from torchvision import models
+            model = models.resnet50()
+            new_conv1 = torch.nn.Conv2d(in_channels, 64, kernel_size=7,
+                                        stride=3, padding=3, bias=False)
+            new_fc = torch.nn.Linear(2048, num_classes, bias=True)
+            new_conv1.weight.data[:, 0:in_channels, :, :] = model.conv1.weight.data[0:, 0:in_channels, :, :]
+            new_fc.weight.data[0:num_classes, :] = model.fc.weight.data[0:num_classes, :]
+            new_fc.bias.data[0:num_classes] = model.fc.bias.data[0:num_classes]
+            model.fc = new_fc
+            model.conv1 = new_conv1
+        else:
+            raise KeyError(arch)
+
+        self.input_norm = input_norm
+        self.model = model
+
+        self.coder = ClfCoder(self.classes)
+
+    def forward(self, inputs):
+        """
+        Args:
+            inputs (Tensor | dict): Either the input images  (as a regulary
+                pytorch BxCxHxW Tensor) or a dictionary mapping input
+                modalities to the input imges.
+
+        Returns:
+             Dict[str, Tensor]: model output wrapped in a dictionary so its
+                 clear what the return type is. In this case "energy" is class
+                 probabilities **before** softmax / normalization is applied.
+        """
+        if isinstance(inputs, dict):
+            # TODO: handle channel modalities later
+            assert len(inputs) == 1, (
+                'only support one fused stream: e.g. rgb for now ')
+            im = ub.peek(inputs.values())
+        else:
+            im = inputs
+
+        im = self.input_norm(im)
+        class_energy = self.model(im)
+        outputs = {
+            'class_energy': class_energy,
+        }
+        return outputs
+
+
+class ClfCoder(object):
+    """
+    The coder take the output of the classifier and transforms it into a
+    standard format. Currently there is no standard "classification" format
+    that I use other than a dictionary with special keys.
+    """
+    def __init__(self, classes):
+        self.classes = classes
+
+    def decode_batch(self, outputs):
+        class_energy = outputs['class_energy']
+        class_probs = self.classes.hierarchical_softmax(class_energy, dim=1)
+        pred_cxs, pred_conf = self.classes.decision(
+            class_probs, dim=1, thresh=0.1,
+            criterion='entropy',
+        )
+        decoded = {
+            'class_probs': class_probs,
+            'pred_cxs': pred_cxs,
+            'pred_conf': pred_conf,
+        }
+        return decoded
+
+
+class ClfDataset(torch.utils.data.Dataset):
+    """
+    Efficient loader for classification training on coco samplers.
+
+    This is a normal torch dataset that uses :module:`ndsampler` and
+    :module:`imgaug` for data loading an augmentation.
+
+    It also contains a ``make_loader`` method for creating a class balanced
+    DataLoader. There is little netharn-specific about this class.
+
+    Example:
+        >>> import ndsampler
+        >>> sampler = ndsampler.CocoSampler.demo()
+        >>> self = ClfDataset(sampler)
+        >>> index = 0
+        >>> self[index]['inputs']['rgb'].shape
+        >>> loader = self.make_loader(batch_size=8, shuffle=True, num_workers=0, num_batches=10)
+        >>> for batch in ub.ProgIter(iter(loader), total=len(loader)):
+        >>>     break
+        >>> print('batch = {}'.format(ub.repr2(batch, nl=1)))
+        >>> # xdoctest: +REQUIRES(--show)
+        >>> import kwplot
+        >>> kwplot.autompl()
+        >>> kwplot.imshow(batch['inputs']['rgb'][0])
+    """
+    def __init__(self, sampler, input_dims=(256, 256), augmenter=None):
+        self.sampler = sampler
+        self.augmenter = None
+        self.conditional_augmentors = None
+        self.input_dims = input_dims
+        self.classes = self.sampler.catgraph
+
+        self.augmenter = self._coerce_augmenter(augmenter)
+
+    def __len__(self):
+        return self.sampler.n_positives
+
+    @ub.memoize_property
+    def input_id(self):
+        def imgaug_json_id(aug):
+            import imgaug
+            if isinstance(aug, tuple):
+                return [imgaug_json_id(item) for item in aug]
+            elif isinstance(aug, imgaug.parameters.StochasticParameter):
+                return str(aug)
+            else:
+                try:
+                    info = ub.odict()
+                    info['__class__'] = aug.__class__.__name__
+                    params = aug.get_parameters()
+                    if params:
+                        info['params'] = [imgaug_json_id(p) for p in params]
+                    if isinstance(aug, list):
+                        children = aug[:]
+                        children = [imgaug_json_id(c) for c in children]
+                        info['children'] = children
+                    return info
+                except Exception:
+                    # imgaug is weird and buggy
+                    return str(aug)
+        depends = [
+            self.sampler._depends(),
+            self.augmenter and imgaug_json_id(self.augmenter),
+        ]
+        _input_id = ub.hash_data(depends, hasher='sha512', base='abc')[0:40]
+        return _input_id
+
+    def __getitem__(self, index):
+        import kwimage
+
+        # Load sample image and category
+        sample = self.sampler.load_positive(index, with_annots=False)
+        image = kwimage.atleast_3channels(sample['im'])[:, :, 0:3]
+        target = sample['tr']
+
+        image = kwimage.ensure_uint255(image)
+        if self.augmenter is not None:
+            det = self.augmenter.to_deterministic()
+            image = det.augment_image(image)
+
+        # Resize to input dimensinos
+        if self.input_dims is not None:
+            dsize = tuple(self.input_dims[::-1])
+            image = kwimage.imresize(image, dsize=dsize, letterbox=True)
+
+        class_id_to_idx = self.sampler.classes.id_to_idx
+        cid = target['category_id']
+        cidx = class_id_to_idx[cid]
+
+        im_chw = image.transpose(2, 0, 1) / 255.0
+        inputs = {
+            'rgb': torch.FloatTensor(im_chw),
+        }
+        labels = {
+            'class_idxs': cidx,
+        }
+        batch = {
+            'inputs': inputs,
+            'labels': labels,
+        }
+        return batch
+
+    def _coerce_augmenter(self, augmenter):
+        import netharn as nh
+        import imgaug.augmenters as iaa
+        if augmenter is True:
+            augmenter = 'simple'
+        if not augmenter:
+            augmenter = None
+        elif augmenter == 'simple':
+            augmenter = iaa.Sequential([
+                iaa.Crop(percent=(0, .2)),
+                iaa.Fliplr(p=.5)
+            ])
+        elif augmenter == 'medium':
+            augmenter = iaa.Sequential([
+                iaa.Sometimes(0.2, nh.data.transforms.HSVShift(hue=0.1, sat=1.5, val=1.5)),
+                iaa.Crop(percent=(0, .2)),
+                iaa.Fliplr(p=.5)
+            ])
+        else:
+            raise KeyError('Unknown augmentation {!r}'.format(self.augment))
+        return augmenter
+
+    def make_loader(self, batch_size=16, num_batches='auto', num_workers=0,
+                    shuffle=False, pin_memory=False, drop_last=False,
+                    balance=None):
+
+        if len(self) == 0:
+            raise Exception('must have some data')
+
+        def worker_init_fn(worker_id):
+            for i in range(worker_id + 1):
+                seed = np.random.randint(0, int(2 ** 32) - 1)
+            seed = seed + worker_id
+            kwarray.seed_global(seed)
+            if self.augmenter:
+                rng = kwarray.ensure_rng(None)
+                self.augmenter.seed_(rng)
+
+        loaderkw = {
+            'num_workers': num_workers,
+            'pin_memory': pin_memory,
+            'worker_init_fn': worker_init_fn,
+        }
+        if balance is None:
+            loaderkw['shuffle'] = shuffle
+            loaderkw['batch_size'] = batch_size
+            loaderkw['drop_last'] = drop_last
+        elif balance == 'classes':
+            from netharn.data.batch_samplers import BalancedBatchSampler
+            index_to_cid = [
+                cid for cid in self.sampler.regions.targets['category_id']
+            ]
+            batch_sampler = BalancedBatchSampler(
+                index_to_cid, batch_size=batch_size,
+                shuffle=shuffle, num_batches=num_batches)
+            loaderkw['batch_sampler'] = batch_sampler
+        else:
+            raise KeyError(balance)
+
+        loader = torch.utils.data.DataLoader(self, **loaderkw)
+        return loader
+
+
+class ClfHarn(nh.FitHarn):
+    """
+    The Classification Harness
+    ==========================
+
+    The concept of a "Harness" at the core of netharn.  This our custom
+    :class:`netharn.FitHarn` object for a classification problem.
+
+    The Harness provides the important details to the training loop via the
+    `run_batch` method. The rest of the loop boilerplate is taken care of by
+    `nh.FitHarn` internals. In addition to `run_batch`, we also define several
+    callbacks to perform customized monitoring of training progress.
+    """
+
+    def after_initialize(harn, **kw):
+        harn._accum_confusion_vectors = {
+            'y_true': [],
+            'y_pred': [],
+            'probs': [],
+        }
+
+    def prepare_batch(harn, raw_batch):
+        return raw_batch
+
+    def run_batch(harn, batch):
+        """
+        Example:
+            >>> # xdoctest: +SKIP
+            >>> harn = setup_harn(datasets='special:shapes256', batch_size=4).initialize()
+            >>> batch = harn._demo_batch(0, tag='train')
+            >>> outputs, loss = harn.run_batch(batch)
+            >>> harn.on_batch(batch, outputs, loss)
+        """
+        classes = harn.raw_model.classes
+        inputs = harn.xpu.move(batch['inputs'])
+        labels = harn.xpu.move(batch['labels'])
+
+        outputs = harn.model(inputs)
+
+        class_energy = outputs['class_energy']
+        class_logprobs = classes.hierarchical_log_softmax(
+            class_energy, dim=1)
+
+        class_idxs = labels['class_idxs']
+        loss = nh.criterions.focal.nll_focal_loss(
+            class_logprobs, class_idxs, focus=2.0, reduction='mean')
+
+        loss_parts = {}
+        loss_parts['clf'] = loss
+
+        decoded = harn.raw_model.coder.decode_batch(outputs)
+
+        outputs['class_probs'] = decoded['class_probs']
+        outputs['pred_cxs'] = decoded['pred_cxs']
+        outputs['true_cxs'] = class_idxs
+        return outputs, loss_parts
+
+    def on_batch(harn, batch, outputs, loss):
+        """
+        Custom code executed at the end of each batch.
+        """
+        bx = harn.bxs[harn.current_tag]
+        if bx < 3:
+            stacked = harn._draw_batch(batch, outputs)
+            dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag))
+            fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch))
+            import kwimage
+            kwimage.imwrite(fpath, stacked)
+
+        y_pred = kwarray.ArrayAPI.numpy(outputs['pred_cxs'])
+        y_true = outputs['true_cxs'].data.cpu().numpy()
+        probs = outputs['class_probs'].data.cpu().numpy()
+        harn._accum_confusion_vectors['y_true'].append(y_true)
+        harn._accum_confusion_vectors['y_pred'].append(y_pred)
+        harn._accum_confusion_vectors['probs'].append(probs)
+
+    def _draw_batch(harn, batch, outputs, limit=32):
+        """
+        Example:
+            >>> # xdoctest: +REQUIRES(--download)
+            >>> harn = setup_harn(batch_size=3).initialize()
+            >>> batch = harn._demo_batch(0, tag='train')
+            >>> outputs, loss = harn.run_batch(batch)
+            >>> stacked = harn._draw_batch(batch, outputs, limit=12)
+            >>> # xdoctest: +REQUIRES(--show)
+            >>> import kwplot
+            >>> kwplot.autompl()
+            >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True)
+            >>> kwplot.show_if_requested()
+        """
+        import kwimage
+        inputs = batch['inputs']['rgb'][0:limit].data.cpu().numpy()
+        true_cxs = batch['labels']['class_idxs'].data.cpu().numpy()
+        class_probs = outputs['class_probs'].data.cpu().numpy()
+        pred_cxs = kwarray.ArrayAPI.numpy(outputs['pred_cxs'])
+
+        dset = harn.datasets[harn.current_tag]
+        classes = dset.classes
+
+        todraw = []
+        for im, pcx, tcx, probs in zip(inputs, pred_cxs, true_cxs, class_probs):
+            im_ = im.transpose(1, 2, 0)
+
+            # Renormalize and resize image for drawing
+            min_, max_ = im_.min(), im_.max()
+            im_ = ((im_ - min_) / (max_ - min_) * 255).astype(np.uint8)
+            im_ = np.ascontiguousarray(im_)
+            im_ = kwimage.imresize(im_, dsize=(200, 200),
+                                   interpolation='nearest')
+
+            # Draw classification information on the image
+            im_ = kwimage.draw_clf_on_image(im_, classes=classes, tcx=tcx,
+                                            pcx=pcx, probs=probs)
+            todraw.append(im_)
+
+        stacked = kwimage.stack_images_grid(todraw, overlap=-10,
+                                            bg_value=(10, 40, 30),
+                                            chunksize=8)
+        return stacked
+
+    def on_epoch(harn):
+        """
+        Custom code executed at the end of each epoch.
+
+        This function can optionally return a dictionary containing any scalar
+        quality metrics that you wish to log and monitor. (Note these will be
+        plotted to tensorboard if that is installed).
+
+        Notes:
+            It is ok to do some medium lifting in this function because it is
+            run relatively few times.
+
+        Returns:
+            dict: dictionary of scalar metrics for netharn to log
+
+        CommandLine:
+            xdoctest -m netharn.examples.classification ClfHarn.on_epoch
+
+        Example:
+            >>> harn = setup_harn().initialize()
+            >>> harn._demo_epoch('vali', max_iter=10)
+            >>> harn.on_epoch()
+        """
+        from netharn.metrics import clf_report
+        dset = harn.datasets[harn.current_tag]
+
+        probs = np.vstack(harn._accum_confusion_vectors['probs'])
+        y_true = np.hstack(harn._accum_confusion_vectors['y_true'])
+        y_pred = np.hstack(harn._accum_confusion_vectors['y_pred'])
+
+        # _pred = probs.argmax(axis=1)
+        # assert np.all(_pred == y_pred)
+
+        # from netharn.metrics import confusion_vectors
+        # cfsn_vecs = confusion_vectors.ConfusionVectors.from_arrays(
+        #     true=y_true, pred=y_pred, probs=probs, classes=dset.classes)
+        # report = cfsn_vecs.classification_report()
+        # combined_report = report['metrics'].loc['combined'].to_dict()
+
+        # ovr_cfsn = cfsn_vecs.binarize_ovr()
+        # Compute multiclass metrics (new way!)
+        target_names = dset.classes
+        ovr_report = clf_report.ovr_classification_report(
+            y_true, probs, target_names=target_names, metrics=[
+                'auc', 'ap', 'mcc', 'brier'
+            ])
+
+        # percent error really isn't a great metric, but its easy and standard.
+        errors = (y_true != y_pred)
+        acc = 1.0 - errors.mean()
+        percent_error = (1.0 - acc) * 100
+
+        metrics_dict = ub.odict()
+        metrics_dict['ave_brier'] = ovr_report['ave']['brier']
+        metrics_dict['ave_mcc'] = ovr_report['ave']['mcc']
+        metrics_dict['ave_auc'] = ovr_report['ave']['auc']
+        metrics_dict['ave_ap'] = ovr_report['ave']['ap']
+        metrics_dict['percent_error'] = percent_error
+        metrics_dict['acc'] = acc
+
+        harn.info(ub.color_text('ACC FOR {!r}: {!r}'.format(harn.current_tag, acc), 'yellow'))
+
+        # Clear confusion vectors accumulator for the next epoch
+        harn._accum_confusion_vectors = {
+            'y_true': [],
+            'y_pred': [],
+            'probs': [],
+        }
+        return metrics_dict
+
+
+def setup_harn(cmdline=True, **kw):
+    """
+    This creates the "The Classification Harness" (i.e. core ClfHarn object).
+    This is where we programmatically connect our program arguments with the
+    netharn HyperParameter standards. We are using :module:`scriptconfig` to
+    capture these, but you could use click / argparse / etc.
+
+    This function has the responsibility of creating our torch datasets,
+    lazy computing input statistics, specifying our model architecture,
+    schedule, initialization, optimizer, dynamics, XPU etc. These can usually
+    be coerced using netharn API helpers and a "standardized" config dict. See
+    the function code for details.
+
+    Args:
+        cmdline (bool, default=True):
+            if True, behavior will be modified based on ``sys.argv``.
+            Note this will activate the scriptconfig ``--help``, ``--dump`` and
+            ``--config`` interactions.
+
+    Kwargs:
+        **kw: the overrides the default config for :class:`ClfConfig`.
+            Note, command line flags have precedence if cmdline=True.
+
+    Returns:
+        ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn`
+            object.
+
+    Example:
+        >>> # xdoctest: +SKIP
+        >>> kw = {'datasets': 'special:shapes256'}
+        >>> cmdline = False
+        >>> harn = setup_harn(cmdline, **kw)
+        >>> harn.initialize()
+    """
+    import ndsampler
+    config = ClfConfig(default=kw)
+    config.load(cmdline=cmdline)
+    print('config = {}'.format(ub.repr2(config.asdict())))
+
+    nh.configure_hacks(config)
+    coco_datasets = nh.api.Datasets.coerce(config)
+
+    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
+    for tag, dset in coco_datasets.items():
+        dset._build_hashid(hash_pixels=False)
+
+    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
+    samplers = {
+        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend'])
+        for tag, dset in coco_datasets.items()
+    }
+
+    for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'):
+        sampler.frames.prepare(workers=config['workers'])
+
+    torch_datasets = {
+        'train': ClfDataset(
+            samplers['train'],
+            input_dims=config['input_dims'],
+            augmenter=config['augmenter'],
+        ),
+        'vali': ClfDataset(
+            samplers['vali'],
+            input_dims=config['input_dims'],
+            augmenter=False),
+    }
+
+    if config['normalize_inputs']:
+        # Get stats on the dataset (todo: turn off augmentation for this)
+        _dset = torch_datasets['train']
+        stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))]
+        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)
+
+        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
+        input_stats = cacher.tryload()
+
+        channels = ChannelSpec.coerce(config['channels'])
+
+        if input_stats is None:
+            # Use parallel workers to load data faster
+            from netharn.data.data_containers import container_collate
+            from functools import partial
+            collate_fn = partial(container_collate, num_devices=1)
+
+            loader = torch.utils.data.DataLoader(
+                stats_subset,
+                collate_fn=collate_fn,
+                num_workers=config['workers'],
+                shuffle=True,
+                batch_size=config['batch_size'])
+
+            # Track moving average of each fused channel stream
+            channel_stats = {key: nh.util.RunningStats()
+                             for key in channels.keys()}
+            assert len(channel_stats) == 1, (
+                'only support one fused stream for now')
+            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
+                for key, val in batch['inputs'].items():
+                    try:
+                        for part in val.numpy():
+                            channel_stats[key].update(part)
+                    except ValueError:  # final batch broadcast error
+                        pass
+
+            perchan_input_stats = {}
+            for key, running in channel_stats.items():
+                running = ub.peek(channel_stats.values())
+                perchan_stats = running.simple(axis=(1, 2))
+                perchan_input_stats[key] = {
+                    'std': perchan_stats['mean'].round(3),
+                    'mean': perchan_stats['std'].round(3),
+                }
+
+            input_stats = ub.peek(perchan_input_stats.values())
+            cacher.save(input_stats)
+    else:
+        input_stats = {}
+
+    torch_loaders = {
+        tag: dset.make_loader(
+            batch_size=config['batch_size'],
+            num_batches=config['num_batches'],
+            num_workers=config['workers'],
+            shuffle=(tag == 'train'),
+            balance=(config['balance'] if tag == 'train' else None),
+            pin_memory=True)
+        for tag, dset in torch_datasets.items()
+    }
+
+    initializer_ = None
+    classes = torch_datasets['train'].classes
+
+    modelkw = {
+        'arch': config['arch'],
+        'input_stats': input_stats,
+        'classes': classes.__json__(),
+        'channels': channels,
+    }
+    model = ClfModel(**modelkw)
+    model._initkw = modelkw
+
+    if initializer_ is None:
+        initializer_ = nh.Initializer.coerce(config)
+
+    hyper = nh.HyperParams(
+        name=config['name'],
+
+        workdir=config['workdir'],
+        xpu=nh.XPU.coerce(config['xpu']),
+
+        datasets=torch_datasets,
+        loaders=torch_loaders,
+
+        model=model,
+        criterion=None,
+
+        optimizer=nh.Optimizer.coerce(config),
+        dynamics=nh.Dynamics.coerce(config),
+        scheduler=nh.Scheduler.coerce(config),
+
+        initializer=initializer_,
+
+        monitor=(nh.Monitor, {
+            'minimize': ['loss'],
+            'patience': config['patience'],
+            'max_epoch': config['max_epoch'],
+            'smoothing': 0.0,
+        }),
+        other={
+            'name': config['name'],
+            'batch_size': config['batch_size'],
+            'balance': config['balance'],
+        },
+        extra={
+            'argv': sys.argv,
+            'config': ub.repr2(config.asdict()),
+        }
+    )
+    harn = ClfHarn(hyper=hyper)
+    harn.preferences.update({
+        'num_keep': 3,
+        'keep_freq': 10,
+        'tensorboard_groups': ['loss'],
+        'eager_dump_tensorboard': True,
+    })
+    harn.intervals.update({})
+    harn.script_config = config
+    return harn
+
+
+def main():
+    """
+    Main function for the generic classification example with an undocumented
+    hack for the lrtest.
+    """
+    harn = setup_harn()
+    harn.initialize()
+
+    if ub.argflag('--lrtest'):
+        # Undocumented hidden feature,
+        # Perform an LR-test, then resetup the harness. Optionally draw the
+        # results using matplotlib.
+        from netharn.prefit.lr_tests import lr_range_test
+        result = lr_range_test(
+            harn, init_value=1e-4, final_value=0.5, beta=0.3,
+            explode_factor=10, num_iters=200)
+        if ub.argflag('--show'):
+            import kwplot
+            plt = kwplot.autoplt()
+            result.draw()
+            plt.show()
+        # Recreate a new version of the harness with the recommended LR.
+        config = harn.script_config.asdict()
+        config['lr'] = (result.recommended_lr * 10)
+        harn = setup_harn(**config)
+        harn.initialize()
+    # This starts the main loop which will run until the monitor's terminator
+    # criterion is satisfied. If the initialize step loaded a checkpointed that
+    # already met the termination criterion, then this will simply return.
+    deploy_fpath = harn.run()
+
+    # The returned deploy_fpath is the path to an exported netharn model.
+    # This model is the on with the best weights according to the monitor.
+    print('deploy_fpath = {!r}'.format(deploy_fpath))
+    return harn
+
+
+if __name__ == '__main__':
+    """
+    python -m netharn.examples.classification --datasets=shapes5000 --name=shapes_clf5000 --batch_size=32
+    """
+    main()
diff --git a/netharn/examples/mnist.py b/netharn/examples/mnist.py
index 0821076d3d9c7c32baab302c51c0f10b6bfcde6c..f67df39cec70e2e4e6fb7a41e5136a690a3de445 100644
--- a/netharn/examples/mnist.py
+++ b/netharn/examples/mnist.py
@@ -246,7 +246,7 @@ def setup_harn(**kw):
     # They nh.HyperParams object keeps track of and helps log all declarative
     # info related to training a model.
     hyper = nh.hyperparams.HyperParams(
-        nice='my-mnist-demo',
+        name='my-mnist-demo',
         xpu=xpu,
         workdir=workdir,
         datasets=datasets,
diff --git a/netharn/examples/object_detection.py b/netharn/examples/object_detection.py
index b8d78b2f8bafa9e21320714ff3591e09a64065e0..741167de5ea6f707487717042b046756daacf26e 100644
--- a/netharn/examples/object_detection.py
+++ b/netharn/examples/object_detection.py
@@ -8,6 +8,7 @@ import os
 import torch
 import ubelt as ub
 import kwarray
+import kwimage
 import scriptconfig as scfg
 from netharn.models.yolo2 import multiscale_batch_sampler  # NOQA
 from netharn.models.yolo2 import yolo2
@@ -391,10 +392,10 @@ class DetectHarn(nh.FitHarn):
             >>> harn.on_batch(batch, outputs, losses)
             >>> # xdoc: +REQUIRES(--show)
             >>> batch_dets = harn.model.module.postprocess(outputs)
-            >>> nh.util.autompl()  # xdoc: +SKIP
+            >>> kwplot.autompl()  # xdoc: +SKIP
             >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01)
-            >>> nh.util.imshow(stacked)
-            >>> nh.util.show_if_requested()
+            >>> kwplot.imshow(stacked)
+            >>> kwplot.show_if_requested()
         """
         dmet = harn.dmets[harn.current_tag]
         inputs = batch['im']
@@ -406,12 +407,12 @@ class DetectHarn(nh.FitHarn):
             bx = harn.bxs[harn.current_tag]
             if bx < 4:
                 stacked = harn.draw_batch(batch, outputs, detections, thresh=0.1)
-                # img = nh.util.render_figure_to_image(fig)
+                # img = kwplot.render_figure_to_image(fig)
                 dump_dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag, 'batch'))
                 dump_fname = 'pred_bx{:04d}_epoch{:08d}.png'.format(bx, harn.epoch)
                 fpath = os.path.join(dump_dpath, dump_fname)
                 harn.debug('dump viz fpath = {}'.format(fpath))
-                nh.util.imwrite(fpath, stacked)
+                kwimage.imwrite(fpath, stacked)
         except Exception as ex:
             harn.error('\n\n\n')
             harn.error('ERROR: FAILED TO POSTPROCESS OUTPUTS')
@@ -583,9 +584,9 @@ class DetectHarn(nh.FitHarn):
             >>> stacked = harn.draw_batch(batch, outputs, batch_dets)
 
             >>> # xdoc: +REQUIRES(--show)
-            >>> nh.util.autompl()  # xdoc: +SKIP
-            >>> nh.util.imshow(stacked)
-            >>> nh.util.show_if_requested()
+            >>> kwplot.autompl()  # xdoc: +SKIP
+            >>> kwplot.imshow(stacked)
+            >>> kwplot.show_if_requested()
         """
         import cv2
         inputs = batch['im']
@@ -647,8 +648,8 @@ class DetectHarn(nh.FitHarn):
                 pred_dets.boxes, orig_size, target_size)
 
             # shift, scale, embed_size = letterbox._letterbox_transform(orig_size, target_size)
-            # fig = nh.util.figure(doclf=True, fnum=1)
-            # nh.util.imshow(img, colorspace='rgb')
+            # fig = kwplot.figure(doclf=True, fnum=1)
+            # kwplot.imshow(img, colorspace='rgb')
             canvas = (img * 255).astype(np.uint8)
             canvas = true_dets.draw_on(canvas, color='green')
             canvas = pred_dets.draw_on(canvas, color='blue')
@@ -656,7 +657,7 @@ class DetectHarn(nh.FitHarn):
             canvas = cv2.resize(canvas, (300, 300))
             imgs.append(canvas)
 
-        stacked = imgs[0] if len(imgs) == 1 else nh.util.stack_images_grid(imgs)
+        stacked = imgs[0] if len(imgs) == 1 else kwimage.stack_images_grid(imgs)
         return stacked
 
 
diff --git a/netharn/examples/yolo_voc.py b/netharn/examples/yolo_voc.py
index 4bd004466788150f035917441b967f5b30e6cc23..1f89e0650881b4163fffdf8d176511d6bbba6f2d 100644
--- a/netharn/examples/yolo_voc.py
+++ b/netharn/examples/yolo_voc.py
@@ -98,13 +98,13 @@ class YoloVOCDataset(nh.data.voc.VOCDataset):
             >>> norm_boxes = label['targets'].numpy().reshape(-1, 5)[:, 1:5]
             >>> inp_size = hwc01.shape[-2::-1]
             >>> # xdoc: +REQUIRES(--show)
-            >>> import netharn as nh
-            >>> nh.util.figure(doclf=True, fnum=1)
-            >>> nh.util.autompl()  # xdoc: +SKIP
-            >>> nh.util.imshow(hwc01, colorspace='rgb')
+            >>> import kwplot
+            >>> kwplot.figure(doclf=True, fnum=1)
+            >>> kwplot.autompl()  # xdoc: +SKIP
+            >>> kwplot.imshow(hwc01, colorspace='rgb')
             >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size)
             >>> inp_boxes.draw()
-            >>> nh.util.show_if_requested()
+            >>> kwplot.show_if_requested()
 
         Example:
             >>> # DISABLE_DOCTSET
@@ -119,12 +119,13 @@ class YoloVOCDataset(nh.data.voc.VOCDataset):
             >>> norm_boxes = label[0].numpy().reshape(-1, 5)[:, 1:5]
             >>> inp_size = hwc01.shape[-2::-1]
             >>> # xdoc: +REQUIRES(--show)
-            >>> nh.util.figure(doclf=True, fnum=1)
-            >>> nh.util.autompl()  # xdoc: +SKIP
-            >>> nh.util.imshow(hwc01, colorspace='rgb')
+            >>> import kwplot
+            >>> kwplot.autompl()  # xdoc: +SKIP
+            >>> kwplot.figure(doclf=True, fnum=1)
+            >>> kwplot.imshow(hwc01, colorspace='rgb')
             >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size)
             >>> inp_boxes.draw()
-            >>> nh.util.show_if_requested()
+            >>> kwplot.show_if_requested()
         """
         if isinstance(index, tuple):
             # Get size index from the batch loader
@@ -368,11 +369,12 @@ class YoloHarn(nh.FitHarn):
             >>> outputs, loss = harn.run_batch(batch)
             >>> harn.on_batch(batch, outputs, loss)
             >>> # xdoc: +REQUIRES(--show)
+            >>> import kwplot
             >>> batch_dets = harn.model.module.postprocess(outputs)
-            >>> nh.util.autompl()  # xdoc: +SKIP
+            >>> kwplot.autompl()  # xdoc: +SKIP
             >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01)
-            >>> nh.util.imshow(stacked)
-            >>> nh.util.show_if_requested()
+            >>> kwplot.imshow(stacked)
+            >>> kwplot.show_if_requested()
         """
         dmet = harn.dmets[harn.current_tag]
         inputs, labels = batch
@@ -386,13 +388,14 @@ class YoloHarn(nh.FitHarn):
 
             bx = harn.bxs[harn.current_tag]
             if bx < 4:
+                import kwimage
                 stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.1)
-                # img = nh.util.render_figure_to_image(fig)
+                # img = kwplot.render_figure_to_image(fig)
                 dump_dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag, 'batch'))
                 dump_fname = 'pred_bx{:04d}_epoch{:08d}.png'.format(bx, harn.epoch)
                 fpath = os.path.join(dump_dpath, dump_fname)
                 harn.debug('dump viz fpath = {}'.format(fpath))
-                nh.util.imwrite(fpath, stacked)
+                kwimage.imwrite(fpath, stacked)
         except Exception as ex:
             harn.error('\n\n\n')
             harn.error('ERROR: FAILED TO POSTPROCESS OUTPUTS')
@@ -573,11 +576,12 @@ class YoloHarn(nh.FitHarn):
             >>> outputs, loss = harn.run_batch(batch)
             >>> harn.on_batch(batch, outputs, loss)
             >>> # xdoc: +REQUIRES(--show)
+            >>> import kwplot
             >>> batch_dets = harn.model.module.postprocess(outputs)
-            >>> nh.util.autompl()  # xdoc: +SKIP
+            >>> kwplot.autompl()  # xdoc: +SKIP
             >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01)
-            >>> nh.util.imshow(stacked)
-            >>> nh.util.show_if_requested()
+            >>> kwplot.imshow(stacked)
+            >>> kwplot.show_if_requested()
         """
         import cv2
         inputs, labels = batch
diff --git a/netharn/export/deployer.py b/netharn/export/deployer.py
index 65d20a8d9e0e0a9a5f3c52218c207ed5fbfeeab2..4d37988d687381d62487ab4cb1d32731a19847bb 100644
--- a/netharn/export/deployer.py
+++ b/netharn/export/deployer.py
@@ -27,7 +27,7 @@ Example:
     >>> # This will train a toy model with toy data using netharn
     >>> hyper = nh.HyperParams(**{
     >>>     'workdir'     : ub.ensure_app_cache_dir('netharn/tests/deploy'),
-    >>>     'nice'        : 'deploy_demo',
+    >>>     'name'        : 'deploy_demo',
     >>>     'xpu'         : nh.XPU.coerce('cpu'),
     >>>     'datasets'    : {
     >>>         'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0),
@@ -61,7 +61,7 @@ Example:
     INFO: Exported model topology to .../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/ToyNet2d_2a3f49.py
     INFO: Initializing model weights with: <netharn.initializers.nninit_core.KaimingNormal object at 0x7fc67efdf8d0>
     INFO:  * harn.train_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww'
-    INFO:  * harn.nice_dpath  = '.../.cache/netharn/tests/deploy/fit/nice/deploy_demo'
+    INFO:  * harn.name_dpath  = '.../.cache/netharn/tests/deploy/fit/name/deploy_demo'
     INFO: Snapshots will save to harn.snapshot_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/torch_snapshots'
     INFO: ARGV:
         .../.local/conda/envs/py36/bin/python .../.local/conda/envs/py36/bin/ipython
@@ -84,9 +84,9 @@ Example:
     INFO:
     INFO: training completed
     INFO: harn.train_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww'
-    INFO: harn.nice_dpath  = '.../.cache/netharn/tests/deploy/fit/nice/deploy_demo'
+    INFO: harn.name_dpath  = '.../.cache/netharn/tests/deploy/fit/name/deploy_demo'
     INFO: view tensorboard results for this run via:
-        tensorboard --logdir ~/.cache/netharn/tests/deploy/fit/nice
+        tensorboard --logdir ~/.cache/netharn/tests/deploy/fit/name
     [DEPLOYER] Deployed zipfpath=.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/deploy_ToyNet2d_onnxqaww_002_TXZBYL.zip
     INFO: wrote single-file deployment to: '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/deploy_ToyNet2d_onnxqaww_002_TXZBYL.zip'
     INFO: exiting fit harness.
@@ -313,7 +313,7 @@ def _package_deploy2(dpath, info, name=None):
 
     Ignore:
         dpath = '/home/joncrall/.cache/netharn/tests/_package_custom'
-        path = '/home/joncrall/work/opir/fit/nice/_Sim3-kw6-99-finetune_ML3D_BEST_2018-9-20_LR1e-4_f2_vel0.0_hn0.25_bs64_nr5.0'
+        path = '/home/joncrall/work/opir/fit/name/_Sim3-kw6-99-finetune_ML3D_BEST_2018-9-20_LR1e-4_f2_vel0.0_hn0.25_bs64_nr5.0'
         info = unpack_model_info(path)
         zipfpath = _package_deploy2(dpath, info)
 
@@ -699,7 +699,7 @@ def _demodata_toy_harn():
     import netharn as nh
     hyper = nh.HyperParams(**{
         'workdir'     : ub.ensure_app_cache_dir('netharn/tests/deploy'),
-        'nice'        : 'deploy_demo_static',
+        'name'        : 'deploy_demo_static',
         'xpu'         : nh.XPU.coerce('cpu'),
         'datasets'    : {'train': nh.data.ToyData2d(size=3, rng=0)},
         'loaders'     : {'batch_size': 64},
diff --git a/netharn/fit_harn.py b/netharn/fit_harn.py
index dfaa6c920dd38b1f85af6827c73d355d51b4fbfa..cfea3d088108c6617ddbe9eb762d2fcb4a69c5c1 100644
--- a/netharn/fit_harn.py
+++ b/netharn/fit_harn.py
@@ -50,16 +50,16 @@ Example:
     >>>     'name'        : 'demo',
     >>>     'xpu'         : nh.XPU.coerce('argv'),
     >>>     # workdir is a directory where intermediate results can be saved
-    >>>     # nice symlinks <workdir>/fit/nice/<name> -> ../runs/<hashid>
+    >>>     # name symlinks <workdir>/fit/name/<name> -> ../runs/<hashid>
     >>>     # XPU auto select a gpu if idle and VRAM>6GB else a cpu
     >>>     # ================
     >>>     # Data Components
     >>>     'datasets'    : {  # dict of plain ol torch.data.Dataset instances
     >>>         'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0),
-    >>>         'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1),
-    >>>         'test': nh.data.ToyData2d(size=3, border=1, n=128, rng=1),
+    >>>         'vali': nh.data.ToyData2d(size=3, border=1, n=64, rng=1),
+    >>>         'test': nh.data.ToyData2d(size=3, border=1, n=64, rng=1),
     >>>     },
-    >>>     'loaders'     : {'batch_size': 64}, # DataLoader instances or kw
+    >>>     'loaders'     : {'batch_size': 8}, # DataLoader instances or kw
     >>>     # ================
     >>>     # Algorithm Components
     >>>     # Note the (cls, kw) tuple formatting
@@ -82,7 +82,7 @@ Example:
     >>>     }),
     >>>     # dynamics are a config option that modify the behavior of the main
     >>>     # training loop. These parameters effect the learned model.
-    >>>     'dynamics'   : {'batch_step': 4},
+    >>>     'dynamics'   : {'batch_step': 2},
     >>> })
     >>> harn = nh.FitHarn(hyper)
     >>> # non-algorithmic behavior configs (do not change learned models)
@@ -93,7 +93,7 @@ Example:
     >>> harn.run()  # note: run calls initialize it hasn't already been called.
     >>> # xdoc: +IGNORE_WANT
     RESET HARNESS BY DELETING EVERYTHING IN TRAINING DIR
-    Symlink: ...tests/demo/fit/runs/demo/keyeewlr -> ...tests/demo/fit/nice/demo
+    Symlink: ...tests/demo/fit/runs/demo/keyeewlr -> ...tests/demo/fit/name/demo
     .... already exists
     .... and points to the right place
     Initializing tensorboard (dont forget to start the tensorboard server)
@@ -101,10 +101,10 @@ Example:
     Mounting ToyNet2d model on CPU
     Initializing model weights
      * harn.train_dpath = '...tests/demo/fit/runs/demo/keyeewlr'
-     * harn.nice_dpath  = '...tests/demo/fit/nice/demo'
+     * harn.name_dpath  = '...tests/demo/fit/name/demo'
     Snapshots will save to harn.snapshot_dpath = '...tests/demo/fit/runs/demo/keyeewlr/torch_snapshots'
     dont forget to start:
-        tensorboard --logdir ...tests/demo/fit/nice
+        tensorboard --logdir ...tests/demo/fit/name
     === begin training ===
     epoch lr:0.001 │ vloss: 0.1409 (n_bad_epochs=00, best=0.1409): 100%|█| 10/10 [00:01<00:00,  9.95it/s]  0:00<?, ?it/s]
     train x64 │ loss:0.147 │: 100%|███████████████████████████████████████████████████████| 8/8 [00:00<00:00, 130.56it/s]
@@ -116,9 +116,9 @@ Example:
     training completed
     current lrs: [0.001]
     harn.train_dpath = '...tests/demo/fit/runs/demo/keyeewlr'
-    harn.nice_dpath  = '...tests/demo/fit/nice/demo'
+    harn.name_dpath  = '...tests/demo/fit/name/demo'
     view tensorboard results for this run via:
-        tensorboard --logdir ...tests/demo/fit/nice
+        tensorboard --logdir ...tests/demo/fit/name
     exiting fit harness.
 
 TODO:
@@ -133,7 +133,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import glob
 import itertools as it
 import logging
-import os
 import parse
 import shutil
 import time
@@ -143,6 +142,8 @@ import warnings
 import functools
 import traceback
 from os.path import join
+from os.path import exists
+from os.path import dirname
 
 import torch
 import numpy as np
@@ -207,6 +208,59 @@ class ExtraMixins(object):
     Miscellaneous methods that will be mixed into FitHarn
     """
 
+    @classmethod
+    def demo(cls):
+        """
+        Creates a dummy FitHarn object for testing and demonstration purposes
+        """
+        import netharn as nh
+        hyper = nh.HyperParams(**{
+            # ================
+            # Environment Components
+            'workdir'     : ub.ensure_app_cache_dir('netharn/tests/demo'),
+            'name'        : 'demo',
+            'xpu'         : nh.XPU.coerce('cpu'),
+            # workdir is a directory where intermediate results can be saved
+            # "name" symlinks <workdir>/fit/name/<name> -> ../runs/<hashid>
+            # XPU auto select a gpu if idle and VRAM>6GB else a cpu
+            # ================
+            # Data Components
+            'datasets'    : {  # dict of plain ol torch.data.Dataset instances
+                'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0),
+                'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1),
+                'test': nh.data.ToyData2d(size=3, border=1, n=128, rng=1),
+            },
+            'loaders'     : {'batch_size': 64},  # DataLoader instances or kw
+            # ================
+            # Algorithm Components
+            # Note the (cls, kw) tuple formatting
+            'model'       : (nh.models.ToyNet2d, {}),
+            'optimizer'   : (nh.optimizers.SGD, {
+                'lr': 0.0001
+            }),
+            # focal loss is usually better than nh.criterions.CrossEntropyLoss
+            'criterion'   : (nh.criterions.FocalLoss, {}),
+            'initializer' : (nh.initializers.KaimingNormal, {
+                'param': 0,
+            }),
+            # these may receive an overhaul soon
+            'scheduler'   : (nh.schedulers.ListedLR, {
+                'points': {0: .0001, 2: .01, 5: .015, 6: .005, 9: .001},
+                'interpolate': True,
+            }),
+            'monitor'     : (nh.Monitor, {
+                'max_epoch': 10,
+            }),
+            # dynamics are a config option that modify the behavior of the main
+            # training loop. These parameters effect the learned model.
+            'dynamics'   : {'batch_step': 4},
+        })
+        harn = cls(hyper)
+        # non-algorithmic behavior configs (do not change learned models)
+        harn.preferences['use_tensorboard'] = False
+        harn.preferences['timeout'] = 0.5
+        return harn
+
     def _demo_epoch(harn, tag='vali', learn=False, max_iter=np.inf,
                     call_on_epoch=False):
         """
@@ -329,7 +383,7 @@ class InitializeMixin(object):
         # train info, keep a backup of the old ones.
         if harn.train_dpath and overwrite:
             train_info_fpath = join(harn.train_dpath, 'train_info.json')
-            if os.path.exists(train_info_fpath):
+            if exists(train_info_fpath):
                 if overwrite:
                     import json
                     try:
@@ -367,14 +421,14 @@ class InitializeMixin(object):
                 raise CannotResume
             harn.resume_from_previous_snapshots()
         except CannotResume:
-            # Abstract logic into a reset_state function?
+            # This step is only run on a fresh start.
             harn.reset_weights()
             for group in harn.optimizer.param_groups:
                 group.setdefault('initial_lr', group['lr'])
 
         if harn.train_dpath:
             harn.info(' * harn.train_dpath = {!r}'.format(harn.train_dpath))
-            harn.info(' * harn.nice_dpath  = {!r}'.format(harn.nice_dpath))
+            harn.info(' * harn.name_dpath  = {!r}'.format(harn.name_dpath))
             harn.info('Snapshots will save to harn.snapshot_dpath = {!r}'.format(
                 harn.snapshot_dpath))
         else:
@@ -393,8 +447,8 @@ class InitializeMixin(object):
             train_info = harn.hyper.train_info(harn.train_dpath)
             ub.ensuredir(train_info['train_dpath'])
 
-            if train_info['nice_dpath']:
-                ub.ensuredir(os.path.dirname(train_info['nice_dpath']))
+            if train_info['name_dpath']:
+                ub.ensuredir(dirname(train_info['name_dpath']))
 
                 # Make a very simple MRU (most recently used) link
                 mru_dpath = join(harn.hyper.workdir, '_mru')
@@ -404,16 +458,26 @@ class InitializeMixin(object):
                 except OSError as ex:
                     harn.warn('Unable to symlink: {!r}'.format(ex))
 
-                # Link the hashed run dir to the human friendly nice dir
+                # Link the hashed run dir to the human friendly "name" dir
                 try:
                     ub.symlink(train_info['train_dpath'],
-                               train_info['nice_dpath'], overwrite=True,
+                               train_info['name_dpath'], overwrite=True,
                                verbose=3)
                 except OSError as ex:
                     harn.warn('Unable to symlink: {!r}'.format(ex))
 
+            if 'nice_dpath' in train_info:
+                # backwards compatibility for "nice" dpaths
+                ub.ensuredir(dirname(train_info['nice_dpath']))
+                try:
+                    ub.symlink(train_info['train_dpath'],
+                               train_info['nice_dpath'], overwrite=True,
+                               verbose=0)
+                except OSError as ex:
+                    harn.warn('Unable to symlink: {!r}'.format(ex))
+
             harn.train_info = train_info
-            harn.nice_dpath = train_info['nice_dpath']
+            harn.name_dpath = train_info['name_dpath']
             harn.train_dpath = train_info['train_dpath']
             return harn.train_dpath
 
@@ -473,7 +537,7 @@ class InitializeMixin(object):
             harn.debug('Initialized logging')
 
         if tensorboard_logger and harn.preferences['use_tensorboard']:
-            # train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath)
+            # train_base = dirname(harn.name_dpath or harn.train_dpath)
             # harn.info('dont forget to start:\n    tensorboard --logdir ' + train_base)
             harn.info('Initializing tensorboard (dont forget to start the tensorboard server)')
             harn._tlog = tensorboard_logger.Logger(harn.train_dpath,
@@ -599,6 +663,9 @@ class InitializeMixin(object):
         else:
             harn.warn('initializer was not specified')
 
+        # Save the original weights for analysis
+        harn.save_snapshot(mode='initial')
+
     @profiler.profile
     def resume_from_previous_snapshots(harn):
         """
@@ -666,7 +733,8 @@ class ProgMixin(object):
             import tqdm  # NOQA
             Prog = tqdm.tqdm
         elif harn.preferences['prog_backend'] == 'progiter':
-            Prog = functools.partial(ub.ProgIter, chunksize=chunksize, verbose=1)
+            Prog = functools.partial(
+                ub.ProgIter, chunksize=chunksize, verbose=1, time_thresh=2.0)
         else:
             raise KeyError(harn.preferences['prog_backend'])
         return Prog(*args, **kw)
@@ -750,14 +818,24 @@ class LogMixin(object):
         except AttributeError:
             pass
 
-    def log(harn, msg):
+    def log(harn, msg, level='info'):
         """
-        Logs an info message. Alias of :func:LogMixin.info
+        Logs a message with a specified verbosity level.
 
         Args:
             msg (str): an info message to log
-        """
-        harn.info(msg)
+            level (str): either info, debug, error, or warn
+        """
+        if level == 'info':
+            harn.info(msg)
+        elif level == 'debug':
+            harn.debug(msg)
+        elif level == 'error':
+            harn.error(msg)
+        elif level == 'warn':
+            harn.warn(msg)
+        else:
+            raise KeyError(level)
 
     def info(harn, msg):
         """
@@ -892,7 +970,8 @@ class SnapshotMixin(object):
         # snapshots or checkpoints for simplicity.
         if harn.train_dpath is None:
             raise ValueError('harn.train_dpath is None')
-        return join(harn.train_dpath, 'torch_snapshots')
+        # return join(harn.train_dpath, 'torch_snapshots')
+        return join(harn.train_dpath, 'checkpoints')
 
     def _epochs_to_remove(harn, existing_epochs, num_keep_recent,
                           num_keep_best, keep_freq):
@@ -1002,44 +1081,104 @@ class SnapshotMixin(object):
         harn.set_snapshot_state(snapshot_state)
         harn.info('Previous snapshot loaded...')
 
-    def save_snapshot(harn, explicit=False):
+    def save_snapshot(harn, explicit=False, mode='checkpoint'):
         """
         Checkpoint the current model state in an epoch-tagged snapshot.
 
         Args:
+            mode (str, default='checkpoint'): the type of snapshot this is
+                (changes the subdirectory where they are stored). Choices
+                are: checkpoint, explicit, and initial.
+
             explicit (bool, default=False): if True, the snapshot is also
                 tagged by a hash and saved to the explit_checkpoints directory.
+                DEPRECTATED, use mode.
 
         Returns:
             PathLike: save_fpath: the path to the saved snapshot
+
+        Example:
+            >>> import netharn as nh
+            >>> harn = nh.FitHarn.demo()
+            >>> # The "save_snapshot" method is called in initialize
+            >>> harn.initialize()
         """
         if explicit:
-            _dpath = join(harn.train_dpath, 'explit_checkpoints')
-            ub.ensuredir(_dpath)
-
-            try:
-                stamp = ub.timestamp()
-            except Exception:
-                stamp = ub.timestamp()
+            mode = 'explicit'
 
+        if mode == 'explicit':
+            dpath = ub.ensuredir((harn.train_dpath, 'explit_checkpoints'))
+            stamp = ub.timestamp()
             save_fname = '_epoch_{:08d}_{}.pt'.format(harn.epoch, stamp)
-            save_fpath = join(_dpath, save_fname)
+        elif mode == 'checkpoint':
+            # TODO: make the transition smoother
+            dpath = ub.ensuredir(harn.snapshot_dpath)
+            _old_snapshot_dpath = join(harn.train_dpath, 'torch_snapshots')
+            _new_snapshot_dpath = join(harn.train_dpath, 'checkpoints')
+
+            if dpath == _new_snapshot_dpath:
+                if not exists(_old_snapshot_dpath):
+                    ub.symlink(_new_snapshot_dpath, _old_snapshot_dpath)
 
-            harn.info('Saving EXPLICIT snapshot to {}'.format(save_fpath))
-            snapshot_state = harn.get_snapshot_state()
-            torch.save(snapshot_state, save_fpath)
-        else:
-            ub.ensuredir(harn.snapshot_dpath)
             save_fname = '_epoch_{:08d}.pt'.format(harn.epoch)
-            save_fpath = join(harn.snapshot_dpath, save_fname)
+        elif mode == 'initial':
+            dpath = ub.ensuredir((harn.train_dpath, 'initial_state'))
+            save_fname = 'initial_state.pt'.format(harn.epoch)
+        else:
+            raise KeyError(mode)
 
-            harn.debug('Saving snapshot to {}'.format(save_fpath))
-            snapshot_state = harn.get_snapshot_state()
-            torch.save(snapshot_state, save_fpath)
+        save_fpath = join(dpath, save_fname)
+        level = 'debug' if mode == 'checkpoint' else 'info'
+        harn.log('Saving {} snapshot to {}'.format(mode.upper(), save_fpath), level)
+
+        snapshot_state = harn.get_snapshot_state()
+
+        try:
+            import safer
+            _open = safer.open
+        except ImportError:
+            _open = open
+
+        with _open(save_fpath, 'wb') as save_file:
+            torch.save(snapshot_state, save_file)
 
         harn.debug('Snapshot saved to {}'.format(save_fpath))
         return save_fpath
 
+    def best_snapshot(harn):
+        """
+        Return the path to the current "best" snapshot.
+        """
+        # Netharn should populate best_snapshot.pt if there is a validation set.
+        # Other names are to support older codebases.
+        train_dpath = harn.train_dpath
+        expected_names = [
+            'best_snapshot.pt',
+            'best_snapshot2.pt',
+            'final_snapshot.pt',
+            'deploy_snapshot.pt',
+        ]
+        for fname in expected_names:
+            fpath = join(train_dpath, fname)
+            if exists(fpath):
+                break
+
+        if not exists(fpath):
+            fpath = None
+
+        if not fpath:
+            epoch_to_fpath = {
+                parse.parse('{}_epoch_{num:d}.pt', path).named['num']: path
+                for path in harn.prev_snapshots()
+            }
+            if epoch_to_fpath:
+                fpath = epoch_to_fpath[max(epoch_to_fpath)]
+
+        if fpath is None:
+            raise Exception('cannot find / determine the best snapshot')
+
+        return fpath
+
 
 @register_mixin
 class SnapshotCallbacks(object):
@@ -1182,7 +1321,7 @@ class ScheduleMixin(object):
                             warmup_lr = [_lr * (1 - k) for _lr in regular_lr]
                         else:
                             raise KeyError(warmup)
-                        harn.debug('warmup_lr = {}'.format(warmup_lr))
+                        # harn.debug('warmup_lr = {}'.format(warmup_lr))
                         _set_optimizer_values(harn.optimizer, 'lr', warmup_lr)
 
         # TODO: REFACTOR SO NETHARN HAS A PROPER ITERATION MODE
@@ -1298,7 +1437,7 @@ class CoreMixin(object):
         harn.info('ARGV:\n    ' + sys.executable + ' ' + ' '.join(sys.argv))
 
         if harn._tlog is not None:
-            train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath)
+            train_base = dirname(harn.name_dpath or harn.train_dpath)
             harn.info('dont forget to start:\n'
                       '    tensorboard --logdir ' + ub.shrinkuser(train_base))
 
@@ -1456,17 +1595,17 @@ class CoreMixin(object):
         harn.info('training completed')
 
         if harn._tlog is not None:
-            train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath)
+            train_base = dirname(harn.name_dpath or harn.train_dpath)
             harn.info('harn.train_dpath = {!r}'.format(harn.train_dpath))
-            harn.info('harn.nice_dpath  = {!r}'.format(harn.nice_dpath))
+            harn.info('harn.name_dpath  = {!r}'.format(harn.name_dpath))
             harn.info('view tensorboard results for this run via:\n'
                       '    tensorboard --logdir ' + ub.shrinkuser(train_base))
 
-        deploy_fpath = harn._deploy()
+        harn.deploy_fpath = harn._deploy()
 
         harn.on_complete()
         harn.info('exiting fit harness.')
-        return deploy_fpath
+        return harn.deploy_fpath
 
     def _export(harn):
         """
@@ -1526,6 +1665,7 @@ class CoreMixin(object):
             deploy_fpath = None
             harn.warn('Failed to deploy: {}'.format(repr(ex)))
 
+        harn.deploy_fpath = deploy_fpath
         return deploy_fpath
 
     @profiler.profile
@@ -1807,12 +1947,12 @@ class CoreMixin(object):
                     iter_moving_metrics.update(cur_metrics)
 
                     # display_train training info
-                    if harn.check_interval('display_' + tag, bx):
+                    if harn.check_interval('display_' + tag, bx) or bx == n_batches - 1:
                         ave_metrics = iter_moving_metrics.average()
 
                         msg = harn._batch_msg({'loss': ave_metrics['loss']},
                                               bsize, learn)
-                        prog.set_description(tag + ' ' + msg)
+                        prog.set_description(tag + ' ' + msg, refresh=False)
 
                         # log_iter_train, log_iter_test, log_iter_vali
                         if harn.check_interval('log_iter_' + tag, bx, first=True):
@@ -1828,7 +1968,14 @@ class CoreMixin(object):
                                         harn, 'iter',
                                         special_groupers=harn.preferences['tensorboard_groups'])
 
-                        prog.update(display_interval)
+                        if use_tqdm:
+                            prog.update(display_interval)
+                        else:
+                            # hack to force progiter to reach 100% at the end
+                            # This should be fixed in progiter.
+                            steps_taken = (bx - prog._iter_idx) + 1
+                            prog.update(steps_taken)
+
                         if use_tqdm:
                             harn._update_prog_postfix(prog)
 
@@ -1853,6 +2000,7 @@ class CoreMixin(object):
         #         harn.optimizer.step()
         #         harn.optimizer.zero_grad()
 
+        prog.refresh()
         prog.close()
         harn.epoch_prog = None
 
@@ -2414,7 +2562,7 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin,
             if harn.hyper.name is not None:
                 harn.hyper.name = 'DEMO_' + harn.hyper.name
             else:
-                raise AssertionError('should have a nice name in demo mode')
+                raise AssertionError('should have a nice "name" in demo mode')
 
         harn.datasets = None
         harn.loaders = None
@@ -2441,7 +2589,7 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin,
 
         # Output directories
         harn.train_dpath = train_dpath
-        harn.nice_dpath = None
+        harn.name_dpath = None
         harn.train_info = None
 
         # Progress bars
@@ -2514,6 +2662,13 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin,
                       DeprecationWarning)
         return harn.preferences
 
+    @property
+    def nice_dpath(harn):
+        import warnings
+        warnings.warn('harn.nice_dpath is deprecated, use harn.name_dpath instead',
+                      DeprecationWarning)
+        return harn.name_dpath
+
     def check_interval(harn, tag, idx, first=False):
         """
         check if its time to do something that happens every few iterations
diff --git a/netharn/hyperparams.py b/netharn/hyperparams.py
index b723b724a778e6501c4b5610b2a3f8ef4a9cb565..740eaffbc1726b8c6aaed0672f87d5c88ba23c7a 100644
--- a/netharn/hyperparams.py
+++ b/netharn/hyperparams.py
@@ -20,7 +20,7 @@ Example:
     >>> hyper = nh.HyperParams(**{
     >>>     # --- Data First
     >>>     'datasets'    : datasets,
-    >>>     'nice'        : 'demo',
+    >>>     'name'        : 'demo',
     >>>     'loaders'     : {'batch_size': 64},
     >>>     'xpu'         : nh.XPU.coerce('auto'),
     >>>     # --- Algorithm Second
@@ -372,6 +372,10 @@ def _rectify_loaders(arg, kw):
     """
     Loaders are handled slightly differently than other classes
     We construct them eagerly (if they are not already constructed)
+
+    Example:
+        >>> # test that dict-base spec words
+        >>> _rectify_loaders({'batch_size': 4}, {})
     """
     if arg is None:
         arg = {}
@@ -395,8 +399,7 @@ def _rectify_loaders(arg, kw):
         else:
             # loaders is kwargs for `torch_data.DataLoader`
             arg = (torch_data.DataLoader, arg)
-            # cls, kw2 = _rectify_class(None, arg, kw)
-            rectified = _rectify_class(None, arg, kw)
+            rectified = _rectify_class(arg, kw)
             cls = rectified['cls']
             kw2 = rectified['cls_kw']
     else:
@@ -453,15 +456,20 @@ class HyperParams(object):
                  augment=None,
                  other=None,  # incorporated into the hash
                  extra=None,  # ignored when computing the hash
-                 nice=None,  # alias of name
+                 nice=None,  # deprecated, alias of name
                  ):
         kwargs = {}
 
         hyper.datasets = datasets
         if name is None:
             import warnings
-            warnings.warn('Specify "name" instead of "nice"')
+            warnings.warn(
+                'The "nice" argument is deprecated and will be removed. '
+                'Specify "name" instead.', DeprecationWarning)
             name = nice
+        if name is None:
+            # raise ValueError('you must specify a name for HyperParams')
+            name = 'untitled'
         hyper.name = name
         hyper.workdir = workdir
         hyper.xpu = xpu
@@ -638,7 +646,7 @@ class HyperParams(object):
         _append_part('criterion', hyper.criterion_cls, hyper.criterion_params, initkw)
 
         # TODO: should other be included in initkw? I think it should.
-        # probably should also include monitor, xpu, nice
+        # probably should also include monitor, xpu, name
 
         # Loader is a bit hacked
         _append_part('loader', hyper.loader_cls, hyper.loader_params_nice, initkw)
@@ -758,7 +766,7 @@ class HyperParams(object):
             >>> hyper = nh.hyperparams.HyperParams(**{
             >>>     # --- Data First
             >>>     'datasets'    : datasets,
-            >>>     'nice'        : 'demo',
+            >>>     'name'        : 'demo',
             >>>     'workdir'     : ub.ensure_app_cache_dir('netharn/demo'),
             >>>     'loaders'     : {'batch_size': 64},
             >>>     'xpu'         : nh.XPU.coerce('auto'),
@@ -860,8 +868,8 @@ class HyperParams(object):
             When r = 10000, it becomes had to compute the number because of
             floating point errors, but the probability is likely astronomically
             low. I doubt we will ever run training in the same work directory
-            (and with the same nice name) 10,000 different times, so using an 8
-            character hash seems safe and user friendly for this purpose.
+            (and with the same nice "name") 10,000 different times, so using an
+            8 character hash seems safe and user friendly for this purpose.
             Perhaps we may move to 12, 16, or 32+ in the future, but for the
             pre 1.0 netharn, 8 seems fine.
 
@@ -874,13 +882,19 @@ class HyperParams(object):
         name = hyper.name
 
         nice_dpath = None
+        name_dpath = None
         if not given_explicit_train_dpath:
             # setup a cannonical and a linked symlink dir
             train_dpath = normpath(
                     join(hyper.workdir, 'fit', 'runs', name, train_hashid))
-            # also setup a "nice" custom name, which may conflict, but oh well
+            # also setup a custom "name", which may conflict. This will
+            # overwrite an existing "name" symlink, but the real runs directory
+            # is based on a hash, so it wont be overwritten with astronomicaly
+            # high probability.
             if name:
                 try:
+                    name_dpath = normpath(
+                            join(hyper.workdir, 'fit', 'name', name))
                     nice_dpath = normpath(
                             join(hyper.workdir, 'fit', 'nice', name))
                 except Exception:
@@ -913,6 +927,7 @@ class HyperParams(object):
             ('init_history', init_history),
             ('init_history_hashid', _hash_data(util.make_idstr(init_history))),
 
+            ('name', hyper.name),
             ('nice', hyper.name),
 
             ('old_train_dpath', normpath(
@@ -920,11 +935,14 @@ class HyperParams(object):
 
             ('train_dpath', train_dpath),
             # ('link_dpath', link_dpath),
+
+            # "nice" will be deprecated for "name_dpath"
             ('nice_dpath', nice_dpath),
+            ('name_dpath', name_dpath),
 
             ('given_explicit_train_dpath', given_explicit_train_dpath),
 
-            # TODO, add in n_classes if applicable
+            # TODO, add in classes if applicable
             # TODO, add in centering if applicable
             # ('centering', hyper.centering),
 
@@ -950,7 +968,7 @@ class HyperParams(object):
             'name'        : 'demo',
             'xpu'         : nh.XPU.coerce('argv'),
             # workdir is a directory where intermediate results can be saved
-            # nice symlinks <workdir>/fit/nice/<nice> -> ../runs/<hashid>
+            # name symlinks <workdir>/fit/name/<name> -> ../runs/<hashid>
             # XPU auto select a gpu if idle and VRAM>6GB else a cpu
             # ================
             # Data Components
diff --git a/netharn/metrics/__init__.py b/netharn/metrics/__init__.py
index a10f09f542dc129ee0e9e47761a5d8de8602a3f2..316fb5b7edc6b0f0c290c7da610329a89be463d3 100644
--- a/netharn/metrics/__init__.py
+++ b/netharn/metrics/__init__.py
@@ -1,5 +1,5 @@
 """
-mkinit netharn.metrics
+mkinit netharn.metrics -w
 """
 # flake8: noqa
 from __future__ import absolute_import, division, print_function, unicode_literals
@@ -17,11 +17,16 @@ from netharn.metrics import voc_metrics
 from netharn.metrics.clf_report import (classification_report,
                                         ovr_classification_report,)
 from netharn.metrics.confusion_vectors import (BinaryConfusionVectors,
-                                               ConfusionVectors,
-                                               OneVsRestConfusionVectors,)
-from netharn.metrics.detect_metrics import (DetectionMetrics,)
+                                               ConfusionVectors, DictProxy,
+                                               OneVsRestConfusionVectors,
+                                               PR_Result, PerClass_PR_Result,
+                                               PerClass_ROC_Result, ROC_Result,
+                                               Threshold_Result,)
+from netharn.metrics.detect_metrics import (DetectionMetrics,
+                                            eval_detections_cli,)
 from netharn.metrics.drawing import (draw_perclass_prcurve, draw_perclass_roc,
-                                     draw_peritem_prcurve, draw_roc,)
+                                     draw_prcurve, draw_roc,
+                                     draw_threshold_curves,)
 from netharn.metrics.functional import (fast_confusion_matrix,)
 from netharn.metrics.sklearn_alts import (class_accuracy_from_confusion,
                                           confusion_matrix,
@@ -29,11 +34,13 @@ from netharn.metrics.sklearn_alts import (class_accuracy_from_confusion,
 from netharn.metrics.voc_metrics import (VOC_Metrics,)
 
 __all__ = ['BinaryConfusionVectors', 'ConfusionVectors', 'DetectionMetrics',
-           'OneVsRestConfusionVectors', 'VOC_Metrics', 'assignment',
+           'DictProxy', 'OneVsRestConfusionVectors', 'PR_Result',
+           'PerClass_PR_Result', 'PerClass_ROC_Result', 'ROC_Result',
+           'Threshold_Result', 'VOC_Metrics', 'assignment',
            'class_accuracy_from_confusion', 'classification_report',
            'clf_report', 'confusion_matrix', 'confusion_vectors',
            'detect_metrics', 'draw_perclass_prcurve', 'draw_perclass_roc',
-           'draw_peritem_prcurve', 'draw_roc', 'drawing',
-           'fast_confusion_matrix', 'functional',
+           'draw_prcurve', 'draw_roc', 'draw_threshold_curves', 'drawing',
+           'eval_detections_cli', 'fast_confusion_matrix', 'functional',
            'global_accuracy_from_confusion', 'ovr_classification_report',
            'sklearn_alts', 'voc_metrics']
diff --git a/netharn/metrics/assignment.py b/netharn/metrics/assignment.py
index 7add77e1f8a325f2d0e8b3c36d713fc1f67e849a..63fa6fd0ffb87325666b036aa74d6ffa6ec2007c 100644
--- a/netharn/metrics/assignment.py
+++ b/netharn/metrics/assignment.py
@@ -24,7 +24,7 @@ import ubelt as ub
 def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0,
                               ovthresh=0.5, bg_cidx=-1, bias=0.0, classes=None,
                               compat='all', prioritize='iou',
-                              ignore_class='ignore'):
+                              ignore_classes='ignore'):
     """
     Create confusion vectors for detections by assigning to ground true boxes
 
@@ -75,8 +75,8 @@ def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0,
             mapping from class indices to class names. Can also contain class
             heirarchy information.
 
-        ignore_class (str):
-            class name indicating ignore regions
+        ignore_classes (str | List[str]):
+            class name(s) indicating ignore regions
 
     TODO:
         - [ ] This is a bottleneck function. An implementation in C / C++ /
@@ -238,13 +238,13 @@ def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0,
     y =  _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup,
                         cx_to_matchable_txs, bg_weight, prioritize, ovthresh,
                         pdist_priority, cx_to_ancestors, bg_cidx,
-                        ignore_class=ignore_class)
+                        ignore_classes=ignore_classes)
     return y
 
 
 def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup,
                    cx_to_matchable_txs, bg_weight, prioritize, ovthresh,
-                   pdist_priority, cx_to_ancestors, bg_cidx, ignore_class):
+                   pdist_priority, cx_to_ancestors, bg_cidx, ignore_classes):
     # Notes:
     # * Preallocating numpy arrays does not help
     # * It might be useful to code this critical loop up in C / Cython
@@ -264,10 +264,10 @@ def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup,
     _pred_cxs = pred_dets.class_idxs.take(_pred_sortx, axis=0)
     _pred_scores = _scores.take(_pred_sortx, axis=0)
 
-    if ignore_class is not None:
+    if ignore_classes is not None:
         # Remove certain ignore regions from scoring
         true_ignore_flags, pred_ignore_flags = _filter_ignore_regions(
-            true_dets, pred_dets, ovthresh=ovthresh, ignore_class=ignore_class)
+            true_dets, pred_dets, ovthresh=ovthresh, ignore_classes=ignore_classes)
 
         _pred_keep_flags = ~pred_ignore_flags[_pred_sortx]
         _pred_sortx = _pred_sortx[_pred_keep_flags]
@@ -383,7 +383,7 @@ def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup,
             # If the prediction is a finer-grained category than the truth
             # change the prediction to match the truth (because it is
             # compatible). This is the key to hierarchical scoring.
-            if true_cx in cx_to_ancestors[pred_cx]:
+            if pred_cx is not None and true_cx in cx_to_ancestors[pred_cx]:
                 pred_cx = true_cx
 
             y_pred_raw.append(raw_pred_cx)
@@ -517,7 +517,7 @@ def _fast_pdist_priority(classes, prioritize, _cache={}):
 
 
 def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5,
-                           ignore_class='ignore'):
+                           ignore_classes='ignore'):
     """
     Determine which true and predicted detections should be ignored.
 
@@ -529,28 +529,30 @@ def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5,
         >>> from netharn.metrics.assignment import *  # NOQA
         >>> from netharn.metrics.assignment import _filter_ignore_regions
         >>> import kwimage
-        >>> pred_dets = kwimage.Detections.random(classes=['a'])
+        >>> pred_dets = kwimage.Detections.random(classes=['a', 'b', 'c'])
         >>> true_dets = kwimage.Detections.random(
-        >>>     segmentations=True, classes=['a', 'ignore'])
-        >>> ignore_class = 'ignore'
+        >>>     segmentations=True, classes=['a', 'b', 'c', 'ignore'])
+        >>> ignore_classes = {'ignore', 'b'}
         >>> ovthresh = 0.5
         >>> print('true_dets = {!r}'.format(true_dets))
         >>> print('pred_dets = {!r}'.format(pred_dets))
         >>> flags1, flags2 = _filter_ignore_regions(
-        >>>     true_dets, pred_dets, ovthresh=ovthresh, ignore_class=ignore_class)
+        >>>     true_dets, pred_dets, ovthresh=ovthresh, ignore_classes=ignore_classes)
         >>> print('flags1 = {!r}'.format(flags1))
         >>> print('flags2 = {!r}'.format(flags2))
 
-
         >>> flags3, flags4 = _filter_ignore_regions(
         >>>     true_dets, pred_dets, ovthresh=ovthresh,
-        >>>     ignore_class=ignore_class.upper())
+        >>>     ignore_classes={c.upper() for c in ignore_classes})
         >>> assert np.all(flags1 == flags3)
         >>> assert np.all(flags2 == flags4)
     """
     true_ignore_flags = np.zeros(len(true_dets), dtype=np.bool)
     pred_ignore_flags = np.zeros(len(pred_dets), dtype=np.bool)
 
+    if not ub.iterable(ignore_classes):
+        ignore_classes = {ignore_classes}
+
     def _normalize_catname(name, classes):
         if classes is None:
             return name
@@ -560,16 +562,21 @@ def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5,
             if cname.lower() == name.lower():
                 return cname
         return name
-        # raise KeyError(name)
 
-    ignore_class = _normalize_catname(ignore_class, true_dets.classes)
+    ignore_classes = {_normalize_catname(c, true_dets.classes)
+                      for c in ignore_classes}
+
+    if true_dets.classes is not None:
+        ignore_classes = ignore_classes & set(true_dets.classes)
 
     # Filter out true detections labeled as "ignore"
-    if true_dets.classes is not None and ignore_class in true_dets.classes:
-        ignore_cidx = true_dets.classes.index(ignore_class)
-        true_ignore_flags = true_dets.class_idxs == ignore_cidx
+    if true_dets.classes is not None and ignore_classes:
+        import kwarray
+        ignore_cidxs = [true_dets.classes.index(c) for c in ignore_classes]
+        true_ignore_flags = kwarray.isect_flags(
+            true_dets.class_idxs, ignore_cidxs)
 
-        if np.any(true_ignore_flags):
+        if np.any(true_ignore_flags) and len(pred_dets):
             ignore_dets = true_dets.compress(true_ignore_flags)
 
             pred_boxes = pred_dets.data['boxes']
diff --git a/netharn/metrics/clf_report.py b/netharn/metrics/clf_report.py
index d8f41e963896a86bbf2c143bdb7959b9c7ef803d..0ae728a16498f864ac9f4b3a61c80e2c8f8c26bd 100644
--- a/netharn/metrics/clf_report.py
+++ b/netharn/metrics/clf_report.py
@@ -417,7 +417,7 @@ def ovr_classification_report(mc_y_true, mc_probs, target_names=None,
             # Index of the true class
             k_true = ohvec_true.T[k]
             # Index of the predicted class
-            k_pred = np.argmax(bin_probs, axis=1)
+            k_pred = np.argmax(bin_probs, axis=1)  # NOTE: ASSUME MUTEX CLASSES
 
             # Probabilities for the true class for each label
             bin_truth = np.eye(2)[k_true]
diff --git a/netharn/metrics/confusion_vectors.py b/netharn/metrics/confusion_vectors.py
index fc3f89d6eca80e050c56c22a14622ee4ed2c83b2..f94b86b313bbec6a7da4b42f282ace97ccf37cac 100644
--- a/netharn/metrics/confusion_vectors.py
+++ b/netharn/metrics/confusion_vectors.py
@@ -21,8 +21,8 @@ class ConfusionVectors(ub.NiceRepr):
         >>> from netharn.metrics import DetectionMetrics
         >>> dmet = DetectionMetrics.demo(
         >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
-        >>> self = dmet.confusion_vectors()
-        >>> print(self.data._pandas())  # xdoctest: IGNORE_WANT
+        >>> cfsn_vecs = dmet.confusion_vectors()
+        >>> print(cfsn_vecs.data._pandas())  # xdoctest: IGNORE_WANT
             pred_raw  pred  true   score  weight     iou  txs  pxs  gid
         0          2     2     2 10.0000  1.0000  1.0000    0    4    0
         1          2     2     2  7.5025  1.0000  1.0000    1    3    0
@@ -48,33 +48,33 @@ class ConfusionVectors(ub.NiceRepr):
         ...
     """
 
-    def __init__(self, data, classes, probs=None):
-        self.data = data
-        self.classes = classes
-        self.probs = probs
+    def __init__(cfsn_vecs, data, classes, probs=None):
+        cfsn_vecs.data = data
+        cfsn_vecs.classes = classes
+        cfsn_vecs.probs = probs
 
-    def __nice__(self):
-        return self.data.__nice__()
+    def __nice__(cfsn_vecs):
+        return cfsn_vecs.data.__nice__()
 
     @classmethod
-    def demo(self):
+    def demo(cfsn_vecs):
         """
         Example:
             >>> # xdoctest: +REQUIRES(module:ndsampler)
-            >>> self = ConfusionVectors.demo()
-            >>> print('self = {!r}'.format(self))
-            >>> cx_to_binvecs = self.binarize_ovr()
+            >>> cfsn_vecs = ConfusionVectors.demo()
+            >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
+            >>> cx_to_binvecs = cfsn_vecs.binarize_ovr()
             >>> print('cx_to_binvecs = {!r}'.format(cx_to_binvecs))
         """
         from netharn.metrics import DetectionMetrics
         dmet = DetectionMetrics.demo(
             nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
         # print('dmet = {!r}'.format(dmet))
-        self = dmet.confusion_vectors()
-        self.data._data = ub.dict_isect(self.data._data, [
+        cfsn_vecs = dmet.confusion_vectors()
+        cfsn_vecs.data._data = ub.dict_isect(cfsn_vecs.data._data, [
             'true', 'pred', 'score', 'weight',
         ])
-        return self
+        return cfsn_vecs
 
     @classmethod
     def from_arrays(ConfusionVectors, true, pred=None, score=None, weight=None,
@@ -89,8 +89,8 @@ class ConfusionVectors(ub.NiceRepr):
             >>> rng = kwarray.ensure_rng(0)
             >>> true = (rng.rand(10) * len(classes)).astype(np.int)
             >>> probs = rng.rand(len(true), len(classes))
-            >>> self = ConfusionVectors.from_arrays(true=true, probs=probs, classes=classes)
-            >>> self.confusion_matrix()
+            >>> cfsn_vecs = ConfusionVectors.from_arrays(true=true, probs=probs, classes=classes)
+            >>> cfsn_vecs.confusion_matrix()
             pred     person  vehicle  object
             real
             person        0        0       0
@@ -118,10 +118,10 @@ class ConfusionVectors(ub.NiceRepr):
 
         data = {k: v for k, v in data.items() if v is not None}
         cfsn_data = kwarray.DataFrameArray(data)
-        self = ConfusionVectors(cfsn_data, probs=probs, classes=classes)
-        return self
+        cfsn_vecs = ConfusionVectors(cfsn_data, probs=probs, classes=classes)
+        return cfsn_vecs
 
-    def confusion_matrix(self, raw=False, compress=False):
+    def confusion_matrix(cfsn_vecs, raw=False, compress=False):
         """
         Builds a confusion matrix from the confusion vectors.
 
@@ -141,17 +141,18 @@ class ConfusionVectors(ub.NiceRepr):
             >>> from netharn.metrics import DetectionMetrics
             >>> dmet = DetectionMetrics.demo(
             >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), n_fn=(0, 1), nclasses=3, cls_noise=.2)
-            >>> self = dmet.confusion_vectors()
-            >>> cm = self.confusion_matrix()
+            >>> cfsn_vecs = dmet.confusion_vectors()
+            >>> cm = cfsn_vecs.confusion_matrix()
+            ...
             >>> print(cm.to_string(float_format=lambda x: '%.2f' % x))
             pred        background  cat_1  cat_2  cat_3
             real
-            background           0      1      1      1
-            cat_1                2     12      0      1
-            cat_2                2      0     14      1
-            cat_3                1      0      1     17
+            background        0.00   1.00   1.00   1.00
+            cat_1             2.00  12.00   0.00   1.00
+            cat_2             2.00   0.00  14.00   1.00
+            cat_3             1.00   0.00   1.00  17.00
         """
-        data = self.data
+        data = cfsn_vecs.data
 
         y_true = data['true'].copy()
         if raw:
@@ -159,8 +160,9 @@ class ConfusionVectors(ub.NiceRepr):
         else:
             y_pred = data['pred'].copy()
 
-        if 'background' in self.classes:
-            bg_idx = self.classes.index('background')
+        # FIXME: hard-coded background class
+        if 'background' in cfsn_vecs.classes:
+            bg_idx = cfsn_vecs.classes.index('background')
             y_true[y_true < 0] = bg_idx
             y_pred[y_pred < 0] = bg_idx
         else:
@@ -170,13 +172,13 @@ class ConfusionVectors(ub.NiceRepr):
                 raise IndexError('y_pred contains invalid indices')
 
         matrix = fast_confusion_matrix(
-            y_true, y_pred, n_labels=len(self.classes),
+            y_true, y_pred, n_labels=len(cfsn_vecs.classes),
             sample_weight=data.get('weight', None)
         )
 
         import pandas as pd
-        cm = pd.DataFrame(matrix, index=list(self.classes),
-                          columns=list(self.classes))
+        cm = pd.DataFrame(matrix, index=list(cfsn_vecs.classes),
+                          columns=list(cfsn_vecs.classes))
         if compress:
             iszero = matrix == 0
             unused = (np.all(iszero, axis=0) & np.all(iszero, axis=1))
@@ -185,42 +187,42 @@ class ConfusionVectors(ub.NiceRepr):
         cm.columns.name = 'pred'
         return cm
 
-    def coarsen(self, cxs):
+    def coarsen(cfsn_vecs, cxs):
         """
         Creates a coarsened set of vectors
         """
         import ndsampler
         import kwarray
-        assert self.probs is not None, 'need probs'
-        if not isinstance(self.classes, ndsampler.CategoryTree):
+        assert cfsn_vecs.probs is not None, 'need probs'
+        if not isinstance(cfsn_vecs.classes, ndsampler.CategoryTree):
             raise TypeError('classes must be a ndsampler.CategoryTree')
 
-        descendent_map = self.classes.idx_to_descendants_idxs(include_self=True)
+        descendent_map = cfsn_vecs.classes.idx_to_descendants_idxs(include_cfsn_vecs=True)
         valid_descendant_mapping = ub.dict_isect(descendent_map, cxs)
         # mapping from current category indexes to the new coarse ones
         # Anything without an explicit key will be mapped to background
 
-        bg_idx = self.classes.index('background')
+        bg_idx = cfsn_vecs.classes.index('background')
         mapping = {v: k for k, vs in valid_descendant_mapping.items() for v in vs}
-        new_true = np.array([mapping.get(x, bg_idx) for x in self.data['true']])
-        new_pred = np.array([mapping.get(x, bg_idx) for x in self.data['pred']])
+        new_true = np.array([mapping.get(x, bg_idx) for x in cfsn_vecs.data['true']])
+        new_pred = np.array([mapping.get(x, bg_idx) for x in cfsn_vecs.data['pred']])
 
-        new_score = np.array([p[x] for x, p in zip(new_pred, self.probs)])
+        new_score = np.array([p[x] for x, p in zip(new_pred, cfsn_vecs.probs)])
 
         new_y_df = {
             'true': new_true,
             'pred': new_pred,
             'score': new_score,
-            'weight': self.data['weight'],
-            'txs': self.data['txs'],
-            'pxs': self.data['pxs'],
-            'gid': self.data['gid'],
+            'weight': cfsn_vecs.data['weight'],
+            'txs': cfsn_vecs.data['txs'],
+            'pxs': cfsn_vecs.data['pxs'],
+            'gid': cfsn_vecs.data['gid'],
         }
         new_y_df = kwarray.DataFrameArray(new_y_df)
-        coarse_cfsn_vecs = ConfusionVectors(new_y_df, self.classes, self.probs)
+        coarse_cfsn_vecs = ConfusionVectors(new_y_df, cfsn_vecs.classes, cfsn_vecs.probs)
         return coarse_cfsn_vecs
 
-    def binarize_peritem(self, negative_classes=None):
+    def binarize_peritem(cfsn_vecs, negative_classes=None):
         """
         Creates a binary representation useful for measuring the performance of
         detectors. It is assumed that scores of "positive" classes should be
@@ -236,24 +238,39 @@ class ConfusionVectors(ub.NiceRepr):
             >>> from netharn.metrics import DetectionMetrics
             >>> dmet = DetectionMetrics.demo(
             >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
-            >>> self = dmet.confusion_vectors()
+            >>> cfsn_vecs = dmet.confusion_vectors()
             >>> class_idxs = list(dmet.classes.node_to_idx.values())
-            >>> binvecs = self.binarize_peritem()
+            >>> binvecs = cfsn_vecs.binarize_peritem()
         """
         import kwarray
         # import warnings
         # warnings.warn('binarize_peritem DOES NOT PRODUCE CORRECT RESULTS')
 
-        if negative_classes is None:
-            negative_cidxs = {-1}
-        else:
-            raise NotImplementedError
+        negative_cidxs = {-1}
+        if negative_classes is not None:
+            @ub.memoize
+            def _lower_classes():
+                if cfsn_vecs.classes is None:
+                    raise Exception(
+                        'classes must be known if negative_classes are strings')
+                return [c.lower() for c in cfsn_vecs.classes]
+            for c in negative_classes:
+                import six
+                if isinstance(c, six.string_types):
+                    classes = _lower_classes()
+                    try:
+                        cidx = classes.index(c)
+                    except Exception:
+                        continue
+                else:
+                    cidx = int(c)
+                negative_cidxs.add(cidx)
 
-        is_false = kwarray.isect_flags(self.data['true'], negative_cidxs)
+        is_false = kwarray.isect_flags(cfsn_vecs.data['true'], negative_cidxs)
 
         _data = {
             'is_true': ~is_false,
-            'pred_score': self.data['score'],
+            'pred_score': cfsn_vecs.data['score'],
         }
         extra = ub.dict_isect(_data, [
             'txs', 'pxs', 'gid', 'weight'])
@@ -262,23 +279,25 @@ class ConfusionVectors(ub.NiceRepr):
         binvecs = BinaryConfusionVectors(bin_data)
         return binvecs
 
-    def binarize_ovr(self, mode=1, keyby='name'):
+    def binarize_ovr(cfsn_vecs, mode=1, keyby='name', ignore_classes={'ignore'}):
         """
-        Transforms self into one-vs-rest BinaryConfusionVectors for each category.
+        Transforms cfsn_vecs into one-vs-rest BinaryConfusionVectors for each category.
 
         Args:
-            mode (int): 0 for heirarchy aware or 1 for voc like
-            keyby : can be cx or name
+            mode (int, default=1): 0 for heirarchy aware or 1 for voc like.
+                MODE 0 IS PROBABLY BROKEN
+            keyby (int | str) : can be cx or name
+            ignore_classes (Set[str]): category names to ignore
 
         Returns:
             OneVsRestConfusionVectors: which behaves like
-            Dict[int, BinaryConfusionVectors]: cx_to_binvecs
+                Dict[int, BinaryConfusionVectors]: cx_to_binvecs
 
         Example:
             >>> # xdoctest: +REQUIRES(module:ndsampler)
-            >>> self = ConfusionVectors.demo()
-            >>> print('self = {!r}'.format(self))
-            >>> catname_to_binvecs = self.binarize_ovr(keyby='name')
+            >>> cfsn_vecs = ConfusionVectors.demo()
+            >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
+            >>> catname_to_binvecs = cfsn_vecs.binarize_ovr(keyby='name')
             >>> print('catname_to_binvecs = {!r}'.format(catname_to_binvecs))
 
         Notes:
@@ -315,17 +334,17 @@ class ConfusionVectors(ub.NiceRepr):
         """
         import kwarray
 
-        classes = self.classes
-        data = self.data
+        classes = cfsn_vecs.classes
+        data = cfsn_vecs.data
 
         if mode == 0:
-            if self.probs is None:
+            if cfsn_vecs.probs is None:
                 raise ValueError('cannot binarize in mode=0 without probs')
             pdist = classes.idx_pairwise_distance()
 
         cx_to_binvecs = {}
         for cx in range(len(classes)):
-            if classes[cx] == 'background':
+            if classes[cx] == 'background' or classes[cx] in ignore_classes:
                 continue
 
             if mode == 0:
@@ -335,7 +354,7 @@ class ConfusionVectors(ub.NiceRepr):
                     'IN THIS FILE WERE, AND I HAVENT CHECKED THIS ONE YET')
 
                 # Lookup original probability predictions for the class of interest
-                new_scores = self.probs[:, cx]
+                new_scores = cfsn_vecs.probs[:, cx]
 
                 # Determine which truth items have compatible classes
                 # Note: we ignore any truth-label that is COARSER than the
@@ -356,9 +375,9 @@ class ConfusionVectors(ub.NiceRepr):
                     'is_true': is_finer_eq.astype(np.uint8),
                     'pred_score': new_scores,
                     'weight': data['weight'] * (np.float32(1.0) - is_coarser),
-                    'txs': self.data['txs'],
-                    'pxs': self.data['pxs'],
-                    'gid': self.data['gid'],
+                    'txs': cfsn_vecs.data['txs'],
+                    'pxs': cfsn_vecs.data['pxs'],
+                    'gid': cfsn_vecs.data['gid'],
                 }
                 bin_data = kwarray.DataFrameArray(bin_data)
 
@@ -371,18 +390,18 @@ class ConfusionVectors(ub.NiceRepr):
             elif mode == 1:
                 # More VOC-like, not heirarchy friendly
 
-                if self.probs is not None:
+                if cfsn_vecs.probs is not None:
                     # We know the actual score predicted for this category in
                     # this case.
-                    is_true = self.data['true'] == cx
-                    pred_score = self.probs[:, cx]
+                    is_true = cfsn_vecs.data['true'] == cx
+                    pred_score = cfsn_vecs.probs[:, cx]
                 else:
                     import warnings
                     warnings.warn(
                         'Binarize ovr is only approximate if not all probabilities are known')
                     # If we don't know the probabilities for non-predicted
                     # categories then we have to guess.
-                    is_true = self.data['true'] == cx
+                    is_true = cfsn_vecs.data['true'] == cx
 
                     # do we know the actual predicted score for this category?
                     score_is_unknown = data['pred'] != cx
@@ -391,6 +410,14 @@ class ConfusionVectors(ub.NiceRepr):
                     # These scores were for a different class, so assume
                     # other classes were predicted with a uniform prior
                     approx_score = (1 - pred_score[score_is_unknown]) / (len(classes) - 1)
+
+                    # Except in the case where predicted class is -1. In this
+                    # case no prediction was actually made (above a threshold)
+                    # so the assumed score should be significantly lower, we
+                    # conservatively choose zero.
+                    unknown_preds = data['pred'][score_is_unknown]
+                    approx_score[unknown_preds == -1] = 0
+
                     pred_score[score_is_unknown] = approx_score
 
                 bin_data = {
@@ -411,27 +438,27 @@ class ConfusionVectors(ub.NiceRepr):
         if keyby == 'cx':
             cx_to_binvecs = cx_to_binvecs
         elif keyby == 'name':
-            cx_to_binvecs = ub.map_keys(self.classes, cx_to_binvecs)
+            cx_to_binvecs = ub.map_keys(cfsn_vecs.classes, cx_to_binvecs)
         else:
             raise KeyError(keyby)
 
-        ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, self.classes)
+        ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, cfsn_vecs.classes)
         return ovr_cfns
 
-    def classification_report(self, verbose=0):
+    def classification_report(cfsn_vecs, verbose=0):
         """
         Build a classification report with various metrics.
 
         Example:
             >>> from netharn.metrics.confusion_vectors import *  # NOQA
-            >>> self = ConfusionVectors.demo()
-            >>> report = self.classification_report(verbose=1)
+            >>> cfsn_vecs = ConfusionVectors.demo()
+            >>> report = cfsn_vecs.classification_report(verbose=1)
         """
         from netharn.metrics import clf_report
-        y_true = self.data['true']
-        y_pred = self.data['pred']
-        sample_weight = self.data.get('weight', None)
-        target_names = list(self.classes)
+        y_true = cfsn_vecs.data['true']
+        y_pred = cfsn_vecs.data['pred']
+        sample_weight = cfsn_vecs.data.get('weight', None)
+        target_names = list(cfsn_vecs.classes)
         report = clf_report.classification_report(
             y_true=y_true,
             y_pred=y_pred,
@@ -449,6 +476,15 @@ class OneVsRestConfusionVectors(ub.NiceRepr):
     Attributes:
         cx_to_binvecs
         classes
+
+    Example:
+        >>> # xdoctest: +REQUIRES(module:ndsampler)
+        >>> from netharn.metrics import DetectionMetrics
+        >>> dmet = DetectionMetrics.demo(
+        >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
+        >>> cfsn_vecs = dmet.confusion_vectors()
+        >>> self = cfsn_vecs.binarize_ovr(keyby='name')
+        >>> print('self = {!r}'.format(self))
     """
     def __init__(self, cx_to_binvecs, classes):
         self.cx_to_binvecs = cx_to_binvecs
@@ -458,6 +494,12 @@ class OneVsRestConfusionVectors(ub.NiceRepr):
         # return ub.repr2(ub.map_vals(len, self.cx_to_binvecs))
         return ub.repr2(self.cx_to_binvecs, strvals=True)
 
+    @classmethod
+    def demo(cls):
+        cfsn_vecs = ConfusionVectors.demo()
+        self = cfsn_vecs.binarize_ovr(keyby='name')
+        return self
+
     def keys(self):
         return self.cx_to_binvecs.keys()
 
@@ -490,6 +532,21 @@ class OneVsRestConfusionVectors(ub.NiceRepr):
             'perclass': perclass,
         }
 
+    def threshold_curves(self, **kwargs):
+        """
+        Example:
+            >>> # xdoctest: +REQUIRES(module:ndsampler)
+            >>> self = OneVsRestConfusionVectors.demo()
+            >>> thresh_result = self.threshold_curves()['perclass']
+        """
+        perclass = PerClass_Threshold_Result({
+            cx: binvecs.threshold_curves(**kwargs)
+            for cx, binvecs in self.cx_to_binvecs.items()
+        })
+        return {
+            'perclass': perclass,
+        }
+
     def ovr_classification_report(self):
         raise NotImplementedError
 
@@ -677,40 +734,40 @@ class BinaryConfusionVectors(ub.NiceRepr):
             realpos_total = (y_true * weight).sum()
             realneg_total = ((1 - y_true) * weight).sum()
 
+            """
+            Notes:
+                Apparently, consistent scoring is really hard to get right.
+
+                For detection problems scoring via
+                confusion_vectors+sklearn produces noticably different
+                results than the VOC method. There are a few reasons for
+                this.  The VOC method stops counting true positives after
+                all assigned predicted boxes have been counted. It simply
+                remembers the amount of original true positives to
+                normalize the true positive reate. On the other hand,
+                confusion vectors maintains a list of these unassigned true
+                boxes and gives them a predicted index of -1 and a score of
+                zero. This means that this function sees them as having a
+                y_true of 1 and a y_score of 0, which allows the
+                scikit-learn fps and tps counts to effectively get up to
+                100% recall when the threshold is zero. The VOC method
+                simply ignores these and handles them implicitly. The
+                problem is that if you remove these from the scikit-learn
+                inputs, it wont see the correct number of positives and it
+                will incorrectly normalize the recall.  In summary:
+
+                    VOC:
+                        * remembers realpos_total
+                        * doesn't count unassigned truths as TP when the
+                        threshold is zero.
+
+                    CV+SKL:
+                        * counts unassigned truths as TP with score=0.
+                        * Always ensure tpr=1, ppv=0 and ppv=1, tpr=0 cases
+                        exist.
+            """
             with warnings.catch_warnings():
                 warnings.filterwarnings('ignore', message='invalid .* true_divide')
-                """
-                Notes:
-                    Apparently, consistent scoring is really hard to get right.
-
-                    For detection problems scoring via
-                    confusion_vectors+sklearn produces noticably different
-                    results than the VOC method. There are a few reasons for
-                    this.  The VOC method stops counting true positives after
-                    all assigned predicted boxes have been counted. It simply
-                    remembers the amount of original true positives to
-                    normalize the true positive reate. On the other hand,
-                    confusion vectors maintains a list of these unassigned true
-                    boxes and gives them a predicted index of -1 and a score of
-                    zero. This means that this function sees them as having a
-                    y_true of 1 and a y_score of 0, which allows the
-                    scikit-learn fps and tps counts to effectively get up to
-                    100% recall when the threshold is zero. The VOC method
-                    simply ignores these and handles them implicitly. The
-                    problem is that if you remove these from the scikit-learn
-                    inputs, it wont see the correct number of positives and it
-                    will incorrectly normalize the recall.  In summary:
-
-                        VOC:
-                            * remembers realpos_total
-                            * doesn't count unassigned truths as TP when the
-                            threshold is zero.
-
-                        CV+SKL:
-                            * counts unassigned truths as TP with score=0.
-                            * Always ensure tpr=1, ppv=0 and ppv=1, tpr=0 cases
-                            exist.
-                """
 
                 if method.startswith('voc'):
                     y_score_ = y_score[y_score > 0]
@@ -778,7 +835,6 @@ class BinaryConfusionVectors(ub.NiceRepr):
             >>> print('roc = {}'.format(ub.repr2(self.roc())))
             >>> self = BinaryConfusionVectors.demo(n=3, p_true=0.5, p_error=0.5)
             >>> print('roc = {}'.format(ub.repr2(self.roc())))
-
         """
         import sklearn
         import sklearn.metrics  # NOQA
@@ -896,6 +952,148 @@ class BinaryConfusionVectors(ub.NiceRepr):
             })
         return ROC_Result(roc_info)
 
+    def threshold_curves(self, stabalize_thresh=7, stabalize_pad=7):
+        """
+        Get statistics (F1, G1, MCC) versus thresholds
+
+        Example:
+            >>> self = BinaryConfusionVectors.demo(n=100)
+            >>> self.threshold_curves()
+        """
+        # compute tp, fp, tn, fn at each point
+        # compute mcc, f1, g1, etc
+        # write plot functions
+        info = self._binary_clf_curves(stabalize_thresh, stabalize_pad)
+
+        tp = info['tp_count']
+        fp = info['fp_count']
+        tn = info['tn_count']
+        fn = info['fn_count']
+
+        ppv = tp / (tp + fp)
+        tpr = tp / (tp + fn)
+
+        # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
+        mcc_numer = (tp * tn) - (fp * fn)
+        mcc_denom = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
+        mcc_denom[np.isnan(mcc_denom) | (mcc_denom == 0)] = 1
+        info['mcc'] = mcc_numer / mcc_denom
+
+        # https://erotemic.wordpress.com/2019/10/23/closed-form-of-the-mcc-when-tn-inf/
+        info['g1'] = np.sqrt(ppv * tpr)
+
+        f1_numer = (2 * ppv * tpr)
+        f1_denom = (ppv + tpr)
+        f1_denom[f1_denom == 0] = 1
+        info['f1'] =  f1_numer / f1_denom
+
+        tnr_denom = (tn + fp)
+        tnr_denom[tnr_denom == 0] = 1
+        tnr = tn / tnr_denom
+
+        pnv_denom = (tn + fn)
+        pnv_denom[pnv_denom == 0] = 1
+        npv = tn / pnv_denom
+
+        info['ppv'] = ppv
+
+        info['tpr'] = tpr
+
+        info['acc'] = (tp + tn) / (tp + tn + fp + fn)
+
+        info['bm'] = tpr + tnr - 1  # informedness
+
+        info['mk'] = ppv + npv - 1  # markedness
+
+        keys = ['mcc', 'g1', 'f1', 'acc']
+        for key in keys:
+            measure = info[key]
+            max_idx = measure.argmax()
+            best_thresh = info['thresholds'][max_idx]
+            best_measure = measure[max_idx]
+            best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh)
+            info['max_{}'.format(key)] = best_label
+            info['_max_{}'.format(key)] = (best_measure, best_thresh)
+
+        return Threshold_Result(info)
+
+    def _binary_clf_curves(self, stabalize_thresh=7, stabalize_pad=7):
+        """
+        Code common to ROC, PR, and threshold measures
+
+        TODO: refactor ROC and PR curves to use this code, perhaps even
+        memoizing it.
+        """
+        try:
+            from sklearn.metrics._ranking import _binary_clf_curve
+        except ImportError:
+            from sklearn.metrics.ranking import _binary_clf_curve
+        data = self.data
+        y_true = data['is_true'].astype(np.uint8)
+        y_score = data['pred_score']
+        sample_weight = data._data.get('weight', None)
+
+        npad = 0
+        if len(self) == 0:
+            fps = [np.nan]
+            fns = [np.nan]
+            tps = [np.nan]
+            thresholds = [np.nan]
+
+            realpos_total = 0
+            realneg_total = 0
+            nsupport = 0
+        else:
+            if len(self) <= stabalize_thresh:
+                # add dummy data to stabalize the computation
+                if sample_weight is None:
+                    sample_weight = np.ones(len(self))
+                npad = stabalize_pad
+                y_true, y_score, sample_weight = _stabalilze_data(
+                    y_true, y_score, sample_weight, npad=npad)
+
+            # Get the total weight (typically number of) positive and negative
+            # examples of this class
+            if sample_weight is None:
+                weight = 1
+                nsupport = len(y_true) - bool(npad)
+            else:
+                weight = sample_weight
+                nsupport = sample_weight.sum() - bool(npad)
+
+            realpos_total = (y_true * weight).sum()
+            realneg_total = ((1 - y_true) * weight).sum()
+
+            fps, tps, thresholds = _binary_clf_curve(
+                y_true, y_score, pos_label=1.0,
+                sample_weight=sample_weight)
+
+            # Adjust weighted totals to be robust to floating point errors
+            if np.isclose(realneg_total, fps[-1]):
+                realneg_total = max(realneg_total, fps[-1])
+            if np.isclose(realpos_total, tps[-1]):
+                realpos_total = max(realpos_total, tps[-1])
+
+        tns = realneg_total - fps
+        fns = realpos_total - tps
+
+        info = {
+            'fp_count': fps,
+            'tp_count': tps,
+            'tn_count': tns,
+            'fn_count': fns,
+            'thresholds': thresholds,
+            'realpos_total': realpos_total,
+            'realneg_total': realneg_total,
+            'nsupport': nsupport,
+        }
+        if self.cx is not None:
+            info.update({
+                'cx': self.cx,
+                'node': self.classes[self.cx],
+            })
+        return info
+
 
 class DictProxy(DictLike):
     """
@@ -942,7 +1140,7 @@ class ROC_Result(ub.NiceRepr, DictProxy):
             'catname': self.get('node', None),
         }, nl=0, precision=4, strvals=True)
 
-    def draw(self, **kw):
+    def draw(self, prefix='', **kw):
         """
         Example:
             >>> from netharn.metrics.confusion_vectors import *  # NOQA
@@ -954,7 +1152,7 @@ class ROC_Result(ub.NiceRepr, DictProxy):
             >>> kwplot.show_if_requested()
         """
         from netharn.metrics import drawing
-        return drawing.draw_roc(self, **kw)
+        return drawing.draw_roc(self, prefix=prefix, **kw)
 
 
 class PR_Result(ub.NiceRepr, DictProxy):
@@ -985,9 +1183,45 @@ class PR_Result(ub.NiceRepr, DictProxy):
             'catname': self.get('node', None),
         }, nl=0, precision=4, strvals=True)
 
-    def draw(self, **kw):
+    def draw(self, prefix='', **kw):
+        from netharn.metrics import drawing
+        return drawing.draw_prcurve(self, prefix=prefix, **kw)
+
+
+class Threshold_Result(ub.NiceRepr, DictProxy):
+    """
+    Example:
+        >>> from netharn.metrics.confusion_vectors import *  # NOQA
+        >>> binvecs = BinaryConfusionVectors.demo(n=100, p_error=0.5)
+        >>> self = binvecs.threshold_curves()
+        >>> print('self = {!r}'.format(self))
+        >>> # xdoctest: +REQUIRES(--show)
+        >>> import kwplot
+        >>> kwplot.autompl()
+        >>> self.draw()
+        >>> kwplot.show_if_requested()
+    """
+    def __init__(self, roc_info):
+        self.proxy = roc_info
+
+    @property
+    def catname(self):
+        return self.get('node', None)
+
+    def __nice__(self):
+        return ub.repr2({
+            'max_mcc': self['max_mcc'],
+            'max_g1': self['max_g1'],
+            # 'max_f1': self['max_f1'],
+            'nsupport': self['nsupport'],
+            'realpos_total': self['realpos_total'],
+            'realneg_total': self['realneg_total'],
+            'catname': self.get('node', None),
+        }, nl=0, precision=4, strvals=True)
+
+    def draw(self, prefix='', **kw):
         from netharn.metrics import drawing
-        return drawing.draw_peritem_prcurve(self, **kw)
+        return drawing.draw_threshold_curves(self, prefix=prefix, **kw)
 
 
 class PerClass_ROC_Result(ub.NiceRepr, DictProxy):
@@ -999,9 +1233,9 @@ class PerClass_ROC_Result(ub.NiceRepr, DictProxy):
     def __nice__(self):
         return ub.repr2(self.proxy, nl=2, strvals=True)
 
-    def draw(self, **kw):
+    def draw(self, prefix='', **kw):
         from netharn.metrics import drawing
-        return drawing.draw_perclass_roc(self, **kw)
+        return drawing.draw_perclass_roc(self, prefix=prefix, **kw)
 
 
 class PerClass_PR_Result(ub.NiceRepr, DictProxy):
@@ -1013,13 +1247,38 @@ class PerClass_PR_Result(ub.NiceRepr, DictProxy):
     def __nice__(self):
         return ub.repr2(self.proxy, nl=2, strvals=True)
 
-    def draw(self, **kw):
+    def draw(self, prefix='', **kw):
         from netharn.metrics import drawing
-        return drawing.draw_perclass_prcurve(self, **kw)
+        return drawing.draw_perclass_prcurve(self, prefix=prefix, **kw)
+
+
+class PerClass_Threshold_Result(ub.NiceRepr, DictProxy):
+    """
+    """
+    def __init__(self, cx_to_info):
+        self.proxy = cx_to_info
+
+    def __nice__(self):
+        return ub.repr2(self.proxy, nl=2, strvals=True)
+
+    def draw(self, prefix='', **kw):
+        """
+        Example:
+            >>> # xdoctest: +REQUIRES(module:ndsampler)
+            >>> cfsn_vecs = ConfusionVectors.demo()
+            >>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name')
+            >>> self = ovr_cfsn.threshold_curves()['perclass']
+            >>> self.draw()
+        """
+        from netharn.metrics import drawing
+        return drawing.draw_perclass_thresholds(self, prefix=prefix, **kw)
 
 
 def _stabalilze_data(y_true, y_score, sample_weight, npad=7):
-    npad = 7
+    """
+    Adds ideally calibrated dummy values to curves with few positive examples.
+    This acts somewhat like a Baysian prior and smooths out the curve.
+    """
     min_score = y_score.min()
     max_score = y_score.max()
 
@@ -1039,3 +1298,11 @@ def _stabalilze_data(y_true, y_score, sample_weight, npad=7):
     y_score = np.hstack([y_score, pad_score])
     sample_weight = np.hstack([sample_weight, pad_weight])
     return y_true, y_score, sample_weight
+
+if __name__ == '__main__':
+    """
+    CommandLine:
+        python ~/code/netharn/netharn/metrics/confusion_vectors.py all
+    """
+    import xdoctest
+    xdoctest.doctest_module(__file__)
diff --git a/netharn/metrics/detect_metrics.py b/netharn/metrics/detect_metrics.py
index cde905beabfc7ef22498f219e3583716be576d3e..70db812f784798618acfc4d481be2d7e5a3d5661 100644
--- a/netharn/metrics/detect_metrics.py
+++ b/netharn/metrics/detect_metrics.py
@@ -133,7 +133,8 @@ class DetectionMetrics(ub.NiceRepr):
         return dmet.gid_to_pred_dets[gid]
 
     def confusion_vectors(dmet, ovthresh=0.5, bias=0, gids=None, compat='all',
-                          prioritize='iou', ignore_class='ignore'):
+                          prioritize='iou', ignore_classes='ignore',
+                          background_class=ub.NoParam, verbose='auto', workers=0):
         """
         Assigns predicted boxes to the true boxes so we can transform the
         detection problem into a classification problem for scoring.
@@ -168,8 +169,19 @@ class DetectionMetrics(ub.NiceRepr):
                 preferred over descendents of the true class, over unreleated
                 classes.
 
-            ignore_class (str, default='ignore'):
-                class name indicating ignore regions
+            ignore_classes (set, default={'ignore'}):
+                class names indicating ignore regions
+
+            background_class (str, default=ub.NoParam):
+                Name of the background class. If unspecified we try to
+                determine it with heuristics. A value of None means there is no
+                background class.
+
+            verbose (int, default='auto'): verbosity flag. In auto mode,
+                verbose=1 if len(gids) > 1000.
+
+            workers (int, default=0):
+                number of parallel assignment processes
 
         Ignore:
             globals().update(xdev.get_func_kwargs(dmet.confusion_vectors))
@@ -183,29 +195,62 @@ class DetectionMetrics(ub.NiceRepr):
 
         if gids is None:
             gids = sorted(dmet._imgname_to_gid.values())
-        for gid in gids:
+
+        if verbose == 'auto':
+            verbose = 1 if len(gids) > 10 else 0
+
+        if background_class is ub.NoParam:
+            # Try to autodetermine background class name,
+            # otherwise fallback to None
+            background_class = None
+            if dmet.classes is not None:
+                lower_classes = [c.lower() for c in dmet.classes]
+                try:
+                    idx = lower_classes.index('background')
+                    background_class = dmet.classes[idx]
+                    # TODO: if we know the background class name should we
+                    # change bg_cidx in assignment?
+                except ValueError:
+                    pass
+
+        from ndsampler.utils import util_futures
+        workers = 0
+        jobs = util_futures.JobPool(mode='process', max_workers=workers)
+
+        for gid in ub.ProgIter(gids, desc='submit assign jobs',
+                               verbose=verbose):
             true_dets = dmet.true_detections(gid)
             pred_dets = dmet.pred_detections(gid)
-
-            y = _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1,
-                                          ovthresh=ovthresh, bg_cidx=-1,
-                                          bias=bias, classes=dmet.classes,
-                                          compat=compat, prioritize=prioritize,
-                                          ignore_class=ignore_class)
+            job = jobs.submit(
+                _assign_confusion_vectors, true_dets, pred_dets,
+                bg_weight=1, ovthresh=ovthresh, bg_cidx=-1, bias=bias,
+                classes=dmet.classes, compat=compat, prioritize=prioritize,
+                ignore_classes=ignore_classes)
+            job.gid = gid
+
+        for job in ub.ProgIter(jobs.jobs, desc='assign detections',
+                               verbose=verbose):
+            y = job.result()
+            gid = job.gid
 
             if TRACK_PROBS:
                 # Keep track of per-class probs
+                pred_dets = dmet.pred_detections(gid)
                 try:
                     pred_probs = pred_dets.probs
                 except KeyError:
                     TRACK_PROBS = False
                 else:
                     pxs = np.array(y['pxs'], dtype=np.int)
+
+                    # For unassigned truths, we need to create dummy probs
+                    # where a background class has probability 1.
                     flags = pxs > -1
                     probs = np.zeros((len(pxs), pred_probs.shape[1]),
                                      dtype=np.float32)
-                    bg_idx = dmet.classes.node_to_idx['background']
-                    probs[:, bg_idx] = 1
+                    if background_class is not None:
+                        bg_idx = dmet.classes.index(background_class)
+                        probs[:, bg_idx] = 1
                     probs[flags] = pred_probs[pxs[flags]]
                     prob_accum.append(probs)
 
@@ -213,8 +258,60 @@ class DetectionMetrics(ub.NiceRepr):
             for k, v in y.items():
                 y_accum[k].extend(v)
 
+        # else:
+        #     for gid in ub.ProgIter(gids, desc='assign detections', verbose=verbose):
+        #         true_dets = dmet.true_detections(gid)
+        #         pred_dets = dmet.pred_detections(gid)
+
+        #         y = _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1,
+        #                                       ovthresh=ovthresh, bg_cidx=-1,
+        #                                       bias=bias, classes=dmet.classes,
+        #                                       compat=compat, prioritize=prioritize,
+        #                                       ignore_classes=ignore_classes)
+
+        #         if TRACK_PROBS:
+        #             # Keep track of per-class probs
+        #             try:
+        #                 pred_probs = pred_dets.probs
+        #             except KeyError:
+        #                 TRACK_PROBS = False
+        #             else:
+        #                 pxs = np.array(y['pxs'], dtype=np.int)
+        #                 flags = pxs > -1
+        #                 probs = np.zeros((len(pxs), pred_probs.shape[1]),
+        #                                  dtype=np.float32)
+        #                 bg_idx = dmet.classes.node_to_idx['background']
+        #                 probs[:, bg_idx] = 1
+        #                 probs[flags] = pred_probs[pxs[flags]]
+        #                 prob_accum.append(probs)
+
+        #         y['gid'] = [gid] * len(y['pred'])
+        #         for k, v in y.items():
+        #             y_accum[k].extend(v)
+
+        _data = {}
+        for k, v in ub.ProgIter(list(y_accum.items()), desc='ndarray convert', verbose=verbose):
+            # Try to use 32 bit types for large evaluation problems
+            kw = dict()
+            if k in {'iou', 'score', 'weight'}:
+                kw['dtype'] = np.float32
+            if k in {'pxs', 'txs', 'gid', 'pred', 'true', 'pred_raw'}:
+                kw['dtype'] = np.int32
+            try:
+                _data[k] = np.asarray(v, **kw)
+            except TypeError:
+                _data[k] = np.asarray(v)
+
         # Avoid pandas when possible
-        cfsn_data = kwarray.DataFrameArray(ub.map_vals(np.array, y_accum))
+        cfsn_data = kwarray.DataFrameArray(_data)
+
+        if 0:
+            import xdev
+            nbytes = 0
+            for k, v in _data.items():
+                nbytes += v.size * v.dtype.itemsize
+            print(xdev.byte_str(nbytes))
+
         if TRACK_PROBS:
             y_prob = np.vstack(prob_accum)
         else:
@@ -334,14 +431,15 @@ class DetectionMetrics(ub.NiceRepr):
         return info
 
     def score_voc(dmet, ovthresh=0.5, bias=1, method='voc2012', gids=None,
-                  ignore_class='ignore'):
+                  ignore_classes='ignore'):
         """
         score using voc method
 
         Example:
             >>> # xdoctest: +REQUIRES(module:ndsampler)
             >>> dmet = DetectionMetrics.demo(
-            >>>     nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8, score_noise=.5)
+            >>>     nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8,
+            >>>     score_noise=.5)
             >>> print(dmet.score_voc()['mAP'])
             0.9399...
         """
@@ -356,10 +454,10 @@ class DetectionMetrics(ub.NiceRepr):
             true_dets = dmet.true_detections(gid)
             pred_dets = dmet.pred_detections(gid)
 
-            if ignore_class is not None:
+            if ignore_classes is not None:
                 true_ignore_flags, pred_ignore_flags = _filter_ignore_regions(
                     true_dets, pred_dets, ovthresh=ovthresh,
-                    ignore_class=ignore_class)
+                    ignore_classes=ignore_classes)
                 true_dets = true_dets.compress(~true_ignore_flags)
                 pred_dets = pred_dets.compress(~pred_ignore_flags)
 
@@ -482,6 +580,11 @@ class DetectionMetrics(ub.NiceRepr):
             cls_noise (float, default=0): probability that a class label will
                 change. Must be within 0 and 1.
             anchors (ndarray, default=None): used to create random boxes
+            null_pred (bool, default=0):
+                if True, predicted classes are returned as null, which means
+                only localization scoring is suitable.
+            with_probs (bool, default=1):
+                if True, includes per-class probabilities with predictions
 
         Example:
             >>> # xdoctest: +REQUIRES(module:ndsampler)
@@ -504,6 +607,27 @@ class DetectionMetrics(ub.NiceRepr):
             <Detections(4)>
             >>> print(dmet.pred_detections(gid=0))
             <Detections(7)>
+
+        Example:
+            >>> # xdoctest: +REQUIRES(module:ndsampler)
+            >>> # Test case with null predicted categories
+            >>> dmet = DetectionMetrics.demo(nimgs=30, null_pred=1, nclasses=3,
+            >>>                              nboxes=10, n_fp=10, box_noise=0.3,
+            >>>                              with_probs=False)
+            >>> dmet.gid_to_pred_dets[0].data
+            >>> dmet.gid_to_true_dets[0].data
+            >>> cfsn_vecs = dmet.confusion_vectors()
+            >>> binvecs_ovr = cfsn_vecs.binarize_ovr()
+            >>> binvecs_per = cfsn_vecs.binarize_peritem()
+            >>> pr_per = binvecs_per.precision_recall()
+            >>> pr_ovr = binvecs_ovr.precision_recall()
+            >>> print('pr_per = {!r}'.format(pr_per))
+            >>> print('pr_ovr = {!r}'.format(pr_ovr))
+            >>> # xdoctest: +REQUIRES(--show)
+            >>> import kwplot
+            >>> kwplot.autompl()
+            >>> pr_per.draw(fnum=1)
+            >>> pr_ovr['perclass'].draw(fnum=2)
         """
         import kwimage
         import kwarray
@@ -515,6 +639,9 @@ class DetectionMetrics(ub.NiceRepr):
         box_noise = kwargs.get('box_noise', 0)
         cls_noise = kwargs.get('cls_noise', 0)
 
+        null_pred = kwargs.get('null_pred', False)
+        with_probs = kwargs.get('with_probs', True)
+
         # specify an amount of overlap between true and false scores
         score_noise = kwargs.get('score_noise', 0.2)
 
@@ -550,8 +677,10 @@ class DetectionMetrics(ub.NiceRepr):
         true_mean  = _interp(0.5, .8, score_noise)
         false_mean = _interp(0.5, .2, score_noise)
 
-        true_score_RV = distributions.TruncNormal(mean=true_mean, std=.5, low=true_low, high=true_high, rng=rng)
-        false_score_RV = distributions.TruncNormal(mean=false_mean, std=.5, low=0, high=false_high, rng=rng)
+        true_score_RV = distributions.TruncNormal(
+            mean=true_mean, std=.5, low=true_low, high=true_high, rng=rng)
+        false_score_RV = distributions.TruncNormal(
+            mean=false_mean, std=.5, low=0, high=false_high, rng=rng)
 
         frgnd_cx_RV = distributions.DiscreteUniform(
             1, nclasses + 1, rng=rng)
@@ -640,7 +769,12 @@ class DetectionMetrics(ub.NiceRepr):
                                            scores=pred_scores)
 
             # Hack in the probs
-            pred_dets.data['probs'] = class_probs
+            if with_probs:
+                pred_dets.data['probs'] = class_probs
+
+            if null_pred:
+                pred_dets.data['class_idxs'] = np.array(
+                    [None] * len(pred_dets), dtype=object)
 
             dmet.add_truth(true_dets, imgname=imgname)
             dmet.add_predictions(pred_dets, imgname=imgname)
diff --git a/netharn/metrics/drawing.py b/netharn/metrics/drawing.py
index 5875280cdad6fff1d39d67a50f0ccf96464bec28..ffa27733bde5a22a251d7c95a740a2bbc125f704 100644
--- a/netharn/metrics/drawing.py
+++ b/netharn/metrics/drawing.py
@@ -80,13 +80,9 @@ def draw_perclass_roc(cx_to_rocinfo, classes=None, prefix='', fnum=1,
     for cx in cxs:
         peritem = cx_to_rocinfo[cx]
 
-        if isinstance(cx, int):
-            catname = classes[cx]
-        else:
-            catname = cx
+        catname = classes[cx] if isinstance(cx, int) else cx
 
         auc = peritem['auc']
-
         tpr = peritem['tpr']
 
         nsupport = int(peritem['nsupport'])
@@ -95,7 +91,7 @@ def draw_perclass_roc(cx_to_rocinfo, classes=None, prefix='', fnum=1,
             if abs(z - int(z)) < 1e-8:
                 label = 'auc={:0.2f}: {} ({:d}/{:d})'.format(auc, catname, int(peritem['realpos_total']), round(nsupport, 2))
             else:
-                label = 'auc={:0.2f}: {} ({}/{:d})'.format(auc, catname, round(peritem['realpos_total'], 2), round(nsupport, 2))
+                label = 'auc={:0.2f}: {} ({:.2f}/{:d})'.format(auc, catname, round(peritem['realpos_total'], 2), round(nsupport, 2))
         else:
             label = 'auc={:0.2f}: {} ({:d})'.format(auc, catname, round(nsupport, 2))
 
@@ -144,10 +140,7 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw):
     xydata = ub.odict()
     for cx in cxs:
         peritem = cx_to_peritem[cx]
-        if isinstance(cx, int):
-            catname = classes[cx]
-        else:
-            catname = cx
+        catname = classes[cx] if isinstance(cx, int) else cx
         ap = peritem['ap']
         if 'pr' in peritem:
             pr = peritem['pr']
@@ -173,11 +166,11 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw):
         if 'realpos_total' in peritem:
             z = peritem['realpos_total']
             if abs(z - int(z)) < 1e-8:
-                label = 'ap={:0.2f}: {} ({:d}/{:d})'.format(ap, catname, int(peritem['realpos_total']), nsupport)
+                label = 'ap={:0.2f}: {} ({:d}/{:d})'.format(ap, catname, int(peritem['realpos_total']), round(nsupport, 2))
             else:
-                label = 'ap={:0.2f}: {} ({}/{:d})'.format(ap, catname, round(peritem['realpos_total'], 2), nsupport)
+                label = 'ap={:0.2f}: {} ({:.2f}/{:d})'.format(ap, catname, round(peritem['realpos_total'], 2), round(nsupport, 2))
         else:
-            label = 'ap={:0.2f}: {} ({:d})'.format(ap, catname, nsupport)
+            label = 'ap={:0.2f}: {} ({:d})'.format(ap, catname, round(nsupport, 2))
         xydata[label] = (recall, precision)
 
     with warnings.catch_warnings():
@@ -195,7 +188,80 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw):
     return ax
 
 
-def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw):
+def draw_perclass_thresholds(cx_to_peritem, key='mcc', classes=None, prefix='', fnum=1, **kw):
+    """
+    Notes:
+        Each category is inspected independently of one another, there is no
+        notion of confusion.
+
+    Example:
+        >>> # xdoctest: +REQUIRES(module:ndsampler)
+        >>> # xdoctest: +REQUIRES(module:kwplot)
+        >>> from netharn.metrics.drawing import *  # NOQA
+        >>> from netharn.metrics import ConfusionVectors
+        >>> cfsn_vecs = ConfusionVectors.demo()
+        >>> classes = cfsn_vecs.classes
+        >>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name')
+        >>> cx_to_peritem = ovr_cfsn.threshold_curves()['perclass']
+        >>> import kwplot
+        >>> kwplot.autompl()
+        >>> key = 'mcc'
+        >>> draw_perclass_thresholds(cx_to_peritem, key, classes)
+        >>> # xdoctest: +REQUIRES(--show)
+        >>> kwplot.show_if_requested()
+    """
+    import kwplot
+    # Sort by descending "best value"
+    cxs = list(cx_to_peritem.keys())
+
+    try:
+        priority = np.array([item['_max_' + key][0] for item in cx_to_peritem.values()])
+        priority[np.isnan(priority)] = -np.inf
+        cxs = list(ub.take(cxs, np.argsort(priority)))[::-1]
+    except KeyError:
+        pass
+
+    xydata = ub.odict()
+    for cx in cxs:
+        peritem = cx_to_peritem[cx]
+        catname = classes[cx] if isinstance(cx, int) else cx
+
+        thresholds = peritem['thresholds']
+        measure = peritem[key]
+        try:
+            best_label = peritem['max_{}'.format(key)]
+        except KeyError:
+            max_idx = measure.argmax()
+            best_thresh = thresholds[max_idx]
+            best_measure = measure[max_idx]
+            best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh)
+
+        nsupport = int(peritem['nsupport'])
+        if 'realpos_total' in peritem:
+            z = peritem['realpos_total']
+            if abs(z - int(z)) < 1e-8:
+                label = '{}: {} ({:d}/{:d})'.format(best_label, catname, int(peritem['realpos_total']), round(nsupport, 2))
+            else:
+                label = '{}: {} ({:.2f}/{:d})'.format(best_label, catname, round(peritem['realpos_total'], 2), round(nsupport, 2))
+        else:
+            label = '{}: {} ({:d})'.format(best_label, catname, round(nsupport, 2))
+        xydata[label] = (thresholds, measure)
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', 'Mean of empty slice', RuntimeWarning)
+
+    ax = kwplot.multi_plot(
+        xydata=xydata, fnum=fnum,
+        xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01,
+        xlabel='threshold', ylabel=key,
+        title=prefix + 'perclass {}'.format(key),
+        legend_loc='lower right',
+        color='distinct', linestyle='cycle', marker='cycle', **kw
+    )
+    return ax
+
+
+def draw_prcurve(peritem, prefix='', fnum=1, **kw):
     """
     TODO: rename to draw prcurve. Just draws a single pr curve.
 
@@ -211,7 +277,7 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw):
         >>> peritem = cfsn_vecs.binarize_peritem().precision_recall()
         >>> import kwplot
         >>> kwplot.autompl()
-        >>> draw_peritem_prcurve(peritem)
+        >>> draw_prcurve(peritem)
         >>> # xdoctest: +REQUIRES(--show)
         >>> kwplot.show_if_requested()
     """
@@ -239,9 +305,9 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw):
     if 'realpos_total' in peritem:
         z = peritem['realpos_total']
         if abs(z - int(z)) < 1e-8:
-            label = 'ap={:0.2f}: ({:d}/{:d})'.format(ap, int(peritem['realpos_total']), nsupport)
+            label = 'ap={:0.2f}: ({:d}/{:d})'.format(ap, int(peritem['realpos_total']), round(nsupport, 2))
         else:
-            label = 'ap={:0.2f}: ({}/{:d})'.format(ap, peritem['realpos_total'], nsupport)
+            label = 'ap={:0.2f}: ({:.2f}/{:d})'.format(ap, round(peritem['realpos_total'], 2), round(nsupport, 2))
     else:
         label = 'ap={:0.2f}: ({:d})'.format(ap, nsupport)
 
@@ -254,3 +320,80 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw):
         color='distinct', linestyle='cycle', marker='cycle', **kw
     )
     return ax
+
+
+def draw_threshold_curves(info, keys=None, prefix='', fnum=1, **kw):
+    """
+    Example:
+        >>> # xdoctest: +REQUIRES(module:ndsampler)
+        >>> # xdoctest: +REQUIRES(module:kwplot)
+        >>> import sys, ubelt
+        >>> sys.path.append(ubelt.expandpath('~/code/netharn'))
+        >>> from netharn.metrics.drawing import *  # NOQA
+        >>> from netharn.metrics import DetectionMetrics
+        >>> dmet = DetectionMetrics.demo(
+        >>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
+        >>> cfsn_vecs = dmet.confusion_vectors()
+        >>> info = cfsn_vecs.binarize_peritem().threshold_curves()
+        >>> keys = None
+        >>> import kwplot
+        >>> kwplot.autompl()
+        >>> draw_threshold_curves(info, keys)
+        >>> # xdoctest: +REQUIRES(--show)
+        >>> kwplot.show_if_requested()
+    """
+    import kwplot
+    import kwimage
+    thresh = info['thresholds']
+
+    if keys is None:
+        keys = {'g1', 'f1', 'acc', 'mcc'}
+
+    idx_to_colors = kwimage.Color.distinct(len(keys), space='rgba')
+    idx_to_best_pt = {}
+
+    xydata = {}
+    colors = {}
+    for idx, key in enumerate(keys):
+        color = idx_to_colors[idx]
+        measure = info[key]
+        max_idx = measure.argmax()
+        best_thresh = thresh[max_idx]
+        best_measure = measure[max_idx]
+        best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh)
+
+        nsupport = int(info['nsupport'])
+        if 'realpos_total' in info:
+            z = info['realpos_total']
+            if abs(z - int(z)) < 1e-8:
+                label = '{}: ({:d}/{:d})'.format(best_label, int(info['realpos_total']), round(nsupport, 2))
+            else:
+                label = '{}: ({:.2f}/{:d})'.format(best_label, round(info['realpos_total'], 2), round(nsupport, 2))
+        else:
+            label = '{}: ({:d})'.format(best_label, nsupport)
+        xydata[label] = (thresh, measure)
+        colors[label] = color
+        idx_to_best_pt[idx] = (best_thresh, best_measure)
+
+    ax = kwplot.multi_plot(
+        xydata=xydata, fnum=fnum,
+        xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01,
+        xlabel='threshold', ylabel=key,
+        title=prefix + 'threshold curves',
+        legend_loc='lower right',
+        color=colors,
+        linestyle='cycle', marker='cycle', **kw
+    )
+    for idx, best_pt in idx_to_best_pt.items():
+        best_thresh, best_measure = best_pt
+        color = idx_to_colors[idx]
+        ax.plot(best_thresh, best_measure, '*', color=color)
+    return ax
+
+if __name__ == '__main__':
+    """
+    CommandLine:
+        python ~/code/netharn/netharn/metrics/drawing.py
+    """
+    import xdoctest
+    xdoctest.doctest_module(__file__)
diff --git a/netharn/mixins.py b/netharn/mixins.py
index 51ab82afc51f833b8ef4fd83118d83f7dfae40ba..1b4b8fd244f0db82ab69004fa6491c524ca52cef 100644
--- a/netharn/mixins.py
+++ b/netharn/mixins.py
@@ -130,7 +130,7 @@ def _redump_measures(dpath):
     from os.path import join
 
     import kwplot
-    kwplot.set_mpl_backend('agg')
+    kwplot.autompl(force='agg')
 
     try:
         import seaborn as sns
@@ -172,14 +172,15 @@ def _dump_measures(tb_data, out_dpath, mode=None, smoothing=0.0,
         >>> mode = ['epoch', 'iter']
         >>> fpath = join(out_dpath, 'tb_data.json')
         >>> tb_data = json.load(open(fpath, 'r'))
+        >>> import kwplot
+        >>> kwplot.autompl()
         >>> _dump_measures(tb_data,  out_dpath, smoothing=0)
     """
     import ubelt as ub
     from os.path import join
     import numpy as np
-
     import kwplot
-    kwplot.autompl()
+    # kwplot.autompl()
 
     # TODO: Is it possible to get htop to show this process with some name that
     # distinguishes it from the dataloader workers?
diff --git a/netharn/models/resnet.py b/netharn/models/resnet.py
index 0e2b654cf15a2b84fd8d7fec63e55b182169e4f3..99abc31171c848960014897606739f6925b5452f 100644
--- a/netharn/models/resnet.py
+++ b/netharn/models/resnet.py
@@ -84,7 +84,8 @@ class ResNet(nn.Module):
         >>> print(tuple(y.size()))
         (1, 10)
     """
-    def __init__(self, num_blocks, num_classes=10, block='Bottleneck'):
+    def __init__(self, num_blocks=[3, 4, 6, 3], num_classes=10,
+                 in_channels=3, block='Bottleneck'):
         super(ResNet, self).__init__()
         self.in_planes = 64
 
@@ -95,7 +96,7 @@ class ResNet(nn.Module):
         else:
             raise KeyError('Unknown block={}'.format(block))
 
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3,
                                stride=1, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(64)
         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
diff --git a/netharn/models/yolo2/light_yolo.py b/netharn/models/yolo2/light_yolo.py
index c8402eb6c39bdad61a09aff4d359aba83e092b73..9eb6cc3da7eb9a201129812f4ab98a03cf18315d 100644
--- a/netharn/models/yolo2/light_yolo.py
+++ b/netharn/models/yolo2/light_yolo.py
@@ -251,18 +251,19 @@ class Yolo(nn.Module):
             >>> dets = batch_dets[0]
             >>> # xdoc: +REQUIRES(--show)
             >>> import netharn as nh
-            >>> nh.util.autompl()  # xdoc: +SKIP
+            >>> import kwplot
+            >>> kwplot.autompl()  # xdoc: +SKIP
             >>> dets.meta['classes'] = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
             >>>  'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
             >>>  'dog', 'horse', 'motorbike', 'person',
             >>>  'pottedplant', 'sheep', 'sofa', 'train',
             >>>  'tvmonitor')
-            >>> nh.util.figure(fnum=1, doclf=True)
+            >>> kwplot.figure(fnum=1, doclf=True)
             >>> sf = orig_sizes[0]
             >>> dets.boxes.scale(sf, inplace=True)
-            >>> nh.util.imshow(rgb255, colorspace='rgb')
+            >>> kwplot.imshow(rgb255, colorspace='rgb')
             >>> dets.draw()
-            >>> nh.util.show_if_requested()
+            >>> kwplot.show_if_requested()
         """
         outputs = []
 
@@ -292,10 +293,10 @@ def find_anchors(dset):
         >>> xy = -anchors / 2
         >>> wh = anchors
         >>> show_boxes = np.hstack([xy, wh])
-        >>> import netharn as nh
-        >>> nh.util.figure(doclf=True, fnum=1)
-        >>> nh.util.autompl()  # xdoc: +SKIP
-        >>> nh.util.draw_boxes(show_boxes, box_format='tlwh')
+        >>> import kwplot
+        >>> kwplot.figure(doclf=True, fnum=1)
+        >>> kwplot.autompl()  # xdoc: +SKIP
+        >>> kwplot.draw_boxes(show_boxes, box_format='tlwh')
         >>> from matplotlib import pyplot as plt
         >>> plt.gca().set_xlim(xy.min() - 1, wh.max() / 2 + 1)
         >>> plt.gca().set_ylim(xy.min() - 1, wh.max() / 2 + 1)
diff --git a/netharn/plots/__init__.py b/netharn/plots/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/netharn/plots/weight_scatter.py b/netharn/plots/weight_scatter.py
new file mode 100644
index 0000000000000000000000000000000000000000..79bcf37d71f77caf0d9a7822db555854669b3ce3
--- /dev/null
+++ b/netharn/plots/weight_scatter.py
@@ -0,0 +1,77 @@
+import numpy as np
+from os.path import join
+
+
+def plot_weight_scatter(harn):
+    """
+    Draw a scatter plot of the initial weights versus the final weights of a
+    network.
+
+    Example:
+        >>> import netharn as nh
+        >>> harn = nh.FitHarn.demo()
+        >>> harn.run()
+
+    Ignore:
+        >>> from netharn.plots.weight_scatter import *  # NOQA
+        >>> from netharn.examples import mnist
+        >>> import kwplot
+        >>> harn = mnist.setup_harn()
+        >>> harn.preferences['timeout'] = 60 * 1
+        >>> kwplot.autompl(force='agg')
+        >>> harn.run()
+        >>> kwplot.autompl(force='auto')
+        >>> plot_weight_scatter(harn)
+    """
+    import netharn as nh
+    cpu = nh.XPU.coerce('cpu')
+
+    path1 = join(harn.train_dpath, 'initial_state', 'initial_state.pt')
+    state1 = cpu.load(path1)
+    weights1 = state1['model_state_dict']
+
+    path2 = harn.best_snapshot()
+    state2 = cpu.load(path2)
+    weights2 = state2['model_state_dict']
+
+    keys1 = set(weights1.keys())
+    keys2 = set(weights2.keys())
+    keys = keys1 & keys2
+
+    assert keys == keys2
+
+    accum1 = []
+    accum2 = []
+
+    for key in keys:
+        w1 = weights1[key]
+        w2 = weights2[key]
+        accum1.append(w1.numpy().ravel())
+        accum2.append(w2.numpy().ravel())
+
+    points1 = np.hstack(accum1)
+    points2 = np.hstack(accum2)
+
+    # Find cosine of angle between the vectors
+    import scipy
+    cosangle = scipy.spatial.distance.cosine(points1, points2)
+    print('cosangle = {!r}'.format(cosangle))
+
+    import kwplot
+    import seaborn
+    seaborn.set()
+    plt = kwplot.autoplt()
+    plt.clf()
+
+    x = points1[::1]
+    y = points2[::1]
+
+    ax = plt.gca()
+    ax.figure.clf()
+
+    # seaborn.kdeplot(x, y, shade=True, gridsize=50)
+
+    ax = plt.gca()
+    ax.scatter(x, y, s=1, alpha=0.1, c='blue')
+    ax.set_xlabel('initial weights')
+    ax.set_ylabel('trained weights')
diff --git a/netharn/schedulers/core.py b/netharn/schedulers/core.py
index af0b43f29ce19caed95a0187d22f6bcd12a23405..2bb2e02a2036205a436faf0c9e0a6be3b2a267b4 100644
--- a/netharn/schedulers/core.py
+++ b/netharn/schedulers/core.py
@@ -132,11 +132,12 @@ class YOLOScheduler(NetharnScheduler):
         >>>         self.step_batch()
         >>> #print('ydata = {}'.format(ub.repr2(ydata, precision=5, nl=0)))
         >>> # xdoc: +REQUIRES(--show)
-        >>> nh.util.autompl()
+        >>> import kwplot
+        >>> kwplot.autompl()
         >>> xticklabels = sorted({1, 20} | set(points.keys()))
-        >>> nh.util.multi_plot(xdata=xdata['epoch'], ydata=ydata, xlabel='epoch', fnum=1,
+        >>> kwplot.multi_plot(xdata=xdata['epoch'], ydata=ydata, xlabel='epoch', fnum=1,
         >>>                    ylabel='lr', xticklabels=xticklabels, xticks=xticklabels)
-        >>> nh.util.show_if_requested()
+        >>> kwplot.show_if_requested()
 
     """
     __batchaware__ = True
diff --git a/super_setup.py b/super_setup.py
index 1bda283b3de8db6e7aafdc2b479ca32fc52e0539..1823ed81bdad9a1e31207e3121dbe74c1c17f126 100755
--- a/super_setup.py
+++ b/super_setup.py
@@ -686,11 +686,11 @@ def make_netharn_registry():
 
         # The util libs
         CommonRepo(
-            name='kwarray', branch='dev/0.5.7', remote='public',
+            name='kwarray', branch='dev/0.5.9', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwarray.git'},
         ),
         CommonRepo(
-            name='kwimage', branch='dev/0.6.2', remote='public',
+            name='kwimage', branch='dev/0.6.3', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwimage.git'},
         ),
         # CommonRepo(  # TODO
@@ -698,7 +698,7 @@ def make_netharn_registry():
         #     remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwannot.git'},
         # ),
         CommonRepo(
-            name='kwcoco', branch='dev/0.1.1', remote='public',
+            name='kwcoco', branch='dev/0.1.2', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwcoco.git'},
         ),
         CommonRepo(
@@ -719,17 +719,17 @@ def make_netharn_registry():
 
         # For example data and CLI
         CommonRepo(
-            name='scriptconfig', branch='dev/0.5.6', remote='public',
+            name='scriptconfig', branch='dev/0.5.7', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:utils/scriptconfig.git'},
         ),
         CommonRepo(
-            name='ndsampler', branch='dev/0.5.8', remote='public',
+            name='ndsampler', branch='dev/0.5.10', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:computer-vision/ndsampler.git'},
         ),
 
         # netharn - training harness
         CommonRepo(
-            name='netharn', branch='dev/0.5.6', remote='public',
+            name='netharn', branch='dev/0.5.7', remote='public',
             remotes={'public': 'git@gitlab.kitware.com:computer-vision/netharn.git'},
         ),
     ]