diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c57876f1838eeca8c03ac0b7d7c9df9e03bc7d13..b377b73d6cd631a935ad9229ce80a2ef1e189e26 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -273,20 +273,6 @@ test_full/cp37-cp37m-linux: python:3.7 -# for universal builds we only need to gpg sign once -gpgsign/cp37-cp37m-linux: - <<: - - *gpgsign_template - image: - python:3.7 - -deploy/cp37-cp37m-linux: - <<: - - *deploy_template - image: - python:3.7 - - # --------------- # Python 3.6 Jobs diff --git a/CHANGELOG.md b/CHANGELOG.md index bf5f8eab2bafff8f6400576958f723714dae2300..5f314384168c9579b5bfbc2bf198b4280fe11ff2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,34 @@ This changelog follows the specifications detailed in: [Keep a Changelog](https: This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release. -## Version 0.5.6 - Unreleased +## Version 0.5.7 - Unreleased + +### Changed +* `harn.deploy_fpath` is now populated when the model is deployed. +* Improved docs on `netharn/data/toydata.py` +* Changed name of `torch_shapshots` directory name to `checkpoints`. + +### Added +* Ported experimental `ChannelSpec` and `DataContainser` from bioharn to netharn.data. +* Added basic classification example that works on generic coco datasets +* Threshold curves to ConfusionVector metrics +* Initial weights are now saved in `initial_state` directory. +* New `plots` submodule. + +### Fixed +* Fixed bug in XPU auto mode which caused it always to choose GPU 0. +* Bug in hyperparams where dict-based loader spec was not working. +* Display intervals were not working correctly with ProgIter, hacked in a temporary fix. + + +## Version 0.5.6 - Released 2020-04-16 + +### Changed +* Enhanced VOC ensure data + + +### Fixed +* Version issues from last release ## Version 0.5.5 diff --git a/README.rst b/README.rst index 1172769cfa581794105da960cc35a24901f8656a..bb1ac3b7c6eba7a89860d89f640869ff750aaa50 100644 --- a/README.rst +++ b/README.rst @@ -130,7 +130,6 @@ Features (continued) ``kwplot``. - Installation ============ @@ -262,50 +261,50 @@ useful to look at. Its complexity is more than CIFAR but less than YOLO. >>> hyper = netharn.HyperParams(**{ >>> # ================ >>> # Environment Components + >>> 'name' : 'demo', >>> 'workdir' : ub.ensure_app_cache_dir('netharn/demo'), - >>> 'nice' : 'demo', - >>> 'xpu' : netharn.XPU.cast('auto'), + >>> 'xpu' : netharn.XPU.coerce('auto'), >>> # workdir is a directory where intermediate results can be saved - >>> # nice symlinks /fit/nice/ -> ../runs/ + >>> # "nice" symlinks /fit/name/ -> ../runs/ >>> # XPU auto select a gpu if idle and VRAM>6GB else a cpu >>> # ================ >>> # Data Components >>> 'datasets' : { # dict of plain ol torch.data.Dataset instances >>> 'train': netharn.data.ToyData2d(size=3, border=1, n=256, rng=0), - >>> 'vali': netharn.data.ToyData2d(size=3, border=1, n=128, rng=1), - >>> 'test': netharn.data.ToyData2d(size=3, border=1, n=128, rng=2), + >>> 'vali': netharn.data.ToyData2d(size=3, border=1, n=64, rng=1), + >>> 'test': netharn.data.ToyData2d(size=3, border=1, n=64, rng=2), >>> }, - >>> 'loaders' : {'batch_size': 64}, # DataLoader instances or kw + >>> 'loaders' : {'batch_size': 4}, # DataLoader instances or kw >>> # ================ >>> # Algorithm Components >>> # Note the (cls, kw) tuple formatting >>> 'model' : (netharn.models.ToyNet2d, {}), >>> 'optimizer' : (netharn.optimizers.SGD, { - >>> 'lr': 0.0001 + >>> 'lr': 0.01 >>> }), >>> # focal loss is usually better than netharn.criterions.CrossEntropyLoss >>> 'criterion' : (netharn.criterions.FocalLoss, {}), >>> 'initializer' : (netharn.initializers.KaimingNormal, { >>> 'param': 0, >>> }), - >>> # these may receive an overhaul soon + >>> # The scheduler adjusts learning rate over the training run >>> 'scheduler' : (netharn.schedulers.ListedScheduler, { - >>> 'points': {'lr': {0: .0001, 2: .01, 5: .015, 6: .005, 9: .001}}, + >>> 'points': {'lr': {0: 0.1, 2: 10.0, 4: .15, 6: .05, 9: .01}}, >>> 'interpolation': 'linear', >>> }), >>> 'monitor' : (netharn.Monitor, { >>> 'max_epoch': 10, + >>> 'patience': 7, >>> }), >>> # dynamics are a config option that modify the behavior of the main >>> # training loop. These parameters effect the learned model. - >>> 'dynamics' : {'batch_step': 4}, + >>> 'dynamics' : {'batch_step': 2}, >>> }) >>> harn = netharn.FitHarn(hyper) - >>> # non-algorithmic behavior configs (do not change learned models) - >>> harn.preferences['prog_backend'] = 'progiter' # alternative: 'tqdm' + >>> # non-algorithmic behavior preferences (do not change learned models) >>> harn.preferences['num_keep'] = 10 >>> # start training. - >>> harn.initialize(reset='delete') + >>> harn.initialize(reset='delete') # delete removes an existing run >>> harn.run() # note: run calls initialize it hasn't already been called. >>> # xdoc: +IGNORE_WANT diff --git a/analytic/analytic_for.py b/analytic/analytic_for.py deleted file mode 100644 index 5ef73f1c5554c060bcbbab8121559822b7fa9564..0000000000000000000000000000000000000000 --- a/analytic/analytic_for.py +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: new api -from netharn.analytic.analytic_for import * diff --git a/analytic/output_shape_for.py b/analytic/output_shape_for.py deleted file mode 100644 index b792abb86ef2ef5e2e1ccb7383600a108b593717..0000000000000000000000000000000000000000 --- a/analytic/output_shape_for.py +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: new api -from netharn.analytic.output_shape_for import * diff --git a/analytic/receptive_field_for.py b/analytic/receptive_field_for.py deleted file mode 100644 index a8f17dd69bdeef3da21f6375817542467bf6a162..0000000000000000000000000000000000000000 --- a/analytic/receptive_field_for.py +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: new api -from netharn.analytic.receptive_field_for import * diff --git a/dev/ggr_matching.py b/dev/ggr_matching.py index 7f54c206e20940d467af4d0d1aece3ce02d6c00b..4f149e8309b367418da4d2c66856611335b53f7c 100644 --- a/dev/ggr_matching.py +++ b/dev/ggr_matching.py @@ -21,6 +21,8 @@ import torch import torchvision # NOQA import ndsampler from sklearn import metrics +import kwimage +import kwarray class MatchingHarness(nh.FitHarn): @@ -41,7 +43,7 @@ class MatchingHarness(nh.FitHarn): harn.POS_LABEL = 1 harn.NEG_LABEL = 0 # BUG: should have one for each tag - harn.confusion_vectors = nh.util.DataFrameLight( + harn.confusion_vectors = kwarray.DataFrameLight( columns=['y_true', 'y_dist'] ) @@ -169,7 +171,7 @@ class MatchingHarness(nh.FitHarn): stacked = harn._draw_batch(batch, decoded) dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag)) fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch)) - nh.util.imwrite(fpath, stacked) + kwimage.imwrite(fpath, stacked) # Record metrics for epoch scores n = len(outputs['distAP']) @@ -260,10 +262,10 @@ class MatchingHarness(nh.FitHarn): >>> decoded = harn._decode(outputs) >>> stacked = harn._draw_batch(batch, decoded, limit=42) >>> # xdoctest: +REQUIRES(--show) - >>> import netharn as nh - >>> nh.util.autompl() - >>> nh.util.imshow(stacked, colorspace='rgb', doclf=True) - >>> nh.util.show_if_requested() + >>> import kwplot + >>> kwplot.autompl() + >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True) + >>> kwplot.show_if_requested() """ tostack = [] fontkw = { @@ -275,7 +277,7 @@ class MatchingHarness(nh.FitHarn): for i in range(n): ims = [g[i].transpose(1, 2, 0) for g in decoded['triple_imgs']] ims = [cv2.resize(g, dsize) for g in ims] - ims = [nh.util.atleast_3channels(g) for g in ims] + ims = [kwimage.atleast_3channels(g) for g in ims] triple_nxs = [n[i] for n in decoded['triple_nxs']] text = 'distAP={:.3g} -- distAN={:.3g} -- {}'.format( @@ -287,17 +289,17 @@ class MatchingHarness(nh.FitHarn): 'dodgerblue' if decoded['distAP'][i] < decoded['distAN'][i] else 'orangered') - img = nh.util.stack_images( + img = kwimage.stack_images( ims, overlap=-2, axis=1, bg_value=(10 / 255, 40 / 255, 30 / 255) ) img = (img * 255).astype(np.uint8) - img = nh.util.draw_text_on_image(img, text, + img = kwimage.draw_text_on_image(img, text, org=(2, img.shape[0] - 2), color=color, **fontkw) tostack.append(img) - stacked = nh.util.stack_images_grid(tostack, overlap=-10, + stacked = kwimage.stack_images_grid(tostack, overlap=-10, bg_value=(30, 10, 40), axis=1, chunksize=3) return stacked @@ -319,12 +321,12 @@ class AnnotCocoDataset(torch.utils.data.Dataset, ub.NiceRepr): >>> index = 0 >>> item = torch_dset[index] >>> import netharn as nh - >>> nh.util.autompl() - >>> nh.util.imshow(item['chip']) + >>> kwplot.autompl() + >>> kwplot.util.imshow(item['chip']) >>> torch_loader = torch_dset.make_loader() >>> raw_batch = ub.peek(torch_loader) - >>> stacked = nh.util.stack_images_grid(raw_batch['chip'].numpy().transpose(0, 2, 3, 1), overlap=-1) - >>> nh.util.imshow(stacked) + >>> stacked = kwplot.stack_images_grid(raw_batch['chip'].numpy().transpose(0, 2, 3, 1), overlap=-1) + >>> kwplot.imshow(stacked) for batch_idxs in torch_loader.batch_sampler: print('batch_idxs = {!r}'.format(batch_idxs)) @@ -360,7 +362,7 @@ class AnnotCocoDataset(torch.utils.data.Dataset, ub.NiceRepr): self.window_dim = window_dim self.dims = (window_dim, window_dim) - self.rng = nh.util.ensure_rng(0) + self.rng = kwarray.ensure_rng(0) if augment: import imgaug.augmenters as iaa self.independent = iaa.Sequential([ @@ -792,7 +794,8 @@ def main(): ns['lr'] = 1e-99 if args.interact: - nh.util.autompl() + import kwplot + kwplot.autompl() import matplotlib.pyplot as plt harn = setup_harn(**ns) diff --git a/dev/manage_snapshots.py b/dev/manage_snapshots.py index 44bef23898ea706e74e7d1e536a6a8bb953fc741..f878efdc6e6b3557fd0e580830ea7c28496e4f65 100755 --- a/dev/manage_snapshots.py +++ b/dev/manage_snapshots.py @@ -139,13 +139,13 @@ def session_info(dpath): dpath = realpath(dpath) if True: - # Determine if we are pointed to by a nice directory or not - nice = basename(dirname(dpath)) - info['nice'] = nice + # Determine if we are pointed to by a "name" directory or not + name = basename(dirname(dpath)) + info['name'] = name fitdir = dirname(dirname(dirname(dpath))) - nice_dpath = join(fitdir, 'nice', nice) + name_dpath = join(fitdir, 'name', name) try: - target = realpath(ub.util_links._readlink(nice_dpath)) + target = realpath(ub.util_links._readlink(name_dpath)) except Exception: target = None info['linked'] = (target == dpath) @@ -206,10 +206,10 @@ def _devcheck_remove_dead_runs(workdir, dry=True, dead_num_snap_thresh=10, else: session['decision'] = 'good' - nice_groups = ub.group_items(all_sessions, lambda x: x['nice']) + nice_groups = ub.group_items(all_sessions, lambda x: x['name']) - for nice, group in nice_groups.items(): - print(' --- {} --- '.format(nice)) + for name, group in nice_groups.items(): + print(' --- {} --- '.format(name)) group = sorted(group, key=lambda x: x['size']) group_ = copy.deepcopy(group) for item in group_: @@ -218,12 +218,12 @@ def _devcheck_remove_dead_runs(workdir, dry=True, dead_num_snap_thresh=10, item['size'] = byte_str(item['size']) print(ub.repr2(group_, nl=1)) - # Partion your "nice" sessions into broken and live symlinks. + # Partion your "name" sessions into broken and live symlinks. # For each live link remember what the real path is. broken_links = [] - nice_dpath = join(workdir, 'fit', 'nice') - for dname in os.listdir(nice_dpath): - dpath = join(nice_dpath, dname) + name_dpath = join(workdir, 'fit', 'name') + for dname in os.listdir(name_dpath): + dpath = join(name_dpath, dname) if is_symlink_broken(dpath): broken_links.append(dpath) diff --git a/dev/mnist_matching.py b/dev/mnist_matching.py index 196220dbda785afbda10ae81170e9147064ae61f..f7be6aab3551e1b91312c4624bb11f13cad5d0a5 100644 --- a/dev/mnist_matching.py +++ b/dev/mnist_matching.py @@ -7,6 +7,8 @@ import torchvision import ubelt as ub from torch import nn from sklearn import metrics +import kwimage +import kwarray class MNISTEmbeddingNet(nh.layers.Module): @@ -107,7 +109,7 @@ class MNIST_MatchingHarness(nh.FitHarn): harn._has_preselected = False harn.POS_LABEL = 1 harn.NEG_LABEL = 0 - harn.confusion_vectors = nh.util.DataFrameLight( + harn.confusion_vectors = kwarray.DataFrameLight( columns=['y_true', 'y_dist'] ) @@ -158,7 +160,6 @@ class MNIST_MatchingHarness(nh.FitHarn): batch['cpu_chips'] = image return batch - @nh.util.profile def run_batch(harn, batch): """ Two - run the batch @@ -241,7 +242,6 @@ class MNIST_MatchingHarness(nh.FitHarn): outputs['distAN'] = neg_dists return outputs, loss - @nh.util.profile def on_batch(harn, batch, outputs, loss): """ custom netharn callback @@ -253,9 +253,10 @@ class MNIST_MatchingHarness(nh.FitHarn): >>> decoded = harn._decode(outputs) >>> stacked = harn._draw_batch(decoded, limit=42) >>> # xdoctest: +REQUIRES(--show) - >>> nh.util.autompl() - >>> nh.util.imshow(stacked) - >>> nh.util.show_if_requested() + >>> import kwplot + >>> kwplot.autompl() + >>> kwplot.imshow(stacked) + >>> kwplot.show_if_requested() """ batch_metrics = ub.odict() for key, value in harn._loss_parts.items(): @@ -270,7 +271,7 @@ class MNIST_MatchingHarness(nh.FitHarn): stacked = harn._draw_batch(decoded) dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag)) fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch)) - nh.util.imwrite(fpath, stacked) + kwimage.imwrite(fpath, stacked) # Record metrics for epoch scores n = len(outputs['distAP']) @@ -282,7 +283,6 @@ class MNIST_MatchingHarness(nh.FitHarn): harn.confusion_vectors._data['y_dist'].extend(outputs['distAN'].data.cpu().numpy().tolist()) return batch_metrics - @nh.util.profile def on_epoch(harn): """ custom netharn callback @@ -345,7 +345,6 @@ class MNIST_MatchingHarness(nh.FitHarn): harn.confusion_vectors.clear() return epoch_metrics - @nh.util.profile def _decode(harn, outputs): """ Convert raw network outputs to something interpretable @@ -366,7 +365,6 @@ class MNIST_MatchingHarness(nh.FitHarn): decoded['distAN'] = outputs['distAN'].data.cpu().numpy() return decoded - @nh.util.profile def _draw_batch(harn, decoded, limit=12): """ Example: @@ -376,10 +374,10 @@ class MNIST_MatchingHarness(nh.FitHarn): >>> decoded = harn._decode(outputs) >>> stacked = harn._draw_batch(decoded) >>> # xdoctest: +REQUIRES(--show) - >>> import netharn as nh - >>> nh.util.autompl() - >>> nh.util.imshow(stacked, colorspace='rgb', doclf=True) - >>> nh.util.show_if_requested() + >>> import kwplot + >>> kwplot.autompl() + >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True) + >>> kwplot.show_if_requested() """ tostack = [] fontkw = { @@ -391,7 +389,7 @@ class MNIST_MatchingHarness(nh.FitHarn): for i in range(n): ims = [g[i].transpose(1, 2, 0) for g in decoded['triple_imgs']] ims = [cv2.resize(g, dsize) for g in ims] - ims = [nh.util.atleast_3channels(g) for g in ims] + ims = [kwimage.atleast_3channels(g) for g in ims] triple_nxs = [n[i] for n in decoded['triple_nxs']] text = 'dAP={:.3g} -- dAN={:.3g} -- {}'.format( @@ -403,16 +401,16 @@ class MNIST_MatchingHarness(nh.FitHarn): 'dodgerblue' if decoded['distAP'][i] < decoded['distAN'][i] else 'orangered') - img = nh.util.stack_images( + img = kwimage.stack_images( ims, overlap=-2, axis=1, bg_value=(10 / 255, 40 / 255, 30 / 255) ) img = (img * 255).astype(np.uint8) - img = nh.util.draw_text_on_image(img, text, + img = kwimage.draw_text_on_image(img, text, org=(2, img.shape[0] - 2), color=color, **fontkw) tostack.append(img) - stacked = nh.util.stack_images_grid(tostack, overlap=-10, + stacked = kwimage.stack_images_grid(tostack, overlap=-10, bg_value=(30, 10, 40), axis=1, chunksize=3) return stacked @@ -464,7 +462,7 @@ def setup_datasets(workdir=None): labels = dset.dataset.train_labels[dset.indices] else: labels = dset.labels - unique_labels, groupxs = nh.util.group_indices(labels.numpy()) + unique_labels, groupxs = kwarray.group_indices(labels.numpy()) dset.pccs = [xs.tolist() for xs in groupxs] # Give the training dataset an input_id @@ -637,7 +635,8 @@ def main(): ns['lr'] = 1e-99 if args.interact: - nh.util.autompl() + import kwplot + kwplot.autompl() import matplotlib.pyplot as plt harn = setup_harn(**ns) diff --git a/netharn/__init__.py b/netharn/__init__.py index dc31d9ca4f5d3f3162a6231b64a5c8f4ca1de4b1..2ebad78468af8c375db78358f6dab99e9f301404 100644 --- a/netharn/__init__.py +++ b/netharn/__init__.py @@ -4,7 +4,7 @@ mkinit netharn --noattrs --dry mkinit netharn --noattrs """ -__version__ = '0.5.6' +__version__ = '0.5.7' try: # PIL 7.0.0 removed PIL_VERSION, which breaks torchvision, monkey patch it diff --git a/netharn/analytic/output_shape_for.py b/netharn/analytic/output_shape_for.py index a8398b07525f4d16ac40681346a162d8c6c7feb4..0f4123f121282aece504fec7c40842a74c0ecbfa 100644 --- a/netharn/analytic/output_shape_for.py +++ b/netharn/analytic/output_shape_for.py @@ -9,10 +9,7 @@ import torchvision from collections import OrderedDict from six.moves import builtins from netharn.analytic import analytic_for -# try: from netharn.device import DataSerial -# except ImportError: -# DataSerial = None REGISTERED_TYPES = [] diff --git a/netharn/analytic/receptive_field_for.py b/netharn/analytic/receptive_field_for.py index 471b95c315811597613c75d816a65752fe108536..0d10905732d49252b0b58cb671e9c8502c423290 100644 --- a/netharn/analytic/receptive_field_for.py +++ b/netharn/analytic/receptive_field_for.py @@ -10,6 +10,7 @@ import numpy as np from collections import OrderedDict from netharn.analytic.output_shape_for import OutputShapeFor from netharn.analytic import analytic_for +from distutils.version import LooseVersion # try: # from netharn.device import MountedModel # except ImportError: @@ -18,6 +19,12 @@ from netharn.analytic import analytic_for REGISTERED_TYPES = [] +if LooseVersion(torch.__version__) >= LooseVersion('1.5.0'): + CONV_TRANSPOSE_TYPES = (nn.modules.conv._ConvTransposeNd,) +else: + CONV_TRANSPOSE_TYPES = (nn.modules.conv._ConvTransposeMixin,) + + def ensure_array_nd(data, n): if ub.iterable(data): return np.array(data) @@ -494,7 +501,7 @@ class _TorchMixin(object): return field # raise NotImplementedError('todo') - @compute_type(nn.modules.conv._ConvTransposeMixin) + @compute_type(*CONV_TRANSPOSE_TYPES) def convT(module, input_field=None): return ReceptiveFieldFor._kernelized_tranpose(module, input_field) diff --git a/netharn/data/__init__.py b/netharn/data/__init__.py index 7ed7517a713c650d15dfc18c200a11989f648917..ee84408674dc52dc1b515991be34cc536e06bdb6 100644 --- a/netharn/data/__init__.py +++ b/netharn/data/__init__.py @@ -3,33 +3,28 @@ mkinit netharn.data """ # flake8: noqa -__DYNAMIC__ = False -if __DYNAMIC__: - from mkinit import dynamic_init - exec(dynamic_init(__name__)) -else: - # - from netharn.data import base - from netharn.data import batch_samplers - from netharn.data import coco_api - from netharn.data import collate - from netharn.data import mnist - from netharn.data import toydata - from netharn.data import transforms - from netharn.data import voc +# +from netharn.data import base +from netharn.data import batch_samplers +from netharn.data import coco_api +from netharn.data import collate +from netharn.data import mnist +from netharn.data import toydata +from netharn.data import transforms +from netharn.data import voc - from netharn.data.base import (DataMixin,) - from netharn.data.batch_samplers import (MatchingSamplerPK,) - from netharn.data.coco_api import (CocoDataset,) - from netharn.data.collate import (CollateException, default_collate, - list_collate, numpy_type_map, - padded_collate,) - from netharn.data.mnist import (MNIST,) - from netharn.data.toydata import (ToyData1d, ToyData2d,) - from netharn.data.voc import (VOCDataset,) +from netharn.data.base import (DataMixin,) +from netharn.data.batch_samplers import (MatchingSamplerPK,) +from netharn.data.coco_api import (CocoDataset,) +from netharn.data.collate import (CollateException, default_collate, + list_collate, numpy_type_map, + padded_collate,) +from netharn.data.mnist import (MNIST,) +from netharn.data.toydata import (ToyData1d, ToyData2d,) +from netharn.data.voc import (VOCDataset,) - __all__ = ['CocoDataset', 'CollateException', 'DataMixin', 'MNIST', - 'MatchingSamplerPK', 'ToyData1d', 'ToyData2d', 'VOCDataset', 'base', - 'batch_samplers', 'coco_api', 'collate', 'default_collate', - 'list_collate', 'mnist', 'numpy_type_map', 'padded_collate', - 'toydata', 'transforms', 'voc'] +__all__ = ['CocoDataset', 'CollateException', 'DataMixin', 'MNIST', + 'MatchingSamplerPK', 'ToyData1d', 'ToyData2d', 'VOCDataset', 'base', + 'batch_samplers', 'coco_api', 'collate', 'default_collate', + 'list_collate', 'mnist', 'numpy_type_map', 'padded_collate', + 'toydata', 'transforms', 'voc'] diff --git a/netharn/data/base.py b/netharn/data/base.py index 57a88ead88b23118d7695040562129d2d6d94ad2..7f4e341fb4b88b7b8fc194d5b48cbd21d356c51d 100644 --- a/netharn/data/base.py +++ b/netharn/data/base.py @@ -1,3 +1,6 @@ +""" +DEPRECATE +""" from torch.utils import data as torch_data diff --git a/netharn/data/batch_samplers.py b/netharn/data/batch_samplers.py index 88758ad974019bfea7f26cd1adbf61e47f572db2..5dc3300386622a3f2d7a25204f232b032d41e0d9 100644 --- a/netharn/data/batch_samplers.py +++ b/netharn/data/batch_samplers.py @@ -273,6 +273,8 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam num_batches (int | str, default='auto'): number of batches to generate shuffle (bool, default=False): if True randomize batch ordering drop_last (bool): unused, exists for compatibility + label_to_weight (dict, default=None): + mapping from labels to user-specified weights rng (RandomState, default=None): random seed References: @@ -289,17 +291,17 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam >>> # Create a rare class >>> index_to_labels[0][0] = 42 >>> self = GroupedBalancedBatchSampler(index_to_labels, batch_size=4) - >>> print('self.label_to_freq = {!r}'.format(self.label_to_freq)) + >>> print('self.label_to_freq = {}'.format(ub.repr2(self.label_to_freq, nl=1))) >>> indices = list(self) >>> print('indices = {!r}'.format(indices)) >>> # Print the epoch / item label frequency per epoch >>> label_sequence = [] >>> index_sequence = [] - >>> for item_indices in self: + >>> for item_indices, _ in zip(self, range(1000)): >>> item_indices = np.array(item_indices) >>> item_labels = list(ub.flatten(ub.take(index_to_labels, item_indices))) >>> index_sequence.extend(item_indices) - >>> label_sequence.extend(item_labels) + >>> label_sequence.extend(ub.unique(item_labels)) >>> label_hist = ub.dict_hist(label_sequence) >>> index_hist = ub.dict_hist(index_sequence) >>> label_hist = ub.sorted_vals(label_hist, reverse=True) @@ -310,7 +312,7 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam """ def __init__(self, index_to_labels, batch_size=1, num_batches='auto', - shuffle=False, rng=None): + label_to_weight=None, shuffle=False, rng=None): import kwarray rng = kwarray.ensure_rng(rng, api='python') @@ -322,20 +324,49 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam for label in item_labels: label_to_indices[label].add(index) flat_labels = np.hstack(index_to_labels) - self.label_to_freq = ub.dict_hist(flat_labels) + label_to_freq = ub.dict_hist(flat_labels) # Use tf-idf based scheme to compute sample probabilities + label_to_idf = {} label_to_tfidf = {} labels = sorted(set(flat_labels)) for label in labels: + # tf for each img, is the number of times the label appears index_to_tf = np.zeros(len(index_to_labels)) for index, item_labels in enumerate(index_to_labels): index_to_tf[index] = (label == item_labels).sum() + # idf is the #imgs / #imgs-with-label idf = len(index_to_tf) / (index_to_tf > 0).sum() + if label_to_weight: + idf = idf * label_to_weight[label] + label_to_idf[label] = idf label_to_tfidf[label] = np.maximum(index_to_tf * idf, 1) index_to_weight = sum(label_to_tfidf.values()) index_to_prob = index_to_weight / index_to_weight.sum() + if 0: + index_to_unique_labels = list(map(set, index_to_labels)) + unique_freq = ub.dict_hist(ub.flatten(index_to_unique_labels)) + tot = sum(unique_freq.values()) + unweighted_odds = ub.map_vals(lambda x: x / tot, unique_freq) + + label_to_indices = ub.ddict(set) + for index, item_labels in enumerate(index_to_labels): + for label in item_labels: + label_to_indices[label].add(index) + ub.map_vals(len, label_to_indices) + + label_to_odds = ub.ddict(lambda: 0) + for label, indices in label_to_indices.items(): + for idx in indices: + label_to_odds[label] += index_to_prob[idx] + + coi = {x for x, w in label_to_weight.items() if w > 0} + coi_weighted = ub.dict_subset(label_to_odds, coi) + coi_unweighted = ub.dict_subset(unweighted_odds, coi) + print('coi_weighted = {}'.format(ub.repr2(coi_weighted, nl=1))) + print('coi_unweighted = {}'.format(ub.repr2(coi_unweighted, nl=1))) + self.index_to_prob = index_to_prob self.indices = np.arange(len(index_to_prob)) @@ -344,6 +375,7 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam else: self.num_batches = num_batches + self.label_to_freq = label_to_freq self.index_to_labels = index_to_labels self.batch_size = batch_size self.shuffle = shuffle @@ -356,6 +388,25 @@ class GroupedBalancedBatchSampler(ub.NiceRepr, torch.utils.data.sampler.BatchSam 'label_to_freq': self.label_to_freq, }, nl=0) + def _balance_report(self, limit=None): + # Print the epoch / item label frequency per epoch + label_sequence = [] + index_sequence = [] + if limit is None: + limit = self.num_batches + for item_indices, _ in zip(self, range(limit)): + item_indices = np.array(item_indices) + item_labels = list(ub.flatten(ub.take(self.index_to_labels, item_indices))) + index_sequence.extend(item_indices) + label_sequence.extend(ub.unique(item_labels)) + label_hist = ub.dict_hist(label_sequence) + index_hist = ub.dict_hist(index_sequence) + label_hist = ub.sorted_vals(label_hist, reverse=True) + index_hist = ub.sorted_vals(index_hist, reverse=True) + index_hist = ub.dict_subset(index_hist, list(index_hist.keys())[0:5]) + print('label_hist = {}'.format(ub.repr2(label_hist, nl=1))) + print('index_hist = {}'.format(ub.repr2(index_hist, nl=1))) + def _auto_num_batches(self): # The right way to calculate num samples would be using a generalized # solutions to the coupon collector problem, but in practice that diff --git a/netharn/data/channel_spec.py b/netharn/data/channel_spec.py new file mode 100644 index 0000000000000000000000000000000000000000..08e2f21c57731a3d1b47b929e83839049e3c2bea --- /dev/null +++ b/netharn/data/channel_spec.py @@ -0,0 +1,323 @@ +import ubelt as ub +import six + + +class ChannelSpec(ub.NiceRepr): + """ + Parse and extract information about network input channel specs for + early or late fusion networks. + + Notes: + The pipe ('|') character represents an early-fused input stream, and + order matters (it is non-communative). + + The comma (',') character separates different inputs streams/branches + for a multi-stream/branch network which will be lated fused. Order does + not matter + + TODO: + - [ ] : normalize representations? e.g: rgb = r|g|b? + - [ ] : rename to BandsSpec or SensorSpec? + + Example: + >>> # Integer spec + >>> ChannelSpec.coerce(3) + + + >>> # single mode spec + >>> ChannelSpec.coerce('rgb') + + + >>> # early fused input spec + >>> ChannelSpec.coerce('rgb|disprity') + + + >>> # late fused input spec + >>> ChannelSpec.coerce('rgb,disprity') + + + >>> # early and late fused input spec + >>> ChannelSpec.coerce('rgb|ir,disprity') + + + Example: + >>> from netharn.data.channel_spec import * # NOQA + >>> self = ChannelSpec('gray') + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + >>> self = ChannelSpec('rgb') + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + >>> self = ChannelSpec('rgb|disparity') + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + >>> self = ChannelSpec('rgb|disparity,disparity') + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + >>> self = ChannelSpec('rgb,disparity,flowx|flowy') + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + + Example: + >>> from netharn.data.channel_spec import * # NOQA + >>> specs = [ + >>> 'rgb', # and rgb input + >>> 'rgb|disprity', # rgb early fused with disparity + >>> 'rgb,disprity', # rgb early late with disparity + >>> 'rgb|ir,disprity', # rgb early fused with ir and late fused with disparity + >>> 3, # 3 unknown channels + >>> ] + >>> for spec in specs: + >>> print('=======================') + >>> print('spec = {!r}'.format(spec)) + >>> # + >>> self = ChannelSpec.coerce(spec) + >>> print('self = {!r}'.format(self)) + >>> sizes = self.sizes() + >>> print('sizes = {!r}'.format(sizes)) + >>> print('self.info = {}'.format(ub.repr2(self.info, nl=1))) + >>> # + >>> item = self._demo_item((1, 1), rng=0) + >>> inputs = self.encode(item) + >>> components = self.decode(inputs) + >>> input_shapes = ub.map_vals(lambda x: x.shape, inputs) + >>> component_shapes = ub.map_vals(lambda x: x.shape, components) + >>> print('item = {}'.format(ub.repr2(item, precision=1))) + >>> print('inputs = {}'.format(ub.repr2(inputs, precision=1))) + >>> print('input_shapes = {}'.format(ub.repr2(input_shapes))) + >>> print('components = {}'.format(ub.repr2(components, precision=1))) + >>> print('component_shapes = {}'.format(ub.repr2(component_shapes, nl=1))) + + """ + + _known = { + 'rgb': 'r|g|b' + } + + _size_lut = { + 'rgb': 3, + } + + def __init__(self, spec): + # TODO: allow integer specs + self.spec = spec + self._info = {} + + def __nice__(self): + return self.spec + + def __json__(self): + return self.spec + + def __contains__(self, key): + """ + Example: + >>> 'disparity' in ChannelSpec('rgb,disparity,flowx|flowy') + True + >>> 'gray' in ChannelSpec('rgb,disparity,flowx|flowy') + False + """ + return key in self.unique() + + @property + def info(self): + self._info = { + 'spec': self.spec, + 'parsed': self.parse(), + 'unique': self.unique(), + 'normed': self.normalize(), + } + return self._info + + @classmethod + def coerce(cls, data): + if isinstance(data, cls): + self = data + return self + else: + if isinstance(data, int): + # we know the number of channels, but not their names + spec = '|'.join(['u{}'.format(i) for i in range(data)]) + elif isinstance(data, six.string_types): + spec = data + else: + raise TypeError(type(data)) + + self = cls(spec) + return self + + def parse(self): + """ + Build internal representation + """ + # commas break inputs into multiple streams + stream_specs = self.spec.split(',') + parsed = {ss: ss.split('|') for ss in stream_specs} + return parsed + + def normalize(self): + spec = self.spec + stream_specs = spec.split(',') + parsed = {ss: ss for ss in stream_specs} + for k1 in parsed.keys(): + for k, v in self._known.items(): + parsed[k1] = parsed[k1].replace(k, v) + parsed = {k: v.split('|') for k, v in parsed.items()} + return parsed + + def keys(self): + spec = self.spec + stream_specs = spec.split(',') + for spec in stream_specs: + yield spec + + def sizes(self): + """ + Number of dimensions for each fused stream channel + + IE: The EARLY-FUSED channel sizes + + Example: + >>> self = ChannelSpec('rgb|disparity,flowx|flowy') + >>> self.sizes() + """ + sizes = { + key: sum(self._size_lut.get(part, 1) for part in vals) + for key, vals in self.parse().items() + } + return sizes + + def unique(self): + """ + Returns the unique channels that will need to be given or loaded + """ + return set(ub.flatten(self.parse().values())) + + def _item_shapes(self, dims): + """ + Expected shape for an input item + + Args: + dims (Tuple[int, int]): the spatial dimension + + Returns: + Dict[int, tuple] + """ + item_shapes = {} + parsed = self.parse() + # normed = self.normalize() + fused_keys = list(self.keys()) + for fused_key in fused_keys: + components = parsed[fused_key] + for mode_key in components: + c = self._size_lut.get(mode_key, 1) + shape = (c,) + tuple(dims) + item_shapes[mode_key] = shape + return item_shapes + + def _demo_item(self, dims=(4, 4), rng=None): + """ + Create an input that satisfies this spec + + Returns: + dict: an item like it might appear when its returned from the + `__getitem__` method of a :class:`torch...Dataset`. + + Example: + >>> dims = (1, 1) + >>> ChannelSpec.coerce(3)._demo_item(dims, rng=0) + >>> ChannelSpec.coerce('r|g|b|disaprity')._demo_item(dims, rng=0) + >>> ChannelSpec.coerce('rgb|disaprity')._demo_item(dims, rng=0) + >>> ChannelSpec.coerce('rgb,disaprity')._demo_item(dims, rng=0) + >>> ChannelSpec.coerce('rgb')._demo_item(dims, rng=0) + >>> ChannelSpec.coerce('gray')._demo_item(dims, rng=0) + """ + import torch + import kwarray + rng = kwarray.ensure_rng(rng) + item_shapes = self._item_shapes(dims) + item = { + key: torch.from_numpy(rng.rand(*shape)) + for key, shape in item_shapes.items() + } + return item + + def encode(self, item, axis=0): + """ + Given a dictionary containing preloaded components of the network + inputs, build a concatenated network representations of each input + stream. + + Args: + item (dict): a batch item + axis (int, default=0): concatenation dimension + + Returns: + Dict[str, Tensor]: mapping between input stream and its early fused + tensor input. + + Example: + >>> import torch + >>> dims = (4, 4) + >>> item = { + >>> 'rgb': torch.rand(3, *dims), + >>> 'disparity': torch.rand(1, *dims), + >>> 'flowx': torch.rand(1, *dims), + >>> 'flowy': torch.rand(1, *dims), + >>> } + >>> # Complex Case + >>> self = ChannelSpec('rgb,disparity,rgb|disparity|flowx|flowy,flowx|flowy') + >>> inputs = self.encode(item) + >>> input_shapes = ub.map_vals(lambda x: x.shape, inputs) + >>> print('input_shapes = {}'.format(ub.repr2(input_shapes, nl=1))) + >>> # Simpler case + >>> self = ChannelSpec('rgb|disparity') + >>> inputs = self.encode(item) + >>> input_shapes = ub.map_vals(lambda x: x.shape, inputs) + >>> print('input_shapes = {}'.format(ub.repr2(input_shapes, nl=1))) + """ + import torch + inputs = dict() + parsed = self.parse() + unique = self.unique() + components = {k: item[k] for k in unique} + for key, parts in parsed.items(): + inputs[key] = torch.cat([components[k] for k in parts], dim=axis) + return inputs + + def decode(self, inputs, axis=1): + """ + break an early fused item into its components + + Example: + >>> import torch + >>> dims = (4, 4) + >>> components = { + >>> 'rgb': torch.rand(3, *dims), + >>> 'ir': torch.rand(1, *dims), + >>> } + >>> self = ChannelSpec('rgb|ir') + >>> inputs = self.encode(components) + >>> from netharn.data import data_containers + >>> item = {k: data_containers.ItemContainer(v, stack=True) + >>> for k, v in inputs.items()} + >>> batch = data_containers.container_collate([item, item]) + >>> components = self.decode(batch) + """ + parsed = self.parse() + components = dict() + for key, parts in parsed.items(): + idx1 = 0 + for part in parts: + size = self._size_lut.get(part, 1) + idx2 = idx1 + size + fused = inputs[key] + index = ([slice(None)] * axis + [slice(idx1, idx2)]) + component = fused[index] + components[part] = component + idx1 = idx2 + return components + + +if __name__ == '__main__': + """ + CommandLine: + python ~/code/netharn/netharn/data/channel_spec.py all + """ + import xdoctest + xdoctest.doctest_module(__file__) diff --git a/netharn/data/coco_api.py b/netharn/data/coco_api.py index 5024973fd6b79cfa95e88f58332df8fdd570b94a..207750b8230ef8925b78a1d87013e76eab687c81 100644 --- a/netharn/data/coco_api.py +++ b/netharn/data/coco_api.py @@ -3,7 +3,7 @@ DEPRECATED NOTE: - THIS IS DEPRECATED IN FAVOR OF COCO_DATASET IN NDSAMPLER + THIS IS DEPRECATED IN FAVOR OF COCO_DATASET IN KWCOCO Extended MS-COCO API. Currently only supports keypoints and bounding boxes. diff --git a/netharn/data/data_containers.py b/netharn/data/data_containers.py new file mode 100644 index 0000000000000000000000000000000000000000..c8abfdc3fc273e00b7db7342f07dc55cb28f483e --- /dev/null +++ b/netharn/data/data_containers.py @@ -0,0 +1,846 @@ +""" +Proof-of-concept for porting mmcv DataContainer concept to netharn. Depending +on how well this works these features might be useful as a standalone module or +to contribute to torch proper. + +References: + https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py + https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/collate.py + https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/scatter_gather.py + +FIXME 0 dimension tensors +""" +import torch.utils.data as torch_data +import torch +import ubelt as ub +import numpy as np # NOQA +import re +import collections +import torch.nn.functional as F +# from torch.nn.parallel import DataParallel +from itertools import chain +from netharn.device import DataParallel, DataSerial, XPU +from torch.nn.parallel._functions import _get_stream +from torch.nn.parallel._functions import Scatter as OrigScatter +from torch.nn.parallel._functions import Gather as OrigGather +from torch._six import container_abcs +from torch._six import int_classes, string_classes +default_collate = torch_data.dataloader.default_collate + + +# numpy_type_map = torch_data.dataloader.numpy_type_map # moved in torch 1.1.0 +numpy_type_map = { + 'float64': torch.DoubleTensor, + 'float32': torch.FloatTensor, + 'float16': torch.HalfTensor, + 'int64': torch.LongTensor, + 'int32': torch.IntTensor, + 'int16': torch.ShortTensor, + 'int8': torch.CharTensor, + 'uint8': torch.ByteTensor, +} + + +class CollateException(Exception): + pass + + +_DEBUG = False + + +class BatchContainer(ub.NiceRepr): + """ + A container for a set of items in a batch. Usually this is for network + outputs or a set of items that have already been collated. + + Attributes: + data (List): Unlike ItemContainer, data is always a list where + len(data) is the number of devices this batch will run on. + """ + def __init__(self, data, stack=False, padding_value=-1, cpu_only=False, + pad_dims=2): + self.data = data + self.meta = { + 'stack': stack, + 'padding_value': padding_value, + 'cpu_only': cpu_only, + 'pad_dims': pad_dims, + } + + def __nice__(self): + shape_repr = ub.repr2(nestshape(self.data), nl=-2) + # return 'nestshape(data)={}, **{}'.format(shape_repr, ub.repr2(self.meta, nl=0)) + return 'nestshape(data)={}'.format(shape_repr) + + def __getitem__(self, index): + cls = self.__class__ + return cls([d[index] for d in self.data], **self.meta) + + @property + def cpu_only(self): + return self.meta['cpu_only'] + + @property + def stack(self): + return self.meta['stack'] + + @property + def padding_value(self): + return self.meta['padding_value'] + + @property + def pad_dims(self): + return self.meta['pad_dims'] + + @classmethod + def cat(cls, items, dim=0): + """ + Concatenate data in multiple BatchContainers + + Example: + d1 = BatchContainer([torch.rand(3, 3, 1, 1), torch.rand(2, 3, 1, 1)]) + d2 = BatchContainer([torch.rand(3, 1, 1, 1), torch.rand(2, 1, 1, 1)]) + items = [d1, d2] + self = BatchContainer.cat(items, dim=1) + """ + newdata = [] + num_devices = len(items[0].data) + for device_idx in range(num_devices): + parts = [item.data[device_idx] for item in items] + newpart = torch.cat(parts, dim=dim) + newdata.append(newpart) + self = cls(newdata, **items[0].meta) + return self + + +class ItemContainer(ub.NiceRepr): + """ + A container for uncollated items that defines a specific collation + strategy. Based on mmdetections ItemContainer. + """ + + def __init__( + self, + data, + stack=False, + padding_value=-1, + cpu_only=False, + pad_dims=2 + ): + self._data = data + assert pad_dims in [None, 1, 2, 3] + self.meta = { + 'stack': stack, + 'padding_value': padding_value, + 'cpu_only': cpu_only, + 'pad_dims': pad_dims, + } + + def __nice__(self): + shape_repr = ub.repr2(nestshape(self.data), nl=-2) + return 'nestshape(data)={}'.format(shape_repr) + # return 'nestshape(data)={}, **{}'.format(shape_repr, ub.repr2(self.meta, nl=0)) + + @classmethod + def demo(cls, key='img', rng=None, **kwargs): + """ + Create data for tests + """ + import kwarray + rng = kwarray.ensure_rng(rng) + if key == 'img': + shape = kwargs.get('shape', (3, 512, 512)) + data = rng.rand(*shape).astype(np.float32) + data = torch.from_numpy(data) + self = cls(data, stack=True) + elif key == 'labels': + n = rng.randint(0, 10) + data = rng.randint(0, 10, n) + data = torch.from_numpy(data) + self = cls(data, stack=False) + else: + raise KeyError(key) + return self + + def __getitem__(self, index): + assert self.stack, 'can only index into stackable items' + cls = self.__class__ + return cls(self.data[index], **self.meta) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self.meta['cpu_only'] + + @property + def stack(self): + return self.meta['stack'] + + @property + def padding_value(self): + return self.meta['padding_value'] + + @property + def pad_dims(self): + return self.meta['pad_dims'] + + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @property + def shape(self): + return self.data.shape + + def dim(self): + return self.data.dim() + + @classmethod + def _collate(cls, inbatch, num_devices=None): + """ + Collates a sequence of DataContainers + + Args: + inbatch (Sequence[ItemContainer]): datacontainers with the same + parameters. + + num_devices (int): number of groups, if None, then uses one group. + + Example: + >>> print('Collate Image ItemContainer') + >>> inbatch = [ItemContainer.demo('img') for _ in range(5)] + >>> print('inbatch = {}'.format(ub.repr2(inbatch))) + >>> result = ItemContainer._collate(inbatch, 2) + >>> print('result1 = {}'.format(ub.repr2(result, nl=1))) + >>> result = ItemContainer._collate(inbatch, 1) + >>> print('result2 = {}'.format(ub.repr2(result, nl=1))) + >>> result = ItemContainer._collate(inbatch, None) + >>> print('resultN = {}'.format(ub.repr2(result, nl=1))) + + >>> print('Collate Label ItemContainer') + >>> inbatch = [ItemContainer.demo('labels') for _ in range(5)] + >>> print('inbatch = {}'.format(ub.repr2(inbatch, nl=1))) + >>> result = ItemContainer._collate(inbatch, 1) + >>> print('result1 = {}'.format(ub.repr2(result, nl=1))) + >>> result = ItemContainer._collate(inbatch, 2) + >>> print('result2 = {}'.format(ub.repr2(result, nl=1))) + >>> result = ItemContainer._collate(inbatch, None) + >>> print('resultN = {}'.format(ub.repr2(result, nl=1))) + """ + item0 = inbatch[0] + bsize = len(inbatch) + if num_devices is None: + num_devices = 1 + + samples_per_device = int(np.ceil(bsize / num_devices)) + + # assert bsize % samples_per_device == 0 + stacked = [] + if item0.cpu_only: + # chunking logic + stacked = [] + for i in range(0, bsize, samples_per_device): + stacked.append( + [sample.data for sample in inbatch[i:i + samples_per_device]]) + + elif item0.stack: + for i in range(0, bsize, samples_per_device): + item = inbatch[i] + pad_dims_ = item.pad_dims + assert isinstance(item.data, torch.Tensor) + + if pad_dims_ is not None: + # Note: can probably reimplement this using padded collate + # logic + ndim = item.dim() + assert ndim > pad_dims_ + max_shape = [0 for _ in range(pad_dims_)] + for dim in range(1, pad_dims_ + 1): + max_shape[dim - 1] = item.shape[-dim] + for sample in inbatch[i:i + samples_per_device]: + for dim in range(0, ndim - pad_dims_): + assert item.shape[dim] == sample.shape[dim] + for dim in range(1, pad_dims_ + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], sample.shape[-dim]) + padded_samples = [] + for sample in inbatch[i:i + samples_per_device]: + pad = [0 for _ in range(pad_dims_ * 2)] + for dim in range(1, pad_dims_ + 1): + pad[2 * dim - 1] = max_shape[dim - 1] - sample.shape[-dim] + padded_samples.append( + F.pad(sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + + elif pad_dims_ is None: + stacked.append( + default_collate([ + sample.data + for sample in inbatch[i:i + samples_per_device] + ])) + else: + raise ValueError( + 'pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, bsize, samples_per_device): + stacked.append( + [sample.data for sample in inbatch[i:i + samples_per_device]]) + result = BatchContainer(stacked, **item0.meta) + return result + + +def container_collate(inbatch, num_devices=None): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + + Ignore: + >>> # DISABLE_DOCTSET + >>> dataset = DetectFitDataset.demo(key='shapes8', augment='complex', window_dims=(512, 512), gsize=(1920, 1080)) + + >>> inbatch = [dataset[0], dataset[1], dataset[2]] + >>> raw_batch = container_collate(inbatch) + + >>> target_gpus = [0] + >>> inputs, kwargs = container_scatter_kwargs(raw_batch, {}, target_gpus) + + >>> loader = torch.utils.data.DataLoader(dataset, collate_fn=container_collate, num_workers=0) + + + Example: + >>> item1 = { + >>> 'im': torch.rand(3, 512, 512), + >>> 'label': torch.rand(3), + >>> } + >>> item2 = { + >>> 'im': torch.rand(3, 512, 512), + >>> 'label': torch.rand(3), + >>> } + >>> item3 = { + >>> 'im': torch.rand(3, 512, 512), + >>> 'label': torch.rand(3), + >>> } + >>> batch = batch_items = [item1, item2, item3] + >>> raw_batch = container_collate(batch_items) + >>> print('batch_items = {}'.format(ub.repr2(batch_items, nl=2))) + >>> print('raw_batch = {}'.format(ub.repr2(raw_batch, nl=2))) + + >>> batch = batch_items = [ + >>> {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')}, + >>> {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')}, + >>> {'im': ItemContainer.demo('img'), 'label': ItemContainer.demo('labels')}, + >>> ] + >>> raw_batch = container_collate(batch, num_devices=2) + >>> print('batch_items = {}'.format(ub.repr2(batch_items, nl=2))) + >>> print('raw_batch = {}'.format(ub.repr2(raw_batch, nl=2))) + + >>> raw_batch = container_collate(batch, num_devices=6) + >>> raw_batch = container_collate(batch, num_devices=3) + >>> raw_batch = container_collate(batch, num_devices=4) + >>> raw_batch = container_collate(batch, num_devices=1) + >>> print('batch = {}'.format(ub.repr2(batch, nl=1))) + """ + + if not isinstance(inbatch, collections.Sequence): + raise TypeError("{} is not supported.".format(inbatch.dtype)) + item0 = inbatch[0] + if isinstance(item0, ItemContainer): + return item0.__class__._collate(inbatch, num_devices=num_devices) + elif isinstance(item0, collections.Sequence): + transposed = zip(*inbatch) + return [container_collate(samples, + num_devices=num_devices) + for samples in transposed] + elif isinstance(item0, collections.Mapping): + return { + key: container_collate([d[key] for d in inbatch], + num_devices=num_devices) + for key in item0 + } + else: + return default_collate(inbatch) + # return _collate_else(inbatch, container_collate) + + +def _collate_else(batch, collate_func): + """ + Handles recursion in the else case for these special collate functions + + This is duplicates all non-tensor cases from `torch_data.dataloader.default_collate` + This also contains support for collating slices. + """ + error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" + elem_type = type(batch[0]) + if elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ + and elem_type.__name__ != 'string_': + elem = batch[0] + if elem_type.__name__ == 'ndarray': + # array of string classes and object + if re.search('[SaUO]', elem.dtype.str) is not None: + raise TypeError(error_msg.format(elem.dtype)) + + return torch.stack([torch.from_numpy(b) for b in batch], 0) + if elem.shape == (): # scalars + py_type = float if elem.dtype.name.startswith('float') else int + return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) + elif isinstance(batch[0], slice): + batch = default_collate([{ + 'start': sl.start, + 'stop': sl.stop, + 'step': 1 if sl.step is None else sl.step + } for sl in batch]) + return batch + elif isinstance(batch[0], int_classes): + return torch.LongTensor(batch) + elif isinstance(batch[0], float): + return torch.DoubleTensor(batch) + elif isinstance(batch[0], string_classes): + return batch + elif isinstance(batch[0], container_abcs.Mapping): + # Hack the mapping collation implementation to print error info + if _DEBUG: + collated = {} + try: + for key in batch[0]: + collated[key] = collate_func([d[key] for d in batch]) + except Exception: + print('\n!!Error collating key = {!r}\n'.format(key)) + raise + return collated + else: + return {key: collate_func([d[key] for d in batch]) for key in batch[0]} + elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'): # namedtuple + return type(batch[0])(*(default_collate(samples) for samples in zip(*batch))) + elif isinstance(batch[0], container_abcs.Sequence): + transposed = zip(*batch) + return [collate_func(samples) for samples in transposed] + else: + raise TypeError((error_msg.format(type(batch[0])))) + + +# ---- + + +def _fn_scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs. + + from mmcv.parallel._functions + """ + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + _fn_scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + return output + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception('Unknown type {}.'.format(type(output))) + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +class ContainerScatter(object): + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = _fn_scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) + +# ---- + + +class ContainerDataParallel(DataParallel): + """ + + Ignore: + import torch + from torch.nn.parallel import DataParallel + + # First lets create a simple model where the forward function accepts + # kwargs. I don't really care what they do for this example, but imaging + # they are flags that change the behavior of forward. + + class MyModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(1, 1, 1) + + def forward(self, im, **kwargs): + return self.conv(im) + + raw_model = MyModel() + raw_model = raw_model.to(0) + + # Next create some dummy input and verify the model works by itself + im = torch.zeros(1, 1, 1, 1).to(0) + raw_model.forward(im) + + # Now create a DataParallel object to map the input across two devices + par_model = DataParallel(raw_model, device_ids=[0, 1], output_device=0) + + # In the case where kwargs are not specified DataParallel correctly + # understands that there is only one item in the batch and applies the + # operation on only one GPU. + par_model.forward(im) + + # Howver, if you pass kwargs, then data parallel breaks + par_model.forward(im, flag1=True) + + inputs = (im,) + kwargs = dict(flag1=True, flag2=False) + s1, k1 = par_model.scatter(inputs, kwargs, [0, 1]) + replicas = par_model.replicate(par_model.module, par_model.device_ids[:len(s1)]) + outputs = par_model.parallel_apply(replicas, s1, k1) + + container_scatter(inputs, [0, 1])[0] + + inbatch = [ItemContainer.demo('img', shape=(1, 1, 1)) for _ in range(5)] + im = ItemContainer._collate(inbatch, 5) + + im = torch.zeros(1, 1, 1, 1).to(0) + inputs = (im,) + self = ContainerDataParallel(raw_model, device_ids=[0, 1], output_device=0) + self.forward(*inputs, **kwargs) + """ + + def forward(self, *inputs, **kwargs): + """ + Unchanged version for torch.nn.DataParallel + """ + if not self.device_ids: + return self.module(*inputs, **kwargs) + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError("module must have its parameters and buffers " + "on device {} (device_ids[0]) but found one of " + "them on device: {}".format(self.src_device_obj, t.device)) + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + return self.module(*inputs[0], **kwargs[0]) + replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) + outputs = self.parallel_apply(replicas, inputs, kwargs) + return self.gather(outputs, self.output_device) + + def scatter(self, inputs, kwargs, device_ids): + return container_scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def gather(self, outputs, output_device): + # not part of mmcv's original impl + return container_gather(outputs, output_device, dim=self.dim) + +# ---- + + +def container_scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + from mmcv.parallel.scatter_gather + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + return OrigScatter.apply(target_gpus, None, dim, obj) + if isinstance(obj, BatchContainer): + if obj.cpu_only: + return obj.data + else: + return ContainerScatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def container_scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """ + Scatter with support for kwargs dictionary + + Example: + >>> # xdoctest: +REQUIRES(--multi-gpu) + >>> inputs = [torch.rand(1, 1, 1, 1)] + >>> kwargs = dict(a=1, b=2) + >>> target_gpus = [0, 1] + >>> a1, k1 = container_scatter_kwargs(inputs, kwargs, target_gpus) + + >>> # xdoctest: +REQUIRES(--multi-gpu) + >>> inputs = [torch.rand(1, 1, 1, 1)] + >>> kwargs = dict(a=torch.rand(1, 1, 1, 1), b=2) + >>> target_gpus = [0, 1] + >>> a1, k1 = container_scatter_kwargs(inputs, kwargs, target_gpus) + """ + inputs = container_scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = container_scatter(kwargs, target_gpus, dim) if kwargs else [] + + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + + # patch for cases where #inputs < len(target_gpus) and len(kwargs) > 0 + PATCH = 1 + if PATCH: + is_empty = [len(p) == 0 for p in inputs] + num_empty = sum(is_empty) + num_full = len(inputs) - num_empty + if num_full > 0 and num_empty > 0: + kwargs = kwargs[0:num_full] + inputs = inputs[0:num_full] + + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs + + +def container_gather(outputs, target_device, dim=0): + r""" + Gathers tensors from different GPUs on a specified device + (-1 means the CPU). + + The only difference from original :func:`gather` is to add support for + :type:`BatchContainer`. + + Ignore: + >>> import kwarray + >>> rng = kwarray.ensure_rng(0) + >>> outputs = [ + >>> { + >>> 'batch_results': BatchContainer([ + >>> torch.rand(rng.randint(0, 10), 5).to(0) + >>> for _ in range(4) + >>> ], stack=False), + >>> 'loss_parts': { + >>> 'part1': torch.rand(2).sum().to(0), + >>> 'part2': torch.rand(3).sum().to(0), + >>> }, + >>> }, + >>> { + >>> 'batch_results': BatchContainer([ + >>> torch.rand(rng.randint(0, 10), 5).to(1) + >>> for _ in range(4) + >>> ], stack=False), + >>> 'loss_parts': { + >>> 'part1': torch.rand(2).sum().to(1), + >>> 'part2': torch.rand(3).sum().to(1), + >>> } + >>> } + >>> ] + >>> _report_data_shape(outputs) + >>> target_device = 0 + >>> dim = 0 + >>> gathered = container_gather(outputs, target_device, dim) + >>> _report_data_shape(gathered) + """ + def gather_map(outputs_): + out = outputs_[0] + if isinstance(out, torch.Tensor): + # if all(t.dim() == 0 for t in outputs_) and dim == 0: + # # unsqueeze warnings will trigger + # import xdev + # xdev.embed() + return OrigGather.apply(target_device, dim, *outputs_) + if isinstance(out, BatchContainer): + # if out.datatype is list: + newdata = [d for dc in outputs_ for d in dc.data] + if not out.cpu_only: + import netharn as nh + target_xpu = nh.XPU(target_device) + newdata = target_xpu.move(newdata) + return newdata + # else: + # raise NotImplementedError(repr(out.datatype)) + if out is None: + return None + if isinstance(out, dict): + out0_keys = set(out.keys()) + output_keys = [set(d.keys()) for d in outputs_] + if not all(out0_keys == k for k in output_keys): + problem_keys = ( + set.union(*output_keys) - set.intersection(*output_keys) + ) + raise ValueError( + 'All dicts must have the same keys. ' + 'problem_keys={}'.format(problem_keys)) + return type(out)(((k, gather_map([d[k] for d in outputs_])) + for k in out)) + return type(out)(map(gather_map, zip(*outputs_))) + + # Recursive function calls like this create reference cycles. + # Setting the function to None clears the refcycle. + try: + res = gather_map(outputs) + finally: + gather_map = None + return res + + +# --- + + +class ContainerXPU(XPU): + + def mount(xpu, model): + """ + Like move, but only for models. + Note that this works inplace for non-Tensor objects. + + Args: + model (torch.nn.Module): the model to mount + + Returns: + DataSerial | DataParallel : + the model mounted on the XPU (which may be multiple GPUs) + + Example: + >>> model = torch.nn.Conv2d(1, 1, 1) + >>> xpu = XPU() + """ + # Unwrap the core model if necessary + model = xpu.raw(model) + model = xpu.move(model) + if xpu._device_ids and len(xpu._device_ids) > 1: + model = ContainerDataParallel( + model, device_ids=xpu._device_ids, + output_device=xpu._main_device_id) + else: + model = DataSerial(model) + return model + + +def nestshape(data): + import ubelt as ub + + def _recurse(d): + import torch + import numpy as np + if isinstance(d, dict): + return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()])) + elif 'Container' in type(d).__name__: + meta = ub.odict(sorted([ + ('stack', d.stack), + # ('padding_value', d.padding_value), + # ('pad_dims', d.pad_dims), + # ('datatype', d.datatype), + ('cpu_only', d.cpu_only), + ])) + meta = ub.repr2(meta, nl=0) + return {type(d).__name__ + meta: _recurse(d.data)} + elif isinstance(d, list): + return [_recurse(v) for v in d] + elif isinstance(d, tuple): + return tuple([_recurse(v) for v in d]) + elif isinstance(d, torch.Tensor): + return d.shape + elif isinstance(d, np.ndarray): + return d.shape + elif isinstance(d, (str, bytes)): + return d + elif isinstance(d, (int, float)): + return d + elif isinstance(d, slice): + return d + else: + raise TypeError(type(d)) + + # globals()['_recurse'] = _recurse + d = _recurse(data) + return d + + +def _report_data_shape(data): + d = nestshape(data) + print('d = {}'.format(ub.repr2(d, nl=-2))) + + +def _debug_inbatch_shapes(inbatch): + import ubelt as ub + print('len(inbatch) = {}'.format(len(inbatch))) + extensions = ub.util_format.FormatterExtensions() + + @extensions.register((torch.Tensor, np.ndarray)) + def format_shape(data, **kwargs): + return ub.repr2(dict(type=str(type(data)), shape=data.shape), nl=1, sv=1) + + print('inbatch = ' + ub.repr2(inbatch, extensions=extensions, nl=True)) diff --git a/netharn/data/toydata.py b/netharn/data/toydata.py index b633cd0197d0d34c7e90edfb082e3ff5472ddf25..23b1e37fc646c8b40eb1cd783bcb4c5792f2f67e 100644 --- a/netharn/data/toydata.py +++ b/netharn/data/toydata.py @@ -1,49 +1,61 @@ -import torch +""" +Simple arbitrary-sized datasets for testing / demo purposes +""" import numpy as np import itertools as it -from torch.utils import data as torch_data -from netharn.data import base -from netharn import util import ubelt as ub +import torch +from torch.utils import data as torch_data + +import kwarray + + +class ToyData1d(torch_data.Dataset): + """ + Spiral xy-data points + + Args: + n (int, default=2000): dataset size + rng (RandomCoercable, default=None): seed or random state + + Note: + this is 1d in the sense that each data point has shape with len(1), + even though they can be interpreted as 2d vector points. + CommandLine: + python -m netharn.data.toydata ToyData1d --show + + Example: + >>> dset = ToyData1d() + >>> data, labels = next(iter(dset.make_loader(batch_size=2000))) + >>> # xdoctest: +REQUIRES(--show) + >>> import kwplot + >>> plt = kwplot.autoplt() + >>> kwplot.figure(fnum=1, doclf=True) + >>> cls1 = data[labels == 0] + >>> cls2 = data[labels == 1] + >>> a, b = cls1.T.numpy() + >>> c, d = cls2.T.numpy() + >>> plt.plot(a, b, 'rx') + >>> plt.plot(c, d, 'bx') + >>> kwplot.show_if_requested() + """ -class ToyData1d(torch_data.Dataset, base.DataMixin): - def __init__(self, rng=None): - """ - Spiral 2d data points - - CommandLine: - python ~/code/netharn/netharn/data/toydata.py ToyData1d --show - - Example: - >>> dset = ToyData1d() - >>> data, labels = next(iter(dset.make_loader(batch_size=2000))) - >>> # xdoctest: +REQUIRES(--show) - >>> from netharn.util import mplutil - >>> mplutil.qtensure() # xdoc: +SKIP - >>> mplutil.figure(fnum=1, doclf=True) - >>> cls1 = data[labels == 0] - >>> cls2 = data[labels == 1] - >>> from matplotlib import pyplot as plt - >>> a, b = cls1.T.numpy() - >>> c, d = cls2.T.numpy() - >>> plt.plot(a, b, 'rx') - >>> plt.plot(c, d, 'bx') - >>> mplutil.show_if_requested() - """ - rng = util.ensure_rng(rng) + def __init__(self, n=2000, rng=None): + rng = kwarray.ensure_rng(rng) # spiral equation in parameteric form: # x(t) = r(t) * cos(t) # y(t) = r(t) * sin(t) # class 1 - n = 1000 - theta1 = rng.rand(n) * 10 + n1 = n // 2 + theta1 = rng.rand(n1) * 10 x1 = theta1 * np.cos(theta1) y1 = theta1 * np.sin(theta1) - theta2 = rng.rand(n) * 10 + n2 = n - n1 + theta2 = rng.rand(n2) * 10 x2 = -theta2 * np.cos(theta2) y2 = -theta2 * np.sin(theta2) @@ -51,10 +63,10 @@ class ToyData1d(torch_data.Dataset, base.DataMixin): labels = [] data.extend(list(zip(x1, y1))) - labels.extend([0] * n) + labels.extend([0] * n1) data.extend(list(zip(x2, y2))) - labels.extend([1] * n) + labels.extend([1] * n2) data = np.array(data) labels = np.array(labels) @@ -62,7 +74,8 @@ class ToyData1d(torch_data.Dataset, base.DataMixin): self.data = data self.labels = labels - suffix = ub.hash_data([rng], base='abc', hasher='sha1')[0:16] + suffix = ub.hash_data([ + rng], base='abc', hasher='sha1')[0:16] self.input_id = 'TD1D_{}_'.format(n) + suffix def __len__(self): @@ -73,26 +86,38 @@ class ToyData1d(torch_data.Dataset, base.DataMixin): label = int(self.labels[index]) return data, label + def make_loader(self, *args, **kwargs): + loader = torch_data.DataLoader(self, *args, **kwargs) + return loader + -class ToyData2d(torch_data.Dataset, base.DataMixin): +class ToyData2d(torch_data.Dataset): """ + Simple black-on-white and white-on-black images. + + Args: + n (int, default=100): dataset size + size (int, default=4): width / height + border (int, default=1): border mode + rng (RandomCoercable, default=None): seed or random state + CommandLine: - python ~/code/netharn/netharn/data/toydata.py ToyData2d --show + python -m netharn.data.toydata ToyData2d --show Example: >>> self = ToyData2d() >>> data1, label1 = self[0] >>> data2, label2 = self[-1] >>> # xdoctest: +REQUIRES(--show) - >>> from netharn.util import mplutil - >>> mplutil.qtensure() - >>> mplutil.figure(fnum=1, doclf=True) - >>> mplutil.imshow(data1.numpy().squeeze(), pnum=(1, 2, 1)) - >>> mplutil.imshow(data2.numpy().squeeze(), pnum=(1, 2, 2)) - >>> mplutil.show_if_requested() + >>> import kwplot + >>> plt = kwplot.autoplt() + >>> kwplot.figure(fnum=1, doclf=True) + >>> kwplot.imshow(data1.numpy().squeeze(), pnum=(1, 2, 1)) + >>> kwplot.imshow(data2.numpy().squeeze(), pnum=(1, 2, 2)) + >>> kwplot.show_if_requested() """ def __init__(self, size=4, border=1, n=100, rng=None): - rng = util.ensure_rng(rng) + rng = kwarray.ensure_rng(rng) h = w = size @@ -130,6 +155,10 @@ class ToyData2d(torch_data.Dataset, base.DataMixin): label = int(self.labels[index]) return data, label + def make_loader(self, *args, **kwargs): + loader = torch_data.DataLoader(self, *args, **kwargs) + return loader + if __name__ == '__main__': """ diff --git a/netharn/device.py b/netharn/device.py index c302e3b71ff6baa1f1520a94e64bdfda2bf478ed..34fbdad15e71f0cf736c7df1119e3eab5ed4f396 100644 --- a/netharn/device.py +++ b/netharn/device.py @@ -408,7 +408,6 @@ class XPU(ub.NiceRepr): } """ - gpus = gpu_info() info = { 'available': 0, 'total': 0, @@ -427,10 +426,11 @@ class XPU(ub.NiceRepr): info['used'] += tup.used * MB info['available'] += tup.available * MB else: - for id in self._device_ids: - info['total'] += gpus[id]['mem_total'] - info['used'] += gpus[id]['mem_used'] - info['available'] += gpus[id]['mem_avail'] + gpus = gpu_info() + for index in self._device_ids: + info['total'] += gpus[index]['mem_total'] + info['used'] += gpus[index]['mem_used'] + info['available'] += gpus[index]['mem_avail'] return info def __str__(xpu): @@ -673,7 +673,11 @@ def find_unused_gpu(min_memory=0): >>> item = find_unused_gpu() >>> assert item is None or isinstance(item, int) """ - gpus = gpu_info() + try: + gpus = gpu_info() + except NvidiaSMIError: + gpus = None + if not gpus: return None @@ -725,6 +729,10 @@ def _query_nvidia_smi(mode, fields): return rows +class NvidiaSMIError(Exception): + pass + + def gpu_info(new_mode=True): """ Run nvidia-smi and parse output @@ -852,7 +860,7 @@ def gpu_info(new_mode=True): print(info['err']) warnings.warn('Problem running nvidia-smi: ret='.format( info['ret'])) - return None + raise NvidiaSMIError xml_string = info['out'] root = ET.fromstring(xml_string) @@ -900,7 +908,7 @@ def gpu_info(new_mode=True): gpu_rows = _query_nvidia_smi(mode, fields) except Exception as ex: warnings.warn('Problem running nvidia-smi: {!r}'.format(ex)) - return None + raise NvidiaSMIError fields = ['pid', 'name', 'gpu_uuid', 'used_memory'] mode = 'query-compute-apps' @@ -918,7 +926,7 @@ def gpu_info(new_mode=True): gpu['procs'] = [] gpus[num] = gpu - gpu_uuid_to_num = {g['gpu_uuid']: gpu['num'] for g in gpus.values()} + gpu_uuid_to_num = {gpu['gpu_uuid']: gpu['num'] for gpu in gpus.values()} for row in proc_rows: # Give each GPU info on which processes are using it @@ -974,10 +982,10 @@ def gpu_info(new_mode=True): result = ub.cmd('nvidia-smi') if result['ret'] != 0: warnings.warn('Problem running nvidia-smi.') - return None + raise NvidiaSMIError except Exception: warnings.warn('Could not run nvidia-smi.') - return {} + raise NvidiaSMIError lines = result['out'].splitlines() diff --git a/netharn/examples/classification.py b/netharn/examples/classification.py new file mode 100644 index 0000000000000000000000000000000000000000..ff6a1c860254b53cf6246dca5e698e134415e8d5 --- /dev/null +++ b/netharn/examples/classification.py @@ -0,0 +1,862 @@ +# -*- coding: utf-8 -*- +""" +This is a simple generalized harness for training a classifier on a coco dataset. + +Given a COCO-style dataset data (you can create a sample coco dataset using the +kwcoco CLI), this module trains a classifier on chipped regions denoted by the +coco annotations. These chips are cropped from the image and resized to the +specified ``input_dims``. The default network architecture is resnet50. Other +settings like augmentation, learning rate, batch size, etc can all be specified +via the command line, a config file, or a Python dictionary (see +:class:`ClfConfig` for all available arguments). + +For details see the other docstrings in this file and / or try running +yourself. + +.. code-block:: bash + + # Install netharn + # pip3 install netharn # TODO: uncomment once 0.5.7 is live + pip3 install git+https://gitlab.kitware.com/computer-vision/netharn.git@dev/0.5.7 + + # Install kwcoco and autogenerate a image toy datasets + pip3 install kwcoco + kwcoco toydata --dst ./toydata_train.json --key shapes1024 + kwcoco toydata --dst ./toydata_vali.json --key shapes128 # optional + kwcoco toydata --dst ./toydata_test.json --key shapes256 # optional + + # Train a classifier on your dataset + python3 -m netharn.examples.classification \ + --name="My Classification Example" \ + --train_dataset=./toydata_train.json \ + --vali_dataset=./toydata_vali.json \ + --test_dataset=./toydata_test.json \ + --input_dims=224,244 \ + --batch_size=32 \ + --max_epoch=100 \ + --patience=40 \ + --xpu=gpu0 \ + --schedule=ReduceLROnPlateau-p10-c10 \ + --augmenter=medium \ + --lr=1e-3 + +# TODO: describe what the output of this should look like. + +""" +from __future__ import absolute_import, division, print_function, unicode_literals +from os.path import join +import numpy as np +import sys +import torch +import ubelt as ub + +import netharn as nh +import kwarray +import scriptconfig as scfg +from netharn.data.channel_spec import ChannelSpec + + +class ClfConfig(scfg.Config): + """ + This is the default configuration for running the classification example. + + Instances of this class behave like a dictionary. However, they can also be + specified on the command line, via kwargs, or by pointing to a YAML/json + file. See :module:``scriptconfig`` for details of how to use + :class:`scriptconfig.Config` objects. + """ + default = { + 'name': scfg.Value('clf_example', help='A human readable tag that is "name" for humans'), + 'workdir': scfg.Path('~/work/netharn', help='Dump all results in your workdir'), + + 'workers': scfg.Value(2, help='number of parallel dataloading jobs'), + 'xpu': scfg.Value('auto', help='See netharn.XPU for details. can be auto/cpu/xpu/cuda0/0,1,2,3)'), + + 'datasets': scfg.Value('special:shapes256', help='Either a special key or a coco file'), + 'train_dataset': scfg.Value(None), + 'vali_dataset': scfg.Value(None), + 'test_dataset': scfg.Value(None), + + 'sampler_backend': scfg.Value(None, help='ndsampler backend'), + + 'channels': scfg.Value('rgb', help='special channel code. See ChannelSpec'), + + 'arch': scfg.Value('resnet50', help='Network architecture code'), + 'optim': scfg.Value('adam', help='Weight optimizer. Can be SGD, ADAM, ADAMW, etc..'), + + 'input_dims': scfg.Value((224, 224), help='Window size to input to the network'), + 'normalize_inputs': scfg.Value(True, help=( + 'if True, precompute training mean and std for data whitening')), + + 'balance': scfg.Value(None, help='balance strategy. Can be category or None'), + + 'augmenter': scfg.Value('simple', help='type of training dataset augmentation'), + + 'batch_size': scfg.Value(3, help='number of items per batch'), + 'num_batches': scfg.Value('auto', help='Number of batches per epoch (mainly for balanced batch sampling)'), + + 'max_epoch': scfg.Value(140, help='Maximum number of epochs'), + 'patience': scfg.Value(140, help='Maximum "bad" validation epochs before early stopping'), + + 'lr': scfg.Value(1e-4, help='Base learning rate'), + 'decay': scfg.Value(1e-5, help='Base weight decay'), + 'schedule': scfg.Value( + 'step90-120', help=( + 'Special coercible netharn code. Eg: onecycle50, step50, gamma, ReduceLROnPlateau-p10-c10')), + + 'init': scfg.Value('noop', help='How to initialized weights: e.g. noop, kaiming_normal, path-to-a-pretrained-model)'), + 'pretrained': scfg.Path(help=('alternative way to specify a path to a pretrained model')), + } + + def normalize(self): + if self['pretrained'] in ['null', 'None']: + self['pretrained'] = None + + if self['pretrained'] is not None: + self['init'] = 'pretrained' + + +class ClfModel(nh.layers.Module): + """ + A simple pytorch classification model. + + Note what I consider as "reproducibility" conventions present in this + model: + + (1) classes can be specified as a list of class names (or + technically anything that is :class:`ndsampler.CategoryTree` + coercible). This helps anyone with your pretrained model to + understand what its predicting. + + (2) The expected input channels are specified, as a + :class:`netharn.data.ChannelSpec` coercible (e.g. a number, a + code like "rgb" or "rgb|disparity", or a dict like structure) + + # TODO: properly define the dict structure, for now just use + # strings. + + (3) The input statistics are specified as a dict and applied at runtime + + { + 'mean': , + 'std': , + } + + This means you don't have to remember these values when loading + data at test time, the network remembers them instead. + + # TODO: this has to be better rectified with channel specifications + # for now assume only one early fused stream like rgb. + + (4) The inputs and outputs to the network are dictionaries with + keys hinting at the proper interpretation of the values. + + The inputs provide a mapping from channel spec keys to early-fused + tensors, which can be used in specific ways (e.g. to connect input + rgb and disparity signals into late fused network components). + + The outputs provide a mapping to whatever type of output you want + to provide. DONT JUST RETURN A SOMETIMES TUPLE OF LOSS AND OUTPUTS + IN SOME RANDOM FORMAT! Instead if your network sometimes returns + loss then sometimes add the value ``outputs['loss'] = ``. And maybe you do some decoding of the outputs to + probabilities, in that case add the value ``outputs['class_probs'] + = ``. Or maybe you return the logits, so return + ``outputs['class_logits'``. This is far easier to use than + returning tuples of data. + + (5) A coder that performs postprocessing on batch outputs to + obtain a useable form for the predictions. + + Example: + >>> from netharn.examples.classification import * # NOQA + >>> classes = ['a', 'b', 'c'] + >>> input_stats = { + >>> 'mean': torch.Tensor([[[0.1]], [[0.2]], [[0.2]]]), + >>> 'std': torch.Tensor([[[0.3]], [[0.3]], [[0.3]]]), + >>> } + >>> channels = 'rgb' + >>> self = ClfModel( + >>> arch='resnet50', channels=channels, + >>> input_stats=input_stats, classes=classes) + >>> inputs = torch.rand(4, 1, 256, 256) + >>> outputs = self(inputs) + >>> self.coder.decode_batch(outputs) + """ + + def __init__(self, arch='resnet50', classes=1000, channels='rgb', + input_stats=None): + super(ClfModel, self).__init__() + + import ndsampler + if input_stats is None: + input_stats = {} + input_norm = nh.layers.InputNorm(**input_stats) + + self.classes = ndsampler.CategoryTree.coerce(classes) + + self.channels = ChannelSpec.coerce(channels) + chann_norm = self.channels.normalize() + assert len(chann_norm) == 1 + in_channels = len(ub.peek(chann_norm.values())) + num_classes = len(self.classes) + + if arch == 'resnet50': + from torchvision import models + model = models.resnet50() + new_conv1 = torch.nn.Conv2d(in_channels, 64, kernel_size=7, + stride=3, padding=3, bias=False) + new_fc = torch.nn.Linear(2048, num_classes, bias=True) + new_conv1.weight.data[:, 0:in_channels, :, :] = model.conv1.weight.data[0:, 0:in_channels, :, :] + new_fc.weight.data[0:num_classes, :] = model.fc.weight.data[0:num_classes, :] + new_fc.bias.data[0:num_classes] = model.fc.bias.data[0:num_classes] + model.fc = new_fc + model.conv1 = new_conv1 + else: + raise KeyError(arch) + + self.input_norm = input_norm + self.model = model + + self.coder = ClfCoder(self.classes) + + def forward(self, inputs): + """ + Args: + inputs (Tensor | dict): Either the input images (as a regulary + pytorch BxCxHxW Tensor) or a dictionary mapping input + modalities to the input imges. + + Returns: + Dict[str, Tensor]: model output wrapped in a dictionary so its + clear what the return type is. In this case "energy" is class + probabilities **before** softmax / normalization is applied. + """ + if isinstance(inputs, dict): + # TODO: handle channel modalities later + assert len(inputs) == 1, ( + 'only support one fused stream: e.g. rgb for now ') + im = ub.peek(inputs.values()) + else: + im = inputs + + im = self.input_norm(im) + class_energy = self.model(im) + outputs = { + 'class_energy': class_energy, + } + return outputs + + +class ClfCoder(object): + """ + The coder take the output of the classifier and transforms it into a + standard format. Currently there is no standard "classification" format + that I use other than a dictionary with special keys. + """ + def __init__(self, classes): + self.classes = classes + + def decode_batch(self, outputs): + class_energy = outputs['class_energy'] + class_probs = self.classes.hierarchical_softmax(class_energy, dim=1) + pred_cxs, pred_conf = self.classes.decision( + class_probs, dim=1, thresh=0.1, + criterion='entropy', + ) + decoded = { + 'class_probs': class_probs, + 'pred_cxs': pred_cxs, + 'pred_conf': pred_conf, + } + return decoded + + +class ClfDataset(torch.utils.data.Dataset): + """ + Efficient loader for classification training on coco samplers. + + This is a normal torch dataset that uses :module:`ndsampler` and + :module:`imgaug` for data loading an augmentation. + + It also contains a ``make_loader`` method for creating a class balanced + DataLoader. There is little netharn-specific about this class. + + Example: + >>> import ndsampler + >>> sampler = ndsampler.CocoSampler.demo() + >>> self = ClfDataset(sampler) + >>> index = 0 + >>> self[index]['inputs']['rgb'].shape + >>> loader = self.make_loader(batch_size=8, shuffle=True, num_workers=0, num_batches=10) + >>> for batch in ub.ProgIter(iter(loader), total=len(loader)): + >>> break + >>> print('batch = {}'.format(ub.repr2(batch, nl=1))) + >>> # xdoctest: +REQUIRES(--show) + >>> import kwplot + >>> kwplot.autompl() + >>> kwplot.imshow(batch['inputs']['rgb'][0]) + """ + def __init__(self, sampler, input_dims=(256, 256), augmenter=None): + self.sampler = sampler + self.augmenter = None + self.conditional_augmentors = None + self.input_dims = input_dims + self.classes = self.sampler.catgraph + + self.augmenter = self._coerce_augmenter(augmenter) + + def __len__(self): + return self.sampler.n_positives + + @ub.memoize_property + def input_id(self): + def imgaug_json_id(aug): + import imgaug + if isinstance(aug, tuple): + return [imgaug_json_id(item) for item in aug] + elif isinstance(aug, imgaug.parameters.StochasticParameter): + return str(aug) + else: + try: + info = ub.odict() + info['__class__'] = aug.__class__.__name__ + params = aug.get_parameters() + if params: + info['params'] = [imgaug_json_id(p) for p in params] + if isinstance(aug, list): + children = aug[:] + children = [imgaug_json_id(c) for c in children] + info['children'] = children + return info + except Exception: + # imgaug is weird and buggy + return str(aug) + depends = [ + self.sampler._depends(), + self.augmenter and imgaug_json_id(self.augmenter), + ] + _input_id = ub.hash_data(depends, hasher='sha512', base='abc')[0:40] + return _input_id + + def __getitem__(self, index): + import kwimage + + # Load sample image and category + sample = self.sampler.load_positive(index, with_annots=False) + image = kwimage.atleast_3channels(sample['im'])[:, :, 0:3] + target = sample['tr'] + + image = kwimage.ensure_uint255(image) + if self.augmenter is not None: + det = self.augmenter.to_deterministic() + image = det.augment_image(image) + + # Resize to input dimensinos + if self.input_dims is not None: + dsize = tuple(self.input_dims[::-1]) + image = kwimage.imresize(image, dsize=dsize, letterbox=True) + + class_id_to_idx = self.sampler.classes.id_to_idx + cid = target['category_id'] + cidx = class_id_to_idx[cid] + + im_chw = image.transpose(2, 0, 1) / 255.0 + inputs = { + 'rgb': torch.FloatTensor(im_chw), + } + labels = { + 'class_idxs': cidx, + } + batch = { + 'inputs': inputs, + 'labels': labels, + } + return batch + + def _coerce_augmenter(self, augmenter): + import netharn as nh + import imgaug.augmenters as iaa + if augmenter is True: + augmenter = 'simple' + if not augmenter: + augmenter = None + elif augmenter == 'simple': + augmenter = iaa.Sequential([ + iaa.Crop(percent=(0, .2)), + iaa.Fliplr(p=.5) + ]) + elif augmenter == 'medium': + augmenter = iaa.Sequential([ + iaa.Sometimes(0.2, nh.data.transforms.HSVShift(hue=0.1, sat=1.5, val=1.5)), + iaa.Crop(percent=(0, .2)), + iaa.Fliplr(p=.5) + ]) + else: + raise KeyError('Unknown augmentation {!r}'.format(self.augment)) + return augmenter + + def make_loader(self, batch_size=16, num_batches='auto', num_workers=0, + shuffle=False, pin_memory=False, drop_last=False, + balance=None): + + if len(self) == 0: + raise Exception('must have some data') + + def worker_init_fn(worker_id): + for i in range(worker_id + 1): + seed = np.random.randint(0, int(2 ** 32) - 1) + seed = seed + worker_id + kwarray.seed_global(seed) + if self.augmenter: + rng = kwarray.ensure_rng(None) + self.augmenter.seed_(rng) + + loaderkw = { + 'num_workers': num_workers, + 'pin_memory': pin_memory, + 'worker_init_fn': worker_init_fn, + } + if balance is None: + loaderkw['shuffle'] = shuffle + loaderkw['batch_size'] = batch_size + loaderkw['drop_last'] = drop_last + elif balance == 'classes': + from netharn.data.batch_samplers import BalancedBatchSampler + index_to_cid = [ + cid for cid in self.sampler.regions.targets['category_id'] + ] + batch_sampler = BalancedBatchSampler( + index_to_cid, batch_size=batch_size, + shuffle=shuffle, num_batches=num_batches) + loaderkw['batch_sampler'] = batch_sampler + else: + raise KeyError(balance) + + loader = torch.utils.data.DataLoader(self, **loaderkw) + return loader + + +class ClfHarn(nh.FitHarn): + """ + The Classification Harness + ========================== + + The concept of a "Harness" at the core of netharn. This our custom + :class:`netharn.FitHarn` object for a classification problem. + + The Harness provides the important details to the training loop via the + `run_batch` method. The rest of the loop boilerplate is taken care of by + `nh.FitHarn` internals. In addition to `run_batch`, we also define several + callbacks to perform customized monitoring of training progress. + """ + + def after_initialize(harn, **kw): + harn._accum_confusion_vectors = { + 'y_true': [], + 'y_pred': [], + 'probs': [], + } + + def prepare_batch(harn, raw_batch): + return raw_batch + + def run_batch(harn, batch): + """ + Example: + >>> # xdoctest: +SKIP + >>> harn = setup_harn(datasets='special:shapes256', batch_size=4).initialize() + >>> batch = harn._demo_batch(0, tag='train') + >>> outputs, loss = harn.run_batch(batch) + >>> harn.on_batch(batch, outputs, loss) + """ + classes = harn.raw_model.classes + inputs = harn.xpu.move(batch['inputs']) + labels = harn.xpu.move(batch['labels']) + + outputs = harn.model(inputs) + + class_energy = outputs['class_energy'] + class_logprobs = classes.hierarchical_log_softmax( + class_energy, dim=1) + + class_idxs = labels['class_idxs'] + loss = nh.criterions.focal.nll_focal_loss( + class_logprobs, class_idxs, focus=2.0, reduction='mean') + + loss_parts = {} + loss_parts['clf'] = loss + + decoded = harn.raw_model.coder.decode_batch(outputs) + + outputs['class_probs'] = decoded['class_probs'] + outputs['pred_cxs'] = decoded['pred_cxs'] + outputs['true_cxs'] = class_idxs + return outputs, loss_parts + + def on_batch(harn, batch, outputs, loss): + """ + Custom code executed at the end of each batch. + """ + bx = harn.bxs[harn.current_tag] + if bx < 3: + stacked = harn._draw_batch(batch, outputs) + dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag)) + fpath = join(dpath, 'batch_{}_epoch_{}.jpg'.format(bx, harn.epoch)) + import kwimage + kwimage.imwrite(fpath, stacked) + + y_pred = kwarray.ArrayAPI.numpy(outputs['pred_cxs']) + y_true = outputs['true_cxs'].data.cpu().numpy() + probs = outputs['class_probs'].data.cpu().numpy() + harn._accum_confusion_vectors['y_true'].append(y_true) + harn._accum_confusion_vectors['y_pred'].append(y_pred) + harn._accum_confusion_vectors['probs'].append(probs) + + def _draw_batch(harn, batch, outputs, limit=32): + """ + Example: + >>> # xdoctest: +REQUIRES(--download) + >>> harn = setup_harn(batch_size=3).initialize() + >>> batch = harn._demo_batch(0, tag='train') + >>> outputs, loss = harn.run_batch(batch) + >>> stacked = harn._draw_batch(batch, outputs, limit=12) + >>> # xdoctest: +REQUIRES(--show) + >>> import kwplot + >>> kwplot.autompl() + >>> kwplot.imshow(stacked, colorspace='rgb', doclf=True) + >>> kwplot.show_if_requested() + """ + import kwimage + inputs = batch['inputs']['rgb'][0:limit].data.cpu().numpy() + true_cxs = batch['labels']['class_idxs'].data.cpu().numpy() + class_probs = outputs['class_probs'].data.cpu().numpy() + pred_cxs = kwarray.ArrayAPI.numpy(outputs['pred_cxs']) + + dset = harn.datasets[harn.current_tag] + classes = dset.classes + + todraw = [] + for im, pcx, tcx, probs in zip(inputs, pred_cxs, true_cxs, class_probs): + im_ = im.transpose(1, 2, 0) + + # Renormalize and resize image for drawing + min_, max_ = im_.min(), im_.max() + im_ = ((im_ - min_) / (max_ - min_) * 255).astype(np.uint8) + im_ = np.ascontiguousarray(im_) + im_ = kwimage.imresize(im_, dsize=(200, 200), + interpolation='nearest') + + # Draw classification information on the image + im_ = kwimage.draw_clf_on_image(im_, classes=classes, tcx=tcx, + pcx=pcx, probs=probs) + todraw.append(im_) + + stacked = kwimage.stack_images_grid(todraw, overlap=-10, + bg_value=(10, 40, 30), + chunksize=8) + return stacked + + def on_epoch(harn): + """ + Custom code executed at the end of each epoch. + + This function can optionally return a dictionary containing any scalar + quality metrics that you wish to log and monitor. (Note these will be + plotted to tensorboard if that is installed). + + Notes: + It is ok to do some medium lifting in this function because it is + run relatively few times. + + Returns: + dict: dictionary of scalar metrics for netharn to log + + CommandLine: + xdoctest -m netharn.examples.classification ClfHarn.on_epoch + + Example: + >>> harn = setup_harn().initialize() + >>> harn._demo_epoch('vali', max_iter=10) + >>> harn.on_epoch() + """ + from netharn.metrics import clf_report + dset = harn.datasets[harn.current_tag] + + probs = np.vstack(harn._accum_confusion_vectors['probs']) + y_true = np.hstack(harn._accum_confusion_vectors['y_true']) + y_pred = np.hstack(harn._accum_confusion_vectors['y_pred']) + + # _pred = probs.argmax(axis=1) + # assert np.all(_pred == y_pred) + + # from netharn.metrics import confusion_vectors + # cfsn_vecs = confusion_vectors.ConfusionVectors.from_arrays( + # true=y_true, pred=y_pred, probs=probs, classes=dset.classes) + # report = cfsn_vecs.classification_report() + # combined_report = report['metrics'].loc['combined'].to_dict() + + # ovr_cfsn = cfsn_vecs.binarize_ovr() + # Compute multiclass metrics (new way!) + target_names = dset.classes + ovr_report = clf_report.ovr_classification_report( + y_true, probs, target_names=target_names, metrics=[ + 'auc', 'ap', 'mcc', 'brier' + ]) + + # percent error really isn't a great metric, but its easy and standard. + errors = (y_true != y_pred) + acc = 1.0 - errors.mean() + percent_error = (1.0 - acc) * 100 + + metrics_dict = ub.odict() + metrics_dict['ave_brier'] = ovr_report['ave']['brier'] + metrics_dict['ave_mcc'] = ovr_report['ave']['mcc'] + metrics_dict['ave_auc'] = ovr_report['ave']['auc'] + metrics_dict['ave_ap'] = ovr_report['ave']['ap'] + metrics_dict['percent_error'] = percent_error + metrics_dict['acc'] = acc + + harn.info(ub.color_text('ACC FOR {!r}: {!r}'.format(harn.current_tag, acc), 'yellow')) + + # Clear confusion vectors accumulator for the next epoch + harn._accum_confusion_vectors = { + 'y_true': [], + 'y_pred': [], + 'probs': [], + } + return metrics_dict + + +def setup_harn(cmdline=True, **kw): + """ + This creates the "The Classification Harness" (i.e. core ClfHarn object). + This is where we programmatically connect our program arguments with the + netharn HyperParameter standards. We are using :module:`scriptconfig` to + capture these, but you could use click / argparse / etc. + + This function has the responsibility of creating our torch datasets, + lazy computing input statistics, specifying our model architecture, + schedule, initialization, optimizer, dynamics, XPU etc. These can usually + be coerced using netharn API helpers and a "standardized" config dict. See + the function code for details. + + Args: + cmdline (bool, default=True): + if True, behavior will be modified based on ``sys.argv``. + Note this will activate the scriptconfig ``--help``, ``--dump`` and + ``--config`` interactions. + + Kwargs: + **kw: the overrides the default config for :class:`ClfConfig`. + Note, command line flags have precedence if cmdline=True. + + Returns: + ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn` + object. + + Example: + >>> # xdoctest: +SKIP + >>> kw = {'datasets': 'special:shapes256'} + >>> cmdline = False + >>> harn = setup_harn(cmdline, **kw) + >>> harn.initialize() + """ + import ndsampler + config = ClfConfig(default=kw) + config.load(cmdline=cmdline) + print('config = {}'.format(ub.repr2(config.asdict()))) + + nh.configure_hacks(config) + coco_datasets = nh.api.Datasets.coerce(config) + + print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1))) + for tag, dset in coco_datasets.items(): + dset._build_hashid(hash_pixels=False) + + workdir = ub.ensuredir(ub.expandpath(config['workdir'])) + samplers = { + tag: ndsampler.CocoSampler(dset, workdir=workdir, backend=config['sampler_backend']) + for tag, dset in coco_datasets.items() + } + + for tag, sampler in ub.ProgIter(list(samplers.items()), desc='prepare frames'): + sampler.frames.prepare(workers=config['workers']) + + torch_datasets = { + 'train': ClfDataset( + samplers['train'], + input_dims=config['input_dims'], + augmenter=config['augmenter'], + ), + 'vali': ClfDataset( + samplers['vali'], + input_dims=config['input_dims'], + augmenter=False), + } + + if config['normalize_inputs']: + # Get stats on the dataset (todo: turn off augmentation for this) + _dset = torch_datasets['train'] + stats_idxs = kwarray.shuffle(np.arange(len(_dset)), rng=0)[0:min(1000, len(_dset))] + stats_subset = torch.utils.data.Subset(_dset, stats_idxs) + + cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3') + input_stats = cacher.tryload() + + channels = ChannelSpec.coerce(config['channels']) + + if input_stats is None: + # Use parallel workers to load data faster + from netharn.data.data_containers import container_collate + from functools import partial + collate_fn = partial(container_collate, num_devices=1) + + loader = torch.utils.data.DataLoader( + stats_subset, + collate_fn=collate_fn, + num_workers=config['workers'], + shuffle=True, + batch_size=config['batch_size']) + + # Track moving average of each fused channel stream + channel_stats = {key: nh.util.RunningStats() + for key in channels.keys()} + assert len(channel_stats) == 1, ( + 'only support one fused stream for now') + for batch in ub.ProgIter(loader, desc='estimate mean/std'): + for key, val in batch['inputs'].items(): + try: + for part in val.numpy(): + channel_stats[key].update(part) + except ValueError: # final batch broadcast error + pass + + perchan_input_stats = {} + for key, running in channel_stats.items(): + running = ub.peek(channel_stats.values()) + perchan_stats = running.simple(axis=(1, 2)) + perchan_input_stats[key] = { + 'std': perchan_stats['mean'].round(3), + 'mean': perchan_stats['std'].round(3), + } + + input_stats = ub.peek(perchan_input_stats.values()) + cacher.save(input_stats) + else: + input_stats = {} + + torch_loaders = { + tag: dset.make_loader( + batch_size=config['batch_size'], + num_batches=config['num_batches'], + num_workers=config['workers'], + shuffle=(tag == 'train'), + balance=(config['balance'] if tag == 'train' else None), + pin_memory=True) + for tag, dset in torch_datasets.items() + } + + initializer_ = None + classes = torch_datasets['train'].classes + + modelkw = { + 'arch': config['arch'], + 'input_stats': input_stats, + 'classes': classes.__json__(), + 'channels': channels, + } + model = ClfModel(**modelkw) + model._initkw = modelkw + + if initializer_ is None: + initializer_ = nh.Initializer.coerce(config) + + hyper = nh.HyperParams( + name=config['name'], + + workdir=config['workdir'], + xpu=nh.XPU.coerce(config['xpu']), + + datasets=torch_datasets, + loaders=torch_loaders, + + model=model, + criterion=None, + + optimizer=nh.Optimizer.coerce(config), + dynamics=nh.Dynamics.coerce(config), + scheduler=nh.Scheduler.coerce(config), + + initializer=initializer_, + + monitor=(nh.Monitor, { + 'minimize': ['loss'], + 'patience': config['patience'], + 'max_epoch': config['max_epoch'], + 'smoothing': 0.0, + }), + other={ + 'name': config['name'], + 'batch_size': config['batch_size'], + 'balance': config['balance'], + }, + extra={ + 'argv': sys.argv, + 'config': ub.repr2(config.asdict()), + } + ) + harn = ClfHarn(hyper=hyper) + harn.preferences.update({ + 'num_keep': 3, + 'keep_freq': 10, + 'tensorboard_groups': ['loss'], + 'eager_dump_tensorboard': True, + }) + harn.intervals.update({}) + harn.script_config = config + return harn + + +def main(): + """ + Main function for the generic classification example with an undocumented + hack for the lrtest. + """ + harn = setup_harn() + harn.initialize() + + if ub.argflag('--lrtest'): + # Undocumented hidden feature, + # Perform an LR-test, then resetup the harness. Optionally draw the + # results using matplotlib. + from netharn.prefit.lr_tests import lr_range_test + result = lr_range_test( + harn, init_value=1e-4, final_value=0.5, beta=0.3, + explode_factor=10, num_iters=200) + if ub.argflag('--show'): + import kwplot + plt = kwplot.autoplt() + result.draw() + plt.show() + # Recreate a new version of the harness with the recommended LR. + config = harn.script_config.asdict() + config['lr'] = (result.recommended_lr * 10) + harn = setup_harn(**config) + harn.initialize() + # This starts the main loop which will run until the monitor's terminator + # criterion is satisfied. If the initialize step loaded a checkpointed that + # already met the termination criterion, then this will simply return. + deploy_fpath = harn.run() + + # The returned deploy_fpath is the path to an exported netharn model. + # This model is the on with the best weights according to the monitor. + print('deploy_fpath = {!r}'.format(deploy_fpath)) + return harn + + +if __name__ == '__main__': + """ + python -m netharn.examples.classification --datasets=shapes5000 --name=shapes_clf5000 --batch_size=32 + """ + main() diff --git a/netharn/examples/mnist.py b/netharn/examples/mnist.py index 0821076d3d9c7c32baab302c51c0f10b6bfcde6c..f67df39cec70e2e4e6fb7a41e5136a690a3de445 100644 --- a/netharn/examples/mnist.py +++ b/netharn/examples/mnist.py @@ -246,7 +246,7 @@ def setup_harn(**kw): # They nh.HyperParams object keeps track of and helps log all declarative # info related to training a model. hyper = nh.hyperparams.HyperParams( - nice='my-mnist-demo', + name='my-mnist-demo', xpu=xpu, workdir=workdir, datasets=datasets, diff --git a/netharn/examples/object_detection.py b/netharn/examples/object_detection.py index b8d78b2f8bafa9e21320714ff3591e09a64065e0..741167de5ea6f707487717042b046756daacf26e 100644 --- a/netharn/examples/object_detection.py +++ b/netharn/examples/object_detection.py @@ -8,6 +8,7 @@ import os import torch import ubelt as ub import kwarray +import kwimage import scriptconfig as scfg from netharn.models.yolo2 import multiscale_batch_sampler # NOQA from netharn.models.yolo2 import yolo2 @@ -391,10 +392,10 @@ class DetectHarn(nh.FitHarn): >>> harn.on_batch(batch, outputs, losses) >>> # xdoc: +REQUIRES(--show) >>> batch_dets = harn.model.module.postprocess(outputs) - >>> nh.util.autompl() # xdoc: +SKIP + >>> kwplot.autompl() # xdoc: +SKIP >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01) - >>> nh.util.imshow(stacked) - >>> nh.util.show_if_requested() + >>> kwplot.imshow(stacked) + >>> kwplot.show_if_requested() """ dmet = harn.dmets[harn.current_tag] inputs = batch['im'] @@ -406,12 +407,12 @@ class DetectHarn(nh.FitHarn): bx = harn.bxs[harn.current_tag] if bx < 4: stacked = harn.draw_batch(batch, outputs, detections, thresh=0.1) - # img = nh.util.render_figure_to_image(fig) + # img = kwplot.render_figure_to_image(fig) dump_dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag, 'batch')) dump_fname = 'pred_bx{:04d}_epoch{:08d}.png'.format(bx, harn.epoch) fpath = os.path.join(dump_dpath, dump_fname) harn.debug('dump viz fpath = {}'.format(fpath)) - nh.util.imwrite(fpath, stacked) + kwimage.imwrite(fpath, stacked) except Exception as ex: harn.error('\n\n\n') harn.error('ERROR: FAILED TO POSTPROCESS OUTPUTS') @@ -583,9 +584,9 @@ class DetectHarn(nh.FitHarn): >>> stacked = harn.draw_batch(batch, outputs, batch_dets) >>> # xdoc: +REQUIRES(--show) - >>> nh.util.autompl() # xdoc: +SKIP - >>> nh.util.imshow(stacked) - >>> nh.util.show_if_requested() + >>> kwplot.autompl() # xdoc: +SKIP + >>> kwplot.imshow(stacked) + >>> kwplot.show_if_requested() """ import cv2 inputs = batch['im'] @@ -647,8 +648,8 @@ class DetectHarn(nh.FitHarn): pred_dets.boxes, orig_size, target_size) # shift, scale, embed_size = letterbox._letterbox_transform(orig_size, target_size) - # fig = nh.util.figure(doclf=True, fnum=1) - # nh.util.imshow(img, colorspace='rgb') + # fig = kwplot.figure(doclf=True, fnum=1) + # kwplot.imshow(img, colorspace='rgb') canvas = (img * 255).astype(np.uint8) canvas = true_dets.draw_on(canvas, color='green') canvas = pred_dets.draw_on(canvas, color='blue') @@ -656,7 +657,7 @@ class DetectHarn(nh.FitHarn): canvas = cv2.resize(canvas, (300, 300)) imgs.append(canvas) - stacked = imgs[0] if len(imgs) == 1 else nh.util.stack_images_grid(imgs) + stacked = imgs[0] if len(imgs) == 1 else kwimage.stack_images_grid(imgs) return stacked diff --git a/netharn/examples/yolo_voc.py b/netharn/examples/yolo_voc.py index 4bd004466788150f035917441b967f5b30e6cc23..1f89e0650881b4163fffdf8d176511d6bbba6f2d 100644 --- a/netharn/examples/yolo_voc.py +++ b/netharn/examples/yolo_voc.py @@ -98,13 +98,13 @@ class YoloVOCDataset(nh.data.voc.VOCDataset): >>> norm_boxes = label['targets'].numpy().reshape(-1, 5)[:, 1:5] >>> inp_size = hwc01.shape[-2::-1] >>> # xdoc: +REQUIRES(--show) - >>> import netharn as nh - >>> nh.util.figure(doclf=True, fnum=1) - >>> nh.util.autompl() # xdoc: +SKIP - >>> nh.util.imshow(hwc01, colorspace='rgb') + >>> import kwplot + >>> kwplot.figure(doclf=True, fnum=1) + >>> kwplot.autompl() # xdoc: +SKIP + >>> kwplot.imshow(hwc01, colorspace='rgb') >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size) >>> inp_boxes.draw() - >>> nh.util.show_if_requested() + >>> kwplot.show_if_requested() Example: >>> # DISABLE_DOCTSET @@ -119,12 +119,13 @@ class YoloVOCDataset(nh.data.voc.VOCDataset): >>> norm_boxes = label[0].numpy().reshape(-1, 5)[:, 1:5] >>> inp_size = hwc01.shape[-2::-1] >>> # xdoc: +REQUIRES(--show) - >>> nh.util.figure(doclf=True, fnum=1) - >>> nh.util.autompl() # xdoc: +SKIP - >>> nh.util.imshow(hwc01, colorspace='rgb') + >>> import kwplot + >>> kwplot.autompl() # xdoc: +SKIP + >>> kwplot.figure(doclf=True, fnum=1) + >>> kwplot.imshow(hwc01, colorspace='rgb') >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size) >>> inp_boxes.draw() - >>> nh.util.show_if_requested() + >>> kwplot.show_if_requested() """ if isinstance(index, tuple): # Get size index from the batch loader @@ -368,11 +369,12 @@ class YoloHarn(nh.FitHarn): >>> outputs, loss = harn.run_batch(batch) >>> harn.on_batch(batch, outputs, loss) >>> # xdoc: +REQUIRES(--show) + >>> import kwplot >>> batch_dets = harn.model.module.postprocess(outputs) - >>> nh.util.autompl() # xdoc: +SKIP + >>> kwplot.autompl() # xdoc: +SKIP >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01) - >>> nh.util.imshow(stacked) - >>> nh.util.show_if_requested() + >>> kwplot.imshow(stacked) + >>> kwplot.show_if_requested() """ dmet = harn.dmets[harn.current_tag] inputs, labels = batch @@ -386,13 +388,14 @@ class YoloHarn(nh.FitHarn): bx = harn.bxs[harn.current_tag] if bx < 4: + import kwimage stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.1) - # img = nh.util.render_figure_to_image(fig) + # img = kwplot.render_figure_to_image(fig) dump_dpath = ub.ensuredir((harn.train_dpath, 'monitor', harn.current_tag, 'batch')) dump_fname = 'pred_bx{:04d}_epoch{:08d}.png'.format(bx, harn.epoch) fpath = os.path.join(dump_dpath, dump_fname) harn.debug('dump viz fpath = {}'.format(fpath)) - nh.util.imwrite(fpath, stacked) + kwimage.imwrite(fpath, stacked) except Exception as ex: harn.error('\n\n\n') harn.error('ERROR: FAILED TO POSTPROCESS OUTPUTS') @@ -573,11 +576,12 @@ class YoloHarn(nh.FitHarn): >>> outputs, loss = harn.run_batch(batch) >>> harn.on_batch(batch, outputs, loss) >>> # xdoc: +REQUIRES(--show) + >>> import kwplot >>> batch_dets = harn.model.module.postprocess(outputs) - >>> nh.util.autompl() # xdoc: +SKIP + >>> kwplot.autompl() # xdoc: +SKIP >>> stacked = harn.draw_batch(batch, outputs, batch_dets, thresh=0.01) - >>> nh.util.imshow(stacked) - >>> nh.util.show_if_requested() + >>> kwplot.imshow(stacked) + >>> kwplot.show_if_requested() """ import cv2 inputs, labels = batch diff --git a/netharn/export/deployer.py b/netharn/export/deployer.py index 65d20a8d9e0e0a9a5f3c52218c207ed5fbfeeab2..4d37988d687381d62487ab4cb1d32731a19847bb 100644 --- a/netharn/export/deployer.py +++ b/netharn/export/deployer.py @@ -27,7 +27,7 @@ Example: >>> # This will train a toy model with toy data using netharn >>> hyper = nh.HyperParams(**{ >>> 'workdir' : ub.ensure_app_cache_dir('netharn/tests/deploy'), - >>> 'nice' : 'deploy_demo', + >>> 'name' : 'deploy_demo', >>> 'xpu' : nh.XPU.coerce('cpu'), >>> 'datasets' : { >>> 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), @@ -61,7 +61,7 @@ Example: INFO: Exported model topology to .../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/ToyNet2d_2a3f49.py INFO: Initializing model weights with: INFO: * harn.train_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww' - INFO: * harn.nice_dpath = '.../.cache/netharn/tests/deploy/fit/nice/deploy_demo' + INFO: * harn.name_dpath = '.../.cache/netharn/tests/deploy/fit/name/deploy_demo' INFO: Snapshots will save to harn.snapshot_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/torch_snapshots' INFO: ARGV: .../.local/conda/envs/py36/bin/python .../.local/conda/envs/py36/bin/ipython @@ -84,9 +84,9 @@ Example: INFO: INFO: training completed INFO: harn.train_dpath = '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww' - INFO: harn.nice_dpath = '.../.cache/netharn/tests/deploy/fit/nice/deploy_demo' + INFO: harn.name_dpath = '.../.cache/netharn/tests/deploy/fit/name/deploy_demo' INFO: view tensorboard results for this run via: - tensorboard --logdir ~/.cache/netharn/tests/deploy/fit/nice + tensorboard --logdir ~/.cache/netharn/tests/deploy/fit/name [DEPLOYER] Deployed zipfpath=.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/deploy_ToyNet2d_onnxqaww_002_TXZBYL.zip INFO: wrote single-file deployment to: '.../.cache/netharn/tests/deploy/fit/runs/deploy_demo/onnxqaww/deploy_ToyNet2d_onnxqaww_002_TXZBYL.zip' INFO: exiting fit harness. @@ -313,7 +313,7 @@ def _package_deploy2(dpath, info, name=None): Ignore: dpath = '/home/joncrall/.cache/netharn/tests/_package_custom' - path = '/home/joncrall/work/opir/fit/nice/_Sim3-kw6-99-finetune_ML3D_BEST_2018-9-20_LR1e-4_f2_vel0.0_hn0.25_bs64_nr5.0' + path = '/home/joncrall/work/opir/fit/name/_Sim3-kw6-99-finetune_ML3D_BEST_2018-9-20_LR1e-4_f2_vel0.0_hn0.25_bs64_nr5.0' info = unpack_model_info(path) zipfpath = _package_deploy2(dpath, info) @@ -699,7 +699,7 @@ def _demodata_toy_harn(): import netharn as nh hyper = nh.HyperParams(**{ 'workdir' : ub.ensure_app_cache_dir('netharn/tests/deploy'), - 'nice' : 'deploy_demo_static', + 'name' : 'deploy_demo_static', 'xpu' : nh.XPU.coerce('cpu'), 'datasets' : {'train': nh.data.ToyData2d(size=3, rng=0)}, 'loaders' : {'batch_size': 64}, diff --git a/netharn/fit_harn.py b/netharn/fit_harn.py index dfaa6c920dd38b1f85af6827c73d355d51b4fbfa..cfea3d088108c6617ddbe9eb762d2fcb4a69c5c1 100644 --- a/netharn/fit_harn.py +++ b/netharn/fit_harn.py @@ -50,16 +50,16 @@ Example: >>> 'name' : 'demo', >>> 'xpu' : nh.XPU.coerce('argv'), >>> # workdir is a directory where intermediate results can be saved - >>> # nice symlinks /fit/nice/ -> ../runs/ + >>> # name symlinks /fit/name/ -> ../runs/ >>> # XPU auto select a gpu if idle and VRAM>6GB else a cpu >>> # ================ >>> # Data Components >>> 'datasets' : { # dict of plain ol torch.data.Dataset instances >>> 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), - >>> 'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), - >>> 'test': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), + >>> 'vali': nh.data.ToyData2d(size=3, border=1, n=64, rng=1), + >>> 'test': nh.data.ToyData2d(size=3, border=1, n=64, rng=1), >>> }, - >>> 'loaders' : {'batch_size': 64}, # DataLoader instances or kw + >>> 'loaders' : {'batch_size': 8}, # DataLoader instances or kw >>> # ================ >>> # Algorithm Components >>> # Note the (cls, kw) tuple formatting @@ -82,7 +82,7 @@ Example: >>> }), >>> # dynamics are a config option that modify the behavior of the main >>> # training loop. These parameters effect the learned model. - >>> 'dynamics' : {'batch_step': 4}, + >>> 'dynamics' : {'batch_step': 2}, >>> }) >>> harn = nh.FitHarn(hyper) >>> # non-algorithmic behavior configs (do not change learned models) @@ -93,7 +93,7 @@ Example: >>> harn.run() # note: run calls initialize it hasn't already been called. >>> # xdoc: +IGNORE_WANT RESET HARNESS BY DELETING EVERYTHING IN TRAINING DIR - Symlink: ...tests/demo/fit/runs/demo/keyeewlr -> ...tests/demo/fit/nice/demo + Symlink: ...tests/demo/fit/runs/demo/keyeewlr -> ...tests/demo/fit/name/demo .... already exists .... and points to the right place Initializing tensorboard (dont forget to start the tensorboard server) @@ -101,10 +101,10 @@ Example: Mounting ToyNet2d model on CPU Initializing model weights * harn.train_dpath = '...tests/demo/fit/runs/demo/keyeewlr' - * harn.nice_dpath = '...tests/demo/fit/nice/demo' + * harn.name_dpath = '...tests/demo/fit/name/demo' Snapshots will save to harn.snapshot_dpath = '...tests/demo/fit/runs/demo/keyeewlr/torch_snapshots' dont forget to start: - tensorboard --logdir ...tests/demo/fit/nice + tensorboard --logdir ...tests/demo/fit/name === begin training === epoch lr:0.001 │ vloss: 0.1409 (n_bad_epochs=00, best=0.1409): 100%|█| 10/10 [00:01<00:00, 9.95it/s] 0:00/fit/name/ -> ../runs/ + # XPU auto select a gpu if idle and VRAM>6GB else a cpu + # ================ + # Data Components + 'datasets' : { # dict of plain ol torch.data.Dataset instances + 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), + 'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), + 'test': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), + }, + 'loaders' : {'batch_size': 64}, # DataLoader instances or kw + # ================ + # Algorithm Components + # Note the (cls, kw) tuple formatting + 'model' : (nh.models.ToyNet2d, {}), + 'optimizer' : (nh.optimizers.SGD, { + 'lr': 0.0001 + }), + # focal loss is usually better than nh.criterions.CrossEntropyLoss + 'criterion' : (nh.criterions.FocalLoss, {}), + 'initializer' : (nh.initializers.KaimingNormal, { + 'param': 0, + }), + # these may receive an overhaul soon + 'scheduler' : (nh.schedulers.ListedLR, { + 'points': {0: .0001, 2: .01, 5: .015, 6: .005, 9: .001}, + 'interpolate': True, + }), + 'monitor' : (nh.Monitor, { + 'max_epoch': 10, + }), + # dynamics are a config option that modify the behavior of the main + # training loop. These parameters effect the learned model. + 'dynamics' : {'batch_step': 4}, + }) + harn = cls(hyper) + # non-algorithmic behavior configs (do not change learned models) + harn.preferences['use_tensorboard'] = False + harn.preferences['timeout'] = 0.5 + return harn + def _demo_epoch(harn, tag='vali', learn=False, max_iter=np.inf, call_on_epoch=False): """ @@ -329,7 +383,7 @@ class InitializeMixin(object): # train info, keep a backup of the old ones. if harn.train_dpath and overwrite: train_info_fpath = join(harn.train_dpath, 'train_info.json') - if os.path.exists(train_info_fpath): + if exists(train_info_fpath): if overwrite: import json try: @@ -367,14 +421,14 @@ class InitializeMixin(object): raise CannotResume harn.resume_from_previous_snapshots() except CannotResume: - # Abstract logic into a reset_state function? + # This step is only run on a fresh start. harn.reset_weights() for group in harn.optimizer.param_groups: group.setdefault('initial_lr', group['lr']) if harn.train_dpath: harn.info(' * harn.train_dpath = {!r}'.format(harn.train_dpath)) - harn.info(' * harn.nice_dpath = {!r}'.format(harn.nice_dpath)) + harn.info(' * harn.name_dpath = {!r}'.format(harn.name_dpath)) harn.info('Snapshots will save to harn.snapshot_dpath = {!r}'.format( harn.snapshot_dpath)) else: @@ -393,8 +447,8 @@ class InitializeMixin(object): train_info = harn.hyper.train_info(harn.train_dpath) ub.ensuredir(train_info['train_dpath']) - if train_info['nice_dpath']: - ub.ensuredir(os.path.dirname(train_info['nice_dpath'])) + if train_info['name_dpath']: + ub.ensuredir(dirname(train_info['name_dpath'])) # Make a very simple MRU (most recently used) link mru_dpath = join(harn.hyper.workdir, '_mru') @@ -404,16 +458,26 @@ class InitializeMixin(object): except OSError as ex: harn.warn('Unable to symlink: {!r}'.format(ex)) - # Link the hashed run dir to the human friendly nice dir + # Link the hashed run dir to the human friendly "name" dir try: ub.symlink(train_info['train_dpath'], - train_info['nice_dpath'], overwrite=True, + train_info['name_dpath'], overwrite=True, verbose=3) except OSError as ex: harn.warn('Unable to symlink: {!r}'.format(ex)) + if 'nice_dpath' in train_info: + # backwards compatibility for "nice" dpaths + ub.ensuredir(dirname(train_info['nice_dpath'])) + try: + ub.symlink(train_info['train_dpath'], + train_info['nice_dpath'], overwrite=True, + verbose=0) + except OSError as ex: + harn.warn('Unable to symlink: {!r}'.format(ex)) + harn.train_info = train_info - harn.nice_dpath = train_info['nice_dpath'] + harn.name_dpath = train_info['name_dpath'] harn.train_dpath = train_info['train_dpath'] return harn.train_dpath @@ -473,7 +537,7 @@ class InitializeMixin(object): harn.debug('Initialized logging') if tensorboard_logger and harn.preferences['use_tensorboard']: - # train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath) + # train_base = dirname(harn.name_dpath or harn.train_dpath) # harn.info('dont forget to start:\n tensorboard --logdir ' + train_base) harn.info('Initializing tensorboard (dont forget to start the tensorboard server)') harn._tlog = tensorboard_logger.Logger(harn.train_dpath, @@ -599,6 +663,9 @@ class InitializeMixin(object): else: harn.warn('initializer was not specified') + # Save the original weights for analysis + harn.save_snapshot(mode='initial') + @profiler.profile def resume_from_previous_snapshots(harn): """ @@ -666,7 +733,8 @@ class ProgMixin(object): import tqdm # NOQA Prog = tqdm.tqdm elif harn.preferences['prog_backend'] == 'progiter': - Prog = functools.partial(ub.ProgIter, chunksize=chunksize, verbose=1) + Prog = functools.partial( + ub.ProgIter, chunksize=chunksize, verbose=1, time_thresh=2.0) else: raise KeyError(harn.preferences['prog_backend']) return Prog(*args, **kw) @@ -750,14 +818,24 @@ class LogMixin(object): except AttributeError: pass - def log(harn, msg): + def log(harn, msg, level='info'): """ - Logs an info message. Alias of :func:LogMixin.info + Logs a message with a specified verbosity level. Args: msg (str): an info message to log - """ - harn.info(msg) + level (str): either info, debug, error, or warn + """ + if level == 'info': + harn.info(msg) + elif level == 'debug': + harn.debug(msg) + elif level == 'error': + harn.error(msg) + elif level == 'warn': + harn.warn(msg) + else: + raise KeyError(level) def info(harn, msg): """ @@ -892,7 +970,8 @@ class SnapshotMixin(object): # snapshots or checkpoints for simplicity. if harn.train_dpath is None: raise ValueError('harn.train_dpath is None') - return join(harn.train_dpath, 'torch_snapshots') + # return join(harn.train_dpath, 'torch_snapshots') + return join(harn.train_dpath, 'checkpoints') def _epochs_to_remove(harn, existing_epochs, num_keep_recent, num_keep_best, keep_freq): @@ -1002,44 +1081,104 @@ class SnapshotMixin(object): harn.set_snapshot_state(snapshot_state) harn.info('Previous snapshot loaded...') - def save_snapshot(harn, explicit=False): + def save_snapshot(harn, explicit=False, mode='checkpoint'): """ Checkpoint the current model state in an epoch-tagged snapshot. Args: + mode (str, default='checkpoint'): the type of snapshot this is + (changes the subdirectory where they are stored). Choices + are: checkpoint, explicit, and initial. + explicit (bool, default=False): if True, the snapshot is also tagged by a hash and saved to the explit_checkpoints directory. + DEPRECTATED, use mode. Returns: PathLike: save_fpath: the path to the saved snapshot + + Example: + >>> import netharn as nh + >>> harn = nh.FitHarn.demo() + >>> # The "save_snapshot" method is called in initialize + >>> harn.initialize() """ if explicit: - _dpath = join(harn.train_dpath, 'explit_checkpoints') - ub.ensuredir(_dpath) - - try: - stamp = ub.timestamp() - except Exception: - stamp = ub.timestamp() + mode = 'explicit' + if mode == 'explicit': + dpath = ub.ensuredir((harn.train_dpath, 'explit_checkpoints')) + stamp = ub.timestamp() save_fname = '_epoch_{:08d}_{}.pt'.format(harn.epoch, stamp) - save_fpath = join(_dpath, save_fname) + elif mode == 'checkpoint': + # TODO: make the transition smoother + dpath = ub.ensuredir(harn.snapshot_dpath) + _old_snapshot_dpath = join(harn.train_dpath, 'torch_snapshots') + _new_snapshot_dpath = join(harn.train_dpath, 'checkpoints') + + if dpath == _new_snapshot_dpath: + if not exists(_old_snapshot_dpath): + ub.symlink(_new_snapshot_dpath, _old_snapshot_dpath) - harn.info('Saving EXPLICIT snapshot to {}'.format(save_fpath)) - snapshot_state = harn.get_snapshot_state() - torch.save(snapshot_state, save_fpath) - else: - ub.ensuredir(harn.snapshot_dpath) save_fname = '_epoch_{:08d}.pt'.format(harn.epoch) - save_fpath = join(harn.snapshot_dpath, save_fname) + elif mode == 'initial': + dpath = ub.ensuredir((harn.train_dpath, 'initial_state')) + save_fname = 'initial_state.pt'.format(harn.epoch) + else: + raise KeyError(mode) - harn.debug('Saving snapshot to {}'.format(save_fpath)) - snapshot_state = harn.get_snapshot_state() - torch.save(snapshot_state, save_fpath) + save_fpath = join(dpath, save_fname) + level = 'debug' if mode == 'checkpoint' else 'info' + harn.log('Saving {} snapshot to {}'.format(mode.upper(), save_fpath), level) + + snapshot_state = harn.get_snapshot_state() + + try: + import safer + _open = safer.open + except ImportError: + _open = open + + with _open(save_fpath, 'wb') as save_file: + torch.save(snapshot_state, save_file) harn.debug('Snapshot saved to {}'.format(save_fpath)) return save_fpath + def best_snapshot(harn): + """ + Return the path to the current "best" snapshot. + """ + # Netharn should populate best_snapshot.pt if there is a validation set. + # Other names are to support older codebases. + train_dpath = harn.train_dpath + expected_names = [ + 'best_snapshot.pt', + 'best_snapshot2.pt', + 'final_snapshot.pt', + 'deploy_snapshot.pt', + ] + for fname in expected_names: + fpath = join(train_dpath, fname) + if exists(fpath): + break + + if not exists(fpath): + fpath = None + + if not fpath: + epoch_to_fpath = { + parse.parse('{}_epoch_{num:d}.pt', path).named['num']: path + for path in harn.prev_snapshots() + } + if epoch_to_fpath: + fpath = epoch_to_fpath[max(epoch_to_fpath)] + + if fpath is None: + raise Exception('cannot find / determine the best snapshot') + + return fpath + @register_mixin class SnapshotCallbacks(object): @@ -1182,7 +1321,7 @@ class ScheduleMixin(object): warmup_lr = [_lr * (1 - k) for _lr in regular_lr] else: raise KeyError(warmup) - harn.debug('warmup_lr = {}'.format(warmup_lr)) + # harn.debug('warmup_lr = {}'.format(warmup_lr)) _set_optimizer_values(harn.optimizer, 'lr', warmup_lr) # TODO: REFACTOR SO NETHARN HAS A PROPER ITERATION MODE @@ -1298,7 +1437,7 @@ class CoreMixin(object): harn.info('ARGV:\n ' + sys.executable + ' ' + ' '.join(sys.argv)) if harn._tlog is not None: - train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath) + train_base = dirname(harn.name_dpath or harn.train_dpath) harn.info('dont forget to start:\n' ' tensorboard --logdir ' + ub.shrinkuser(train_base)) @@ -1456,17 +1595,17 @@ class CoreMixin(object): harn.info('training completed') if harn._tlog is not None: - train_base = os.path.dirname(harn.nice_dpath or harn.train_dpath) + train_base = dirname(harn.name_dpath or harn.train_dpath) harn.info('harn.train_dpath = {!r}'.format(harn.train_dpath)) - harn.info('harn.nice_dpath = {!r}'.format(harn.nice_dpath)) + harn.info('harn.name_dpath = {!r}'.format(harn.name_dpath)) harn.info('view tensorboard results for this run via:\n' ' tensorboard --logdir ' + ub.shrinkuser(train_base)) - deploy_fpath = harn._deploy() + harn.deploy_fpath = harn._deploy() harn.on_complete() harn.info('exiting fit harness.') - return deploy_fpath + return harn.deploy_fpath def _export(harn): """ @@ -1526,6 +1665,7 @@ class CoreMixin(object): deploy_fpath = None harn.warn('Failed to deploy: {}'.format(repr(ex))) + harn.deploy_fpath = deploy_fpath return deploy_fpath @profiler.profile @@ -1807,12 +1947,12 @@ class CoreMixin(object): iter_moving_metrics.update(cur_metrics) # display_train training info - if harn.check_interval('display_' + tag, bx): + if harn.check_interval('display_' + tag, bx) or bx == n_batches - 1: ave_metrics = iter_moving_metrics.average() msg = harn._batch_msg({'loss': ave_metrics['loss']}, bsize, learn) - prog.set_description(tag + ' ' + msg) + prog.set_description(tag + ' ' + msg, refresh=False) # log_iter_train, log_iter_test, log_iter_vali if harn.check_interval('log_iter_' + tag, bx, first=True): @@ -1828,7 +1968,14 @@ class CoreMixin(object): harn, 'iter', special_groupers=harn.preferences['tensorboard_groups']) - prog.update(display_interval) + if use_tqdm: + prog.update(display_interval) + else: + # hack to force progiter to reach 100% at the end + # This should be fixed in progiter. + steps_taken = (bx - prog._iter_idx) + 1 + prog.update(steps_taken) + if use_tqdm: harn._update_prog_postfix(prog) @@ -1853,6 +2000,7 @@ class CoreMixin(object): # harn.optimizer.step() # harn.optimizer.zero_grad() + prog.refresh() prog.close() harn.epoch_prog = None @@ -2414,7 +2562,7 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin, if harn.hyper.name is not None: harn.hyper.name = 'DEMO_' + harn.hyper.name else: - raise AssertionError('should have a nice name in demo mode') + raise AssertionError('should have a nice "name" in demo mode') harn.datasets = None harn.loaders = None @@ -2441,7 +2589,7 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin, # Output directories harn.train_dpath = train_dpath - harn.nice_dpath = None + harn.name_dpath = None harn.train_info = None # Progress bars @@ -2514,6 +2662,13 @@ class FitHarn(ExtraMixins, InitializeMixin, ProgMixin, LogMixin, SnapshotMixin, DeprecationWarning) return harn.preferences + @property + def nice_dpath(harn): + import warnings + warnings.warn('harn.nice_dpath is deprecated, use harn.name_dpath instead', + DeprecationWarning) + return harn.name_dpath + def check_interval(harn, tag, idx, first=False): """ check if its time to do something that happens every few iterations diff --git a/netharn/hyperparams.py b/netharn/hyperparams.py index b723b724a778e6501c4b5610b2a3f8ef4a9cb565..740eaffbc1726b8c6aaed0672f87d5c88ba23c7a 100644 --- a/netharn/hyperparams.py +++ b/netharn/hyperparams.py @@ -20,7 +20,7 @@ Example: >>> hyper = nh.HyperParams(**{ >>> # --- Data First >>> 'datasets' : datasets, - >>> 'nice' : 'demo', + >>> 'name' : 'demo', >>> 'loaders' : {'batch_size': 64}, >>> 'xpu' : nh.XPU.coerce('auto'), >>> # --- Algorithm Second @@ -372,6 +372,10 @@ def _rectify_loaders(arg, kw): """ Loaders are handled slightly differently than other classes We construct them eagerly (if they are not already constructed) + + Example: + >>> # test that dict-base spec words + >>> _rectify_loaders({'batch_size': 4}, {}) """ if arg is None: arg = {} @@ -395,8 +399,7 @@ def _rectify_loaders(arg, kw): else: # loaders is kwargs for `torch_data.DataLoader` arg = (torch_data.DataLoader, arg) - # cls, kw2 = _rectify_class(None, arg, kw) - rectified = _rectify_class(None, arg, kw) + rectified = _rectify_class(arg, kw) cls = rectified['cls'] kw2 = rectified['cls_kw'] else: @@ -453,15 +456,20 @@ class HyperParams(object): augment=None, other=None, # incorporated into the hash extra=None, # ignored when computing the hash - nice=None, # alias of name + nice=None, # deprecated, alias of name ): kwargs = {} hyper.datasets = datasets if name is None: import warnings - warnings.warn('Specify "name" instead of "nice"') + warnings.warn( + 'The "nice" argument is deprecated and will be removed. ' + 'Specify "name" instead.', DeprecationWarning) name = nice + if name is None: + # raise ValueError('you must specify a name for HyperParams') + name = 'untitled' hyper.name = name hyper.workdir = workdir hyper.xpu = xpu @@ -638,7 +646,7 @@ class HyperParams(object): _append_part('criterion', hyper.criterion_cls, hyper.criterion_params, initkw) # TODO: should other be included in initkw? I think it should. - # probably should also include monitor, xpu, nice + # probably should also include monitor, xpu, name # Loader is a bit hacked _append_part('loader', hyper.loader_cls, hyper.loader_params_nice, initkw) @@ -758,7 +766,7 @@ class HyperParams(object): >>> hyper = nh.hyperparams.HyperParams(**{ >>> # --- Data First >>> 'datasets' : datasets, - >>> 'nice' : 'demo', + >>> 'name' : 'demo', >>> 'workdir' : ub.ensure_app_cache_dir('netharn/demo'), >>> 'loaders' : {'batch_size': 64}, >>> 'xpu' : nh.XPU.coerce('auto'), @@ -860,8 +868,8 @@ class HyperParams(object): When r = 10000, it becomes had to compute the number because of floating point errors, but the probability is likely astronomically low. I doubt we will ever run training in the same work directory - (and with the same nice name) 10,000 different times, so using an 8 - character hash seems safe and user friendly for this purpose. + (and with the same nice "name") 10,000 different times, so using an + 8 character hash seems safe and user friendly for this purpose. Perhaps we may move to 12, 16, or 32+ in the future, but for the pre 1.0 netharn, 8 seems fine. @@ -874,13 +882,19 @@ class HyperParams(object): name = hyper.name nice_dpath = None + name_dpath = None if not given_explicit_train_dpath: # setup a cannonical and a linked symlink dir train_dpath = normpath( join(hyper.workdir, 'fit', 'runs', name, train_hashid)) - # also setup a "nice" custom name, which may conflict, but oh well + # also setup a custom "name", which may conflict. This will + # overwrite an existing "name" symlink, but the real runs directory + # is based on a hash, so it wont be overwritten with astronomicaly + # high probability. if name: try: + name_dpath = normpath( + join(hyper.workdir, 'fit', 'name', name)) nice_dpath = normpath( join(hyper.workdir, 'fit', 'nice', name)) except Exception: @@ -913,6 +927,7 @@ class HyperParams(object): ('init_history', init_history), ('init_history_hashid', _hash_data(util.make_idstr(init_history))), + ('name', hyper.name), ('nice', hyper.name), ('old_train_dpath', normpath( @@ -920,11 +935,14 @@ class HyperParams(object): ('train_dpath', train_dpath), # ('link_dpath', link_dpath), + + # "nice" will be deprecated for "name_dpath" ('nice_dpath', nice_dpath), + ('name_dpath', name_dpath), ('given_explicit_train_dpath', given_explicit_train_dpath), - # TODO, add in n_classes if applicable + # TODO, add in classes if applicable # TODO, add in centering if applicable # ('centering', hyper.centering), @@ -950,7 +968,7 @@ class HyperParams(object): 'name' : 'demo', 'xpu' : nh.XPU.coerce('argv'), # workdir is a directory where intermediate results can be saved - # nice symlinks /fit/nice/ -> ../runs/ + # name symlinks /fit/name/ -> ../runs/ # XPU auto select a gpu if idle and VRAM>6GB else a cpu # ================ # Data Components diff --git a/netharn/metrics/__init__.py b/netharn/metrics/__init__.py index a10f09f542dc129ee0e9e47761a5d8de8602a3f2..316fb5b7edc6b0f0c290c7da610329a89be463d3 100644 --- a/netharn/metrics/__init__.py +++ b/netharn/metrics/__init__.py @@ -1,5 +1,5 @@ """ -mkinit netharn.metrics +mkinit netharn.metrics -w """ # flake8: noqa from __future__ import absolute_import, division, print_function, unicode_literals @@ -17,11 +17,16 @@ from netharn.metrics import voc_metrics from netharn.metrics.clf_report import (classification_report, ovr_classification_report,) from netharn.metrics.confusion_vectors import (BinaryConfusionVectors, - ConfusionVectors, - OneVsRestConfusionVectors,) -from netharn.metrics.detect_metrics import (DetectionMetrics,) + ConfusionVectors, DictProxy, + OneVsRestConfusionVectors, + PR_Result, PerClass_PR_Result, + PerClass_ROC_Result, ROC_Result, + Threshold_Result,) +from netharn.metrics.detect_metrics import (DetectionMetrics, + eval_detections_cli,) from netharn.metrics.drawing import (draw_perclass_prcurve, draw_perclass_roc, - draw_peritem_prcurve, draw_roc,) + draw_prcurve, draw_roc, + draw_threshold_curves,) from netharn.metrics.functional import (fast_confusion_matrix,) from netharn.metrics.sklearn_alts import (class_accuracy_from_confusion, confusion_matrix, @@ -29,11 +34,13 @@ from netharn.metrics.sklearn_alts import (class_accuracy_from_confusion, from netharn.metrics.voc_metrics import (VOC_Metrics,) __all__ = ['BinaryConfusionVectors', 'ConfusionVectors', 'DetectionMetrics', - 'OneVsRestConfusionVectors', 'VOC_Metrics', 'assignment', + 'DictProxy', 'OneVsRestConfusionVectors', 'PR_Result', + 'PerClass_PR_Result', 'PerClass_ROC_Result', 'ROC_Result', + 'Threshold_Result', 'VOC_Metrics', 'assignment', 'class_accuracy_from_confusion', 'classification_report', 'clf_report', 'confusion_matrix', 'confusion_vectors', 'detect_metrics', 'draw_perclass_prcurve', 'draw_perclass_roc', - 'draw_peritem_prcurve', 'draw_roc', 'drawing', - 'fast_confusion_matrix', 'functional', + 'draw_prcurve', 'draw_roc', 'draw_threshold_curves', 'drawing', + 'eval_detections_cli', 'fast_confusion_matrix', 'functional', 'global_accuracy_from_confusion', 'ovr_classification_report', 'sklearn_alts', 'voc_metrics'] diff --git a/netharn/metrics/assignment.py b/netharn/metrics/assignment.py index 7add77e1f8a325f2d0e8b3c36d713fc1f67e849a..63fa6fd0ffb87325666b036aa74d6ffa6ec2007c 100644 --- a/netharn/metrics/assignment.py +++ b/netharn/metrics/assignment.py @@ -24,7 +24,7 @@ import ubelt as ub def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0, ovthresh=0.5, bg_cidx=-1, bias=0.0, classes=None, compat='all', prioritize='iou', - ignore_class='ignore'): + ignore_classes='ignore'): """ Create confusion vectors for detections by assigning to ground true boxes @@ -75,8 +75,8 @@ def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0, mapping from class indices to class names. Can also contain class heirarchy information. - ignore_class (str): - class name indicating ignore regions + ignore_classes (str | List[str]): + class name(s) indicating ignore regions TODO: - [ ] This is a bottleneck function. An implementation in C / C++ / @@ -238,13 +238,13 @@ def _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1.0, y = _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, cx_to_matchable_txs, bg_weight, prioritize, ovthresh, pdist_priority, cx_to_ancestors, bg_cidx, - ignore_class=ignore_class) + ignore_classes=ignore_classes) return y def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, cx_to_matchable_txs, bg_weight, prioritize, ovthresh, - pdist_priority, cx_to_ancestors, bg_cidx, ignore_class): + pdist_priority, cx_to_ancestors, bg_cidx, ignore_classes): # Notes: # * Preallocating numpy arrays does not help # * It might be useful to code this critical loop up in C / Cython @@ -264,10 +264,10 @@ def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, _pred_cxs = pred_dets.class_idxs.take(_pred_sortx, axis=0) _pred_scores = _scores.take(_pred_sortx, axis=0) - if ignore_class is not None: + if ignore_classes is not None: # Remove certain ignore regions from scoring true_ignore_flags, pred_ignore_flags = _filter_ignore_regions( - true_dets, pred_dets, ovthresh=ovthresh, ignore_class=ignore_class) + true_dets, pred_dets, ovthresh=ovthresh, ignore_classes=ignore_classes) _pred_keep_flags = ~pred_ignore_flags[_pred_sortx] _pred_sortx = _pred_sortx[_pred_keep_flags] @@ -383,7 +383,7 @@ def _critical_loop(true_dets, pred_dets, iou_lookup, isvalid_lookup, # If the prediction is a finer-grained category than the truth # change the prediction to match the truth (because it is # compatible). This is the key to hierarchical scoring. - if true_cx in cx_to_ancestors[pred_cx]: + if pred_cx is not None and true_cx in cx_to_ancestors[pred_cx]: pred_cx = true_cx y_pred_raw.append(raw_pred_cx) @@ -517,7 +517,7 @@ def _fast_pdist_priority(classes, prioritize, _cache={}): def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5, - ignore_class='ignore'): + ignore_classes='ignore'): """ Determine which true and predicted detections should be ignored. @@ -529,28 +529,30 @@ def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5, >>> from netharn.metrics.assignment import * # NOQA >>> from netharn.metrics.assignment import _filter_ignore_regions >>> import kwimage - >>> pred_dets = kwimage.Detections.random(classes=['a']) + >>> pred_dets = kwimage.Detections.random(classes=['a', 'b', 'c']) >>> true_dets = kwimage.Detections.random( - >>> segmentations=True, classes=['a', 'ignore']) - >>> ignore_class = 'ignore' + >>> segmentations=True, classes=['a', 'b', 'c', 'ignore']) + >>> ignore_classes = {'ignore', 'b'} >>> ovthresh = 0.5 >>> print('true_dets = {!r}'.format(true_dets)) >>> print('pred_dets = {!r}'.format(pred_dets)) >>> flags1, flags2 = _filter_ignore_regions( - >>> true_dets, pred_dets, ovthresh=ovthresh, ignore_class=ignore_class) + >>> true_dets, pred_dets, ovthresh=ovthresh, ignore_classes=ignore_classes) >>> print('flags1 = {!r}'.format(flags1)) >>> print('flags2 = {!r}'.format(flags2)) - >>> flags3, flags4 = _filter_ignore_regions( >>> true_dets, pred_dets, ovthresh=ovthresh, - >>> ignore_class=ignore_class.upper()) + >>> ignore_classes={c.upper() for c in ignore_classes}) >>> assert np.all(flags1 == flags3) >>> assert np.all(flags2 == flags4) """ true_ignore_flags = np.zeros(len(true_dets), dtype=np.bool) pred_ignore_flags = np.zeros(len(pred_dets), dtype=np.bool) + if not ub.iterable(ignore_classes): + ignore_classes = {ignore_classes} + def _normalize_catname(name, classes): if classes is None: return name @@ -560,16 +562,21 @@ def _filter_ignore_regions(true_dets, pred_dets, ovthresh=0.5, if cname.lower() == name.lower(): return cname return name - # raise KeyError(name) - ignore_class = _normalize_catname(ignore_class, true_dets.classes) + ignore_classes = {_normalize_catname(c, true_dets.classes) + for c in ignore_classes} + + if true_dets.classes is not None: + ignore_classes = ignore_classes & set(true_dets.classes) # Filter out true detections labeled as "ignore" - if true_dets.classes is not None and ignore_class in true_dets.classes: - ignore_cidx = true_dets.classes.index(ignore_class) - true_ignore_flags = true_dets.class_idxs == ignore_cidx + if true_dets.classes is not None and ignore_classes: + import kwarray + ignore_cidxs = [true_dets.classes.index(c) for c in ignore_classes] + true_ignore_flags = kwarray.isect_flags( + true_dets.class_idxs, ignore_cidxs) - if np.any(true_ignore_flags): + if np.any(true_ignore_flags) and len(pred_dets): ignore_dets = true_dets.compress(true_ignore_flags) pred_boxes = pred_dets.data['boxes'] diff --git a/netharn/metrics/clf_report.py b/netharn/metrics/clf_report.py index d8f41e963896a86bbf2c143bdb7959b9c7ef803d..0ae728a16498f864ac9f4b3a61c80e2c8f8c26bd 100644 --- a/netharn/metrics/clf_report.py +++ b/netharn/metrics/clf_report.py @@ -417,7 +417,7 @@ def ovr_classification_report(mc_y_true, mc_probs, target_names=None, # Index of the true class k_true = ohvec_true.T[k] # Index of the predicted class - k_pred = np.argmax(bin_probs, axis=1) + k_pred = np.argmax(bin_probs, axis=1) # NOTE: ASSUME MUTEX CLASSES # Probabilities for the true class for each label bin_truth = np.eye(2)[k_true] diff --git a/netharn/metrics/confusion_vectors.py b/netharn/metrics/confusion_vectors.py index fc3f89d6eca80e050c56c22a14622ee4ed2c83b2..f94b86b313bbec6a7da4b42f282ace97ccf37cac 100644 --- a/netharn/metrics/confusion_vectors.py +++ b/netharn/metrics/confusion_vectors.py @@ -21,8 +21,8 @@ class ConfusionVectors(ub.NiceRepr): >>> from netharn.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo( >>> nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3) - >>> self = dmet.confusion_vectors() - >>> print(self.data._pandas()) # xdoctest: IGNORE_WANT + >>> cfsn_vecs = dmet.confusion_vectors() + >>> print(cfsn_vecs.data._pandas()) # xdoctest: IGNORE_WANT pred_raw pred true score weight iou txs pxs gid 0 2 2 2 10.0000 1.0000 1.0000 0 4 0 1 2 2 2 7.5025 1.0000 1.0000 1 3 0 @@ -48,33 +48,33 @@ class ConfusionVectors(ub.NiceRepr): ... """ - def __init__(self, data, classes, probs=None): - self.data = data - self.classes = classes - self.probs = probs + def __init__(cfsn_vecs, data, classes, probs=None): + cfsn_vecs.data = data + cfsn_vecs.classes = classes + cfsn_vecs.probs = probs - def __nice__(self): - return self.data.__nice__() + def __nice__(cfsn_vecs): + return cfsn_vecs.data.__nice__() @classmethod - def demo(self): + def demo(cfsn_vecs): """ Example: >>> # xdoctest: +REQUIRES(module:ndsampler) - >>> self = ConfusionVectors.demo() - >>> print('self = {!r}'.format(self)) - >>> cx_to_binvecs = self.binarize_ovr() + >>> cfsn_vecs = ConfusionVectors.demo() + >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs)) + >>> cx_to_binvecs = cfsn_vecs.binarize_ovr() >>> print('cx_to_binvecs = {!r}'.format(cx_to_binvecs)) """ from netharn.metrics import DetectionMetrics dmet = DetectionMetrics.demo( nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3) # print('dmet = {!r}'.format(dmet)) - self = dmet.confusion_vectors() - self.data._data = ub.dict_isect(self.data._data, [ + cfsn_vecs = dmet.confusion_vectors() + cfsn_vecs.data._data = ub.dict_isect(cfsn_vecs.data._data, [ 'true', 'pred', 'score', 'weight', ]) - return self + return cfsn_vecs @classmethod def from_arrays(ConfusionVectors, true, pred=None, score=None, weight=None, @@ -89,8 +89,8 @@ class ConfusionVectors(ub.NiceRepr): >>> rng = kwarray.ensure_rng(0) >>> true = (rng.rand(10) * len(classes)).astype(np.int) >>> probs = rng.rand(len(true), len(classes)) - >>> self = ConfusionVectors.from_arrays(true=true, probs=probs, classes=classes) - >>> self.confusion_matrix() + >>> cfsn_vecs = ConfusionVectors.from_arrays(true=true, probs=probs, classes=classes) + >>> cfsn_vecs.confusion_matrix() pred person vehicle object real person 0 0 0 @@ -118,10 +118,10 @@ class ConfusionVectors(ub.NiceRepr): data = {k: v for k, v in data.items() if v is not None} cfsn_data = kwarray.DataFrameArray(data) - self = ConfusionVectors(cfsn_data, probs=probs, classes=classes) - return self + cfsn_vecs = ConfusionVectors(cfsn_data, probs=probs, classes=classes) + return cfsn_vecs - def confusion_matrix(self, raw=False, compress=False): + def confusion_matrix(cfsn_vecs, raw=False, compress=False): """ Builds a confusion matrix from the confusion vectors. @@ -141,17 +141,18 @@ class ConfusionVectors(ub.NiceRepr): >>> from netharn.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo( >>> nimgs=10, nboxes=(0, 10), n_fp=(0, 1), n_fn=(0, 1), nclasses=3, cls_noise=.2) - >>> self = dmet.confusion_vectors() - >>> cm = self.confusion_matrix() + >>> cfsn_vecs = dmet.confusion_vectors() + >>> cm = cfsn_vecs.confusion_matrix() + ... >>> print(cm.to_string(float_format=lambda x: '%.2f' % x)) pred background cat_1 cat_2 cat_3 real - background 0 1 1 1 - cat_1 2 12 0 1 - cat_2 2 0 14 1 - cat_3 1 0 1 17 + background 0.00 1.00 1.00 1.00 + cat_1 2.00 12.00 0.00 1.00 + cat_2 2.00 0.00 14.00 1.00 + cat_3 1.00 0.00 1.00 17.00 """ - data = self.data + data = cfsn_vecs.data y_true = data['true'].copy() if raw: @@ -159,8 +160,9 @@ class ConfusionVectors(ub.NiceRepr): else: y_pred = data['pred'].copy() - if 'background' in self.classes: - bg_idx = self.classes.index('background') + # FIXME: hard-coded background class + if 'background' in cfsn_vecs.classes: + bg_idx = cfsn_vecs.classes.index('background') y_true[y_true < 0] = bg_idx y_pred[y_pred < 0] = bg_idx else: @@ -170,13 +172,13 @@ class ConfusionVectors(ub.NiceRepr): raise IndexError('y_pred contains invalid indices') matrix = fast_confusion_matrix( - y_true, y_pred, n_labels=len(self.classes), + y_true, y_pred, n_labels=len(cfsn_vecs.classes), sample_weight=data.get('weight', None) ) import pandas as pd - cm = pd.DataFrame(matrix, index=list(self.classes), - columns=list(self.classes)) + cm = pd.DataFrame(matrix, index=list(cfsn_vecs.classes), + columns=list(cfsn_vecs.classes)) if compress: iszero = matrix == 0 unused = (np.all(iszero, axis=0) & np.all(iszero, axis=1)) @@ -185,42 +187,42 @@ class ConfusionVectors(ub.NiceRepr): cm.columns.name = 'pred' return cm - def coarsen(self, cxs): + def coarsen(cfsn_vecs, cxs): """ Creates a coarsened set of vectors """ import ndsampler import kwarray - assert self.probs is not None, 'need probs' - if not isinstance(self.classes, ndsampler.CategoryTree): + assert cfsn_vecs.probs is not None, 'need probs' + if not isinstance(cfsn_vecs.classes, ndsampler.CategoryTree): raise TypeError('classes must be a ndsampler.CategoryTree') - descendent_map = self.classes.idx_to_descendants_idxs(include_self=True) + descendent_map = cfsn_vecs.classes.idx_to_descendants_idxs(include_cfsn_vecs=True) valid_descendant_mapping = ub.dict_isect(descendent_map, cxs) # mapping from current category indexes to the new coarse ones # Anything without an explicit key will be mapped to background - bg_idx = self.classes.index('background') + bg_idx = cfsn_vecs.classes.index('background') mapping = {v: k for k, vs in valid_descendant_mapping.items() for v in vs} - new_true = np.array([mapping.get(x, bg_idx) for x in self.data['true']]) - new_pred = np.array([mapping.get(x, bg_idx) for x in self.data['pred']]) + new_true = np.array([mapping.get(x, bg_idx) for x in cfsn_vecs.data['true']]) + new_pred = np.array([mapping.get(x, bg_idx) for x in cfsn_vecs.data['pred']]) - new_score = np.array([p[x] for x, p in zip(new_pred, self.probs)]) + new_score = np.array([p[x] for x, p in zip(new_pred, cfsn_vecs.probs)]) new_y_df = { 'true': new_true, 'pred': new_pred, 'score': new_score, - 'weight': self.data['weight'], - 'txs': self.data['txs'], - 'pxs': self.data['pxs'], - 'gid': self.data['gid'], + 'weight': cfsn_vecs.data['weight'], + 'txs': cfsn_vecs.data['txs'], + 'pxs': cfsn_vecs.data['pxs'], + 'gid': cfsn_vecs.data['gid'], } new_y_df = kwarray.DataFrameArray(new_y_df) - coarse_cfsn_vecs = ConfusionVectors(new_y_df, self.classes, self.probs) + coarse_cfsn_vecs = ConfusionVectors(new_y_df, cfsn_vecs.classes, cfsn_vecs.probs) return coarse_cfsn_vecs - def binarize_peritem(self, negative_classes=None): + def binarize_peritem(cfsn_vecs, negative_classes=None): """ Creates a binary representation useful for measuring the performance of detectors. It is assumed that scores of "positive" classes should be @@ -236,24 +238,39 @@ class ConfusionVectors(ub.NiceRepr): >>> from netharn.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo( >>> nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3) - >>> self = dmet.confusion_vectors() + >>> cfsn_vecs = dmet.confusion_vectors() >>> class_idxs = list(dmet.classes.node_to_idx.values()) - >>> binvecs = self.binarize_peritem() + >>> binvecs = cfsn_vecs.binarize_peritem() """ import kwarray # import warnings # warnings.warn('binarize_peritem DOES NOT PRODUCE CORRECT RESULTS') - if negative_classes is None: - negative_cidxs = {-1} - else: - raise NotImplementedError + negative_cidxs = {-1} + if negative_classes is not None: + @ub.memoize + def _lower_classes(): + if cfsn_vecs.classes is None: + raise Exception( + 'classes must be known if negative_classes are strings') + return [c.lower() for c in cfsn_vecs.classes] + for c in negative_classes: + import six + if isinstance(c, six.string_types): + classes = _lower_classes() + try: + cidx = classes.index(c) + except Exception: + continue + else: + cidx = int(c) + negative_cidxs.add(cidx) - is_false = kwarray.isect_flags(self.data['true'], negative_cidxs) + is_false = kwarray.isect_flags(cfsn_vecs.data['true'], negative_cidxs) _data = { 'is_true': ~is_false, - 'pred_score': self.data['score'], + 'pred_score': cfsn_vecs.data['score'], } extra = ub.dict_isect(_data, [ 'txs', 'pxs', 'gid', 'weight']) @@ -262,23 +279,25 @@ class ConfusionVectors(ub.NiceRepr): binvecs = BinaryConfusionVectors(bin_data) return binvecs - def binarize_ovr(self, mode=1, keyby='name'): + def binarize_ovr(cfsn_vecs, mode=1, keyby='name', ignore_classes={'ignore'}): """ - Transforms self into one-vs-rest BinaryConfusionVectors for each category. + Transforms cfsn_vecs into one-vs-rest BinaryConfusionVectors for each category. Args: - mode (int): 0 for heirarchy aware or 1 for voc like - keyby : can be cx or name + mode (int, default=1): 0 for heirarchy aware or 1 for voc like. + MODE 0 IS PROBABLY BROKEN + keyby (int | str) : can be cx or name + ignore_classes (Set[str]): category names to ignore Returns: OneVsRestConfusionVectors: which behaves like - Dict[int, BinaryConfusionVectors]: cx_to_binvecs + Dict[int, BinaryConfusionVectors]: cx_to_binvecs Example: >>> # xdoctest: +REQUIRES(module:ndsampler) - >>> self = ConfusionVectors.demo() - >>> print('self = {!r}'.format(self)) - >>> catname_to_binvecs = self.binarize_ovr(keyby='name') + >>> cfsn_vecs = ConfusionVectors.demo() + >>> print('cfsn_vecs = {!r}'.format(cfsn_vecs)) + >>> catname_to_binvecs = cfsn_vecs.binarize_ovr(keyby='name') >>> print('catname_to_binvecs = {!r}'.format(catname_to_binvecs)) Notes: @@ -315,17 +334,17 @@ class ConfusionVectors(ub.NiceRepr): """ import kwarray - classes = self.classes - data = self.data + classes = cfsn_vecs.classes + data = cfsn_vecs.data if mode == 0: - if self.probs is None: + if cfsn_vecs.probs is None: raise ValueError('cannot binarize in mode=0 without probs') pdist = classes.idx_pairwise_distance() cx_to_binvecs = {} for cx in range(len(classes)): - if classes[cx] == 'background': + if classes[cx] == 'background' or classes[cx] in ignore_classes: continue if mode == 0: @@ -335,7 +354,7 @@ class ConfusionVectors(ub.NiceRepr): 'IN THIS FILE WERE, AND I HAVENT CHECKED THIS ONE YET') # Lookup original probability predictions for the class of interest - new_scores = self.probs[:, cx] + new_scores = cfsn_vecs.probs[:, cx] # Determine which truth items have compatible classes # Note: we ignore any truth-label that is COARSER than the @@ -356,9 +375,9 @@ class ConfusionVectors(ub.NiceRepr): 'is_true': is_finer_eq.astype(np.uint8), 'pred_score': new_scores, 'weight': data['weight'] * (np.float32(1.0) - is_coarser), - 'txs': self.data['txs'], - 'pxs': self.data['pxs'], - 'gid': self.data['gid'], + 'txs': cfsn_vecs.data['txs'], + 'pxs': cfsn_vecs.data['pxs'], + 'gid': cfsn_vecs.data['gid'], } bin_data = kwarray.DataFrameArray(bin_data) @@ -371,18 +390,18 @@ class ConfusionVectors(ub.NiceRepr): elif mode == 1: # More VOC-like, not heirarchy friendly - if self.probs is not None: + if cfsn_vecs.probs is not None: # We know the actual score predicted for this category in # this case. - is_true = self.data['true'] == cx - pred_score = self.probs[:, cx] + is_true = cfsn_vecs.data['true'] == cx + pred_score = cfsn_vecs.probs[:, cx] else: import warnings warnings.warn( 'Binarize ovr is only approximate if not all probabilities are known') # If we don't know the probabilities for non-predicted # categories then we have to guess. - is_true = self.data['true'] == cx + is_true = cfsn_vecs.data['true'] == cx # do we know the actual predicted score for this category? score_is_unknown = data['pred'] != cx @@ -391,6 +410,14 @@ class ConfusionVectors(ub.NiceRepr): # These scores were for a different class, so assume # other classes were predicted with a uniform prior approx_score = (1 - pred_score[score_is_unknown]) / (len(classes) - 1) + + # Except in the case where predicted class is -1. In this + # case no prediction was actually made (above a threshold) + # so the assumed score should be significantly lower, we + # conservatively choose zero. + unknown_preds = data['pred'][score_is_unknown] + approx_score[unknown_preds == -1] = 0 + pred_score[score_is_unknown] = approx_score bin_data = { @@ -411,27 +438,27 @@ class ConfusionVectors(ub.NiceRepr): if keyby == 'cx': cx_to_binvecs = cx_to_binvecs elif keyby == 'name': - cx_to_binvecs = ub.map_keys(self.classes, cx_to_binvecs) + cx_to_binvecs = ub.map_keys(cfsn_vecs.classes, cx_to_binvecs) else: raise KeyError(keyby) - ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, self.classes) + ovr_cfns = OneVsRestConfusionVectors(cx_to_binvecs, cfsn_vecs.classes) return ovr_cfns - def classification_report(self, verbose=0): + def classification_report(cfsn_vecs, verbose=0): """ Build a classification report with various metrics. Example: >>> from netharn.metrics.confusion_vectors import * # NOQA - >>> self = ConfusionVectors.demo() - >>> report = self.classification_report(verbose=1) + >>> cfsn_vecs = ConfusionVectors.demo() + >>> report = cfsn_vecs.classification_report(verbose=1) """ from netharn.metrics import clf_report - y_true = self.data['true'] - y_pred = self.data['pred'] - sample_weight = self.data.get('weight', None) - target_names = list(self.classes) + y_true = cfsn_vecs.data['true'] + y_pred = cfsn_vecs.data['pred'] + sample_weight = cfsn_vecs.data.get('weight', None) + target_names = list(cfsn_vecs.classes) report = clf_report.classification_report( y_true=y_true, y_pred=y_pred, @@ -449,6 +476,15 @@ class OneVsRestConfusionVectors(ub.NiceRepr): Attributes: cx_to_binvecs classes + + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> from netharn.metrics import DetectionMetrics + >>> dmet = DetectionMetrics.demo( + >>> nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3) + >>> cfsn_vecs = dmet.confusion_vectors() + >>> self = cfsn_vecs.binarize_ovr(keyby='name') + >>> print('self = {!r}'.format(self)) """ def __init__(self, cx_to_binvecs, classes): self.cx_to_binvecs = cx_to_binvecs @@ -458,6 +494,12 @@ class OneVsRestConfusionVectors(ub.NiceRepr): # return ub.repr2(ub.map_vals(len, self.cx_to_binvecs)) return ub.repr2(self.cx_to_binvecs, strvals=True) + @classmethod + def demo(cls): + cfsn_vecs = ConfusionVectors.demo() + self = cfsn_vecs.binarize_ovr(keyby='name') + return self + def keys(self): return self.cx_to_binvecs.keys() @@ -490,6 +532,21 @@ class OneVsRestConfusionVectors(ub.NiceRepr): 'perclass': perclass, } + def threshold_curves(self, **kwargs): + """ + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> self = OneVsRestConfusionVectors.demo() + >>> thresh_result = self.threshold_curves()['perclass'] + """ + perclass = PerClass_Threshold_Result({ + cx: binvecs.threshold_curves(**kwargs) + for cx, binvecs in self.cx_to_binvecs.items() + }) + return { + 'perclass': perclass, + } + def ovr_classification_report(self): raise NotImplementedError @@ -677,40 +734,40 @@ class BinaryConfusionVectors(ub.NiceRepr): realpos_total = (y_true * weight).sum() realneg_total = ((1 - y_true) * weight).sum() + """ + Notes: + Apparently, consistent scoring is really hard to get right. + + For detection problems scoring via + confusion_vectors+sklearn produces noticably different + results than the VOC method. There are a few reasons for + this. The VOC method stops counting true positives after + all assigned predicted boxes have been counted. It simply + remembers the amount of original true positives to + normalize the true positive reate. On the other hand, + confusion vectors maintains a list of these unassigned true + boxes and gives them a predicted index of -1 and a score of + zero. This means that this function sees them as having a + y_true of 1 and a y_score of 0, which allows the + scikit-learn fps and tps counts to effectively get up to + 100% recall when the threshold is zero. The VOC method + simply ignores these and handles them implicitly. The + problem is that if you remove these from the scikit-learn + inputs, it wont see the correct number of positives and it + will incorrectly normalize the recall. In summary: + + VOC: + * remembers realpos_total + * doesn't count unassigned truths as TP when the + threshold is zero. + + CV+SKL: + * counts unassigned truths as TP with score=0. + * Always ensure tpr=1, ppv=0 and ppv=1, tpr=0 cases + exist. + """ with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='invalid .* true_divide') - """ - Notes: - Apparently, consistent scoring is really hard to get right. - - For detection problems scoring via - confusion_vectors+sklearn produces noticably different - results than the VOC method. There are a few reasons for - this. The VOC method stops counting true positives after - all assigned predicted boxes have been counted. It simply - remembers the amount of original true positives to - normalize the true positive reate. On the other hand, - confusion vectors maintains a list of these unassigned true - boxes and gives them a predicted index of -1 and a score of - zero. This means that this function sees them as having a - y_true of 1 and a y_score of 0, which allows the - scikit-learn fps and tps counts to effectively get up to - 100% recall when the threshold is zero. The VOC method - simply ignores these and handles them implicitly. The - problem is that if you remove these from the scikit-learn - inputs, it wont see the correct number of positives and it - will incorrectly normalize the recall. In summary: - - VOC: - * remembers realpos_total - * doesn't count unassigned truths as TP when the - threshold is zero. - - CV+SKL: - * counts unassigned truths as TP with score=0. - * Always ensure tpr=1, ppv=0 and ppv=1, tpr=0 cases - exist. - """ if method.startswith('voc'): y_score_ = y_score[y_score > 0] @@ -778,7 +835,6 @@ class BinaryConfusionVectors(ub.NiceRepr): >>> print('roc = {}'.format(ub.repr2(self.roc()))) >>> self = BinaryConfusionVectors.demo(n=3, p_true=0.5, p_error=0.5) >>> print('roc = {}'.format(ub.repr2(self.roc()))) - """ import sklearn import sklearn.metrics # NOQA @@ -896,6 +952,148 @@ class BinaryConfusionVectors(ub.NiceRepr): }) return ROC_Result(roc_info) + def threshold_curves(self, stabalize_thresh=7, stabalize_pad=7): + """ + Get statistics (F1, G1, MCC) versus thresholds + + Example: + >>> self = BinaryConfusionVectors.demo(n=100) + >>> self.threshold_curves() + """ + # compute tp, fp, tn, fn at each point + # compute mcc, f1, g1, etc + # write plot functions + info = self._binary_clf_curves(stabalize_thresh, stabalize_pad) + + tp = info['tp_count'] + fp = info['fp_count'] + tn = info['tn_count'] + fn = info['fn_count'] + + ppv = tp / (tp + fp) + tpr = tp / (tp + fn) + + # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient + mcc_numer = (tp * tn) - (fp * fn) + mcc_denom = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) + mcc_denom[np.isnan(mcc_denom) | (mcc_denom == 0)] = 1 + info['mcc'] = mcc_numer / mcc_denom + + # https://erotemic.wordpress.com/2019/10/23/closed-form-of-the-mcc-when-tn-inf/ + info['g1'] = np.sqrt(ppv * tpr) + + f1_numer = (2 * ppv * tpr) + f1_denom = (ppv + tpr) + f1_denom[f1_denom == 0] = 1 + info['f1'] = f1_numer / f1_denom + + tnr_denom = (tn + fp) + tnr_denom[tnr_denom == 0] = 1 + tnr = tn / tnr_denom + + pnv_denom = (tn + fn) + pnv_denom[pnv_denom == 0] = 1 + npv = tn / pnv_denom + + info['ppv'] = ppv + + info['tpr'] = tpr + + info['acc'] = (tp + tn) / (tp + tn + fp + fn) + + info['bm'] = tpr + tnr - 1 # informedness + + info['mk'] = ppv + npv - 1 # markedness + + keys = ['mcc', 'g1', 'f1', 'acc'] + for key in keys: + measure = info[key] + max_idx = measure.argmax() + best_thresh = info['thresholds'][max_idx] + best_measure = measure[max_idx] + best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh) + info['max_{}'.format(key)] = best_label + info['_max_{}'.format(key)] = (best_measure, best_thresh) + + return Threshold_Result(info) + + def _binary_clf_curves(self, stabalize_thresh=7, stabalize_pad=7): + """ + Code common to ROC, PR, and threshold measures + + TODO: refactor ROC and PR curves to use this code, perhaps even + memoizing it. + """ + try: + from sklearn.metrics._ranking import _binary_clf_curve + except ImportError: + from sklearn.metrics.ranking import _binary_clf_curve + data = self.data + y_true = data['is_true'].astype(np.uint8) + y_score = data['pred_score'] + sample_weight = data._data.get('weight', None) + + npad = 0 + if len(self) == 0: + fps = [np.nan] + fns = [np.nan] + tps = [np.nan] + thresholds = [np.nan] + + realpos_total = 0 + realneg_total = 0 + nsupport = 0 + else: + if len(self) <= stabalize_thresh: + # add dummy data to stabalize the computation + if sample_weight is None: + sample_weight = np.ones(len(self)) + npad = stabalize_pad + y_true, y_score, sample_weight = _stabalilze_data( + y_true, y_score, sample_weight, npad=npad) + + # Get the total weight (typically number of) positive and negative + # examples of this class + if sample_weight is None: + weight = 1 + nsupport = len(y_true) - bool(npad) + else: + weight = sample_weight + nsupport = sample_weight.sum() - bool(npad) + + realpos_total = (y_true * weight).sum() + realneg_total = ((1 - y_true) * weight).sum() + + fps, tps, thresholds = _binary_clf_curve( + y_true, y_score, pos_label=1.0, + sample_weight=sample_weight) + + # Adjust weighted totals to be robust to floating point errors + if np.isclose(realneg_total, fps[-1]): + realneg_total = max(realneg_total, fps[-1]) + if np.isclose(realpos_total, tps[-1]): + realpos_total = max(realpos_total, tps[-1]) + + tns = realneg_total - fps + fns = realpos_total - tps + + info = { + 'fp_count': fps, + 'tp_count': tps, + 'tn_count': tns, + 'fn_count': fns, + 'thresholds': thresholds, + 'realpos_total': realpos_total, + 'realneg_total': realneg_total, + 'nsupport': nsupport, + } + if self.cx is not None: + info.update({ + 'cx': self.cx, + 'node': self.classes[self.cx], + }) + return info + class DictProxy(DictLike): """ @@ -942,7 +1140,7 @@ class ROC_Result(ub.NiceRepr, DictProxy): 'catname': self.get('node', None), }, nl=0, precision=4, strvals=True) - def draw(self, **kw): + def draw(self, prefix='', **kw): """ Example: >>> from netharn.metrics.confusion_vectors import * # NOQA @@ -954,7 +1152,7 @@ class ROC_Result(ub.NiceRepr, DictProxy): >>> kwplot.show_if_requested() """ from netharn.metrics import drawing - return drawing.draw_roc(self, **kw) + return drawing.draw_roc(self, prefix=prefix, **kw) class PR_Result(ub.NiceRepr, DictProxy): @@ -985,9 +1183,45 @@ class PR_Result(ub.NiceRepr, DictProxy): 'catname': self.get('node', None), }, nl=0, precision=4, strvals=True) - def draw(self, **kw): + def draw(self, prefix='', **kw): + from netharn.metrics import drawing + return drawing.draw_prcurve(self, prefix=prefix, **kw) + + +class Threshold_Result(ub.NiceRepr, DictProxy): + """ + Example: + >>> from netharn.metrics.confusion_vectors import * # NOQA + >>> binvecs = BinaryConfusionVectors.demo(n=100, p_error=0.5) + >>> self = binvecs.threshold_curves() + >>> print('self = {!r}'.format(self)) + >>> # xdoctest: +REQUIRES(--show) + >>> import kwplot + >>> kwplot.autompl() + >>> self.draw() + >>> kwplot.show_if_requested() + """ + def __init__(self, roc_info): + self.proxy = roc_info + + @property + def catname(self): + return self.get('node', None) + + def __nice__(self): + return ub.repr2({ + 'max_mcc': self['max_mcc'], + 'max_g1': self['max_g1'], + # 'max_f1': self['max_f1'], + 'nsupport': self['nsupport'], + 'realpos_total': self['realpos_total'], + 'realneg_total': self['realneg_total'], + 'catname': self.get('node', None), + }, nl=0, precision=4, strvals=True) + + def draw(self, prefix='', **kw): from netharn.metrics import drawing - return drawing.draw_peritem_prcurve(self, **kw) + return drawing.draw_threshold_curves(self, prefix=prefix, **kw) class PerClass_ROC_Result(ub.NiceRepr, DictProxy): @@ -999,9 +1233,9 @@ class PerClass_ROC_Result(ub.NiceRepr, DictProxy): def __nice__(self): return ub.repr2(self.proxy, nl=2, strvals=True) - def draw(self, **kw): + def draw(self, prefix='', **kw): from netharn.metrics import drawing - return drawing.draw_perclass_roc(self, **kw) + return drawing.draw_perclass_roc(self, prefix=prefix, **kw) class PerClass_PR_Result(ub.NiceRepr, DictProxy): @@ -1013,13 +1247,38 @@ class PerClass_PR_Result(ub.NiceRepr, DictProxy): def __nice__(self): return ub.repr2(self.proxy, nl=2, strvals=True) - def draw(self, **kw): + def draw(self, prefix='', **kw): from netharn.metrics import drawing - return drawing.draw_perclass_prcurve(self, **kw) + return drawing.draw_perclass_prcurve(self, prefix=prefix, **kw) + + +class PerClass_Threshold_Result(ub.NiceRepr, DictProxy): + """ + """ + def __init__(self, cx_to_info): + self.proxy = cx_to_info + + def __nice__(self): + return ub.repr2(self.proxy, nl=2, strvals=True) + + def draw(self, prefix='', **kw): + """ + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> cfsn_vecs = ConfusionVectors.demo() + >>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name') + >>> self = ovr_cfsn.threshold_curves()['perclass'] + >>> self.draw() + """ + from netharn.metrics import drawing + return drawing.draw_perclass_thresholds(self, prefix=prefix, **kw) def _stabalilze_data(y_true, y_score, sample_weight, npad=7): - npad = 7 + """ + Adds ideally calibrated dummy values to curves with few positive examples. + This acts somewhat like a Baysian prior and smooths out the curve. + """ min_score = y_score.min() max_score = y_score.max() @@ -1039,3 +1298,11 @@ def _stabalilze_data(y_true, y_score, sample_weight, npad=7): y_score = np.hstack([y_score, pad_score]) sample_weight = np.hstack([sample_weight, pad_weight]) return y_true, y_score, sample_weight + +if __name__ == '__main__': + """ + CommandLine: + python ~/code/netharn/netharn/metrics/confusion_vectors.py all + """ + import xdoctest + xdoctest.doctest_module(__file__) diff --git a/netharn/metrics/detect_metrics.py b/netharn/metrics/detect_metrics.py index cde905beabfc7ef22498f219e3583716be576d3e..70db812f784798618acfc4d481be2d7e5a3d5661 100644 --- a/netharn/metrics/detect_metrics.py +++ b/netharn/metrics/detect_metrics.py @@ -133,7 +133,8 @@ class DetectionMetrics(ub.NiceRepr): return dmet.gid_to_pred_dets[gid] def confusion_vectors(dmet, ovthresh=0.5, bias=0, gids=None, compat='all', - prioritize='iou', ignore_class='ignore'): + prioritize='iou', ignore_classes='ignore', + background_class=ub.NoParam, verbose='auto', workers=0): """ Assigns predicted boxes to the true boxes so we can transform the detection problem into a classification problem for scoring. @@ -168,8 +169,19 @@ class DetectionMetrics(ub.NiceRepr): preferred over descendents of the true class, over unreleated classes. - ignore_class (str, default='ignore'): - class name indicating ignore regions + ignore_classes (set, default={'ignore'}): + class names indicating ignore regions + + background_class (str, default=ub.NoParam): + Name of the background class. If unspecified we try to + determine it with heuristics. A value of None means there is no + background class. + + verbose (int, default='auto'): verbosity flag. In auto mode, + verbose=1 if len(gids) > 1000. + + workers (int, default=0): + number of parallel assignment processes Ignore: globals().update(xdev.get_func_kwargs(dmet.confusion_vectors)) @@ -183,29 +195,62 @@ class DetectionMetrics(ub.NiceRepr): if gids is None: gids = sorted(dmet._imgname_to_gid.values()) - for gid in gids: + + if verbose == 'auto': + verbose = 1 if len(gids) > 10 else 0 + + if background_class is ub.NoParam: + # Try to autodetermine background class name, + # otherwise fallback to None + background_class = None + if dmet.classes is not None: + lower_classes = [c.lower() for c in dmet.classes] + try: + idx = lower_classes.index('background') + background_class = dmet.classes[idx] + # TODO: if we know the background class name should we + # change bg_cidx in assignment? + except ValueError: + pass + + from ndsampler.utils import util_futures + workers = 0 + jobs = util_futures.JobPool(mode='process', max_workers=workers) + + for gid in ub.ProgIter(gids, desc='submit assign jobs', + verbose=verbose): true_dets = dmet.true_detections(gid) pred_dets = dmet.pred_detections(gid) - - y = _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1, - ovthresh=ovthresh, bg_cidx=-1, - bias=bias, classes=dmet.classes, - compat=compat, prioritize=prioritize, - ignore_class=ignore_class) + job = jobs.submit( + _assign_confusion_vectors, true_dets, pred_dets, + bg_weight=1, ovthresh=ovthresh, bg_cidx=-1, bias=bias, + classes=dmet.classes, compat=compat, prioritize=prioritize, + ignore_classes=ignore_classes) + job.gid = gid + + for job in ub.ProgIter(jobs.jobs, desc='assign detections', + verbose=verbose): + y = job.result() + gid = job.gid if TRACK_PROBS: # Keep track of per-class probs + pred_dets = dmet.pred_detections(gid) try: pred_probs = pred_dets.probs except KeyError: TRACK_PROBS = False else: pxs = np.array(y['pxs'], dtype=np.int) + + # For unassigned truths, we need to create dummy probs + # where a background class has probability 1. flags = pxs > -1 probs = np.zeros((len(pxs), pred_probs.shape[1]), dtype=np.float32) - bg_idx = dmet.classes.node_to_idx['background'] - probs[:, bg_idx] = 1 + if background_class is not None: + bg_idx = dmet.classes.index(background_class) + probs[:, bg_idx] = 1 probs[flags] = pred_probs[pxs[flags]] prob_accum.append(probs) @@ -213,8 +258,60 @@ class DetectionMetrics(ub.NiceRepr): for k, v in y.items(): y_accum[k].extend(v) + # else: + # for gid in ub.ProgIter(gids, desc='assign detections', verbose=verbose): + # true_dets = dmet.true_detections(gid) + # pred_dets = dmet.pred_detections(gid) + + # y = _assign_confusion_vectors(true_dets, pred_dets, bg_weight=1, + # ovthresh=ovthresh, bg_cidx=-1, + # bias=bias, classes=dmet.classes, + # compat=compat, prioritize=prioritize, + # ignore_classes=ignore_classes) + + # if TRACK_PROBS: + # # Keep track of per-class probs + # try: + # pred_probs = pred_dets.probs + # except KeyError: + # TRACK_PROBS = False + # else: + # pxs = np.array(y['pxs'], dtype=np.int) + # flags = pxs > -1 + # probs = np.zeros((len(pxs), pred_probs.shape[1]), + # dtype=np.float32) + # bg_idx = dmet.classes.node_to_idx['background'] + # probs[:, bg_idx] = 1 + # probs[flags] = pred_probs[pxs[flags]] + # prob_accum.append(probs) + + # y['gid'] = [gid] * len(y['pred']) + # for k, v in y.items(): + # y_accum[k].extend(v) + + _data = {} + for k, v in ub.ProgIter(list(y_accum.items()), desc='ndarray convert', verbose=verbose): + # Try to use 32 bit types for large evaluation problems + kw = dict() + if k in {'iou', 'score', 'weight'}: + kw['dtype'] = np.float32 + if k in {'pxs', 'txs', 'gid', 'pred', 'true', 'pred_raw'}: + kw['dtype'] = np.int32 + try: + _data[k] = np.asarray(v, **kw) + except TypeError: + _data[k] = np.asarray(v) + # Avoid pandas when possible - cfsn_data = kwarray.DataFrameArray(ub.map_vals(np.array, y_accum)) + cfsn_data = kwarray.DataFrameArray(_data) + + if 0: + import xdev + nbytes = 0 + for k, v in _data.items(): + nbytes += v.size * v.dtype.itemsize + print(xdev.byte_str(nbytes)) + if TRACK_PROBS: y_prob = np.vstack(prob_accum) else: @@ -334,14 +431,15 @@ class DetectionMetrics(ub.NiceRepr): return info def score_voc(dmet, ovthresh=0.5, bias=1, method='voc2012', gids=None, - ignore_class='ignore'): + ignore_classes='ignore'): """ score using voc method Example: >>> # xdoctest: +REQUIRES(module:ndsampler) >>> dmet = DetectionMetrics.demo( - >>> nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8, score_noise=.5) + >>> nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8, + >>> score_noise=.5) >>> print(dmet.score_voc()['mAP']) 0.9399... """ @@ -356,10 +454,10 @@ class DetectionMetrics(ub.NiceRepr): true_dets = dmet.true_detections(gid) pred_dets = dmet.pred_detections(gid) - if ignore_class is not None: + if ignore_classes is not None: true_ignore_flags, pred_ignore_flags = _filter_ignore_regions( true_dets, pred_dets, ovthresh=ovthresh, - ignore_class=ignore_class) + ignore_classes=ignore_classes) true_dets = true_dets.compress(~true_ignore_flags) pred_dets = pred_dets.compress(~pred_ignore_flags) @@ -482,6 +580,11 @@ class DetectionMetrics(ub.NiceRepr): cls_noise (float, default=0): probability that a class label will change. Must be within 0 and 1. anchors (ndarray, default=None): used to create random boxes + null_pred (bool, default=0): + if True, predicted classes are returned as null, which means + only localization scoring is suitable. + with_probs (bool, default=1): + if True, includes per-class probabilities with predictions Example: >>> # xdoctest: +REQUIRES(module:ndsampler) @@ -504,6 +607,27 @@ class DetectionMetrics(ub.NiceRepr): >>> print(dmet.pred_detections(gid=0)) + + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> # Test case with null predicted categories + >>> dmet = DetectionMetrics.demo(nimgs=30, null_pred=1, nclasses=3, + >>> nboxes=10, n_fp=10, box_noise=0.3, + >>> with_probs=False) + >>> dmet.gid_to_pred_dets[0].data + >>> dmet.gid_to_true_dets[0].data + >>> cfsn_vecs = dmet.confusion_vectors() + >>> binvecs_ovr = cfsn_vecs.binarize_ovr() + >>> binvecs_per = cfsn_vecs.binarize_peritem() + >>> pr_per = binvecs_per.precision_recall() + >>> pr_ovr = binvecs_ovr.precision_recall() + >>> print('pr_per = {!r}'.format(pr_per)) + >>> print('pr_ovr = {!r}'.format(pr_ovr)) + >>> # xdoctest: +REQUIRES(--show) + >>> import kwplot + >>> kwplot.autompl() + >>> pr_per.draw(fnum=1) + >>> pr_ovr['perclass'].draw(fnum=2) """ import kwimage import kwarray @@ -515,6 +639,9 @@ class DetectionMetrics(ub.NiceRepr): box_noise = kwargs.get('box_noise', 0) cls_noise = kwargs.get('cls_noise', 0) + null_pred = kwargs.get('null_pred', False) + with_probs = kwargs.get('with_probs', True) + # specify an amount of overlap between true and false scores score_noise = kwargs.get('score_noise', 0.2) @@ -550,8 +677,10 @@ class DetectionMetrics(ub.NiceRepr): true_mean = _interp(0.5, .8, score_noise) false_mean = _interp(0.5, .2, score_noise) - true_score_RV = distributions.TruncNormal(mean=true_mean, std=.5, low=true_low, high=true_high, rng=rng) - false_score_RV = distributions.TruncNormal(mean=false_mean, std=.5, low=0, high=false_high, rng=rng) + true_score_RV = distributions.TruncNormal( + mean=true_mean, std=.5, low=true_low, high=true_high, rng=rng) + false_score_RV = distributions.TruncNormal( + mean=false_mean, std=.5, low=0, high=false_high, rng=rng) frgnd_cx_RV = distributions.DiscreteUniform( 1, nclasses + 1, rng=rng) @@ -640,7 +769,12 @@ class DetectionMetrics(ub.NiceRepr): scores=pred_scores) # Hack in the probs - pred_dets.data['probs'] = class_probs + if with_probs: + pred_dets.data['probs'] = class_probs + + if null_pred: + pred_dets.data['class_idxs'] = np.array( + [None] * len(pred_dets), dtype=object) dmet.add_truth(true_dets, imgname=imgname) dmet.add_predictions(pred_dets, imgname=imgname) diff --git a/netharn/metrics/drawing.py b/netharn/metrics/drawing.py index 5875280cdad6fff1d39d67a50f0ccf96464bec28..ffa27733bde5a22a251d7c95a740a2bbc125f704 100644 --- a/netharn/metrics/drawing.py +++ b/netharn/metrics/drawing.py @@ -80,13 +80,9 @@ def draw_perclass_roc(cx_to_rocinfo, classes=None, prefix='', fnum=1, for cx in cxs: peritem = cx_to_rocinfo[cx] - if isinstance(cx, int): - catname = classes[cx] - else: - catname = cx + catname = classes[cx] if isinstance(cx, int) else cx auc = peritem['auc'] - tpr = peritem['tpr'] nsupport = int(peritem['nsupport']) @@ -95,7 +91,7 @@ def draw_perclass_roc(cx_to_rocinfo, classes=None, prefix='', fnum=1, if abs(z - int(z)) < 1e-8: label = 'auc={:0.2f}: {} ({:d}/{:d})'.format(auc, catname, int(peritem['realpos_total']), round(nsupport, 2)) else: - label = 'auc={:0.2f}: {} ({}/{:d})'.format(auc, catname, round(peritem['realpos_total'], 2), round(nsupport, 2)) + label = 'auc={:0.2f}: {} ({:.2f}/{:d})'.format(auc, catname, round(peritem['realpos_total'], 2), round(nsupport, 2)) else: label = 'auc={:0.2f}: {} ({:d})'.format(auc, catname, round(nsupport, 2)) @@ -144,10 +140,7 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw): xydata = ub.odict() for cx in cxs: peritem = cx_to_peritem[cx] - if isinstance(cx, int): - catname = classes[cx] - else: - catname = cx + catname = classes[cx] if isinstance(cx, int) else cx ap = peritem['ap'] if 'pr' in peritem: pr = peritem['pr'] @@ -173,11 +166,11 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw): if 'realpos_total' in peritem: z = peritem['realpos_total'] if abs(z - int(z)) < 1e-8: - label = 'ap={:0.2f}: {} ({:d}/{:d})'.format(ap, catname, int(peritem['realpos_total']), nsupport) + label = 'ap={:0.2f}: {} ({:d}/{:d})'.format(ap, catname, int(peritem['realpos_total']), round(nsupport, 2)) else: - label = 'ap={:0.2f}: {} ({}/{:d})'.format(ap, catname, round(peritem['realpos_total'], 2), nsupport) + label = 'ap={:0.2f}: {} ({:.2f}/{:d})'.format(ap, catname, round(peritem['realpos_total'], 2), round(nsupport, 2)) else: - label = 'ap={:0.2f}: {} ({:d})'.format(ap, catname, nsupport) + label = 'ap={:0.2f}: {} ({:d})'.format(ap, catname, round(nsupport, 2)) xydata[label] = (recall, precision) with warnings.catch_warnings(): @@ -195,7 +188,80 @@ def draw_perclass_prcurve(cx_to_peritem, classes=None, prefix='', fnum=1, **kw): return ax -def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw): +def draw_perclass_thresholds(cx_to_peritem, key='mcc', classes=None, prefix='', fnum=1, **kw): + """ + Notes: + Each category is inspected independently of one another, there is no + notion of confusion. + + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> # xdoctest: +REQUIRES(module:kwplot) + >>> from netharn.metrics.drawing import * # NOQA + >>> from netharn.metrics import ConfusionVectors + >>> cfsn_vecs = ConfusionVectors.demo() + >>> classes = cfsn_vecs.classes + >>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name') + >>> cx_to_peritem = ovr_cfsn.threshold_curves()['perclass'] + >>> import kwplot + >>> kwplot.autompl() + >>> key = 'mcc' + >>> draw_perclass_thresholds(cx_to_peritem, key, classes) + >>> # xdoctest: +REQUIRES(--show) + >>> kwplot.show_if_requested() + """ + import kwplot + # Sort by descending "best value" + cxs = list(cx_to_peritem.keys()) + + try: + priority = np.array([item['_max_' + key][0] for item in cx_to_peritem.values()]) + priority[np.isnan(priority)] = -np.inf + cxs = list(ub.take(cxs, np.argsort(priority)))[::-1] + except KeyError: + pass + + xydata = ub.odict() + for cx in cxs: + peritem = cx_to_peritem[cx] + catname = classes[cx] if isinstance(cx, int) else cx + + thresholds = peritem['thresholds'] + measure = peritem[key] + try: + best_label = peritem['max_{}'.format(key)] + except KeyError: + max_idx = measure.argmax() + best_thresh = thresholds[max_idx] + best_measure = measure[max_idx] + best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh) + + nsupport = int(peritem['nsupport']) + if 'realpos_total' in peritem: + z = peritem['realpos_total'] + if abs(z - int(z)) < 1e-8: + label = '{}: {} ({:d}/{:d})'.format(best_label, catname, int(peritem['realpos_total']), round(nsupport, 2)) + else: + label = '{}: {} ({:.2f}/{:d})'.format(best_label, catname, round(peritem['realpos_total'], 2), round(nsupport, 2)) + else: + label = '{}: {} ({:d})'.format(best_label, catname, round(nsupport, 2)) + xydata[label] = (thresholds, measure) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Mean of empty slice', RuntimeWarning) + + ax = kwplot.multi_plot( + xydata=xydata, fnum=fnum, + xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01, + xlabel='threshold', ylabel=key, + title=prefix + 'perclass {}'.format(key), + legend_loc='lower right', + color='distinct', linestyle='cycle', marker='cycle', **kw + ) + return ax + + +def draw_prcurve(peritem, prefix='', fnum=1, **kw): """ TODO: rename to draw prcurve. Just draws a single pr curve. @@ -211,7 +277,7 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw): >>> peritem = cfsn_vecs.binarize_peritem().precision_recall() >>> import kwplot >>> kwplot.autompl() - >>> draw_peritem_prcurve(peritem) + >>> draw_prcurve(peritem) >>> # xdoctest: +REQUIRES(--show) >>> kwplot.show_if_requested() """ @@ -239,9 +305,9 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw): if 'realpos_total' in peritem: z = peritem['realpos_total'] if abs(z - int(z)) < 1e-8: - label = 'ap={:0.2f}: ({:d}/{:d})'.format(ap, int(peritem['realpos_total']), nsupport) + label = 'ap={:0.2f}: ({:d}/{:d})'.format(ap, int(peritem['realpos_total']), round(nsupport, 2)) else: - label = 'ap={:0.2f}: ({}/{:d})'.format(ap, peritem['realpos_total'], nsupport) + label = 'ap={:0.2f}: ({:.2f}/{:d})'.format(ap, round(peritem['realpos_total'], 2), round(nsupport, 2)) else: label = 'ap={:0.2f}: ({:d})'.format(ap, nsupport) @@ -254,3 +320,80 @@ def draw_peritem_prcurve(peritem, prefix='', fnum=1, **kw): color='distinct', linestyle='cycle', marker='cycle', **kw ) return ax + + +def draw_threshold_curves(info, keys=None, prefix='', fnum=1, **kw): + """ + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> # xdoctest: +REQUIRES(module:kwplot) + >>> import sys, ubelt + >>> sys.path.append(ubelt.expandpath('~/code/netharn')) + >>> from netharn.metrics.drawing import * # NOQA + >>> from netharn.metrics import DetectionMetrics + >>> dmet = DetectionMetrics.demo( + >>> nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3) + >>> cfsn_vecs = dmet.confusion_vectors() + >>> info = cfsn_vecs.binarize_peritem().threshold_curves() + >>> keys = None + >>> import kwplot + >>> kwplot.autompl() + >>> draw_threshold_curves(info, keys) + >>> # xdoctest: +REQUIRES(--show) + >>> kwplot.show_if_requested() + """ + import kwplot + import kwimage + thresh = info['thresholds'] + + if keys is None: + keys = {'g1', 'f1', 'acc', 'mcc'} + + idx_to_colors = kwimage.Color.distinct(len(keys), space='rgba') + idx_to_best_pt = {} + + xydata = {} + colors = {} + for idx, key in enumerate(keys): + color = idx_to_colors[idx] + measure = info[key] + max_idx = measure.argmax() + best_thresh = thresh[max_idx] + best_measure = measure[max_idx] + best_label = '{}={:0.2f}@{:0.2f}'.format(key, best_measure, best_thresh) + + nsupport = int(info['nsupport']) + if 'realpos_total' in info: + z = info['realpos_total'] + if abs(z - int(z)) < 1e-8: + label = '{}: ({:d}/{:d})'.format(best_label, int(info['realpos_total']), round(nsupport, 2)) + else: + label = '{}: ({:.2f}/{:d})'.format(best_label, round(info['realpos_total'], 2), round(nsupport, 2)) + else: + label = '{}: ({:d})'.format(best_label, nsupport) + xydata[label] = (thresh, measure) + colors[label] = color + idx_to_best_pt[idx] = (best_thresh, best_measure) + + ax = kwplot.multi_plot( + xydata=xydata, fnum=fnum, + xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01, + xlabel='threshold', ylabel=key, + title=prefix + 'threshold curves', + legend_loc='lower right', + color=colors, + linestyle='cycle', marker='cycle', **kw + ) + for idx, best_pt in idx_to_best_pt.items(): + best_thresh, best_measure = best_pt + color = idx_to_colors[idx] + ax.plot(best_thresh, best_measure, '*', color=color) + return ax + +if __name__ == '__main__': + """ + CommandLine: + python ~/code/netharn/netharn/metrics/drawing.py + """ + import xdoctest + xdoctest.doctest_module(__file__) diff --git a/netharn/mixins.py b/netharn/mixins.py index 51ab82afc51f833b8ef4fd83118d83f7dfae40ba..1b4b8fd244f0db82ab69004fa6491c524ca52cef 100644 --- a/netharn/mixins.py +++ b/netharn/mixins.py @@ -130,7 +130,7 @@ def _redump_measures(dpath): from os.path import join import kwplot - kwplot.set_mpl_backend('agg') + kwplot.autompl(force='agg') try: import seaborn as sns @@ -172,14 +172,15 @@ def _dump_measures(tb_data, out_dpath, mode=None, smoothing=0.0, >>> mode = ['epoch', 'iter'] >>> fpath = join(out_dpath, 'tb_data.json') >>> tb_data = json.load(open(fpath, 'r')) + >>> import kwplot + >>> kwplot.autompl() >>> _dump_measures(tb_data, out_dpath, smoothing=0) """ import ubelt as ub from os.path import join import numpy as np - import kwplot - kwplot.autompl() + # kwplot.autompl() # TODO: Is it possible to get htop to show this process with some name that # distinguishes it from the dataloader workers? diff --git a/netharn/models/resnet.py b/netharn/models/resnet.py index 0e2b654cf15a2b84fd8d7fec63e55b182169e4f3..99abc31171c848960014897606739f6925b5452f 100644 --- a/netharn/models/resnet.py +++ b/netharn/models/resnet.py @@ -84,7 +84,8 @@ class ResNet(nn.Module): >>> print(tuple(y.size())) (1, 10) """ - def __init__(self, num_blocks, num_classes=10, block='Bottleneck'): + def __init__(self, num_blocks=[3, 4, 6, 3], num_classes=10, + in_channels=3, block='Bottleneck'): super(ResNet, self).__init__() self.in_planes = 64 @@ -95,7 +96,7 @@ class ResNet(nn.Module): else: raise KeyError('Unknown block={}'.format(block)) - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, + self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) diff --git a/netharn/models/yolo2/light_yolo.py b/netharn/models/yolo2/light_yolo.py index c8402eb6c39bdad61a09aff4d359aba83e092b73..9eb6cc3da7eb9a201129812f4ab98a03cf18315d 100644 --- a/netharn/models/yolo2/light_yolo.py +++ b/netharn/models/yolo2/light_yolo.py @@ -251,18 +251,19 @@ class Yolo(nn.Module): >>> dets = batch_dets[0] >>> # xdoc: +REQUIRES(--show) >>> import netharn as nh - >>> nh.util.autompl() # xdoc: +SKIP + >>> import kwplot + >>> kwplot.autompl() # xdoc: +SKIP >>> dets.meta['classes'] = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', >>> 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', >>> 'dog', 'horse', 'motorbike', 'person', >>> 'pottedplant', 'sheep', 'sofa', 'train', >>> 'tvmonitor') - >>> nh.util.figure(fnum=1, doclf=True) + >>> kwplot.figure(fnum=1, doclf=True) >>> sf = orig_sizes[0] >>> dets.boxes.scale(sf, inplace=True) - >>> nh.util.imshow(rgb255, colorspace='rgb') + >>> kwplot.imshow(rgb255, colorspace='rgb') >>> dets.draw() - >>> nh.util.show_if_requested() + >>> kwplot.show_if_requested() """ outputs = [] @@ -292,10 +293,10 @@ def find_anchors(dset): >>> xy = -anchors / 2 >>> wh = anchors >>> show_boxes = np.hstack([xy, wh]) - >>> import netharn as nh - >>> nh.util.figure(doclf=True, fnum=1) - >>> nh.util.autompl() # xdoc: +SKIP - >>> nh.util.draw_boxes(show_boxes, box_format='tlwh') + >>> import kwplot + >>> kwplot.figure(doclf=True, fnum=1) + >>> kwplot.autompl() # xdoc: +SKIP + >>> kwplot.draw_boxes(show_boxes, box_format='tlwh') >>> from matplotlib import pyplot as plt >>> plt.gca().set_xlim(xy.min() - 1, wh.max() / 2 + 1) >>> plt.gca().set_ylim(xy.min() - 1, wh.max() / 2 + 1) diff --git a/netharn/plots/__init__.py b/netharn/plots/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/netharn/plots/weight_scatter.py b/netharn/plots/weight_scatter.py new file mode 100644 index 0000000000000000000000000000000000000000..79bcf37d71f77caf0d9a7822db555854669b3ce3 --- /dev/null +++ b/netharn/plots/weight_scatter.py @@ -0,0 +1,77 @@ +import numpy as np +from os.path import join + + +def plot_weight_scatter(harn): + """ + Draw a scatter plot of the initial weights versus the final weights of a + network. + + Example: + >>> import netharn as nh + >>> harn = nh.FitHarn.demo() + >>> harn.run() + + Ignore: + >>> from netharn.plots.weight_scatter import * # NOQA + >>> from netharn.examples import mnist + >>> import kwplot + >>> harn = mnist.setup_harn() + >>> harn.preferences['timeout'] = 60 * 1 + >>> kwplot.autompl(force='agg') + >>> harn.run() + >>> kwplot.autompl(force='auto') + >>> plot_weight_scatter(harn) + """ + import netharn as nh + cpu = nh.XPU.coerce('cpu') + + path1 = join(harn.train_dpath, 'initial_state', 'initial_state.pt') + state1 = cpu.load(path1) + weights1 = state1['model_state_dict'] + + path2 = harn.best_snapshot() + state2 = cpu.load(path2) + weights2 = state2['model_state_dict'] + + keys1 = set(weights1.keys()) + keys2 = set(weights2.keys()) + keys = keys1 & keys2 + + assert keys == keys2 + + accum1 = [] + accum2 = [] + + for key in keys: + w1 = weights1[key] + w2 = weights2[key] + accum1.append(w1.numpy().ravel()) + accum2.append(w2.numpy().ravel()) + + points1 = np.hstack(accum1) + points2 = np.hstack(accum2) + + # Find cosine of angle between the vectors + import scipy + cosangle = scipy.spatial.distance.cosine(points1, points2) + print('cosangle = {!r}'.format(cosangle)) + + import kwplot + import seaborn + seaborn.set() + plt = kwplot.autoplt() + plt.clf() + + x = points1[::1] + y = points2[::1] + + ax = plt.gca() + ax.figure.clf() + + # seaborn.kdeplot(x, y, shade=True, gridsize=50) + + ax = plt.gca() + ax.scatter(x, y, s=1, alpha=0.1, c='blue') + ax.set_xlabel('initial weights') + ax.set_ylabel('trained weights') diff --git a/netharn/schedulers/core.py b/netharn/schedulers/core.py index af0b43f29ce19caed95a0187d22f6bcd12a23405..2bb2e02a2036205a436faf0c9e0a6be3b2a267b4 100644 --- a/netharn/schedulers/core.py +++ b/netharn/schedulers/core.py @@ -132,11 +132,12 @@ class YOLOScheduler(NetharnScheduler): >>> self.step_batch() >>> #print('ydata = {}'.format(ub.repr2(ydata, precision=5, nl=0))) >>> # xdoc: +REQUIRES(--show) - >>> nh.util.autompl() + >>> import kwplot + >>> kwplot.autompl() >>> xticklabels = sorted({1, 20} | set(points.keys())) - >>> nh.util.multi_plot(xdata=xdata['epoch'], ydata=ydata, xlabel='epoch', fnum=1, + >>> kwplot.multi_plot(xdata=xdata['epoch'], ydata=ydata, xlabel='epoch', fnum=1, >>> ylabel='lr', xticklabels=xticklabels, xticks=xticklabels) - >>> nh.util.show_if_requested() + >>> kwplot.show_if_requested() """ __batchaware__ = True diff --git a/super_setup.py b/super_setup.py index 1bda283b3de8db6e7aafdc2b479ca32fc52e0539..1823ed81bdad9a1e31207e3121dbe74c1c17f126 100755 --- a/super_setup.py +++ b/super_setup.py @@ -686,11 +686,11 @@ def make_netharn_registry(): # The util libs CommonRepo( - name='kwarray', branch='dev/0.5.7', remote='public', + name='kwarray', branch='dev/0.5.9', remote='public', remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwarray.git'}, ), CommonRepo( - name='kwimage', branch='dev/0.6.2', remote='public', + name='kwimage', branch='dev/0.6.3', remote='public', remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwimage.git'}, ), # CommonRepo( # TODO @@ -698,7 +698,7 @@ def make_netharn_registry(): # remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwannot.git'}, # ), CommonRepo( - name='kwcoco', branch='dev/0.1.1', remote='public', + name='kwcoco', branch='dev/0.1.2', remote='public', remotes={'public': 'git@gitlab.kitware.com:computer-vision/kwcoco.git'}, ), CommonRepo( @@ -719,17 +719,17 @@ def make_netharn_registry(): # For example data and CLI CommonRepo( - name='scriptconfig', branch='dev/0.5.6', remote='public', + name='scriptconfig', branch='dev/0.5.7', remote='public', remotes={'public': 'git@gitlab.kitware.com:utils/scriptconfig.git'}, ), CommonRepo( - name='ndsampler', branch='dev/0.5.8', remote='public', + name='ndsampler', branch='dev/0.5.10', remote='public', remotes={'public': 'git@gitlab.kitware.com:computer-vision/ndsampler.git'}, ), # netharn - training harness CommonRepo( - name='netharn', branch='dev/0.5.6', remote='public', + name='netharn', branch='dev/0.5.7', remote='public', remotes={'public': 'git@gitlab.kitware.com:computer-vision/netharn.git'}, ), ]