diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a543da17211b1021752da5d058bca1e000db69c..a525651a82b52a33aad8f301e608a771377101e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,17 @@ This changelog follows the specifications detailed in: [Keep a Changelog](https: This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release. -## Version 0.5.15 - Unreleased +## Version 0.5.16 - Unreleased + +### Added +* decollate batch utility + +### Fixed +* Fixed segmentation, and object detection example +* Fixed issue with container_abcs + + +## Version 0.5.15 - Released 2021-01-26 ## Version 0.5.14 - Released 2021-01-21 diff --git a/dev/debug_memory.py b/dev/debug_memory.py index cb17ed0c830873b5453fad22cefce384f652b595..c2662487f6424e9756150321a21f1effb1aef2ab 100644 --- a/dev/debug_memory.py +++ b/dev/debug_memory.py @@ -8,6 +8,43 @@ References: Potential Solutions: https://stackoverflow.com/questions/6832554/multiprocessing-how-do-i-share-a-dict-among-multiple-processes + +Notes: + The issue does not stem from any quirk in the multiprocessing library. It + is a fundamental consequence of Python reference counting and the + operating-system level fork operation. When the OS forks the base Python + process it creates a new nearly identical process (Python variables even + have the same id). This new process is very lightweight because it does not + copy over all the memory from the original program. Instead, it will only + copy bits of memory as they are changed, i.e. diverge from the base + process. This is the copy-on-write behavior. When an item of a Python list + is accessed by the forked process, it must increment the reference count of + whatever it accessed, and thus the OS perceives a write and triggers the + copy on write. But the OS doesn't just copy the small bit of memory that + was touched. It copies the entire memory page that the reference count for + the variable existed on. That's why the problem is so much worse when you + do random access (in sequential access the memory page that is copied + likely has the next reference count you were going to increment anyway, but + in random access discontiguous blocks of memory are copied,... well... + randomly). The one part I don't have a firm grasp on is why the problem + doesn't plateau as you start to randomly access information in pages you + already copied. Perhaps the information is stale somehow? I'm not sure. + But that is my best understanding of the issue. + + Using a pointer to a database like SQLite completely side-steps this + problem, because the only information that is forked is a string that + points to the database URI. New connections are opened up in each of the + forked processes. The only issue I've had is accessing a row is now O(N + log(N)) instead of O(1). This can be mitigated with memoized caching, which + again for a reason I don't entirely understand, uses less memory than + fork's copy-on-write behavior. However, I see speed benefits of SQL when I + scale from 10,000 to 100,000 images. The SQL+memoized cache backend was + running consistently at 45Hz as I scaled up (theoretically there should be + a logarithmic slowdown, but it appears to be small enough effect that I + didn't see it), whereas the in-memory json data structure starts at over + 100Hz, but slows down to 1.1Hz at scale (which theoretically should have + been constant at scale, but that copy-on-write appears to add a lot of + overhead). """ from torch.utils.data import Dataset, DataLoader import numpy as np @@ -28,36 +65,32 @@ class CustomDataset(Dataset): self.data = np.array([x for x in range(int(total))]) elif storage_mode == 'python': self.data = [x for x in range(int(total))] + elif storage_mode == 'ndsampler-sql': + import ndsampler + import kwcoco + from kwcoco.coco_sql_dataset import ensure_sql_coco_view + dset = kwcoco.CocoDataset.demo( + 'vidshapes', num_videos=1, num_frames=total, + gsize=(64, 64) + ) + dset = ensure_sql_coco_view(dset) + print('dset.uri = {!r}'.format(dset.uri)) + dset.hashid = 'fake-hashid' + sampler = ndsampler.CocoSampler(dset, backend=None) + self.data = sampler + # sampler.load_item(0) + # tr = sampler.regions.get_item(0) + # sampler.load_sample(tr) + # assert total <= 1000 + # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total)) + # sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total)) elif storage_mode == 'ndsampler': import ndsampler - assert total <= 1000 - sampler = ndsampler.CocoSampler.demo('shapes{}'.format(total)) - - TRY_TWEAKS = 1 - - if 1 and TRY_TWEAKS: - # Tweaks to try and prevent the sampler from leaking - sampler.frames._lru = None - - if 1 and TRY_TWEAKS: - import multiprocessing - dset = sampler.dset - manager = multiprocessing.Manager() - dset.index.cats = manager.dict(dset.index.cats) - dset.index.anns = manager.dict(dset.index.anns) - dset.index.imgs = manager.dict(dset.index.imgs) - dset.index.gid_to_aids = manager.dict(dset.index.gid_to_aids) - dset.index.cid_to_aids = manager.dict(dset.index.cid_to_aids) - dset.index.vidid_to_gids = manager.dict(dset.index.vidid_to_gids) - dset.index.file_name_to_img = manager.dict(dset.index.file_name_to_img) - dset.index.name_to_cat = manager.dict(dset.index.name_to_cat) - - dset.dataset = manager.dict(dset.dataset) - - # sampler.frames.dset - # sampler.dset - # sampler.regions.dset - + # assert total <= 10000 + sampler = ndsampler.CocoSampler.demo( + 'vidshapes', num_videos=1, num_frames=total, + gsize=(64, 64) + ) self.data = sampler else: raise KeyError(storage_mode) @@ -65,9 +98,23 @@ class CustomDataset(Dataset): def __len__(self): return len(self.data) + # def __getstate__(self): + # print('\n\nGETTING CUSTOM DATASET STATE') + # return super().__getstate__() + + # def __setstate__(self, val): + # print('\n\nSETTING CUSTOM DATASET STATE') + # return super().__setstate__(val) + def __getitem__(self, idx): - if self.storage_mode == 'ndsampler': - data = self.data.load_item(idx)['im'].ravel()[0:1].astype(np.float32) + if 0: + import multiprocessing + print('\n\nidx = {!r}'.format(idx)) + print('self = {!r}'.format(self)) + print(multiprocessing.current_process()) + if self.storage_mode == 'ndsampler' or self.storage_mode == 'ndsampler-sql': + sample = self.data.load_item(idx) + data = sample['im'].ravel()[0:1].astype(np.float32) data_pt = torch.from_numpy(data) else: data = self.data[idx] @@ -180,6 +227,17 @@ def byte_str(num, unit='auto', precision=2): return ub.repr2(num_unit, precision=precision) + ' ' + unit +def worker_init_fn(worker_id): + worker_info = torch.utils.data.get_worker_info() + dataset = worker_info.dataset + print('WORKER INIT FOR dataset') + if hasattr(dataset.data, 'dset'): + dset = dataset.data.dset + if hasattr(dset, 'connect'): + dset.connect(readonly=True) + print('WORKER INIT FOR dset = {!r}'.format(dset)) + + def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=True, workers=2): """ Args: @@ -190,6 +248,11 @@ def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=True, w total : size of backend storage """ + + if 0: + # torch_multiprocessing.get_context() + torch.multiprocessing.set_start_method('spawn') + mem = psutil.virtual_memory() start_mem = mem.used mem_str = byte_str(start_mem) @@ -214,11 +277,12 @@ def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=True, w print('shuffle = {!r}'.format(shuffle)) num_workers = workers - train_loader = DataLoader(train_data, batch_size=300, - shuffle=shuffle, - drop_last=True, - pin_memory=False, - num_workers=num_workers) + batch_size = 32 + # batch_size = 300 + train_loader = DataLoader(train_data, batch_size=batch_size, + shuffle=shuffle, drop_last=True, + pin_memory=False, num_workers=num_workers, + worker_init_fn=worker_init_fn) used_nbytes = psutil.virtual_memory().used - start_mem print('After init DataLoader memory = {!r}'.format(byte_str(used_nbytes))) @@ -252,7 +316,7 @@ def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=True, w print('measured final usage: {}'.format(byte_str(used_bytes))) print('measured peak usage: {}'.format(byte_str(max_bytes))) - if hasattr(train_data.data, 'frames'): + if 0 and hasattr(train_data.data, 'frames'): sampler = train_data.data print('sampler.regions.__dict__ = {}'.format( ub.repr2(sampler.regions.__dict__, nl=1))) @@ -263,6 +327,7 @@ def main(storage_mode='numpy', return_mode='tensor', total=24e5, shuffle=True, w if __name__ == '__main__': """ + CommandLine: python debug_memory.py numpy tensor --total=24e5 --shuffle=True @@ -273,10 +338,24 @@ if __name__ == '__main__': python debug_memory.py --storage_mode=python --total=24e5 --shuffle=True python debug_memory.py --storage_mode=python --total=24e5 --shuffle=False - python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=True + python debug_memory.py --storage_mode=ndsampler --total=100000 --shuffle=True --workers=4 + python debug_memory.py --storage_mode=ndsampler-sql --total=100000 --shuffle=True --workers=4 + + python debug_memory.py --storage_mode=ndsampler --total=10000 --shuffle=True --workers=4 + python debug_memory.py --storage_mode=ndsampler-sql --total=10000 --shuffle=True --workers=4 + + python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=True --workers=0 --profile + python debug_memory.py --storage_mode=ndsampler-sql --total=1000 --shuffle=True --workers=0 --profile + + python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=False --workers=0 + + python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=False --workers=8 + python debug_memory.py --storage_mode=ndsampler-sql --total=1000 --shuffle=False --workers=8 + python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=True --workers=0 python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=False --workers=0 + python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=False --workers=4 python debug_memory.py --storage_mode=ndsampler --total=1000 --shuffle=True --workers=4 diff --git a/netharn/__init__.py b/netharn/__init__.py index cc7c5ef59400dc0e462a2dc9aeeed07acc9c60f3..0a7448f5ead88ed072a5fc3d0f5925ff83eeaafe 100644 --- a/netharn/__init__.py +++ b/netharn/__init__.py @@ -4,7 +4,7 @@ mkinit netharn --noattrs --dry mkinit netharn --noattrs """ -__version__ = '0.5.15' +__version__ = '0.5.16' try: # PIL 7.0.0 removed PIL_VERSION, which breaks torchvision, monkey patch it diff --git a/netharn/api.py b/netharn/api.py index ac2f40c5a65859a26f8cfdd0c26267aa9ba4dfab..6a3eb7a3f76ac223740c77172639d5d2e375dfa7 100644 --- a/netharn/api.py +++ b/netharn/api.py @@ -37,7 +37,6 @@ class Datasets(object): >>> print(ub.repr2(nh.api.Datasets.coerce(config, **kw))) """ from ndsampler import coerce_data - config = _update_defaults(config, kw) torch_datasets = coerce_data.coerce_datasets(config) return torch_datasets @@ -836,6 +835,15 @@ def configure_hacks(config={}, **kw): raise KeyError('start={} is not in valid_strats={}'.format(strat, valid_strats)) torch.multiprocessing.set_sharing_strategy(strat) + if 0: + """ + References: + https://britishgeologicalsurvey.github.io/science/python-forking-vs-spawn/ + """ + import torch + # torch_multiprocessing.get_context() + torch.multiprocessing.set_start_method('spawn') + def configure_workdir(config={}, **kw): config = _update_defaults(config, kw) diff --git a/netharn/cli/manage_runs.py b/netharn/cli/manage_runs.py index a4296d2074da0fe2a623f7266820f378dd9934ba..190e05471285dd27cf49a700c2f198637b4b7584 100755 --- a/netharn/cli/manage_runs.py +++ b/netharn/cli/manage_runs.py @@ -98,6 +98,9 @@ def is_symlink_broken(path): https://stackoverflow.com/questions/20794/find-broken-symlinks-with-python Example: + >>> import pytest + >>> if ub.WIN32: + >>> pytest.skip('symlink checks on windows dont always work') >>> test_dpath = ub.ensure_app_cache_dir('test') >>> real_fpath = ub.touch(join(test_dpath, 'real')) >>> link_fpath = ub.symlink(real_fpath, join(test_dpath, 'link')) @@ -327,7 +330,8 @@ def _devcheck_remove_dead_runs(workdir, dry=True, dead_num_snap_thresh=10, all_info = [s.info for s in all_sessions] - nice_groups = ub.group_items(all_info, lambda x: x['name']) + nice_groups = ub.group_items(all_info, lambda x: x.get('name', x.get('nice', None))) + nice_groups.pop(None, None) for name, group in nice_groups.items(): print(' --- {} --- '.format(name)) group = sorted(group, key=lambda x: x['size']) @@ -627,6 +631,8 @@ if __name__ == '__main__': python -m netharn.cli.manage_runs --mode=clean_checkpoints --workdir=~/work/voc_yolo2/ --recent 2 --factor 40 python -m netharn.cli.manage_runs --mode=clean_monitor --workdir=~/work/voc_yolo2/ + + python -m netharn.cli.manage_runs --mode=summarize --workdir=. python -m netharn.cli.manage_runs --mode=clean_monitor --workdir=. -f python -m netharn.cli.manage_runs --mode=clean_runs --workdir=. python -m netharn.cli.manage_runs --mode=clean_checkpoints --workdir=. --recent 2 --factor 40 -f diff --git a/netharn/data/channel_spec.py b/netharn/data/channel_spec.py index 2d4bd6bee23ec8d7dc394ac97dbd408ef23cdd22..227979ee39d99751f44282461fa470dc23835d76 100644 --- a/netharn/data/channel_spec.py +++ b/netharn/data/channel_spec.py @@ -1,3 +1,6 @@ +""" +Deprecated. Ported to kwcoco +""" import ubelt as ub import six import functools diff --git a/netharn/data/coerce_data.py b/netharn/data/coerce_data.py new file mode 100644 index 0000000000000000000000000000000000000000..a5b327ec2b6f8df8c530fa177203cbaf8f73f0d4 --- /dev/null +++ b/netharn/data/coerce_data.py @@ -0,0 +1,223 @@ +""" +Copied from ndsampler +""" + +import ubelt as ub + + +def coerce_datasets(config, build_hashid=False, verbose=1): + """ + Coerce train / val / test datasets from standard netharn config keys + + TODO: + * Does this belong in netharn? + + This only looks at the following keys in config: + * datasets + * train_dataset + * vali_dataset + * test_dataset + + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> from netharn.data.coerce_data import coerce_datasets + >>> import kwcoco + >>> config = {'datasets': 'special:shapes'} + >>> print('config = {!r}'.format(config)) + >>> dsets = coerce_datasets(config) + >>> print('dsets = {!r}'.format(dsets)) + + >>> config = {'datasets': 'special:shapes256'} + >>> coerce_datasets(config) + + >>> config = { + >>> 'datasets': kwcoco.CocoDataset.demo('shapes'), + >>> } + >>> coerce_datasets(config) + >>> coerce_datasets({ + >>> 'datasets': kwcoco.CocoDataset.demo('shapes'), + >>> 'test_dataset': kwcoco.CocoDataset.demo('photos'), + >>> }) + >>> coerce_datasets({ + >>> 'datasets': kwcoco.CocoDataset.demo('shapes'), + >>> 'test_dataset': kwcoco.CocoDataset.demo('photos'), + >>> }) + """ + # Ideally the user specifies a standard train/vali/test split + def _rectify_fpath(key): + fpath = key + fpath = fpath.lstrip('path:').lstrip('PATH:') + fpath = ub.expandpath(fpath) + return fpath + + def _ensure_coco(coco): + # Map a file path or an in-memory dataset to a CocoDataset + import kwcoco + import six + from os.path import exists + if coco is None: + return None + elif isinstance(coco, six.string_types): + fpath = _rectify_fpath(coco) + if exists(fpath): + with ub.Timer('read kwcoco dataset: fpath = {!r}'.format(fpath)): + coco = kwcoco.CocoDataset(fpath, autobuild=False) + print('building kwcoco index') + coco._build_index() + else: + if not coco.lower().startswith('special:'): + import warnings + warnings.warn('warning start dataset codes with special:') + code = coco + else: + code = coco.lower()[len('special:'):] + coco = kwcoco.CocoDataset.demo(code) + else: + # print('live dataset') + assert isinstance(coco, kwcoco.CocoDataset) + return coco + + config = config.copy() + + subsets = { + 'train': config.get('train_dataset', None), + 'vali': config.get('vali_dataset', None), + 'test': config.get('test_dataset', None), + } + + # specifying any train / vali / test disables datasets + if any(d is not None for d in subsets.values()): + config['datasets'] = None + + if verbose: + print('[netharn.data.coerce_data] Checking for explicit subsets') + subsets = ub.map_vals(_ensure_coco, subsets) + + # However, sometimes they just specify a single dataset, and we need to + # make a split for it. + # print('config = {!r}'.format(config)) + base = _ensure_coco(config.get('datasets', None)) + print('[netharn.data.coerce_data] base = {!r}'.format(base)) + if base is not None: + if verbose: + print('Splitting base into train/vali') + # TODO: the actual split may need to be cached. + factor = config.get('split_factor', 3) + split_gids = _split_train_vali_test(base, factor=factor) + if config.get('no_test', False): + split_gids['train'] += split_gids.pop('test') + for tag in split_gids.keys(): + gids = split_gids[tag] + subset = base.subset(sorted(gids), copy=True) + subset.tag = base.tag + '-' + tag + subsets[tag] = subset + + subsets = {k: v for k, v in subsets.items() if v is not None} + if build_hashid: + print('Building subset hashids') + for tag, subset in subsets.items(): + print('Build index for {}'.format(subset.tag)) + subset._build_index() + print('Build hashid for {}'.format(subset.tag)) + subset._build_hashid(hash_pixels=False, verbose=10) + + # if verbose: + # print(_catfreq_columns_str(subsets)) + return subsets + + +def _print_catfreq_columns(subsets): + print('Category Split Frequency:') + print(_catfreq_columns_str(subsets)) + + +def _catfreq_columns_str(subsets): + import pandas as pd + split_freq = {} + for tag, subset in subsets.items(): + freq = subset.category_annotation_frequency() + split_freq[tag] = freq + + df_ = pd.DataFrame.from_dict(split_freq) + df_['sum'] = df_.sum(axis=1) + df_ = df_.sort_values('sum') + + with pd.option_context('display.max_rows', 1000): + text = df_.to_string() + return text + + +def _split_train_vali_test(coco_dset, factor=3): + """ + Args: + factor (int): number of pieces to divide images into + + Example: + >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> from netharn.data.coerce_data import _split_train_vali_test + >>> import kwcoco + >>> coco_dset = kwcoco.CocoDataset.demo('shapes8') + >>> split_gids = _split_train_vali_test(coco_dset) + >>> print('split_gids = {}'.format(ub.repr2(split_gids, nl=1))) + """ + import kwarray + images = coco_dset.images() + + def _stratified_split(gids, cids, n_splits=2, rng=None): + """ helper to split while trying to maintain class balance within images """ + rng = kwarray.ensure_rng(rng) + from ndsampler.utils import util_sklearn + selector = util_sklearn.StratifiedGroupKFold( + n_splits=n_splits, random_state=rng, shuffle=True) + + # from sklearn import model_selection + # selector = model_selection.StratifiedKFold( + # n_splits=n_splits, random_state=rng, shuffle=True) + skf_list = list(selector.split(X=gids, y=cids, groups=gids)) + trainx, testx = skf_list[0] + + if 0: + _train_gids = set(ub.take(gids, trainx)) + _test_gids = set(ub.take(gids, testx)) + print('_train_gids = {!r}'.format(_train_gids)) + print('_test_gids = {!r}'.format(_test_gids)) + return trainx, testx + + # Create flat table of image-ids and category-ids + gids, cids = [], [] + for gid_, cids_ in zip(images, images.annots.cids): + cids.extend(cids_) + gids.extend([gid_] * len(cids_)) + + # Split into learn/test then split learn into train/vali + learnx, testx = _stratified_split(gids, cids, rng=2997217409, + n_splits=factor) + learn_gids = list(ub.take(gids, learnx)) + learn_cids = list(ub.take(cids, learnx)) + _trainx, _valix = _stratified_split(learn_gids, learn_cids, rng=140860164, + n_splits=factor) + trainx = learnx[_trainx] + valix = learnx[_valix] + + split_gids = { + 'train': sorted(ub.unique(ub.take(gids, trainx))), + 'vali': sorted(ub.unique(ub.take(gids, valix))), + 'test': sorted(ub.unique(ub.take(gids, testx))), + } + + if True: + # Hack to favor training a good model over testing it properly The only + # real fix to this is to add more data, otherwise its simply a systemic + # issue. + split_gids['vali'] = sorted(set(split_gids['vali']) - set(split_gids['train'])) + split_gids['test'] = sorted(set(split_gids['test']) - set(split_gids['train'])) + split_gids['test'] = sorted(set(split_gids['test']) - set(split_gids['vali'])) + + if __debug__: + import itertools as it + for a, b in it.combinations(split_gids.values(), 2): + if (set(a) & set(b)): + print('split_gids = {!r}'.format(split_gids)) + assert False + + return split_gids diff --git a/netharn/data/collate.py b/netharn/data/collate.py index 6cbaed4fbf666b5cb57c490527993259a39c2ebb..08c1beb3efe52d730ad487fa77a75d3f26238f75 100644 --- a/netharn/data/collate.py +++ b/netharn/data/collate.py @@ -13,10 +13,14 @@ import re # elif six.PY3: # import collections.abc # container_abcs = collections.abc -# string_classes = six.string_types -# int_classes = six.integer_types -from torch._six import container_abcs -from torch._six import string_classes, int_classes +try: + import collections.abc as container_abcs + from six import string_types as string_classes + from six import integer_types as int_classes +except Exception: + from torch._six import container_abcs + from torch._six import string_classes, int_classes + default_collate = torch_data.dataloader.default_collate diff --git a/netharn/data/data_containers.py b/netharn/data/data_containers.py index 3a733a7ac7fc93b89ab73b91f7d4fb1def2026e7..e9c6305a65a9d27b052f65b0912fb64aa96d5838 100644 --- a/netharn/data/data_containers.py +++ b/netharn/data/data_containers.py @@ -23,8 +23,13 @@ from netharn.device import DataParallel, DataSerial, XPU from torch.nn.parallel._functions import _get_stream from torch.nn.parallel._functions import Scatter as OrigScatter from torch.nn.parallel._functions import Gather as OrigGather -from torch._six import container_abcs -from torch._six import int_classes, string_classes +try: + import collections.abc as container_abcs + from six import string_types as string_classes + from six import integer_types as int_classes +except Exception: + from torch._six import container_abcs + from torch._six import string_classes, int_classes default_collate = torch_data.dataloader.default_collate @@ -112,7 +117,7 @@ class BatchContainer(ub.NiceRepr): shape_repr = ub.repr2(self.nestshape, nl=-2) return 'nestshape(data)={}'.format(shape_repr) except Exception: - return super().__repr__() + return object.__repr__(self) def __getitem__(self, index): cls = self.__class__ @@ -228,7 +233,8 @@ class ItemContainer(ub.NiceRepr): shape_repr = ub.repr2(self.nestshape, nl=-2) return 'nestshape(data)={}'.format(shape_repr) except Exception: - return super().__repr__() + return object.__repr__(self) + # return super().__repr__() @classmethod def demo(cls, key='img', rng=None, **kwargs): @@ -399,6 +405,38 @@ class ItemContainer(ub.NiceRepr): return result +def decollate_batch(batch): + """ + Breakup a collated batch of BatchContainers back into ItemContainers + + Example: + >>> bsize = 5 + >>> batch_items = [ + >>> { + >>> 'im': ItemContainer.demo('img'), + >>> 'label': ItemContainer.demo('labels'), + >>> 'box': ItemContainer.demo('box'), + >>> } + >>> for _ in range(bsize) + >>> ] + >>> batch = container_collate(batch_items, num_devices=2) + >>> decollated = decollate_batch(batch) + >>> assert len(decollated) == len(batch_items) + >>> assert (decollated[0]['im'].data == batch_items[0]['im'].data).all() + """ + import ubelt as ub + from kwcoco.util.util_json import IndexableWalker + walker = IndexableWalker(batch) + decollated_dict = ub.AutoDict() + decollated_walker = IndexableWalker(decollated_dict) + for path, batch_val in walker: + if isinstance(batch_val, BatchContainer): + for bx, item_val in enumerate(ub.flatten(batch_val.data)): + decollated_walker[[bx] + path] = ItemContainer(item_val) + decollated = list(decollated_dict.to_dict().values()) + return decollated + + def container_collate(inbatch, num_devices=None): """Puts each data field into a tensor/DataContainer with outer dimension batch size. @@ -914,6 +952,32 @@ def nestshape(data): >>> data = [np.arange(10), np.arange(13)] >>> nestshape(data) [(10,), (13,)] + + Ignore: + >>> # xdoctest: +REQUIRES(module:mmdet) + >>> from netharn.data.data_containers import * # NOQA + + >>> from mmdet.core.mask.structures import * # NOQA + >>> masks = [ + >>> [ np.array([0, 0, 10, 0, 10, 10., 0, 10, 0, 0]) ], + >>> [ np.array([0, 0, 10, 0, 10, 10., 0, 10, 5., 5., 0, 0]) ] + >>> ] + >>> height, width = 16, 16 + >>> polys = PolygonMasks(masks, height, width) + >>> nestshape(polys) + + >>> dc = BatchContainer([polys], stack=False) + >>> print('dc = {}'.format(ub.repr2(dc, nl=1))) + + >>> num_masks, H, W = 3, 32, 32 + >>> rng = np.random.RandomState(0) + >>> masks = (rng.rand(num_masks, H, W) > 0.1).astype(np.int) + >>> bitmasks = BitmapMasks(masks, height=H, width=W) + >>> nestshape(bitmasks) + + >>> dc = BatchContainer([bitmasks], stack=False) + >>> print('dc = {}'.format(ub.repr2(dc, nl=1))) + """ import ubelt as ub @@ -922,7 +986,9 @@ def nestshape(data): import numpy as np if isinstance(d, dict): return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()])) - elif 'Container' in type(d).__name__: + + clsname = type(d).__name__ + if 'Container' in clsname: meta = ub.odict(sorted([ ('stack', d.stack), # ('padding_value', d.padding_value), @@ -946,6 +1012,17 @@ def nestshape(data): return d elif isinstance(d, slice): return d + elif 'PolygonMasks' == clsname: + # hack for mmdet + return repr(d) + elif 'BitmapMasks' == clsname: + # hack for mmdet + return repr(d) + elif hasattr(d, 'shape'): + return d.shape + elif hasattr(d, 'items'): + # hack for dict-like objects + return ub.odict(sorted([(k, _recurse(v)) for k, v in d.items()])) else: raise TypeError(type(d)) diff --git a/netharn/data/grab_camvid.py b/netharn/data/grab_camvid.py index 430b6450718c67318295a0c13803edb695b1e4b2..79c4fa90caa058cdec9744a0ab872e5fb3fab8b9 100644 --- a/netharn/data/grab_camvid.py +++ b/netharn/data/grab_camvid.py @@ -1,591 +1,6 @@ -from os.path import relpath -from os.path import exists -from os.path import join -import ubelt as ub - - -def _devcheck_sample_full_image(): - """ - """ - import kwimage - import numpy as np - - sampler = grab_camvid_sampler() - - cid_to_cidx = sampler.catgraph.id_to_idx - classes = sampler.catgraph - - # Try loading an entire image - img, annots = sampler.load_image_with_annots(1) - - file = img['imdata'] - imdata = file[:] - - aids = [ann['id'] for ann in annots] - _annots = sampler.dset.annots(aids) - - sseg_list = [] - for s in _annots.lookup('segmentation'): - m = kwimage.MultiPolygon.coerce(s) - sseg_list.append(m) - - aids = _annots.aids - cids = _annots.cids - boxes = _annots.boxes - segmentations = kwimage.PolygonList(sseg_list) - class_idxs = np.array([cid_to_cidx[cid] for cid in cids]) - - dets = kwimage.Detections( - aids=aids, - boxes=boxes, - class_idxs=class_idxs, - segmentations=segmentations, - classes=classes, - datakeys=['aids'], - ) - - if 1: - print('dets = {!r}'.format(dets)) - print('dets.data = {!r}'.format(dets.data)) - print('dets.meta = {!r}'.format(dets.meta)) - - if ub.argflag('--show'): - import kwplot - - with ub.Timer('dets.draw_on'): - canvas = imdata.copy() - canvas = dets.draw_on(canvas) - kwplot.imshow(canvas, pnum=(1, 2, 1), title='dets.draw_on') - - with ub.Timer('dets.draw'): - kwplot.imshow(imdata, pnum=(1, 2, 2), docla=True, title='dets.draw') - dets.draw() - - -def _devcheck_load_sub_image(): - import kwimage - import numpy as np - - sampler = grab_camvid_sampler() - - cid_to_cidx = sampler.catgraph.id_to_idx - classes = sampler.catgraph - - # Try loading a subregion of an image - sample = sampler.load_positive(2) - imdata = sample['im'] - annots = sample['annots'] - aids = annots['aids'] - cids = annots['cids'] - boxes = annots['rel_boxes'] - class_idxs = np.array([cid_to_cidx[cid] for cid in cids]) - segmentations = annots['rel_ssegs'] - - raw_dets = kwimage.Detections( - aids=aids, - boxes=boxes, - class_idxs=class_idxs, - segmentations=segmentations, - classes=classes, - datakeys=['aids'], - ) - - # Clip boxes to the image boundary - input_dims = imdata.shape[0:2] - raw_dets.data['boxes'] = raw_dets.boxes.clip(0, 0, input_dims[1], input_dims[0]) - - keep = [] - for i, s in enumerate(raw_dets.data['segmentations']): - # TODO: clip polygons - m = s.to_mask(input_dims) - if m.area > 0: - keep.append(i) - dets = raw_dets.take(keep) - - heatmap = dets.rasterize(bg_size=(1, 1), input_dims=input_dims) - - if 1: - print('dets = {!r}'.format(dets)) - print('dets.data = {!r}'.format(dets.data)) - print('dets.meta = {!r}'.format(dets.meta)) - - if ub.argflag('--show'): - import kwplot - - kwplot.autompl() - heatmap.draw() - - draw_boxes = 1 - - kwplot.figure(doclf=True) - with ub.Timer('dets.draw_on'): - canvas = imdata.copy() - # TODO: add logic to color by class - canvas = dets.draw_on(canvas, boxes=draw_boxes, color='random') - kwplot.imshow(canvas, pnum=(1, 2, 1), title='dets.draw_on') - - with ub.Timer('dets.draw'): - kwplot.imshow(imdata, pnum=(1, 2, 2), docla=True, title='dets.draw') - dets.draw(boxes=draw_boxes, color='random') - - -def grab_camvid_train_test_val_splits(coco_dset, mode='segnet'): - # Use the split from SegNet: https://github.com/alexgkendall/SegNet-Tutorial - split_files = { - 'train': ub.grabdata('https://raw.githubusercontent.com/alexgkendall/SegNet-Tutorial/master/CamVid/train.txt'), - 'vali': ub.grabdata('https://raw.githubusercontent.com/alexgkendall/SegNet-Tutorial/master/CamVid/val.txt'), - 'test': ub.grabdata('https://raw.githubusercontent.com/alexgkendall/SegNet-Tutorial/master/CamVid/test.txt'), - } - gid_subsets = {} - for tag, fpath in split_files.items(): - text = open(fpath, 'r').read() - parts = text.replace('\n', ' ').split(' ') - parts = [p for p in parts if p] - from os.path import basename - names = sorted(set(basename(p) for p in parts)) - gids = [coco_dset.index.file_name_to_img['701_StillsRaw_full/' + name]['id'] for name in names] - gid_subsets[tag] = gids - return gid_subsets - - -def grab_camvid_sampler(): - """ - Grab a ndsampler.CocoSampler object for the CamVid dataset. - - Returns: - ndsampler.CocoSampler: sampler - - Example: - >>> # xdoctest: +REQUIRES(--download) - >>> sampler = grab_camvid_sampler() - >>> print('sampler = {!r}'.format(sampler)) - >>> # sampler.load_sample() - >>> for gid in ub.ProgIter(sampler.image_ids, desc='load image'): - >>> img = sampler.load_image(gid) - """ - import ndsampler - dset = grab_coco_camvid() - workdir = ub.ensure_app_cache_dir('camvid') - sampler = ndsampler.CocoSampler(dset, workdir=workdir) - return sampler - - -def grab_coco_camvid(): - """ - Example: - >>> # xdoctest: +REQUIRES(--download) - >>> dset = grab_coco_camvid() - >>> print('dset = {!r}'.format(dset)) - >>> # xdoctest: +REQUIRES(--show) - >>> import kwplot - >>> plt = kwplot.autoplt() - >>> plt.clf() - >>> dset.show_image(gid=1) - - Ignore: - import xdev - gid_list = list(dset.imgs) - for gid in xdev.InteractiveIter(gid_list): - dset.show_image(gid) - xdev.InteractiveIter.draw() - """ - import ndsampler - cache_dpath = ub.ensure_app_cache_dir('netharn', 'camvid') - coco_fpath = join(cache_dpath, 'camvid.mscoco.json') - - # Need to manually bump this if you make a change to loading - SCRIPT_VERSION = 'v4' - - # Ubelt's stamp-based caches are super cheap and let you take control of - # the data format. - stamp = ub.CacheStamp('camvid_coco', cfgstr=SCRIPT_VERSION, - dpath=cache_dpath, product=coco_fpath, hasher='sha1', - verbose=3) - if stamp.expired(): - camvid_raw_info = grab_raw_camvid() - dset = convert_camvid_raw_to_coco(camvid_raw_info) - with ub.Timer('dumping MS-COCO dset to: {}'.format(coco_fpath)): - dset.dump(coco_fpath) - # Mark this process as completed by saving a small file containing the - # hash of the "product" you are stamping. - stamp.renew() - - # We can also cache the index build step independently. This uses - # ubelt.Cacher, which is pickle based, and writes the actual object to - # disk. Each type of caching has its own uses and tradeoffs. - cacher = ub.Cacher('prebuilt-coco', cfgstr=SCRIPT_VERSION, - dpath=cache_dpath, verbose=3) - dset = cacher.tryload() - if dset is None: - print('Reading coco_fpath = {!r}'.format(coco_fpath)) - dset = ndsampler.CocoDataset(coco_fpath, tag='camvid') - # Directly save the file to disk. - dset._build_index() - dset._build_hashid() - cacher.save(dset) - - camvid_dset = dset - print('Loaded camvid_dset = {!r}'.format(camvid_dset)) - return camvid_dset - - -def grab_raw_camvid(): - """ - Grab the raw camvid data. - """ - import zipfile - dpath = ub.get_app_cache_dir('netharn', 'camvid') - - # url = 'http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/data/LabeledApproved_full.zip' - # url = 'https://github.com/mostafaizz/camvid/archive/master.zip' - url = 'https://data.kitware.com/api/v1/item/5cc0adce8d777f072b643503/download' - zip_fpath = ub.grabdata(url, fname='camvid-master.zip', dpath=dpath) - - dset_root = join(dpath, 'camvid-master') - image_dpath = join(dset_root, '701_StillsRaw_full') - mask_dpath = join(dset_root, 'LabeledApproved_full') - label_path = join(dset_root, 'label_colors.txt') - - if not exists(image_dpath): - zip_ref = zipfile.ZipFile(zip_fpath, 'r') - zip_ref.extractall(dpath) - zip_ref.close() - - import glob - img_paths = sorted([relpath(fpath, dset_root) - for fpath in glob.glob(join(image_dpath, '*.png'))]) - mask_paths = sorted([relpath(fpath, dset_root) - for fpath in glob.glob(join(mask_dpath, '*.png'))]) - - camvid_raw_info = { - 'img_paths': img_paths, - 'mask_paths': mask_paths, - 'dset_root': dset_root, - 'label_path': label_path, - } - return camvid_raw_info - - -def rgb_to_cid(r, g, b): - cid = (r << 16) + (g << 8) + (b << 0) - return cid - - -def cid_to_rgb(cid): - mask_b = (int(2 ** 8) - 1) << 0 - mask_g = (int(2 ** 8) - 1) << 8 - mask_r = (int(2 ** 8) - 1) << 16 - - r = (cid & mask_b) >> 0 - g = (cid & mask_g) >> 8 - b = (cid & mask_r) >> 16 - rgb = (r, g, b) - return rgb - - -def convert_camvid_raw_to_coco(camvid_raw_info): - """ - Converts the raw camvid format to an MSCOCO based format, ( which lets use - use ndsampler's COCO backend). - - Example: - >>> # xdoctest: +REQUIRES(--download) - >>> camvid_raw_info = grab_raw_camvid() - >>> # test with a reduced set of data - >>> del camvid_raw_info['img_paths'][2:] - >>> del camvid_raw_info['mask_paths'][2:] - >>> dset = convert_camvid_raw_to_coco(camvid_raw_info) - >>> # xdoctest: +REQUIRES(--show) - >>> import kwplot - >>> plt = kwplot.autoplt() - >>> kwplot.figure(fnum=1, pnum=(1, 2, 1)) - >>> dset.show_image(gid=1) - >>> kwplot.figure(fnum=1, pnum=(1, 2, 2)) - >>> dset.show_image(gid=2) - """ - import re - import kwimage - import ndsampler - print('Converting CamVid to MS-COCO format') - - dset_root, img_paths, label_path, mask_paths = ub.take( - camvid_raw_info, 'dset_root, img_paths, label_path, mask_paths'.split(', ')) - - img_infos = { - 'img_fname': img_paths, - 'mask_fname': mask_paths, - } - keys = list(img_infos.keys()) - next_vals = list(zip(*img_infos.values())) - image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals] - - dataset = { - 'img_root': dset_root, - 'images': [], - 'categories': [], - 'annotations': [], - } - - lines = ub.readfrom(label_path).split('\n') - lines = [line for line in lines if line] - for line in lines: - color_text, name = re.split('\t+', line) - r, g, b = map(int, color_text.split(' ')) - color = (r, g, b) - - # Parse the special camvid format - cid = (r << 16) + (g << 8) + (b << 0) - cat = { - 'id': cid, - 'name': name, - 'color': color, - } - dataset['categories'].append(cat) - - for gid, img_item in enumerate(image_items, start=1): - img = { - 'id': gid, - 'file_name': img_item['img_fname'], - # nonstandard image field - 'segmentation': img_item['mask_fname'], - } - dataset['images'].append(img) - - dset = ndsampler.CocoDataset(dataset) - dset.rename_categories({'Void': 'background'}) - - assert dset.name_to_cat['background']['id'] == 0 - dset.name_to_cat['background'].setdefault('alias', []).append('Void') - - if False: - _define_camvid_class_hierarcy(dset) - - if 1: - # TODO: Binarize CCs (and efficiently encode if possible) - import numpy as np - - bad_info = [] - once = False - - # Add images - dset.remove_all_annotations() - for gid, img in ub.ProgIter(dset.imgs.items(), desc='parse label masks'): - mask_fpath = join(dset_root, img['segmentation']) - - rgb_mask = kwimage.imread(mask_fpath, space='rgb') - r, g, b = rgb_mask.T.astype(np.int64) - cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) - - cids = set(np.unique(cid_mask)) - {0} - - for cid in cids: - if cid not in dset.cats: - if gid == 618: - # Handle a known issue with image 618 - c_mask = (cid == cid_mask).astype(np.uint8) - total_bad = c_mask.sum() - if total_bad < 32: - if not once: - print('gid 618 has a few known bad pixels, ignoring them') - once = True - continue - else: - raise Exception('more bad pixels than expected') - else: - raise Exception('UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) - - # bad_rgb = cid_to_rgb(cid) - # print('bad_rgb = {!r}'.format(bad_rgb)) - # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) - # bad_info.append({ - # 'gid': gid, - # 'cid': cid, - # }) - else: - ann = { - 'category_id': cid, - 'image_id': gid - # 'segmentation': mask.to_coco() - } - assert cid in dset.cats - c_mask = (cid == cid_mask).astype(np.uint8) - mask = kwimage.Mask(c_mask, 'c_mask') - - box = kwimage.Boxes([mask.get_xywh()], 'xywh') - # box = mask.to_boxes() - - ann['bbox'] = ub.peek(box.to_coco()) - ann['segmentation'] = mask.to_coco() - dset.add_annotation(**ann) - - if 0: - bad_cids = [i['cid'] for i in bad_info] - print(sorted([c['color'] for c in dataset['categories']])) - print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info]))) - - gid = 618 - img = dset.imgs[gid] - mask_fpath = join(dset_root, img['segmentation']) - rgb_mask = kwimage.imread(mask_fpath, space='rgb') - r, g, b = rgb_mask.T.astype(np.int64) - cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) - cid_hist = ub.dict_hist(cid_mask.ravel()) - - bad_cid_hist = {} - for cid in bad_cids: - bad_cid_hist[cid] = cid_hist.pop(cid) - - import kwplot - kwplot.autompl() - kwplot.imshow(rgb_mask) - - if 0: - import kwplot - plt = kwplot.autoplt() - plt.clf() - dset.show_image(1) - - import xdev - gid_list = list(dset.imgs) - for gid in xdev.InteractiveIter(gid_list): - dset.show_image(gid) - xdev.InteractiveIter.draw() - - dset._build_index() - dset._build_hashid() - return dset - - -def _define_camvid_class_hierarcy(dset): - # add extra supercategories - # NOTE: life-conscious, and life-inanimate are disjoint in this - # forumlation because we are restricted to a tree structure. If - # this changse, then we can try rencoding with multiple parents. - extra_structure = { - # Break down the image into things that are part of the system, and - # things that aren't - 'background': 'root', - 'system': 'root', - - # The system is made up of environmental components and actor - # components. - 'environment': 'system', - 'actor': 'system', - - # Break actors (things with complex movement) into subtypes - 'life-conscious': 'actor', - 'vehicle-land': 'actor', - 'actor-other': 'actor', - - # Break the environment (things with simple movement) info subtypes - 'life-inanimate': 'environment', - 'civil-structure': 'environment', - 'civil-notice': 'environment', - 'transport-way': 'environment', - - # Subclassify transport mediums - 'drive-way': 'transport-way', - 'walk-way': 'transport-way', - } - - for child, parent in extra_structure.items(): - if child in dset.name_to_cat: - dset.name_to_cat[child]['supercategory'] = parent - else: - dset.add_category(name=child, supercategory=parent) - - dset.name_to_cat['background']['supercategory'] = 'root' - - dset.name_to_cat['Sky']['supercategory'] = 'environment' - - dset.name_to_cat['Animal']['supercategory'] = 'life-conscious' - dset.name_to_cat['Bicyclist']['supercategory'] = 'life-conscious' - dset.name_to_cat['Pedestrian']['supercategory'] = 'life-conscious' - dset.name_to_cat['Child']['supercategory'] = 'life-conscious' - - dset.name_to_cat['OtherMoving']['supercategory'] = 'actor-other' - dset.name_to_cat['CartLuggagePram']['supercategory'] = 'actor-other' - - dset.name_to_cat['Car']['supercategory'] = 'vehicle-land' - dset.name_to_cat['Train']['supercategory'] = 'vehicle-land' - dset.name_to_cat['Truck_Bus']['supercategory'] = 'vehicle-land' - dset.name_to_cat['SUVPickupTruck']['supercategory'] = 'vehicle-land' - dset.name_to_cat['MotorcycleScooter']['supercategory'] = 'vehicle-land' - - dset.name_to_cat['VegetationMisc']['supercategory'] = 'life-inanimate' - dset.name_to_cat['Tree']['supercategory'] = 'life-inanimate' - - dset.name_to_cat['Column_Pole']['supercategory'] = 'civil-structure' - dset.name_to_cat['Fence']['supercategory'] = 'civil-structure' - dset.name_to_cat['Wall']['supercategory'] = 'civil-structure' - dset.name_to_cat['Building']['supercategory'] = 'civil-structure' - dset.name_to_cat['Archway']['supercategory'] = 'civil-structure' - dset.name_to_cat['Bridge']['supercategory'] = 'civil-structure' - dset.name_to_cat['Tunnel']['supercategory'] = 'civil-structure' - - dset.name_to_cat['TrafficCone']['supercategory'] = 'civil-notice' - dset.name_to_cat['TrafficLight']['supercategory'] = 'civil-notice' - dset.name_to_cat['LaneMkgsDriv']['supercategory'] = 'civil-notice' - dset.name_to_cat['LaneMkgsNonDriv']['supercategory'] = 'civil-notice' - dset.name_to_cat['SignSymbol']['supercategory'] = 'civil-notice' - dset.name_to_cat['ParkingBlock']['supercategory'] = 'civil-notice' - dset.name_to_cat['Misc_Text']['supercategory'] = 'civil-notice' - - dset.name_to_cat['Road']['supercategory'] = 'drive-way' - dset.name_to_cat['RoadShoulder']['supercategory'] = 'drive-way' - dset.name_to_cat['Sidewalk']['supercategory'] = 'walk-way' - - for cat in list(dset.cats.values()): - parent = cat.get('supercategory', None) - if parent is not None: - if parent not in dset.name_to_cat: - print('Missing parent = {!r}'.format(parent)) - dset.add_category(name=parent, supercategory=parent) - - if 0: - graph = dset.category_graph() - import graphid - graphid.util.show_nx(graph) - - # Add in some hierarcy information - if 0: - for x in dset.name_to_cat: - print("dset.name_to_cat[{!r}]['supercategory'] = 'object'".format(x)) - - if 0: - example_cat_aids = [] - for cat in dset.cats.values(): - cname = cat['name'] - aids = dset.index.cid_to_aids[dset.name_to_cat[cname]['id']] - if len(aids): - aid = ub.peek(aids) - example_cat_aids.append(aid) - else: - print('No examples of cat = {!r}'.format(cat)) - - import xdev - import kwplot - kwplot.autompl() - for aid in xdev.InteractiveIter(example_cat_aids): - print('aid = {!r}'.format(aid)) - ann = dset.anns[aid] - cat = dset.cats[ann['category_id']] - print('cat = {!r}'.format(cat)) - dset.show_image(aid=aid) - xdev.InteractiveIter.draw() - - if 0: - cname = 'CartLuggagePram' - cname = 'ParkingBlock' - cname = 'LaneMkgsDriv' - aids = dset.index.cid_to_aids[dset.name_to_cat[cname]['id']] - if len(aids): - aid = ub.peek(aids) - print('aid = {!r}'.format(aid)) - ann = dset.anns[aid] - cat = dset.cats[ann['category_id']] - print('cat = {!r}'.format(cat)) - dset.show_image(aid=aid) +""" +DEPRECATED. Moved to kwcoco.data +""" def main(): @@ -596,28 +11,13 @@ def main(): ~/.cache/netharn/camvid/camvid-master The four files will be: - ~/.cache/netharn/camvid/camvid-master/camvid-full.mscoco.json - ~/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json - ~/.cache/netharn/camvid/camvid-master/camvid-vali.mscoco.json - ~/.cache/netharn/camvid/camvid-master/camvid-test.mscoco.json + ~/.cache/kwcoco/camvid/camvid-master/camvid-full.mscoco.json + ~/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json + ~/.cache/kwcoco/camvid/camvid-master/camvid-vali.mscoco.json + ~/.cache/kwcoco/camvid/camvid-master/camvid-test.mscoco.json """ - coco_dset = grab_coco_camvid() - - # Use the same train/test/vali splits used in segnet - gid_subsets = grab_camvid_train_test_val_splits(coco_dset, mode='segnet') - dpath = coco_dset.dataset['img_root'] - - # Dump the full dataset - fpath = join(dpath, 'camvid-full.mscoco.json') - print(fpath) - coco_dset.dump(open(fpath, 'w')) - - # Dump the train/vali/test splits - for tag, gids in gid_subsets.items(): - subset = coco_dset.subset(gids) - fpath = join(dpath, 'camvid-{}.mscoco.json'.format(tag)) - print(fpath) - subset.dump(open(fpath, 'w')) + from kwcoco.data import grab_camvid + return grab_camvid.main() if __name__ == '__main__': """ diff --git a/netharn/device.py b/netharn/device.py index 8cc92558d8ae9184420f56b5ad831cf4f749137e..0bcbd3dae1a4f4426d51040fcd074ae415a3b415 100644 --- a/netharn/device.py +++ b/netharn/device.py @@ -11,8 +11,10 @@ import torch import six import os from netharn import util -from torch._six import container_abcs - +try: + import collections.abc as container_abcs +except Exception: + from torch._six import container_abcs __all__ = ['XPU'] diff --git a/netharn/examples/classification.py b/netharn/examples/classification.py index ab40beb678a868c566fbe1f98ff426f96ac4e083..df8f752cf915ee858d1062227c7c6b7451bec307 100644 --- a/netharn/examples/classification.py +++ b/netharn/examples/classification.py @@ -31,6 +31,7 @@ yourself. --train_dataset=./toydata_train.json \ --vali_dataset=./toydata_vali.json \ --test_dataset=./toydata_test.json \ + --workdir=$HOME/work/netharn \ --input_dims=224,244 \ --batch_size=32 \ --max_epoch=100 \ @@ -40,6 +41,31 @@ yourself. --augmenter=medium \ --lr=1e-3 + +Equivalently you could call this via python + +.. code-block:: python + + from netharn.examples.classification import setup_harn + + kwargs = { + 'name': 'My Classification Example', + 'train_dataset': './toydata_train.json', + 'vali_dataset': './toydata_vali.json', + 'workdir': '$HOME/work/netharn', + 'input_dims': (224, 244), + 'batch_size': 32, + 'max_epoch': 100, + 'patience': 40, + 'xpu': 'auto', + 'schedule': 'ReduceLROnPlateau-p10-c10', + 'augmenter': 'medium', + 'lr': 1e-3, + } + + harn = setup_harn(**kwargs) + harn.run() + # TODO: describe what the output of this should look like. """ @@ -283,6 +309,7 @@ class ClfDataset(torch.utils.data.Dataset): DataLoader. There is little netharn-specific about this class. Example: + >>> from netharn.examples.classification import * # NOQA >>> import ndsampler >>> sampler = ndsampler.CocoSampler.demo() >>> self = ClfDataset(sampler) @@ -343,7 +370,10 @@ class ClfDataset(torch.utils.data.Dataset): import kwimage # Load sample image and category - sample = self.sampler.load_positive(index, with_annots=False) + # sample = self.sampler.load_positive(index, with_annots=False) + tr = self.sampler.regions.get_positive(index) + sample = self.sampler.load_sample(tr, with_annots=False) + image = kwimage.atleast_3channels(sample['im'])[:, :, 0:3] target = sample['tr'] @@ -403,15 +433,6 @@ class ClfDataset(torch.utils.data.Dataset): if len(self) == 0: raise Exception('must have some data') - def worker_init_fn(worker_id): - for i in range(worker_id + 1): - seed = np.random.randint(0, int(2 ** 32) - 1) - seed = seed + worker_id - kwarray.seed_global(seed) - if self.augmenter: - rng = kwarray.ensure_rng(None) - self.augmenter.seed_(rng) - loaderkw = { 'num_workers': num_workers, 'pin_memory': pin_memory, @@ -437,6 +458,20 @@ class ClfDataset(torch.utils.data.Dataset): return loader +def worker_init_fn(worker_id, augmenter=None): + for i in range(worker_id + 1): + seed = np.random.randint(0, int(2 ** 31) - 1) + seed = seed + worker_id + kwarray.seed_global(seed) + + worker_info = torch.utils.data.get_worker_info() + self = worker_info.dataset + + if self.augmenter: + rng = kwarray.ensure_rng(None) + self.augmenter.seed_(rng) + + class ClfHarn(nh.FitHarn): """ The Classification Harness diff --git a/netharn/examples/object_detection.py b/netharn/examples/object_detection.py index a6cc9103ab407972d239aba47dd170a76f8c6da0..82280ced354e9e590a6dac522921a0265e626700 100644 --- a/netharn/examples/object_detection.py +++ b/netharn/examples/object_detection.py @@ -151,12 +151,13 @@ class DetectDataset(torch.utils.data.Dataset): Example: >>> # DISABLE_DOCTSET - >>> self = DetectDataset.demo(backend='npy') + >>> from netharn.examples.object_detection import * # NOQA + >>> self = DetectDataset.demo(backend=None) >>> index = 0 >>> item = self[index] >>> hwc01 = item['im'].numpy().transpose(1, 2, 0) >>> print(hwc01.shape) - >>> norm_boxes = item['label']['targets'].numpy().reshape(-1, 5)[:, 1:5] + >>> norm_boxes = item['label']['cxywh'].numpy().reshape(-1, 4)[:, 0:4] >>> inp_size = hwc01.shape[-2::-1] >>> # xdoc: +REQUIRES(--show) >>> import kwplot @@ -483,7 +484,7 @@ class DetectHarn(nh.FitHarn): """ Convert batch groundtruth to coco-style annotations for scoring """ indices = labels['indices'] orig_sizes = labels['orig_sizes'] - targets = labels['targets'] + targets = labels['cxywh'] gt_weights = labels['gt_weights'] letterbox = harn.datasets[harn.current_tag].letterbox diff --git a/netharn/examples/segmentation.py b/netharn/examples/segmentation.py index 0f297871ccf19389b4e87fef2bd44307c7486053..d72249b497cd37df5778b2fe9714cef7441ee673 100644 --- a/netharn/examples/segmentation.py +++ b/netharn/examples/segmentation.py @@ -19,7 +19,7 @@ class SegmentationConfig(scfg.Config): Default configuration for setting up a training session """ default = { - 'nice': scfg.Value('untitled', help='A human readable tag that is "nice" for humans'), + 'name': scfg.Value('untitled', help='A human readable tag that is "nice" for humans'), 'workdir': scfg.Path('~/work/sseg', help='Dump all results in your workdir'), 'workers': scfg.Value(0, help='number of parallel dataloading jobs'), @@ -75,11 +75,12 @@ class SegmentationDataset(torch.utils.data.Dataset): >>> # DISABLE_DOCTEST >>> #input_dims = (224, 224) >>> # xdoctest: +REQUIRES(module:ndsampler) + >>> from netharn.examples.segmentation import * # NOQA >>> import ndsampler >>> sampler = ndsampler.CocoSampler.demo('shapes') >>> input_dims = (512, 512) >>> self = dset = SegmentationDataset(sampler, input_dims) - >>> output = self[10] + >>> output = self[1] >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() @@ -177,7 +178,7 @@ class SegmentationDataset(torch.utils.data.Dataset): """ Example: >>> # DISABLE_DOCTEST - >>> self = SegmentationDataset.demo(augment=True) + >>> self = SegmentationDataset.demo(augmenter=True) >>> output = self[10] >>> # xdoctest: +REQUIRES(--show) >>> import kwplot @@ -282,10 +283,17 @@ class SegmentationDataset(torch.utils.data.Dataset): return heatmap def _colorized_labels(self, cidxs): - self.cx_to_color = np.array([ - self.sampler.dset.name_to_cat[self.classes[cx]]['color'] + dset = self.sampler.dset + cx_to_color = [ + dset.name_to_cat.get(self.classes[cx], {}).get('color', None) for cx in range(len(self.cid_to_cidx)) - ]) + ] + if any(color is None for color in cx_to_color): + defaults = kwimage.Color.distinct(len(cx_to_color)) + for cx, color in enumerate(cx_to_color): + if color is None: + cx_to_color[cx] = defaults[cx] + self.cx_to_color = np.array(cx_to_color) colorized = self.cx_to_color[cidxs] return colorized @@ -294,7 +302,8 @@ class SegmentationDataset(torch.utils.data.Dataset): # from grab_camvid import grab_coco_camvid # dset = grab_coco_camvid() import ndsampler - sampler = ndsampler.CocoSampler.demo('shapes', workdir=None, backend='npy') + sampler = ndsampler.CocoSampler.demo( + 'shapes', workdir=None, backend=None) self = cls(sampler, **kwargs) return self @@ -397,6 +406,7 @@ class SegmentationHarn(nh.FitHarn): >>> kwplot.autompl() >>> kwplot.imshow(toshow) """ + import cv2 im = batch['im'].data.cpu().numpy() class_true = batch['class_idxs'].data.cpu().numpy() class_pred = outputs['class_probs'].data.cpu().numpy().argmax(axis=1) @@ -404,10 +414,8 @@ class SegmentationHarn(nh.FitHarn): batch_imgs = [] for bx in range(min(len(class_true), lim)): - orig_img = im[bx].transpose(1, 2, 0) - import cv2 out_size = class_pred[bx].shape[::-1] orig_img = cv2.resize(orig_img, tuple(map(int, out_size))) @@ -713,7 +721,7 @@ def setup_harn(cmdline=True, **kw): # Create hyperparameters hyper = nh.HyperParams( - nice=config['nice'], + nice=config['name'], workdir=config['workdir'], xpu=nh.XPU.coerce(config['xpu']), @@ -771,35 +779,53 @@ def main(): if __name__ == '__main__': - """ + r""" CommandLine: conda install gdal + # Use the kwcoco-coercable toydata dataset names python -m netharn.examples.segmentation \ - --nice=shapes_demo --datasets=shapes32 \ + --name=shapes_demo --datasets=shapes32 \ --workers=0 --xpu=cpu + # Or write the toy data explicitly using the kwcoco CLI + kwcoco toydata --key shapes32 --dst toy_train.kwcoco.json + kwcoco toydata --key shapes8 --dst toy_vali.kwcoco.json + + # Run on the explicit kwcoco files + python -m netharn.examples.segmentation \ + --name=shapes_segmentation_demo \ + --train_dataset=./toy_train.kwcoco.json \ + --vali_dataset=./toy_vali.kwcoco.json \ + --workers=0 --xpu=cpu + + # You can use MS-COCO files to learn to segment your own data To # demonstrate grab the CamVid dataset (the following script also # transforms camvid into the MS-COCO format) - python -m netharn.data.grab_camvid # Download MS-COCO files + python -m kwcoco.data.grab_camvid # Download MS-COCO files + + python -m netharn.examples.segmentation --workers=4 --xpu=cpu --name=camvid_deeplab \ + --train_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --vali_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --schedule=step-90-120 --arch=deeplab_v3 --batch_size=8 --lr=1e-5 --input_dims=224,224 --optim=sgd --bstep=8 - python -m netharn.examples.segmentation --workers=4 --xpu=0 --nice=camvid_deeplab \ - --train_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ - --vali_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ - --schedule=step-90-120 --arch=deeplab --batch_size=8 --lr=1e-5 --input_dims=224,224 --optim=sgd --bstep=8 + python -m netharn.examples.segmentation --workers=4 --xpu=0 --name=camvid_deeplab \ + --train_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --vali_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --schedule=step-90-120 --arch=deeplab_v3 --batch_size=8 --lr=1e-5 --input_dims=224,224 --optim=sgd --bstep=8 - python -m netharn.examples.segmentation --workers=4 --xpu=auto --nice=camvid_psp_wip \ - --train_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ - --vali_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ + python -m netharn.examples.segmentation --workers=4 --xpu=auto --name=camvid_psp_wip \ + --train_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --vali_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ --schedule=step-90-120 --arch=psp --batch_size=6 --lr=1e-3 --input_dims=512,512 --optim=sgd --bstep=1 # Note you would need to change the path to a pretrained network - python -m netharn.examples.segmentation --workers=4 --xpu=auto --nice=camvid_psp_wip_fine \ - --train_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ - --vali_dataset=$HOME/.cache/netharn/camvid/camvid-master/camvid-train.mscoco.json \ + python -m netharn.examples.segmentation --workers=4 --xpu=auto --name=camvid_psp_wip_fine \ + --train_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ + --vali_dataset=$HOME/.cache/kwcoco/camvid/camvid-master/camvid-train.mscoco.json \ --pretrained=$HOME/work/sseg/fit/runs/camvid_psp_wip/fowjplca/deploy_SegmentationModel_fowjplca_134_CZARGB.zip \ --schedule=step-90-120 --arch=psp --batch_size=6 --lr=1e-2 --input_dims=512,512 --optim=sgd --bstep=8 """ diff --git a/netharn/examples/sseg_camvid.py b/netharn/examples/sseg_camvid.py index c89829759136cea366a36391d41cbe1080d2fd4c..fdf397244e465275cb32f32e6da410baab4247d0 100644 --- a/netharn/examples/sseg_camvid.py +++ b/netharn/examples/sseg_camvid.py @@ -4,6 +4,8 @@ An train an example semenatic segmenation model on the CamVid dataset. For a more general segmentation example that works with any (ndsampler-style) MS-COCO dataset see segmentation.py. +NOTE: This will eventually be deprecated and repalced by "segmentation.py" + CommandLine: python ~/code/netharn/examples/sseg_camvid.py --workers=4 --xpu=0 --batch_size=2 --nice=expt1 """ @@ -161,7 +163,10 @@ class SegmentationDataset(torch.utils.data.Dataset): """ import netharn as nh gid_to_slider = {} + self.sampler.dset._ensure_imgsize(fail=True) + for img in self.sampler.dset.imgs.values(): + print('img = {!r}'.format(img)) full_dims = [img['height'], img['width']] slider = nh.util.SlidingWindow(full_dims, input_dims, overlap=input_overlap, @@ -277,7 +282,7 @@ class SegmentationDataset(torch.utils.data.Dataset): @classmethod def demo(cls, **kwargs): - from netharn.data.grab_camvid import grab_coco_camvid + from kwcoco.data.grab_camvid import grab_coco_camvid import ndsampler dset = grab_coco_camvid() sampler = ndsampler.CocoSampler(dset, workdir=None, backend='npy') @@ -790,7 +795,7 @@ def setup_coco_datasets(): - [ ] Do proper train / validation split - [ ] Allow custom train / validation split """ - from netharn.data.grab_camvid import grab_coco_camvid, grab_camvid_train_test_val_splits + from kwcoco.data.grab_camvid import grab_coco_camvid, grab_camvid_train_test_val_splits coco_dset = grab_coco_camvid() # Use the same train/test/vali splits used in segnet @@ -1018,7 +1023,7 @@ def setup_harn(cmdline=True, **kw): else: raise KeyError(config['arch']) - if config['init'] == 'cls': + if config['init'] == 'cls' and hasattr(model_[0], '_initializer_cls'): initializer_ = model_[0]._initializer_cls() # Create hyperparameters diff --git a/netharn/fit_harn.py b/netharn/fit_harn.py index f30bc6e4e51cef03a64229f1b8e0619d2ac4a9a8..54c048ab0315b1021e9751d17f06a0b70e719f72 100644 --- a/netharn/fit_harn.py +++ b/netharn/fit_harn.py @@ -444,6 +444,35 @@ class InitializeMixin(object): else: harn.warn('harn.train_dpath is None, all computation is in memory') + if isinstance(harn.preferences['timeout'], str): + import datetime + import parse + text = harn.preferences['timeout'] + + def parse_timedelta_text(text): + candidate_formats = [ + '{microseconds:d}us', + '{milliseconds}ms', + '{minutes}m', + '{seconds}s', + '{hours:d}h', + '{days:d}d', + '{weeks:d}w', + ] + text_ = text.lower() + for fmt in candidate_formats: + result = parse.parse(fmt, text_) + if result is not None: + delta = datetime.timedelta(**result.named) + break + if delta is None: + raise Exception('Unknown time format {}'.format(text)) + return delta + + delta = parse_timedelta_text(text) + print('delta = {!r}'.format(delta)) + harn.preferences['timeout'] = delta.total_seconds() + harn._initialized = True harn.after_initialize() return harn @@ -1498,19 +1527,18 @@ class CoreMixin(object): action = 'resume' if harn.epoch > 0 else 'begin' if harn.preferences['prog_backend'] == 'progiter': - harn.info(ub.color_text('=== {} training {!r} / {!r} : {} ==='.format( + text = '=== {} training {!r} / {!r} : {} ==='.format( action, harn.epoch + 1, harn.monitor.max_epoch, - harn.hyper.name), 'white')) + harn.hyper.name) + harn.info(ub.color_text(text, 'white')) else: harn.info(ub.color_text('=== {} training : {} ==='.format( action, harn.hyper.name), 'white')) - harn.main_prog = harn._make_prog(desc='epoch', - total=harn.monitor.max_epoch, - disable=not harn.preferences['show_prog'], - leave=True, dynamic_ncols=True, - show_wall=True, position=0, - initial=harn.epoch) + harn.main_prog = harn._make_prog( + desc='epoch', total=harn.monitor.max_epoch, disable=not + harn.preferences['show_prog'], leave=True, dynamic_ncols=True, + show_wall=True, position=0, initial=harn.epoch) harn._update_main_prog_desc() # Loader dict should be ordered @@ -1537,9 +1565,17 @@ class CoreMixin(object): if harn.scheduler: if harn.scheduler.__class__.__name__ == 'ReduceLROnPlateau': if vali_loader is None: - raise ValueError('A validation dataset is required to use ReduceLROnPlateau, but None was given') + raise ValueError(ub.paragraph( + ''' + A validation dataset is required to use + ReduceLROnPlateau, but None was given + ''')) else: - raise ValueError('A non-empty validation dataset is required to use ReduceLROnPlateau') + raise ValueError(ub.paragraph( + ''' + A non-empty validation dataset is required to + use ReduceLROnPlateau + ''')) ############################# ### THIS IS THE MAIN LOOP ### diff --git a/netharn/initializers/functional.py b/netharn/initializers/functional.py index 126a67866139edbc3620fa26d24ef66372ce3475..b3066dab0d5e6ba0a7466523e9418a82578699b6 100644 --- a/netharn/initializers/functional.py +++ b/netharn/initializers/functional.py @@ -823,6 +823,7 @@ def maximum_common_ordered_subpaths(paths1, paths2, sep='.', mode='embedding'): else: break return score + # return tok1[-1] == tok2[-1] node_affinity = _affinity # import operator diff --git a/netharn/metrics/detect_metrics.py b/netharn/metrics/detect_metrics.py index 2eb84eb87e162bfdc9bac16efb45f0bda271ca1c..4dc738bb211724a298b8228b95d850dc04444639 100644 --- a/netharn/metrics/detect_metrics.py +++ b/netharn/metrics/detect_metrics.py @@ -57,13 +57,13 @@ class DetectionMetrics(ub.NiceRepr): predictions. Args: - true_coco (ndsampler.CocoDataset): - pred_coco (ndsampler.CocoDataset): + true_coco (kwcoco.CocoDataset): + pred_coco (kwcoco.CocoDataset): Example: >>> # xdoctest: +REQUIRES(module:ndsampler) - >>> import ndsampler - >>> true_coco = ndsampler.CocoDataset.demo('shapes') + >>> import kwcoco + >>> true_coco = kwcoco.CocoDataset.demo('shapes') >>> pred_coco = true_coco >>> self = DetectionMetrics.from_coco(true_coco, pred_coco) >>> self.score_voc() @@ -480,9 +480,9 @@ class DetectionMetrics(ub.NiceRepr): """ Convert to a coco representation of truth and predictions """ - import ndsampler - true = ndsampler.CocoDataset() - pred = ndsampler.CocoDataset() + import kwcoco + true = kwcoco.CocoDataset() + pred = kwcoco.CocoDataset() for node in dmet.classes: # cid = dmet.classes.graph.node[node]['id'] @@ -875,7 +875,7 @@ def eval_detections_cli(**kw): xdoctest -m ~/code/netharn/netharn/metrics/detect_metrics.py eval_detections_cli """ import scriptconfig as scfg - import ndsampler + import kwcoco class EvalDetectionCLI(scfg.Config): default = { @@ -889,8 +889,8 @@ def eval_detections_cli(**kw): cmdline = kw.pop('cmdline', True) config.load(kw, cmdline=cmdline) - true_coco = ndsampler.CocoDataset(config['true']) - pred_coco = ndsampler.CocoDataset(config['pred']) + true_coco = kwcoco.CocoDataset(config['true']) + pred_coco = kwcoco.CocoDataset(config['pred']) from netharn.metrics.detect_metrics import DetectionMetrics dmet = DetectionMetrics.from_coco(true_coco, pred_coco) diff --git a/netharn/metrics/functional.py b/netharn/metrics/functional.py index d8617fd521918a7104fca90f7bc0bb4968a1c517..c699dc9fb22664a7aa23437c1ae4a7d2df0f3715 100644 --- a/netharn/metrics/functional.py +++ b/netharn/metrics/functional.py @@ -29,9 +29,9 @@ def fast_confusion_matrix(y_true, y_pred, n_labels, sample_weight=None): >>> y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) >>> fast_confusion_matrix(y_true, y_pred, 2) array([[4, 2], - [3, 1]]) + [3, 1]]...) >>> fast_confusion_matrix(y_true, y_pred, 2).ravel() - array([4, 2, 3, 1]) + array([4, 2, 3, 1]...) """ if sample_weight is None: sample_weight = np.ones(len(y_true), dtype=np.uint8) diff --git a/netharn/metrics/sklearn_alts.py b/netharn/metrics/sklearn_alts.py index b17965ed806cd704329a5f110ba7117b9dd4cc69..f06a789cf8a206e35ea237dd787f6954db8e3bb2 100644 --- a/netharn/metrics/sklearn_alts.py +++ b/netharn/metrics/sklearn_alts.py @@ -27,9 +27,9 @@ def confusion_matrix(y_true, y_pred, n_labels=None, labels=None, >>> y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) >>> confusion_matrix(y_true, y_pred, 2) array([[4, 2], - [3, 1]]) + [3, 1]]...) >>> confusion_matrix(y_true, y_pred, 2).ravel() - array([4, 2, 3, 1]) + array([4, 2, 3, 1]...) Benchmarks: import ubelt as ub diff --git a/netharn/util/imutil.py b/netharn/util/imutil.py index 4e183d5baadf842149994100d0c0d2ebac921744..bdd87c3b82f45e703f02e491925a5f1b867c78b7 100644 --- a/netharn/util/imutil.py +++ b/netharn/util/imutil.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +""" +MOSTLY DEPRECATE FOR KWIMAGE +""" from __future__ import absolute_import, division, print_function, unicode_literals import glob from os.path import expanduser, exists, join, basename diff --git a/netharn/util/util_slider.py b/netharn/util/util_slider.py index ef594a4982a21b58931b7e270ba9b38d2aa48b79..21133c2eb90c27a61dcca780824e0d53f762dcc7 100644 --- a/netharn/util/util_slider.py +++ b/netharn/util/util_slider.py @@ -56,10 +56,10 @@ class SlidingWindow(ub.NiceRepr): slide over that basis. Example: - >>> import netharn as nh + >>> from netharn.util.util_slider import * # NOQA >>> shape = (10, 10) >>> window = (5, 5) - >>> self = nh.util.SlidingWindow(shape, window) + >>> self = SlidingWindow(shape, window) >>> for i, index in enumerate(self): >>> print('i={}, index={}'.format(i, index)) i=0, index=(slice(0, 5, None), slice(0, 5, None)) @@ -68,6 +68,7 @@ class SlidingWindow(ub.NiceRepr): i=3, index=(slice(5, 10, None), slice(5, 10, None)) Example: + >>> from netharn.util.util_slider import * # NOQA >>> shape = (16, 16) >>> window = (4, 4) >>> self = SlidingWindow(shape, window, overlap=(.5, .25)) @@ -84,8 +85,6 @@ class SlidingWindow(ub.NiceRepr): """ def __init__(self, shape, window, overlap=None, stride=None, keepbound=False, allow_overshoot=False): - import netharn as nh - if overlap is None and stride is None: overlap = 0 @@ -136,7 +135,7 @@ class SlidingWindow(ub.NiceRepr): # NOTE: if we have overshot, then basis shape will not perfectly # align to the original image. This shape will be a bit bigger. - self.basis_slices = [list(nh.util.wide_strides_1d(**kw)) + self.basis_slices = [list(_wide_strides_1d(**kw)) for kw in stide_kw] self.basis_shape = [len(b) for b in self.basis_slices] self.n_total = np.prod(self.basis_shape) @@ -196,10 +195,10 @@ class SlidingWindow(ub.NiceRepr): Get a specific item by its flat (raveled) index Example: - >>> import netharn as nh - >>> shape = (220, 220) + >>> from netharn.util.util_slider import * # NOQA >>> window = (10, 10) - >>> self = nh.util.SlidingWindow(shape, window, stride=5) + >>> shape = (20, 20) + >>> self = SlidingWindow(shape, window, stride=5) >>> itered_items = list(self) >>> assert len(itered_items) == len(self) >>> indexed_items = [self[i] for i in range(len(self))] @@ -374,6 +373,7 @@ class Stitcher(ub.NiceRepr): Example: >>> import sys + >>> from netharn.util.util_slider import * # NOQA >>> import netharn as nh >>> # Build a high resolution image and slice it into chips >>> frames = np.random.rand(1, 200, 100, 100).astype(np.float32) @@ -563,6 +563,84 @@ class Stitcher(ub.NiceRepr): return final +def _wide_strides_1d(margin, stop, step=None, start=0, keepbound=False, + check=True): + """ + Helper to generates slices in a single dimension. + + Args: + start (int): starting point (in most cases set this to 0) + + margin (int): the length of the slice (window) + + stop (int): the length of the image dimension + + step (int): the length of each step / distance between slices + + keepbound (bool): if True, a non-uniform step will be taken to ensure + that the right / bottom of the image is returned as a slice if + needed. Such a slice will not obey the overlap constraints. + (Defaults to False) + + check (bool): if True an error will be raised if the window does not + cover the entire extent from start to stop, even if keepbound is + True. + + Yields: + slice : slice in one dimension of size (margin) + + Example: + >>> stop, margin, step = 2000, 360, 360 + >>> keepbound = True + >>> strides = list(_wide_strides_1d(margin, stop, step, keepbound, check=False)) + >>> assert all([(s.stop - s.start) == margin for s in strides]) + + Example: + >>> stop, margin, step = 200, 46, 7 + >>> keepbound = True + >>> strides = list(_wide_strides_1d(margin, stop, step, keepbound=False, check=True)) + >>> starts = np.array([s.start for s in strides]) + >>> stops = np.array([s.stop for s in strides]) + >>> widths = stops - starts + >>> assert np.all(np.diff(starts) == step) + >>> assert np.all(widths == margin) + + Example: + >>> import pytest + >>> stop, margin, step = 200, 36, 7 + >>> with pytest.raises(ValueError): + ... list(_wide_strides_1d(margin, stop, step)) + """ + if step is None: + step = margin + + if check: + # see how far off the end we would fall if we didnt check bounds + perfect_final_pos = (stop - start - margin) + overshoot = perfect_final_pos % step + if overshoot > 0: + raise ValueError( + ('margin={} and step={} overshoot endpoint={} ' + 'by {} units when starting from={}').format( + margin, step, stop, overshoot, start)) + pos = start + # probably could be more efficient with numpy here + while True: + endpos = pos + margin + yield slice(pos, endpos) + # Stop once we reached the end + if endpos == stop: + break + pos += step + if pos + margin > stop: + if keepbound: + # Ensure the boundary is always used even if steps + # would overshoot Could do some other strategy here + pos = stop - margin + else: + break + + from .util_slider_dep import SlidingIndexDataset, SlidingSlices # NOQA diff --git a/netharn/util/util_slider_dep.py b/netharn/util/util_slider_dep.py index c93b8684bc6e6296fc3307c3e40d987c8a351d83..ae86f8e98205dc12dfe03c267d80b68478afa954 100644 --- a/netharn/util/util_slider_dep.py +++ b/netharn/util/util_slider_dep.py @@ -232,7 +232,7 @@ class SlidingSlices(ub.NiceRepr): >>> resized = np.pad(resized, padding, mode='constant') >>> # FIXME: Following scale doesnt work right >>> import kwimage - >>> kwimage.imscale(pred.astype(np.uint8), (xscale, yscale))[0].shape + >>> kwimage.imresize(pred.astype(np.uint8), (xscale, yscale))[0].shape """ def slcenter(sl): """ center of the window defined by a slice """ diff --git a/super_setup.py b/super_setup.py index cd8aa644fad94f22909c66a134451a7c746f66a5..ec3b118b50a3f79445f4b9e287572e6d08d04c16 100755 --- a/super_setup.py +++ b/super_setup.py @@ -71,7 +71,7 @@ def parse_version(package): Statically parse the version number from __init__.py CommandLine: - python -c "import setup; print(setup.parse_version('ovharn'))" + python -c "import setup; print(setup.parse_version('netharn'))" """ from os.path import dirname, join import ast @@ -379,16 +379,35 @@ class Repo(ub.NiceRepr): return repo._pygit def develop(repo): + """ + Install each repo in development mode. + """ + # NOTE: We need ensure build requirements are satisfied! + build_req_fpath = join(repo.dpath, 'requirements/build.txt') + if exists(build_req_fpath): + repo._cmd('pip install -r {}'.format(build_req_fpath), cwd=repo.dpath) + if ub.WIN32: # We can't run a shell file on win32, so lets hope this works import warnings warnings.warn('super_setup develop may not work on win32') repo._cmd('pip install -e .', cwd=repo.dpath) else: - devsetup_script_fpath = join(repo.dpath, 'run_developer_setup.sh') - if not exists(devsetup_script_fpath): - raise AssertionError('Assume we always have run_developer_setup.sh: repo={!r}'.format(repo)) - repo._cmd(devsetup_script_fpath, cwd=repo.dpath) + repo._cmd('pip install -e .', cwd=repo.dpath) + # devsetup_script_fpath = join(repo.dpath, 'run_developer_setup.sh') + # if not exists(devsetup_script_fpath): + # raise AssertionError('Assume we always have run_developer_setup.sh: repo={!r}'.format(repo)) + # repo._cmd(devsetup_script_fpath, cwd=repo.dpath) + + @classmethod + def demo(Repo, ensure=True): + repo = Repo( + remote='https://github.com/Erotemic/ubelt.git', + code_dpath=ub.ensuredir(ub.expandpath('~/tmp/demo-repos')), + ) + if ensure: + repo.ensure() + return repo def doctest(repo): if ub.WIN32: @@ -448,7 +467,17 @@ class Repo(ub.NiceRepr): repo.debug('Clone non-existing repo={}'.format(repo)) repo.clone() - def update_to_latest_dev_branch(repo, dry=False): + def upgrade(repo, dry=False): + """ + Look for a "dev" branch with a higher version number and switch to that. + + Example: + >>> from super_setup import * + >>> import ubelt as ub + >>> repo = Repo.demo() + >>> print('repo = {}'.format(repo)) + >>> repo.upgrade() + """ remote = repo._registered_remote() repo._cmd('git fetch {}'.format(remote.name)) repo.info('Fetch was successful') @@ -456,13 +485,19 @@ class Repo(ub.NiceRepr): print('remote_branchnames = {!r}'.format(remote_branchnames)) # Find all the dev branches - dev_branches = [ref for ref in remote.refs - if ref.remote_head.startswith('dev/')] + dev_branches_ = [ref for ref in remote.refs + if ref.remote_head.startswith('dev/')] + + dev_branches = [] + version_tuples = [] + for ref in dev_branches_: + try: + tup = tuple(map(int, ref.remote_head.split('dev/')[1].split('.'))) + dev_branches.append(ref) + version_tuples.append(tup) + except Exception: + pass - version_tuples = [ - tuple(map(int, ref.remote_head.split('dev/')[1].split('.'))) - for ref in dev_branches - ] latest_ref = dev_branches[ub.argmax(version_tuples)] latest_branch = latest_ref.remote_head @@ -483,9 +518,11 @@ class Repo(ub.NiceRepr): try: remote = repo.pygit.remotes[repo.remote] except IndexError: - if not dry: - raise AssertionError('Something went wrong') - else: + repo._ensure_remotes(dry=dry) + try: + remote = repo.pygit.remotes[repo.remote] + except IndexError: + repo.debug('Something went wrong, cannot find remote in git') remote = None if remote is not None: @@ -501,28 +538,10 @@ class Repo(ub.NiceRepr): repo.debug('Requested remote does NOT exist') return remote - def ensure(repo, dry=False): + def _ensure_remotes(repo, dry=True): """ - Ensure that the repo is checked out on your local machine, that the - correct branch is checked out, and the upstreams are targeting the - correct remotes. + Ensures the the registred remotes exists in the git repo. """ - if repo.verbose > 0: - if dry: - repo.debug(ub.color_text('Checking {}'.format(repo), 'blue')) - else: - repo.debug(ub.color_text('Ensuring {}'.format(repo), 'blue')) - - if not exists(repo.dpath): - repo.debug('NEED TO CLONE {}: {}'.format(repo, repo.url)) - if dry: - return - - repo.ensure_clone() - - repo._assert_clean() - - # Ensure all registered remotes exist for remote_name, remote_url in repo.remotes.items(): try: remote = repo.pygit.remotes[remote_name] @@ -538,13 +557,36 @@ class Repo(ub.NiceRepr): remote_name, remote_url, repo)) if not dry: repo._cmd('git remote add {} {}'.format(remote_name, remote_url)) + else: + raise AssertionError('In dry mode, cannot ensure remotes') except ShellException: if remote_name == repo.remote: # Only error if the main remote is not available raise + def ensure(repo, dry=False): + """ + Ensure that the repo is checked out on your local machine, that the + correct branch is checked out, and the upstreams are targeting the + correct remotes. + """ + if repo.verbose > 0: + if dry: + repo.debug(ub.color_text('Checking {}'.format(repo), 'blue')) + else: + repo.debug(ub.color_text('Ensuring {}'.format(repo), 'blue')) + + if not exists(repo.dpath): + repo.debug('NEED TO CLONE {}: {}'.format(repo, repo.url)) + if dry: + return + + repo.ensure_clone() + + repo._assert_clean() + # Ensure we have the right remote - remote = repo._registered_remote() + remote = repo._registered_remote(dry=dry) if remote is not None: try: @@ -570,7 +612,11 @@ class Repo(ub.NiceRepr): # Ensure the remote points to the right place if repo.url not in list(remote.urls): - repo.debug('WARNING: The requested url={} disagrees with remote urls={}'.format(repo.url, list(remote.urls))) + repo.debug(ub.paragraph( + ''' + 'WARNING: The requested url={} disagrees with remote + urls={} + ''').format(repo.url, list(remote.urls))) if dry: repo.info('Dry run, not updating remote url') @@ -579,7 +625,27 @@ class Repo(ub.NiceRepr): repo._cmd('git remote set-url {} {}'.format(repo.remote, repo.url)) # Ensure we are on the right branch - if repo.branch != repo.pygit.active_branch.name: + try: + active_branch_name = repo.pygit.active_branch.name + except TypeError: + # We may be on a tag, not a branch + candidates = [tag for tag in repo.pygit.tags if tag.name == repo.branch] + if len(candidates) != 1: + raise + else: + # branch is actually a tag + assert len(candidates) == 1 + want_tag = candidates[0] + is_on_correct_commit = ( + repo.pygit.head.commit.hexsha == want_tag.commit.hexsha + ) + ref_is_tag = True + else: + ref_is_tag = False + tracking_branch = repo.pygit.active_branch.tracking_branch() + is_on_correct_commit = repo.branch == active_branch_name + + if not is_on_correct_commit: repo.debug('NEED TO SET BRANCH TO {} for {}'.format(repo.branch, repo)) if dry: repo.info('Dry run, not setting branch') @@ -594,42 +660,54 @@ class Repo(ub.NiceRepr): except ShellException: raise Exception('does the branch exist on the remote?') - tracking_branch = repo.pygit.active_branch.tracking_branch() - if tracking_branch is None or tracking_branch.remote_name != repo.remote: - repo.debug('NEED TO SET UPSTREAM FOR FOR {}'.format(repo)) + if not ref_is_tag: + if tracking_branch is None or tracking_branch.remote_name != repo.remote: + repo.debug('NEED TO SET UPSTREAM FOR FOR {}'.format(repo)) - try: - remote = repo.pygit.remotes[repo.remote] - if not remote.exists(): - raise IndexError - except IndexError: - repo.debug('WARNING: remote={} does not exist'.format(remote)) - else: - if remote.exists(): - remote_branchnames = [ref.remote_head for ref in remote.refs] - if repo.branch not in remote_branchnames: - if dry: - repo.info('Branch name not found in local remote. Dry run, use ensure to attempt to fetch') + try: + remote = repo.pygit.remotes[repo.remote] + if not remote.exists(): + raise IndexError + except IndexError: + repo.debug('WARNING: remote={} does not exist'.format(remote)) + else: + if remote.exists(): + remote_branchnames = [ref.remote_head for ref in remote.refs] + if repo.branch not in remote_branchnames: + if dry: + repo.info('Branch name not found in local remote. Dry run, use ensure to attempt to fetch') + else: + repo.info('Branch name not found in local remote. Attempting to fetch') + repo._cmd('git fetch {}'.format(repo.remote)) + + remote_branchnames = [ref.remote_head for ref in remote.refs] + if repo.branch not in remote_branchnames: + raise Exception('Branch name still does not exist') + + if not dry: + repo._cmd('git branch --set-upstream-to={remote}/{branch} {branch}'.format( + remote=repo.remote, branch=repo.branch + )) else: - repo.info('Branch name not found in local remote. Attempting to fetch') - repo._cmd('git fetch {}'.format(repo.remote)) + repo.info('Would attempt to set upstream') - remote_branchnames = [ref.remote_head for ref in remote.refs] - if repo.branch not in remote_branchnames: - raise Exception('Branch name still does not exist') - - if not dry: - repo._cmd('git branch --set-upstream-to={remote}/{branch} {branch}'.format( - remote=repo.remote, branch=repo.branch - )) - else: - repo.info('Would attempt to set upstream') + # Check if the current head is tagged + head_tags = [ + tag for tag in repo.pygit.tags + if tag.commit.hexsha == repo.pygit.head.commit.hexsha + ] # Print some status - repo.debug(' * branch = {} -> {}'.format( - repo.pygit.active_branch.name, - repo.pygit.active_branch.tracking_branch(), - )) + try: + repo.debug(' * branch = {} -> {}'.format( + repo.pygit.active_branch.name, + repo.pygit.active_branch.tracking_branch(), + )) + except Exception: + pass + + if head_tags: + repo.debug(' * head_tags = {}'.format(head_tags)) def pull(repo): repo._assert_clean() @@ -768,23 +846,24 @@ def determine_code_dpath(): DEVEL_REPOS = [ # The util libs { - 'name': 'kwarray', 'branch': 'dev/0.5.15', 'remote': 'public', + 'name': 'kwarray', 'branch': 'dev/0.5.20', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwarray.git'}, }, { - 'name': 'kwimage', 'branch': 'dev/0.6.11', 'remote': 'public', + 'name': 'kwimage', 'branch': 'dev/0.7.8', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwimage.git'}, }, + # TODO: + # { + # 'name': 'kwannot', 'branch': 'dev/0.1.0', 'remote': 'public', + # 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwannot.git'}, + # }, { - 'name': 'kwannot', 'branch': 'dev/0.1.0', 'remote': 'public', - 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwannot.git'}, - }, - { - 'name': 'kwcoco', 'branch': 'dev/0.1.10', 'remote': 'public', + 'name': 'kwcoco', 'branch': 'dev/0.2.6', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwcoco.git'}, }, { - 'name': 'kwplot', 'branch': 'dev/0.4.8', 'remote': 'public', + 'name': 'kwplot', 'branch': 'dev/0.4.9', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/kwplot.git'}, }, @@ -794,23 +873,23 @@ DEVEL_REPOS = [ 'remotes': {'public': 'git@gitlab.kitware.com:python/liberator.git'}, }, { - 'name': 'torch_liberator', 'branch': 'dev/0.0.5', 'remote': 'public', + 'name': 'torch_liberator', 'branch': 'dev/0.1.1', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/torch_liberator.git'}, }, # For example data and CLI { - 'name': 'scriptconfig', 'branch': 'dev/0.5.8', 'remote': 'public', + 'name': 'scriptconfig', 'branch': 'dev/0.5.9', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:utils/scriptconfig.git'}, }, { - 'name': 'ndsampler', 'branch': 'dev/0.5.14', 'remote': 'public', + 'name': 'ndsampler', 'branch': 'dev/0.6.4', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/ndsampler.git'}, }, # netharn - training harness { - 'name': 'netharn', 'branch': 'dev/0.5.15', 'remote': 'public', + 'name': 'netharn', 'branch': 'dev/0.5.16', 'remote': 'public', 'remotes': {'public': 'git@gitlab.kitware.com:computer-vision/netharn.git'}, }, ] @@ -851,7 +930,6 @@ def main(): if repo.name == MAIN_REPO_NAME: main_repo = repo break - assert main_repo is not None HACK_PROTOCOL = True if HACK_PROTOCOL: @@ -933,7 +1011,8 @@ def main(): @cli_group.add_command @click.command('upgrade', context_settings=default_context_settings) def upgrade(): - main_repo.update_to_latest_dev_branch() + assert main_repo is not None + main_repo.upgrade() cli_group() @@ -944,12 +1023,16 @@ docker run -v $PWD:/io --rm -it $DOCKER_IMAGE bash mkdir -p $HOME/code cd $HOME/code -git clone -b dev/0.5.5 https://gitlab.kitware.com/computer-vision/netharn.git +git clone https://gitlab.kitware.com/computer-vision/netharn.git cd $HOME/code/netharn pip install -r requirements/super_setup.txt +python super_setup.py upgrade --serial python super_setup.py ensure --serial +# Seems like sudo is necessary for permission issues in docker +sudo python super_setup.py develop --serial + """ diff --git a/tests/test_yolo_lr.py b/tests/test_yolo_lr.py index 6019812ca81570ad390a9aeca75de15c8d60e1c7..d3f2210256afa33524bee481b99ddd1cf830efb0 100644 --- a/tests/test_yolo_lr.py +++ b/tests/test_yolo_lr.py @@ -85,7 +85,7 @@ def test_yolo_lr(): hyper = { # --- data first 'datasets' : datasets, - 'nice' : 'restart_lr', + 'name' : 'restart_lr', 'workdir' : ub.ensure_app_cache_dir('netharn/test/restart_lr'), 'loaders' : {'batch_size': bsize}, 'xpu' : nh.XPU.coerce('cpu'),