From 813d76da803f37c365d8eb135807340c29ce0b95 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 13 May 2021 09:48:13 -0400
Subject: [PATCH 1/3] Start branch for 0.6.2

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6c3870..2f9233c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,10 @@
 This changelog follows the specifications detailed in: [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release.
 
-## Version 0.6.1 - Unreleased
+## Version 0.6.2 - Unreleased
+
+
+## Version 0.6.1 - Released 2021-05-13
 
 ### Added
 * The target dictionary in `CocoSampler.load_sample` can now accept a `vidid`
-- 
GitLab


From 4af6dc2a9caf8410e36f43f0a5622a4cf0dc68e6 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Tue, 18 May 2021 22:05:51 -0400
Subject: [PATCH 2/3] allow specification of space_slice and time_slice

---
 CHANGELOG.md              |   3 +
 ndsampler/coco_sampler.py | 271 +++++++++++++++++++++++---------------
 2 files changed, 169 insertions(+), 105 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2f9233c..112eaa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ## Version 0.6.2 - Unreleased
 
+### Added
+* The target dictionary can now contain `space_slice`, `time_slice` or `gids` 
+
 
 ## Version 0.6.1 - Released 2021-05-13
 
diff --git a/ndsampler/coco_sampler.py b/ndsampler/coco_sampler.py
index 2b7e015..5ff82ad 100644
--- a/ndsampler/coco_sampler.py
+++ b/ndsampler/coco_sampler.py
@@ -463,13 +463,18 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
                 infer the key `gid (int)`, to specify an image id.
 
                 For 3D video source objects, tr must contain the key
-                `vidid (int)`, to specify a video id. (NEW in 0.6.1)
+                `vidid (int)`, to specify a video id (NEW in 0.6.1) or
+                `gids List[int]`, as a list of images in a video (NEW in 0.6.2)
 
                 In general, coordinate regions can specified by the key
                 `slices`, a numpy-like "fancy index" over each of the n
                 dimensions. Usually this is a tuple of slices, e.g.
                 (y1:y2, x1:x2) for images and (t1:t2, y1:y2, x1:x2) for videos.
 
+                You may also specify:
+                `space_slice` as (y1:y2, x1:x2) for both 2D images and 3D
+                videos and `time_slice` as t1:t2 for 3D videos.
+
                 Spatial regions can be specified with keys:
                     * 'cx' and 'cy' as the center of the region in pixels.
                     * 'width' and 'height' are in pixels.
@@ -477,17 +482,19 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
                     special string key 'square', which overrides width and
                     height to both be the maximum of the two.
 
-                Temporal regions are currently only specifiable by slices. This
-                will change in the future.
+                Temporal regions are specifiable by `slices`, `time_slice` or
+                an explicit list of `gids`.
 
                 The `aid` key can be specified to indicate a specific
                 annotation to load. This uses the annotation information to
                 infer 'gid', 'cx', 'cy', 'width', and 'height' if they are not
                 present. (NEW in 0.5.10)
 
-                The `channels` key can be specified as a channel code or list
-                of channel codes indicating a subset of channels to load.
-                (NEW in 0.6.1)
+                The `channels` key can be specified as a channel code or
+                    :class:`kwcoco.ChannelSpec` object.  (NEW in 0.6.1)
+
+                as_xarray (bool, default=False):
+                    if True, return the image data as an xarray object
 
             pad (tuple): (height, width) extra context to add to window dims.
                 This helps prevent augmentation from producing boundary effects
@@ -504,16 +511,9 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
                 should be extracted. Valid strings in this list are: boxes,
                 keypoints, and segmentation.
 
-            window_dims (tuple | str): (height, width) overrides the height/width
-                in tr to determine the extracted window size. Can also be
-                'extent' or 'square', which determines the final size using
-                target information.
-
-                DEPRECATED. IF DESIRED SPECIFY IN THE TARGET DICTIONARY
-
         Returns:
             Dict: sample: dict containing keys
-                im (ndarray): image data
+                im (ndarray | DataArray): image / video data
                 tr (dict): contains the same input items as tr but additionally
                     specifies rel_cx and rel_cy, which gives the center
                     of the target w.r.t the returned **padded** sample.
@@ -628,7 +628,10 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             >>> sample_grid = self.new_sample_grid('video_detection', (3, 128, 128))
             >>> tr = sample_grid['positives'][0]
             >>> tr['channels'] = 'B1|B8'
+            >>> tr['as_xarray'] = False
             >>> sample = self.load_sample(tr)
+            >>> print(ub.repr2(sample['tr'], nl=1))
+            >>> print(sample['im'].shape)
             >>> assert sample['im'].shape == (3, 128, 128, 2)
             >>> tr['channels'] = '<all>'
             >>> sample = self.load_sample(tr)
@@ -657,12 +660,27 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             >>> # sample using only an annotation id
             >>> from ndsampler.coco_sampler import *
             >>> self = CocoSampler.demo()
-            >>> tr = {'aid': 1}
+            >>> tr = {'aid': 1, 'as_xarray': True}
+            >>> tr_ = self._infer_target_attributes(tr)
+            >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
+            >>> assert tr_['gid'] == 1
+            >>> assert all(k in tr_ for k in ['cx', 'cy', 'width', 'height'])
+
+            >>> self = CocoSampler.demo('vidshapes8-multispectral')
+            >>> tr = {'aid': 1, 'as_xarray': True}
             >>> tr_ = self._infer_target_attributes(tr)
             >>> assert tr_['gid'] == 1
             >>> assert all(k in tr_ for k in ['cx', 'cy', 'width', 'height'])
+
+            >>> tr = {'vidid': 1, 'as_xarray': True}
+            >>> tr_ = self._infer_target_attributes(tr)
+            >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
+            >>> assert 'gids' in tr_
+
+            >>> tr = {'gids': [1, 2], 'as_xarray': True}
+            >>> tr_ = self._infer_target_attributes(tr)
+            >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
         """
-        window_dims = None
         # we might modify the target
         tr_ = tr.copy()
         if 'aid' in tr_:
@@ -689,71 +707,104 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
                 if 'category_id' not in tr_:
                     tr_['category_id'] = ann['category_id']
 
-        ndims = 3 if 'vidid' in tr_ else 2
+        gid = tr_.get('gid', None)
+        vidid = tr_.get('vidid', None)
+        gids = tr_.get('gids', None)
+        slices = tr_.get('slices', None)
+        time_slice = tr_.get('time_slice', None)
+        space_slice = tr_.get('space_slice', None)
+        window_dims = tr_.get('window_dims', None)
+        vid_gids = None
+        ndim = None
+
+        if vidid is not None or gids is not None:
+            # Video sample
+            if vidid is None:
+                if gids is None:
+                    raise ValueError('ambiguous image or video object id(s)')
+                _vidids = self.dset.images(gids).lookup('video_id')
+                if __debug__:
+                    if not ub.allsame(_vidids):
+                        warnings.warn('sampled gids from different videos')
+                vidid = ub.peek(_vidids)
+                tr_['vidid'] = vidid
+            assert vidid == tr_['vidid']
+            ndim = 3
+        elif gid is not None:
+            # Image sample
+            ndim = 2
+        else:
+            raise ValueError('no source object id(s)')
+
+        # Fix non-determined bounds
+        if ndim == 2:
+            img = self.dset.index.imgs[gid]
+            space_dims = (img['height'], img['width'])
+            data_dims = space_dims
+        elif ndim == 3:
+            video = self.dset.index.videos[vidid]
+            space_dims = (video['height'], video['width'])
+            vid_gids = self.dset.index.vidid_to_gids[vidid]
+            data_dims = (len(vid_gids),) + space_dims
+        else:
+            raise NotImplementedError
+
+        tr_['space_dims'] = space_dims
+        tr_['data_dims'] = data_dims
 
         # other spatial specifiers allowed if slices is not given
         alternate_keys = {'cx', 'cy', 'height', 'width'}
         has_alternate = bool(set(tr_) & alternate_keys)
 
-        if 'slices' in tr_:
-            # Slice was explicitly specified
-            if has_alternate or window_dims:
-                warnings.warn(ub.paragraph(
-                    '''
-                    data_slice was specified, but ignored keys are present
-                    '''))
-        elif not has_alternate:
-            # No region specified. load everything.
-            if ndims == 3:
-                tr_['slices'] = (slice(0, None), slice(0, None), slice(0, None))
-            else:
-                tr_['slices'] = (slice(0, None), slice(0, None))
-        else:
-            if ndims == 3:
-                raise NotImplementedError
-            # A center / width / height was specified
-            center = (tr_['cy'], tr_['cx'])
-            # Determine the requested window size
-            window_dims = tr_.get('window_dims', window_dims)
-            if window_dims is None:
-                window_dims = 'extent'
-
-            if isinstance(window_dims, six.string_types):
-                if window_dims == 'extent':
-                    window_dims = (tr_['height'], tr_['width'])
-                    window_dims = np.ceil(np.array(window_dims)).astype(np.int)
-                    window_dims = tuple(window_dims.tolist())
-                elif window_dims == 'square':
-                    window_dims = (tr_['height'], tr_['width'])
-                    window_dims = np.ceil(np.array(window_dims)).astype(np.int)
-                    window_dims = tuple(window_dims.tolist())
-                    maxdim = max(window_dims)
-                    window_dims = (maxdim, maxdim)
+        if slices is not None:
+            if space_slice is None:
+                if ndim == 3:
+                    space_slice = tr_['space_slice'] = slices[1:3]
+                elif ndim == 2:
+                    space_slice = tr_['space_slice'] = slices[0:2]
                 else:
-                    raise KeyError(window_dims)
-            tr_['window_dims'] = window_dims
-            tr_['slices'] = _center_extent_to_slice(center, window_dims)
-
-        if ndims == 3:
-            vidid = tr['vidid']
-            vid_gids = self.dset.index.vidid_to_gids[vidid]
-
-        if any(sl.stop is None for sl in tr_['slices']):
-            # Fix non-determined bounds
-            if ndims == 2:
-                gid = tr['gid']
-                img = self.dset.index.imgs[gid]
-                data_dims = img['height'], img['width']
+                    raise NotImplementedError
+            if ndim == 3 and gids is None and time_slice is None:
+                time_slice = tr_['time_slice'] = slices[0]
+
+        if space_slice is None:
+            if has_alternate:
+                # A center / width / height was specified
+                center = (tr_['cy'], tr_['cx'])
+                # Determine the requested window size
+                if window_dims is None:
+                    window_dims = 'extent'
+
+                if isinstance(window_dims, six.string_types):
+                    if window_dims == 'extent':
+                        window_dims = (tr_['height'], tr_['width'])
+                        window_dims = np.ceil(np.array(window_dims)).astype(np.int)
+                        window_dims = tuple(window_dims.tolist())
+                    elif window_dims == 'square':
+                        window_dims = (tr_['height'], tr_['width'])
+                        window_dims = np.ceil(np.array(window_dims)).astype(np.int)
+                        window_dims = tuple(window_dims.tolist())
+                        maxdim = max(window_dims)
+                        window_dims = (maxdim, maxdim)
+                    else:
+                        raise KeyError(window_dims)
+                tr_['window_dims'] = window_dims
+                space_slice = _center_extent_to_slice(center, window_dims)
             else:
-                video = self.dset.index.videos[vidid]
-                data_dims = len(vid_gids), video['height'], video['width']
-
-            fixed = []
-            for sl, D in zip(tr_['slices'], data_dims):
-                stop = D if sl.stop is None else sl.stop
-                fixed.append(slice(sl.start, stop, sl.step))
-            tr_['slices'] = tuple(fixed)
-
+                height, width = space_dims
+                space_slice = (slice(0, height), slice(0, width))
+            tr_['space_slice'] = space_slice
+
+        if ndim == 2:
+            tr_['slices'] = slices = space_slice
+        elif ndim == 3:
+            if time_slice is None:
+                time_slice = tr['time_slice'] = slice(0, len(vid_gids))
+            if gids is None:
+                gids = tr_['gids'] = vid_gids[time_slice]
+            tr_['slices'] = slices = (time_slice,) + space_slice
+        else:
+            raise NotImplementedError(ndim)
         return tr_
 
     @profile
@@ -764,6 +815,7 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             >>> from ndsampler.coco_sampler import *
             >>> self = CocoSampler.demo()
             >>> tr = self.regions.get_positive(0)
+            >>> tr['as_xarray'] = True
             >>> sample = self._load_slice(tr)
             >>> print('sample = {!r}'.format(ub.map_vals(type, sample)))
 
@@ -771,6 +823,12 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             >>> from ndsampler.coco_sampler import *
             >>> self = CocoSampler.demo('vidshapes2')
             >>> tr = self._infer_target_attributes({'vidid': 1})
+            >>> tr['as_xarray'] = True
+            >>> sample = self._load_slice(tr)
+            >>> print('sample = {!r}'.format(ub.map_vals(type, sample)))
+
+            >>> tr = self._infer_target_attributes({'gids': [1, 2, 3]})
+            >>> tr['as_xarray'] = True
             >>> sample = self._load_slice(tr)
             >>> print('sample = {!r}'.format(ub.map_vals(type, sample)))
         """
@@ -783,8 +841,11 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             pad = 0
 
         tr_ = self._infer_target_attributes(tr)
-        assert 'slices' in tr_
+        assert 'space_slice' in tr_
+        data_dims = tr_['data_dims']
+
         requested_slice = tr_['slices']
+
         channels = tr_.get('channels', ub.NoParam)
 
         if channels == '<all>' or channels is ub.NoParam:
@@ -801,19 +862,7 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             pad = tuple(_ensure_iterablen(pad, ndim))
 
             # As of kwcoco 0.2.1 gids are ordered by frame index
-            vid_gids = self.dset.index.vidid_to_gids[vidid]
-            video = self.dset.index.videos[vidid]
-            vid_width = video.get('width', None)
-            vid_height = video.get('height', None)
-            num_frames = len(vid_gids)
-            if vid_height is None or vid_width is None:
-                # Fallback on the first image
-                img = self.dset.imgs[vid_gids[0]]
-                vid_width = img['width']
-                vid_height = img['height']
-
-            vid_dsize = (vid_width, vid_height)
-            data_dims = (num_frames, vid_height, vid_width)
+            vid_dsize = (data_dims[2], data_dims[1])
 
             data_slice, extra_padding = kwarray.embed_slice(
                 requested_slice, data_dims, pad)
@@ -822,7 +871,8 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
             # just load the 2d data for each image
             time_slice, *space_slice = data_slice
             space_slice = tuple(space_slice)
-            time_gids = vid_gids[time_slice]
+
+            time_gids = tr_['gids']
             space_frames = []
 
             slice_height = space_slice[0].stop - space_slice[0].start
@@ -920,31 +970,42 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
 
             # Hack to return some info about dims and not returning the xarray
             # itself. In the future we will likely return the xarray itself.
-            tr_['_coords'] = _data_clipped.coords
-            tr_['_dims'] = _data_clipped.dims
-
-            data_clipped = _data_clipped.values
+            if not tr_.get('as_xarray', False):
+                data_clipped = _data_clipped.values
+                tr_['_coords'] = _data_clipped.coords
+                tr_['_dims'] = _data_clipped.dims
+            else:
+                data_clipped = _data_clipped
 
             # TODO: gids should be padded if it goes oob.
-            tr_['_data_gids'] = time_gids
+            # tr_['_data_gids'] = time_gids
         else:
+            gid = tr_['gid']
             ndim = 2  # number of space-time dimensions (ignore channel)
             pad = tuple(_ensure_iterablen(pad, ndim))
-            gid = tr_['gid']
-            # Determine the image extent
-            img = self.dset.imgs[gid]
-            data_dims = (img['height'], img['width'])
-
             data_slice, extra_padding = kwarray.embed_slice(
                 requested_slice, data_dims, pad)
 
-            # Load the image data
-            # frame = self.frames.load_image(gid)  # TODO: lazy load on slice
-            # im = frame[data_slice]
-
             data_clipped = self.frames.load_region(
                 image_id=gid, region=data_slice, channels=channels)
 
+            if tr_.get('as_xarray', False):
+                # TODO: respect the channels arg in tr_
+                if len(data_clipped.shape) == 1:
+                    num_bands = 1
+                else:
+                    num_bands = data_clipped.shape[2]
+
+                xrkw = {}
+                if num_bands == 1:
+                    xrkw['c'] = ['gray']
+                elif num_bands == 3:
+                    xrkw['c'] = ['r', 'g', 'b']
+
+                # hack to respect xarray
+                data_clipped = xr.DataArray(
+                    data_clipped, dims=('y', 'x', 'c'), coords=xrkw)
+
         # Apply the padding
         if sum(map(sum, extra_padding)) == 0:
             # No padding was requested
@@ -1013,8 +1074,8 @@ class CocoSampler(abstract_sampler.AbstractSampler, util_misc.HashIdentifiable,
 
         tr = sample['tr']
 
-        if '_data_gids' in tr:
-            gids = tr['_data_gids']
+        if 'gids' in tr:
+            gids = tr['gids']
         else:
             gids = [tr['gid']]
 
-- 
GitLab


From 47c6c8c0ac7a1048bf27cddb86f5082ea1d2c485 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Tue, 18 May 2021 22:12:38 -0400
Subject: [PATCH 3/3] test video grid

---
 ndsampler/coco_regions.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/ndsampler/coco_regions.py b/ndsampler/coco_regions.py
index dd9e065..de962ff 100644
--- a/ndsampler/coco_regions.py
+++ b/ndsampler/coco_regions.py
@@ -863,6 +863,14 @@ def new_video_sample_grid(dset, window_dims, window_overlap=0.0,
         >>> window_dims = (2, 224, 224)
         >>> sample_grid = new_video_sample_grid(dset, window_dims)
         >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2)))
+        >>> # Now try to load a sample
+        >>> tr = sample_grid['positives'][0]
+        >>> import ndsampler
+        >>> sampler = ndsampler.CocoSampler(dset)
+        >>> tr_ = sampler._infer_target_attributes(tr)
+        >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
+        >>> sample = sampler.load_sample(tr)
+        >>> assert sample['im'].shape == (2, 224, 224, 5)
 
     Ignore:
         import xdev
@@ -915,10 +923,14 @@ def new_video_sample_grid(dset, window_dims, window_overlap=0.0,
                 region_aids.append(aids)
 
             pos_aids = sorted(ub.flatten(region_aids))
+            space_slice = region[1:3]
+            time_slice = region[0]
 
             tr = {
                 'vidid': vidid,
-                'slices': region,
+                'time_slice': time_slice,
+                'space_slice': space_slice,
+                # 'slices': region,
                 'gids': region_gids,
                 'aids': pos_aids,
             }
@@ -950,6 +962,15 @@ def new_image_sample_grid(dset, window_dims, window_overlap=0.0,
         >>> window_dims = (224, 224)
         >>> sample_grid = new_image_sample_grid(dset, window_dims)
         >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2)))
+        >>> # Now try to load a sample
+        >>> tr = sample_grid['positives'][0]
+        >>> import ndsampler
+        >>> sampler = ndsampler.CocoSampler(dset)
+        >>> tr['channels'] = '<all>'
+        >>> tr_ = sampler._infer_target_attributes(tr)
+        >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
+        >>> sample = sampler.load_sample(tr)
+        >>> assert sample['im'].shape == (224, 224, 5)
 
     Ignore:
         import xdev
-- 
GitLab