diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b1192fbf013307cc5ed9d77e6b8f7d75f1997b25..663175ab1078f7192335e497530bc47bc10e884b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,661 +1,423 @@ -# Abuse YAML notation to make a heredoc. This will be ignored by the CI. -.__heredoc__: &__heredoc__ - - | - - NOTE: INSTRUCTION HAVE BEEN MOVED TO ./dev/setup_secrets.sh - This file should need minimal modification. - - Template for this files is from - ~/code/xcookie/.gitlab-ci.yml - - Templates used in: - - ~/code/kwplot/.gitlab-ci.yml - ~/code/kwimage/.gitlab-ci.yml - ~/code/kwarray/.gitlab-ci.yml - ~/code/kwcoco/.gitlab-ci.yml - - ~/code/ndsampler/.gitlab-ci.yml - - - Enable the opencv-hack if needed, and turn on/off the desired versions - of Python. - +# Autogenerated by ~/code/xcookie/xcookie/builders/gitlab_ci.py stages: - - build - - test - - gpgsign - - deploy - - -### TEMPLATES ### -# Define common templates using YAML anchors - +# TEMPLATES +- build +- test +- gpgsign +- deploy .common_template: &common_template - tags: - # Tags define which runners will accept which jobs - - docker - - linux - - build - - variables: - # Change pip's cache directory to be inside the project directory since we can - # only cache local items. - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" - - except: - # Don't run the pipeline for new tags - - tags - - cache: - paths: - - .cache/pip - - -.build_template: &build_template + tags: # Tags define which runners will accept which jobs - <<: - - *common_template - - stage: - build - - before_script: - - python -V # Print out python version for debugging - - script: - #- python setup.py bdist_wheel --universal - - python setup.py bdist_wheel - - artifacts: - paths: - - dist/*.whl - - -.common_test_template: &common_test_template - # Tags define which runners will accept which jobs - <<: - - *common_template - - variables: - # Change pip's cache directory to be inside the project directory since we can - # only cache local items. - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" - - stage: - test - - script: - - ./run_tests.py - - # Coverage is a regex that will parse the coverage from the test stdout - coverage: '/TOTAL.+ ([0-9]{1,3}%)/' - - #except: - # refs: - # - release - # - tags - # changes: - # - README.rst - # - CHANGELOG.md - + - docker + - linux-x86_64 + - build -# Define anchors to be used in "before_script" parts -._setup_virtualenv_template: &_setup_virtualenv_template |- - python -V # Print out python version for debugging + variables: + # Change pip's cache directory to be inside the project directory + # since we can only cache local items. + PIP_CACHE_DIR: $CI_PROJECT_DIR/.cache/pip + except: + # Don't run the pipeline for new tags + - tags + + cache: + paths: + - .cache/pip +.build_template: &build_template + <<: *common_template + stage: build + before_script: + - 'python -V # Print out python version for debugging' + script: + - python -m pip install pip -U + - python -m pip install setuptools>=0.8 wheel build + - python -m build --wheel --outdir wheelhouse + artifacts: + paths: + - ./wheelhouse/netharn*.whl +.test_template: &test_template + <<: *common_template + stage: test + coverage: /TOTAL.+ ([0-9]{1,3}%)/ +.test_minimal-loose_template: &test_minimal-loose_template + <<: *test_template + before_script: + - |- + # Setup the correct version of python (which should be the same as this instance) + python --version # Print out python version for debugging export PYVER=$(python -c "import sys; print('{}{}'.format(*sys.version_info[0:2]))") - pip install virtualenv - virtualenv venv$PYVER + python -m pip install virtualenv + python -m virtualenv venv$PYVER source venv$PYVER/bin/activate pip install pip -U pip install pip setuptools -U - python -V # Print out python version for debugging - - -.test_minimal_strict_template: &test_minimal_strict_template - # Tags define which runners will accept which jobs - <<: - - *common_test_template - - before_script: - - *_setup_virtualenv_template - # - pip install .[tests-strict] # xcookie: +COMMENT_IF(cv2) - - pip install .[tests-strict,headless-strict] # xcookie: +UNCOMMENT_IF(cv2) - - -.test_full_strict_template: &test_full_strict_template - # Tags define which runners will accept which jobs - <<: - - *common_test_template - - before_script: - - *_setup_virtualenv_template - # - pip install .[all-strict] # xcookie: +COMMENT_IF(cv2) - - pip install .[all-strict,headless-strict] # xcookie: +UNCOMMENT_IF(cv2) - #- pip install GDAL==3.3.3 --find-links https://girder.github.io/large_image_wheels -U # xcookie: +UNCOMMENT_IF(gdal) - -.test_minimal_loose_template: &test_minimal_loose_template - # Tags define which runners will accept which jobs - <<: - - *common_test_template - - before_script: - - *_setup_virtualenv_template - # - pip install .[tests] # xcookie: +COMMENT_IF(cv2) - - pip install .[tests,headless] # xcookie: +UNCOMMENT_IF(cv2) - - -.test_full_loose_template: &test_full_loose_template - # Tags define which runners will accept which jobs - <<: - - *common_test_template - - before_script: - - *_setup_virtualenv_template - # - pip install .[all] # xcookie: +COMMENT_IF(cv2) - - pip install .[all,headless] # xcookie: +UNCOMMENT_IF(cv2) - #- pip install GDAL>=3.3.3 --find-links https://girder.github.io/large_image_wheels -U # xcookie: +UNCOMMENT_IF(gdal) - - - -.gpgsign_template: &gpgsign_template - <<: - - *common_template - - stage: - gpgsign - - script: - - | - export GNUPGHOME=$(mktemp -d) - export GPG_EXECUTABLE=gpg - export GPG_KEYID=$(cat dev/public_gpg_key) - echo "GPG_KEYID = $GPG_KEYID" - source dev/secrets_configuration.sh - # note the variable pointed to by VARNAME_CI_SECRET is a protected variables only available on main and release branch - CI_SECRET=${!VARNAME_CI_SECRET} - $GPG_EXECUTABLE --version - openssl version - $GPG_EXECUTABLE --list-keys - $GPG_EXECUTABLE --list-keys - # Decrypt and import GPG Keys / trust - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import - $GPG_EXECUTABLE --list-keys || echo "first one fails for some reason" - $GPG_EXECUTABLE --list-keys - # The publish script only builds wheels and does gpg signing if DO_UPLOAD is no - pip install requests[security] twine - DO_GPG=True GPG_KEYID=$GPG_KEYID TWINE_PASSWORD=$TWINE_PASSWORD TWINE_USERNAME=$TWINE_USERNAME GPG_EXECUTABLE=$GPG_EXECUTABLE DEPLOY_BRANCH=release DO_TAG=False DO_UPLOAD=False ./publish.sh - - artifacts: - paths: - - dist/*.asc - - dist/*.tar.gz - - dist/*.whl - - only: - refs: - # Gitlab will only expose protected variables on protected branches - # (which I've set to be main and release), so only run this stage - # there. - - main - - master - - release - - -.deploy_template: &deploy_template - <<: - - *common_template - - stage: - deploy - - script: - - | - export GNUPGHOME=$(mktemp -d) - export GPG_EXECUTABLE=gpg - export GPG_KEYID=$(cat dev/public_gpg_key) - echo "GPG_KEYID = $GPG_KEYID" - # VARNAME_CI_SECRET points to a protected variable only available on main and release branch - source dev/secrets_configuration.sh - CI_SECRET=${!VARNAME_CI_SECRET} - PUSH_TOKEN=${!VARNAME_PUSH_TOKEN} - TWINE_PASSWORD=${!VARNAME_TWINE_PASSWORD} - TWINE_USERNAME=${!VARNAME_TWINE_USERNAME} - $GPG_EXECUTABLE --version - openssl version - $GPG_EXECUTABLE --list-keys - $GPG_EXECUTABLE --list-keys - # Decrypt and import GPG Keys / trust - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust - GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import - $GPG_EXECUTABLE --list-keys || echo "first one fails for some reason" - $GPG_EXECUTABLE --list-keys - # Install twine - pip install six pyopenssl ndg-httpsclient pyasn1 -U - pip install requests[security] twine - # Execute the publish script for real this time - TWINE_REPOSITORY_URL=https://upload.pypi.org/legacy/ DO_GPG=True GPG_KEYID=$GPG_KEYID TWINE_PASSWORD=$TWINE_PASSWORD TWINE_USERNAME=$TWINE_USERNAME GPG_EXECUTABLE=$GPG_EXECUTABLE CURRENT_BRANCH=release DEPLOY_BRANCH=release DO_TAG=True DO_UPLOAD=True ./publish.sh || echo "upload already exists" - # Have the server git-tag the release and push the tags - VERSION=$(python -c "import setup; print(setup.VERSION)") - # do sed twice to handle the case of https clone with and without a read token - - | - URL_HOST=$(git remote get-url origin | sed -e 's|https\?://.*@||g' | sed -e 's|https\?://||g' | sed -e 's|git@||g' | sed -e 's|:|/|g') - echo "URL_HOST = $URL_HOST" - # A git config user name and email is required. Set if needed. - if [[ "$(git config user.email)" == "" ]]; then - git config user.email "ci@gitlab.org.com" - git config user.name "Gitlab-CI" - fi - TAG_NAME="v${VERSION}" - echo "TAG_NAME = $TAG_NAME" - if [ $(git tag -l "$TAG_NAME") ]; then - echo "Tag already exists" - else - # if we messed up we can delete the tag - # git push origin :refs/tags/$TAG_NAME - # and then tag with -f - git tag $TAG_NAME -m "tarball tag $VERSION" - git push --tags "https://git-push-token:${PUSH_TOKEN}@${URL_HOST}" - fi - - only: - refs: - - release - - -# Aliases for the images that run the tests -.image_python3_10: &image_python3_10 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.10 - #python:3.10 -.image_python39: &image_python39 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.9 - #python:3.9 -.image_python38: &image_python38 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.8 - #python:3.8 -.image_python37: &image_python37 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.7 - #python:3.7 -.image_python36: &image_python36 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.6 - #python:3.6 -.image_python35: &image_python35 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.5 - #python:3.5 -.image_python27: &image_python27 - gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:2.7 - #python:2.7 - - - -### JOBS ### -# Define the actual jobs - - -# --------------- -# Python 3.10 Jobs - - -build/cp3_10-cp3_10-linux: - <<: - - *build_template - image: - *image_python3_10 - -test_full_loose/cp3_10-cp3_10-linux: - <<: - - *test_full_loose_template - image: - *image_python3_10 - needs: - - build/cp3_10-cp3_10-linux - -test_minimal_loose/cp3_10-cp3_10-linux: - <<: - - *test_minimal_loose_template - image: - *image_python3_10 - needs: - - build/cp3_10-cp3_10-linux - -test_full_strict/cp3_10-cp3_10-linux: - <<: - - *test_full_strict_template - image: - *image_python3_10 - needs: - - build/cp3_10-cp3_10-linux - -test_minimal_strict/cp3_10-cp3_10-linux: - <<: - - *test_minimal_strict_template - image: - *image_python3_10 - needs: - - build/cp3_10-cp3_10-linux - -# --------------- -# Python 3.9 Jobs - - -build/cp39-cp39-linux: - <<: - - *build_template - image: - *image_python39 - -test_full_loose/cp39-cp39-linux: - <<: - - *test_full_loose_template - image: - *image_python39 - needs: - - build/cp39-cp39-linux - -test_minimal_loose/cp39-cp39-linux: - <<: - - *test_minimal_loose_template - image: - *image_python39 - needs: - - build/cp39-cp39-linux - -test_full_strict/cp39-cp39-linux: - <<: - - *test_full_strict_template - image: - *image_python39 - needs: - - build/cp39-cp39-linux - -test_minimal_strict/cp39-cp39-linux: - <<: - - *test_minimal_strict_template - image: - *image_python39 - needs: - - build/cp39-cp39-linux - -# --------------- -# Python 3.8 Jobs - -build/cp38-cp38-linux: - <<: - - *build_template - image: - *image_python38 - -test_full_loose/cp38-cp38-linux: - <<: - - *test_full_loose_template - image: - *image_python38 - needs: - - build/cp38-cp38-linux - -test_minimal_loose/cp38-cp38-linux: - <<: - - *test_minimal_loose_template - image: - *image_python38 - needs: - - build/cp38-cp38-linux - -test_full_strict/cp38-cp38-linux: - <<: - - *test_full_strict_template - image: - *image_python38 - needs: - - build/cp38-cp38-linux - -test_minimal_strict/cp38-cp38-linux: - <<: - - *test_minimal_strict_template - image: - *image_python38 - needs: - - build/cp38-cp38-linux - -# for universal builds we only need to gpg sign once -gpgsign/cp38-cp38-linux: - <<: - - *gpgsign_template - image: - *image_python38 - -deploy/cp38-cp38-linux: - <<: - - *deploy_template - image: - *image_python38 - - -# --------------- -# Python 3.7 Jobs - - -build/cp37-cp37-linux: - <<: - - *build_template - image: - *image_python37 - -test_full_loose/cp37-cp37-linux: - <<: - - *test_full_loose_template - image: - *image_python37 - needs: - - build/cp37-cp37-linux - -test_minimal_loose/cp37-cp37-linux: - <<: - - *test_minimal_loose_template - image: - *image_python37 - needs: - - build/cp37-cp37-linux - -test_full_strict/cp37-cp37-linux: - <<: - - *test_full_strict_template - image: - *image_python37 - needs: - - build/cp37-cp37-linux - -test_minimal_strict/cp37-cp37-linux: - <<: - - *test_minimal_strict_template - image: - *image_python37 - needs: - - build/cp37-cp37-linux - - -# --------------- -# Python 3.6 Jobs - - -build/cp36-cp36m-linux: - <<: - - *build_template - image: - *image_python36 - -test_full_loose/cp36-cp36m-linux: - <<: - - *test_full_loose_template - image: - *image_python36 - needs: - - build/cp36-cp36m-linux - -test_minimal_loose/cp36-cp36m-linux: - <<: - - *test_minimal_loose_template - image: - *image_python36 - needs: - - build/cp36-cp36m-linux - -test_full_strict/cp36-cp36m-linux: - <<: - - *test_full_strict_template - image: - *image_python36 - needs: - - build/cp36-cp36m-linux - -test_minimal_strict/cp36-cp36m-linux: - <<: - - *test_minimal_strict_template - image: - *image_python36 - needs: - - build/cp36-cp36m-linux - - - -# --------------- -# Python 3.5 Jobs - - -#build/cp35-cp35m-linux: -# <<: -# - *build_template -# image: -# *image_python35 - -#test_full_loose/cp35-cp35m-linux: -# <<: -# - *test_full_loose_template -# image: -# *image_python35 -# needs: -# - build/cp35-cp35m-linux - -#test_minimal_loose/cp35-cp35m-linux: -# <<: -# - *test_minimal_loose_template -# image: -# *image_python35 -# needs: -# - build/cp35-cp35m-linux - -#test_full_strict/cp35-cp35m-linux: -# <<: -# - *test_full_strict_template -# image: -# *image_python35 -# needs: -# - build/cp35-cp35m-linux - -#test_minimal_strict/cp35-cp35m-linux: -# <<: -# - *test_minimal_strict_template -# image: -# *image_python35 -# needs: -# - build/cp35-cp35m-linux - - - -# --------------- -# Python 2.7 Jobs - - -#build/cp27-cp27m-linux: -# <<: -# - *build_template -# image: -# *image_python27 - -#test_full_loose/cp27-cp27m-linux: -# <<: -# - *test_full_loose_template -# image: -# *image_python27 -# needs: -# - build/cp27-cp27m-linux - -#test_minimal_loose/cp27-cp27m-linux: -# <<: -# - *test_minimal_loose_template -# image: -# *image_python27 -# needs: -# - build/cp27-cp27m-linux - -#test_full_strict/cp27-cp27m-linux: -# <<: -# - *test_full_strict_template -# image: -# *image_python27 -# needs: -# - build/cp27-cp27m-linux - -#test_minimal_strict/cp27-cp27m-linux: -# <<: -# - *test_minimal_strict_template -# image: -# *image_python27 -# needs: -# - build/cp27-cp27m-linux - - - -.__local_docker_heredoc__: - - | - # Commands to help developers debug pipelines on their local machine - # Grab the base docker image, (forwarding your ssh credentials), clone - # the watch repo, create the environment, and run the tests. - #docker login gitlab.kitware.com:4567 - #IMAGE_NAME=gitlab.kitware.com:4567/computer-vision/ci-docker/gl-python:3.8 - - # Use whatever image is defined for Python39 in this file and start a docker session - IMAGE_NAME=$(cat .gitlab-ci.yml | yq -r '.".image_python3_10"') - IMAGE_NAME=$(cat .gitlab-ci.yml | yq -r '.".image_python37"') - docker run -v $PWD:/io:ro -v $HOME/.cache/pip:/pip_cache -it $IMAGE_NAME bash - - # Will need to chmod things afterwords - export PIP_CACHE_DIR=/pip_cache - echo $PIP_CACHE_DIR - chmod -R o+rw $PIP_CACHE_DIR - chmod -R o+rw $PIP_CACHE_DIR - chmod -R g+rw $PIP_CACHE_DIR - USER=$(whoami) - chown -R $USER $PIP_CACHE_DIR - cd $HOME - git clone /io ./repo - - cd $HOME/repo - - # Make a virtualenv - export PYVER=$(python -c "import sys; print('{}{}'.format(*sys.version_info[0:2]))") - pip install virtualenv - virtualenv venv$PYVER - source venv$PYVER/bin/activate - #pip install pip -U - #pip install pip setuptools -U - - # FULL STRICT VARIANT - pip install -e .[all-strict,headless-strict] - ./run_tests.py - - # FULL LOOSE VARIANT - pip install -e .[all,headless] - ./run_tests.py - - # MINIMAL STRICT VARIANT - pip install -e .[runtime-strict,tests-strict] - ./run_tests.py - - # MINIMAL LOOSE VARIANT - pip install -e .[tests] - ./run_tests.py + pip install pygments + python --version # Print out python version for debugging + script: + - export INSTALL_EXTRAS=tests,headless + - echo "Finding the path to the wheel" + - ls wheelhouse || echo "wheelhouse does not exist" + - echo "Installing helpers" + - pip install pip setuptools>=0.8 setuptools_scm wheel build -U + - pip install tomli pkginfo + - export MOD_NAME=netharn + - echo "MOD_NAME=$MOD_NAME" + - export WHEEL_FPATH=$(python -c "import pathlib; print(str(sorted(pathlib.Path('wheelhouse').glob('$MOD_NAME*.whl'))[-1]).replace(chr(92), + chr(47)))") + - echo "WHEEL_FPATH=$WHEEL_FPATH" + - export MOD_VERSION=$(python -c "from pkginfo import Wheel; print(Wheel('$WHEEL_FPATH').version)") + - echo "MOD_VERSION=$MOD_VERSION" + - pip install --prefer-binary "$MOD_NAME[$INSTALL_EXTRAS]==$MOD_VERSION" -f wheelhouse + - echo "Install finished." + - echo "Creating test sandbox directory" + - WORKSPACE_DNAME="sandbox" + - echo "WORKSPACE_DNAME=$WORKSPACE_DNAME" + - mkdir -p $WORKSPACE_DNAME + - echo "cd-ing into the workspace" + - cd $WORKSPACE_DNAME + - pwd + - ls -al + - '# Get the path to the installed package and run the tests' + - MOD_DPATH=$(python -c "import netharn, os; print(os.path.dirname(netharn.__file__))") + - echo "MOD_DPATH = $MOD_DPATH" + - echo "running the pytest command inside the workspace" + - |- + python -m pytest -p pytester -p no:doctest --xdoctest --cov-config ../pyproject.toml --cov-report term --cov="$MOD_NAME" "$MOD_DPATH" ../tests + - echo "pytest command finished, moving the coverage file to the repo root" +.test_full-loose_template: &test_full-loose_template + <<: *test_template + before_script: + - |- + # Setup the correct version of python (which should be the same as this instance) + python --version # Print out python version for debugging + export PYVER=$(python -c "import sys; print('{}{}'.format(*sys.version_info[0:2]))") + python -m pip install virtualenv + python -m virtualenv venv$PYVER + source venv$PYVER/bin/activate + pip install pip -U + pip install pip setuptools -U + pip install pygments + python --version # Print out python version for debugging + script: + - export INSTALL_EXTRAS=tests,optional,headless + - echo "Finding the path to the wheel" + - ls wheelhouse || echo "wheelhouse does not exist" + - echo "Installing helpers" + - pip install pip setuptools>=0.8 setuptools_scm wheel build -U + - pip install tomli pkginfo + - export MOD_NAME=netharn + - echo "MOD_NAME=$MOD_NAME" + - export WHEEL_FPATH=$(python -c "import pathlib; print(str(sorted(pathlib.Path('wheelhouse').glob('$MOD_NAME*.whl'))[-1]).replace(chr(92), + chr(47)))") + - echo "WHEEL_FPATH=$WHEEL_FPATH" + - export MOD_VERSION=$(python -c "from pkginfo import Wheel; print(Wheel('$WHEEL_FPATH').version)") + - echo "MOD_VERSION=$MOD_VERSION" + - pip install --prefer-binary "$MOD_NAME[$INSTALL_EXTRAS]==$MOD_VERSION" -f wheelhouse + - echo "Install finished." + - echo "Creating test sandbox directory" + - WORKSPACE_DNAME="sandbox" + - echo "WORKSPACE_DNAME=$WORKSPACE_DNAME" + - mkdir -p $WORKSPACE_DNAME + - echo "cd-ing into the workspace" + - cd $WORKSPACE_DNAME + - pwd + - ls -al + - '# Get the path to the installed package and run the tests' + - MOD_DPATH=$(python -c "import netharn, os; print(os.path.dirname(netharn.__file__))") + - echo "MOD_DPATH = $MOD_DPATH" + - echo "running the pytest command inside the workspace" + - |- + python -m pytest -p pytester -p no:doctest --xdoctest --cov-config ../pyproject.toml --cov-report term --cov="$MOD_NAME" "$MOD_DPATH" ../tests + - echo "pytest command finished, moving the coverage file to the repo root" +.test_minimal-strict_template: &test_minimal-strict_template + <<: *test_template + before_script: + - |- + # Setup the correct version of python (which should be the same as this instance) + python --version # Print out python version for debugging + export PYVER=$(python -c "import sys; print('{}{}'.format(*sys.version_info[0:2]))") + python -m pip install virtualenv + python -m virtualenv venv$PYVER + source venv$PYVER/bin/activate + pip install pip -U + pip install pip setuptools -U + pip install pygments + python --version # Print out python version for debugging + script: + - export INSTALL_EXTRAS=tests-strict,runtime-strict,headless-strict + - echo "Finding the path to the wheel" + - ls wheelhouse || echo "wheelhouse does not exist" + - echo "Installing helpers" + - pip install pip setuptools>=0.8 setuptools_scm wheel build -U + - pip install tomli pkginfo + - export MOD_NAME=netharn + - echo "MOD_NAME=$MOD_NAME" + - export WHEEL_FPATH=$(python -c "import pathlib; print(str(sorted(pathlib.Path('wheelhouse').glob('$MOD_NAME*.whl'))[-1]).replace(chr(92), + chr(47)))") + - echo "WHEEL_FPATH=$WHEEL_FPATH" + - export MOD_VERSION=$(python -c "from pkginfo import Wheel; print(Wheel('$WHEEL_FPATH').version)") + - echo "MOD_VERSION=$MOD_VERSION" + - pip install --prefer-binary "$MOD_NAME[$INSTALL_EXTRAS]==$MOD_VERSION" -f wheelhouse + - echo "Install finished." + - echo "Creating test sandbox directory" + - WORKSPACE_DNAME="sandbox" + - echo "WORKSPACE_DNAME=$WORKSPACE_DNAME" + - mkdir -p $WORKSPACE_DNAME + - echo "cd-ing into the workspace" + - cd $WORKSPACE_DNAME + - pwd + - ls -al + - '# Get the path to the installed package and run the tests' + - MOD_DPATH=$(python -c "import netharn, os; print(os.path.dirname(netharn.__file__))") + - echo "MOD_DPATH = $MOD_DPATH" + - echo "running the pytest command inside the workspace" + - |- + python -m pytest -p pytester -p no:doctest --xdoctest --cov-config ../pyproject.toml --cov-report term --cov="$MOD_NAME" "$MOD_DPATH" ../tests + - echo "pytest command finished, moving the coverage file to the repo root" +.test_full-strict_template: &test_full-strict_template + <<: *test_template + before_script: + - |- + # Setup the correct version of python (which should be the same as this instance) + python --version # Print out python version for debugging + export PYVER=$(python -c "import sys; print('{}{}'.format(*sys.version_info[0:2]))") + python -m pip install virtualenv + python -m virtualenv venv$PYVER + source venv$PYVER/bin/activate + pip install pip -U + pip install pip setuptools -U + pip install pygments + python --version # Print out python version for debugging + script: + - export INSTALL_EXTRAS=tests-strict,runtime-strict,optional-strict,headless-strict + - echo "Finding the path to the wheel" + - ls wheelhouse || echo "wheelhouse does not exist" + - echo "Installing helpers" + - pip install pip setuptools>=0.8 setuptools_scm wheel build -U + - pip install tomli pkginfo + - export MOD_NAME=netharn + - echo "MOD_NAME=$MOD_NAME" + - export WHEEL_FPATH=$(python -c "import pathlib; print(str(sorted(pathlib.Path('wheelhouse').glob('$MOD_NAME*.whl'))[-1]).replace(chr(92), + chr(47)))") + - echo "WHEEL_FPATH=$WHEEL_FPATH" + - export MOD_VERSION=$(python -c "from pkginfo import Wheel; print(Wheel('$WHEEL_FPATH').version)") + - echo "MOD_VERSION=$MOD_VERSION" + - pip install --prefer-binary "$MOD_NAME[$INSTALL_EXTRAS]==$MOD_VERSION" -f wheelhouse + - echo "Install finished." + - echo "Creating test sandbox directory" + - WORKSPACE_DNAME="sandbox" + - echo "WORKSPACE_DNAME=$WORKSPACE_DNAME" + - mkdir -p $WORKSPACE_DNAME + - echo "cd-ing into the workspace" + - cd $WORKSPACE_DNAME + - pwd + - ls -al + - '# Get the path to the installed package and run the tests' + - MOD_DPATH=$(python -c "import netharn, os; print(os.path.dirname(netharn.__file__))") + - echo "MOD_DPATH = $MOD_DPATH" + - echo "running the pytest command inside the workspace" + - |- + python -m pytest -p pytester -p no:doctest --xdoctest --cov-config ../pyproject.toml --cov-report term --cov="$MOD_NAME" "$MOD_DPATH" ../tests + - echo "pytest command finished, moving the coverage file to the repo root" +build/cp37-linux-x86_64: + <<: *build_template + image: python:3.7 +test/minimal-loose/cp37-linux-x86_64: + <<: *test_minimal-loose_template + image: python:3.7 + needs: + - build/cp37-linux-x86_64 +test/full-loose/cp37-linux-x86_64: + <<: *test_full-loose_template + image: python:3.7 + needs: + - build/cp37-linux-x86_64 +test/minimal-strict/cp37-linux-x86_64: + <<: *test_minimal-strict_template + image: python:3.7 + needs: + - build/cp37-linux-x86_64 +test/full-strict/cp37-linux-x86_64: + <<: *test_full-strict_template + image: python:3.7 + needs: + - build/cp37-linux-x86_64 +build/cp38-linux-x86_64: + <<: *build_template + image: python:3.8 +test/minimal-loose/cp38-linux-x86_64: + <<: *test_minimal-loose_template + image: python:3.8 + needs: + - build/cp38-linux-x86_64 +test/full-loose/cp38-linux-x86_64: + <<: *test_full-loose_template + image: python:3.8 + needs: + - build/cp38-linux-x86_64 +test/minimal-strict/cp38-linux-x86_64: + <<: *test_minimal-strict_template + image: python:3.8 + needs: + - build/cp38-linux-x86_64 +test/full-strict/cp38-linux-x86_64: + <<: *test_full-strict_template + image: python:3.8 + needs: + - build/cp38-linux-x86_64 +build/cp39-linux-x86_64: + <<: *build_template + image: python:3.9 +test/minimal-loose/cp39-linux-x86_64: + <<: *test_minimal-loose_template + image: python:3.9 + needs: + - build/cp39-linux-x86_64 +test/full-loose/cp39-linux-x86_64: + <<: *test_full-loose_template + image: python:3.9 + needs: + - build/cp39-linux-x86_64 +test/minimal-strict/cp39-linux-x86_64: + <<: *test_minimal-strict_template + image: python:3.9 + needs: + - build/cp39-linux-x86_64 +test/full-strict/cp39-linux-x86_64: + <<: *test_full-strict_template + image: python:3.9 + needs: + - build/cp39-linux-x86_64 +build/cp310-linux-x86_64: + <<: *build_template + image: python:3.10 +test/minimal-loose/cp310-linux-x86_64: + <<: *test_minimal-loose_template + image: python:3.10 + needs: + - build/cp310-linux-x86_64 +test/full-loose/cp310-linux-x86_64: + <<: *test_full-loose_template + image: python:3.10 + needs: + - build/cp310-linux-x86_64 +test/minimal-strict/cp310-linux-x86_64: + <<: *test_minimal-strict_template + image: python:3.10 + needs: + - build/cp310-linux-x86_64 +test/full-strict/cp310-linux-x86_64: + <<: *test_full-strict_template + image: python:3.10 + needs: + - build/cp310-linux-x86_64 +gpgsign/wheels: + <<: *common_template + image: python:3.8 + stage: gpgsign + artifacts: + paths: + - wheelhouse/*.asc + + only: + refs: + # Gitlab will only expose protected variables on protected branches + # (which I've set to be main and release), so only run this stage + # there. + - master + - main + - release + needs: + - job: build/cp37-linux-x86_64 + artifacts: true + - job: build/cp38-linux-x86_64 + artifacts: true + - job: build/cp39-linux-x86_64 + artifacts: true + - job: build/cp310-linux-x86_64 + artifacts: true + script: + - ls wheelhouse + - export GPG_EXECUTABLE=gpg + - export GPG_KEYID=$(cat dev/public_gpg_key) + - echo "GPG_KEYID = $GPG_KEYID" + # Decrypt and import GPG Keys / trust + # note the variable pointed to by VARNAME_CI_SECRET is a protected variables only available on main and release branch + - source dev/secrets_configuration.sh + - CI_SECRET=${!VARNAME_CI_SECRET} + - $GPG_EXECUTABLE --version + - openssl version + - $GPG_EXECUTABLE --list-keys + # note CI_KITWARE_SECRET is a protected variables only available on main and release branch + - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a + -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import + - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a + -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust + - CIS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CIS -d -a + -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import + - GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user + $GPG_KEYID" + - |- + WHEEL_PATHS=(wheelhouse/*.whl) + WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_PATHS[@]}") + echo "$WHEEL_PATHS_STR" + for WHEEL_PATH in "${WHEEL_PATHS[@]}" + do + echo "------" + echo "WHEEL_PATH = $WHEEL_PATH" + $GPG_SIGN_CMD --output $WHEEL_PATH.asc $WHEEL_PATH + $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH || echo "hack, the first run of gpg very fails" + $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH + done + - ls wheelhouse +deploy/wheels: + <<: *common_template + image: python:3.8 + stage: deploy + only: + refs: + - release + script: + - pip install pyopenssl ndg-httpsclient pyasn1 requests[security] twine -U + - ls wheelhouse + - |- + WHEEL_PATHS=(wheelhouse/*.whl) + WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_PATHS[@]}") + source dev/secrets_configuration.sh + TWINE_PASSWORD=${!VARNAME_TWINE_PASSWORD} + TWINE_USERNAME=${!VARNAME_TWINE_USERNAME} + echo "$WHEEL_PATHS_STR" + for WHEEL_PATH in "${WHEEL_PATHS[@]}" + do + twine check $WHEEL_PATH.asc $WHEEL_PATH + twine upload --username $TWINE_USERNAME --password $TWINE_PASSWORD $WHEEL_PATH.asc $WHEEL_PATH || echo "upload already exists" + done + - |- + # Have the server git-tag the release and push the tags + export VERSION=$(python -c "import setup; print(setup.VERSION)") + # do sed twice to handle the case of https clone with and without a read token + URL_HOST=$(git remote get-url origin | sed -e 's|https\?://.*@||g' | sed -e 's|https\?://||g' | sed -e 's|git@||g' | sed -e 's|:|/|g') + source dev/secrets_configuration.sh + CI_SECRET=${!VARNAME_CI_SECRET} + PUSH_TOKEN=${!VARNAME_PUSH_TOKEN} + echo "URL_HOST = $URL_HOST" + # A git config user name and email is required. Set if needed. + if [[ "$(git config user.email)" == "" ]]; then + git config user.email "ci@gitlab.org.com" + git config user.name "Gitlab-CI" + fi + TAG_NAME="v${VERSION}" + echo "TAG_NAME = $TAG_NAME" + if [ $(git tag -l "$TAG_NAME") ]; then + echo "Tag already exists" + else + # if we messed up we can delete the tag + # git push origin :refs/tags/$TAG_NAME + # and then tag with -f + git tag $TAG_NAME -m "tarball tag $VERSION" + git push --tags "https://git-push-token:${PUSH_TOKEN}@${URL_HOST}" + fi + + +# end \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml index e6a5fd4721cc3af97786e68233977dc6d97f030e..0447e511a539293e8234f8824f55c1ea59467d43 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,33 +1,19 @@ # .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details -# +# # See Also: # https://readthedocs.org/dashboard/netharn/advanced/ # Required version: 2 - -# Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py - -# Build documentation with MkDocs -#mkdocs: -# configuration: mkdocs.yml - -# Optionally build your docs in additional formats such as PDF and ePub formats: all - -# Optionally set the version of Python and requirements required to build your docs python: version: 3.7 install: - - requirements: requirements/docs.txt - - method: pip - path: . - #extra_requirements: - # - docs - -#conda: -# environment: environment.yml + - requirements: requirements/headless.txt + - requirements: requirements/docs.txt + - method: pip + path: . diff --git a/.rules.yml b/.rules.yml new file mode 100644 index 0000000000000000000000000000000000000000..9a82b632cb86ebf4b8c5fe9a956c577bca8671e5 --- /dev/null +++ b/.rules.yml @@ -0,0 +1,35 @@ +# Rules for where jobs can run +# Derived from: https://gitlab.kitware.com/cmake/cmake/-/blob/v3.25.1/.gitlab/rules.yml +# For an overview of gitlab rules see: +# https://docs.gitlab.com/ee/ci/yaml/#workflowrules + +.run_manually: + rules: + - if: '$CI_MERGE_REQUEST_ID' + when: manual + - if: '$CI_COMMIT_REF_PROTECTED == true' + when: on_success + - if: '$CI_PROJECT_PATH == "computer-vision/netharn" && $CI_PIPELINE_SOURCE == "schedule"' + when: on_success + - if: '$CI_PROJECT_PATH == "computer-vision/netharn"' + when: manual + - when: never + +.run_automatically: + rules: + - if: '$CI_MERGE_REQUEST_ID' + when: on_success + - if: '$CI_PROJECT_PATH == "computer-vision/netharn" && $CI_PIPELINE_SOURCE == "schedule"' + when: on_success + - if: '$CI_PROJECT_PATH == "computer-vision/netharn"' + when: delayed + start_in: 5 minutes + - when: never + +.run_dependent: + rules: + - if: '$CI_MERGE_REQUEST_ID' + when: on_success + - if: '$CI_PROJECT_PATH == "computer-vision/netharn"' + when: on_success + - when: never \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 68c14980e4e1a522b7f5fa128bf9b8f995d2f83f..1d1659aa554625867435dc778c789de1e3469c77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,17 @@ This changelog follows the specifications detailed in: [Keep a Changelog](https: This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release. -## Version 0.5.19 - Unreleased +## Version 0.6.1 - Unreleased + +### Fixed +* Hotfix so the training loop doesnt crash when `ignore_first_epochs>0` +* np.float np.int np.bool issue + +### Changed +* Reworked dependencies +* tensorboard is now an optional dependency + +## Version 0.5.19 - Released 2022-07-06 ### Added * Ability to ignore the first N epochs when choosing the best model via the `ignore_first` config. diff --git a/LICENSE b/LICENSE index 261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64..daad4f4278a81c13ff693db84bcdb47a28d80360 100644 --- a/LICENSE +++ b/LICENSE @@ -178,7 +178,7 @@ APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" + boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2022 "Kitware Inc" Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/dev/_devcheck_detmetrics.py b/dev/_devcheck_detmetrics.py index 3d8e91f3c9e373eae4e900acbf2887695f94f41e..4e5685be2261f5c80c57f505226128dd34ac6c2f 100644 --- a/dev/_devcheck_detmetrics.py +++ b/dev/_devcheck_detmetrics.py @@ -74,7 +74,7 @@ def voc_eval(lines, recs, classname, ovthresh=0.5, method=False, bias=1): for imagename in imagenames: R = [obj for obj in recs2[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) - difficult = np.array([x['difficult'] for x in R]).astype(np.bool) + difficult = np.array([x['difficult'] for x in R]).astype(bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {'bbox': bbox, @@ -313,7 +313,7 @@ def _devcheck_voc_consistency(): import kwimage true_boxes = kwimage.Boxes.random(num=nboxes, scale=100., rng=rng, format='cxywh') pred_boxes = true_boxes.copy() - pred_boxes.data = pred_boxes.data.astype(np.float) + (rng.rand() * noise) + pred_boxes.data = pred_boxes.data.astype(float) + (rng.rand() * noise) if nbad: pred_boxes.data = np.vstack([ pred_boxes.data, diff --git a/dev/ci_public_gpg_key.pgp.enc b/dev/ci_public_gpg_key.pgp.enc index e338fa207554dad8cf7aa74de623f92e028f7b1e..090c66749139dca6344e776cf6bb01d9d910efb1 100644 --- a/dev/ci_public_gpg_key.pgp.enc +++ b/dev/ci_public_gpg_key.pgp.enc @@ -1,35 +1,49 @@ -U2FsdGVkX195/LDODgm0n4ZK+F/wOH5xAa9XARpCbPD3Xi0NSk0ReJYvHVYru+sy -AgPg4WUiiC2YcOxZLFjev5rDKxiHAWye8bs99JnvAOCPSqaBul4A5jrJwi6dm5VB -imgmGPdK/chWrUAp6+80obMyuiwIBsPbgjuAliE2LvYfevraikRkB4oYIykMrysh -s0blX8J3fQfUKk3myT3KwVs9hpLRcTGUbyiph8rFsj2d2/hI4BojmmTepFtXds2X -4Xw8tk2fpcMBpoR+zVLW+bNR2z9jI1H+Iv6yg2/J6KDbJL3mpK8kuh/BMA3OcBxn -NqK5dYpiXaBf3B8PNsk5K9GlXDmcxfbb+YVa4xAWI3FHDLoOWPWKbfKRZRxYBHXJ -bzP1lkGUMIb0oXVW/k4SPnmZydrQg6lqrWjMSEAwxdyN70vobVLZ3a9s+61cpdWW -rLwuWJIoQo1Pd1JZw1Y8BOlUSIKImX1YGtz0d207OyrM2SMTH2Z6DTrnGhnu2EAJ -OQ44+zkgRXxlb43dK4CRd+SVqR+/Z92HI6yks1LDEPP6LfKDi/0AbHNUiJcUMFjR -dM3q0G+NKVIsRJCAU/bPmWB9FV6cxr1V56BQKAO9TSn+67Uv2d80vw6E91XgDMDM -J+XyBwiA6B2BgRzVbIKssBPX1nrzDyVT3WKXNKSubJDjwe7iDhPLwTwGc66DQ/gn -K75pYQ6vQbnIgNZipcl/ZQxCXSVtuCtSc+qdj0GXIOk4IUDO0qE+7TR99FUPLT5L -Jv+lJ1TUrf+OmvRF59lTdnCx863UYRNajmSWwOdkglJu/CACmv7VfI7884vta7o4 -Yab2wQe2WhFUmKIGRc18usPh7LmC2k5lsmzRnC0xlacfyOdOyJOXn28VGSaEW1J9 -q31UG9dID1Q8eo2KesVdyyzgDbXUqIMznc9hEpsiGKcUB+YZtoLGPvkQbBIkq2Yw -yVJeBH1IJwfZ91TVDOElA9RHMvfpnyx6TFvlWJ+cuvNxrX2DpCovMDy3opJzv2vN -GJdfVPG+na0NjaD5oHoPDaQDCBDzsd3keh8wJKaatiaRtsfmcYrznZrsdzacQkaN -YmTntMN9Xg35ASQ2VqCYKrU9NKqeHhM+8Sxe4a8/onEZKSbTvHkJnlfzs9oeS8Zk -kNJ+vos0SY6FCtoTgywPSmzDq5G8ijPuaZnRcqcrecHYIBz4S5M1YTbEskVzJY7D -IXiL0LAVj+ftMaRuTronJzCXkK43Zc1wRBI+4D7CylVDE4ZZ3iGp/587Is5UOeGd -GEWM0OeMWalXWUu3wXFyCU/P+tRKUOiV2etrogYfJQ0jjD2aCbxCXXTqbeTpW7UM -o8QUj9vJW3b8M2fDa7nRBYwgz0150GVnOO1kgtUHR7HYghg3lGN1uAJkjT+xBpxr -mY0Lp/nZhs732rHYq347sEz7FEAuZrRdeoR1vU4BEagiOVQdYN2ByHn/6frpUphM -odSNcPowi28N1lmVpEkSX69l2d2K4lKI1ki1ofTvMEf8awmU352OcHVb3/ld/pGI -/uqMFwVannZUPEUByh9iSJV7eZA21YO2bQGRgXoQ4SCkG+2sAYyXlOdUesstN0hH -Uzx3kTSk8XGBhVpQAm2ldp9qnojZbr69vVHLmr+nxtT9g0Qspn73uD7cGjKPrIMu -gzDZMSouPfIKzJy23OJmzTIqqLtEgH3U5BbYx0IknaNG7/dAV/3ik6mjmUVFGBRv -rAPnog6MNW4nvmmS449D3rCjoT8mPHMm3mVMDetaJbOMDP7KMWHIVR/FfWUeQHDX -YkbFxaaGL4kpcQqNMSnb9jitAHbecprYpnYVtS49EG8tE/TktuHpaukkPFPKgiwZ -OhJLfUpOqIwa8CAjoR4lIxBuzYdnz5f9Og+c0nWMW6DVFP6yRz1gPf/RveTbuo4o -liLWfjVBqZaXm+YrE99q2lReLRcLEaqiA5ulyvr84v+2PeP12hUeus8J5OVmVQrw -1j5xTjVTW0Of0ve7JjjQwwZ80V8GIusRBjd9o9cRb5G2q6pt95N8g6+jsUHRqx4U -c7LuMEYT8eTo7/31omk0THS4CP4qMU7yFX251cEtUOjzhlNphiex+nOLbWHgyEqv -ZW51NS+7MwrOlmOg866RRIPL5r9h7i9yjEBi43bwRK38BnJSgsHpgK+pLxA/CE5x -ZMRsU1Vc9e87ncuck3J8rA== +U2FsdGVkX18MudpU8EynNKx1/XQi4RI4+A0yLhmW2RkD/H4ePWG7h3Y6hs7eK5Bk +CFMRoVUFWDw7mTyAS62y47DqIuoxv8YA+TDP7ThF+Wud7wYHgX2E3sDiWW3NSElv +N+84pMHKJ3kpv1uUuqOT9s78/O2NQV4gsGaLKyNvn9BNyOx+P+kJORLo4dJrTw1Q +Wc+D1a3lRloFUb71OM7+36VzG3PrDp/ZIwmy9mmAzG6x2dHRD10QeQ747lLh16pU +WJm+aviHMsmbDBTkZHVJ+YDPiDrarQ48kwIUo0r5WAs2sFkvz9nINl0mvwO+dw30 +/x9JKbE0SEIlCERUTh9Tqowjpi7HtdiPmgXazQCs4klQXFmwD5UM12p62gSGzMEY +lD7hXrPLdqmjvs1R09Nwi8pc88DvWo9+PhdwlcPo9yBVyE71kHRhNL/sHOeoUjDg +7yriXph9TQe3T9+oWWJG/CtMgRUDS+Bmhcr/9VQw4mYbNNZEMR2P7Yo1iFXp1y+m +fU+5NCa/r98+RRKWKu+CUEmO21xD+UZxAfdewRl0iNwv/5SOLk0TjvBvEyT0w3sI +kjFFEJpXn0iH1/rCerV+T8vT+WoD803eWTdDAxYZX8oPR4slDsfT97KX7mlklmX7 +0QITLo8X7/rcZykd9zNQmbAQPuVFDyF2CUBaSkK+VMVmsi5yI1sZkWzj/pILH1kU +PsVeBBRtbXnWJ56XsEx2iFfbVEWoMPzeLByI+7mttPWV1z4n0V+f/hj5BstOrhIy +bFkSa93YiXUTZRdydeGedfoh4tOqA8NO0wN9OpH5QYMx40Y9rIMxOnvfHntaMZQG +rgox/jSMhL3zyiIWDEg1lCDtPfRK8JrRjrs4DtHhptnEqaRnZ22Ogd2LR1ESYs1b +VXppW1Uw0skWkudbTp2Ipm76fd5dYKK7t7kt39hDiQsw4zS8MUN/GZ4q2vTSEWYK +9CCAKhMocNl1efPDcyh6NKQYfNRfD0yP58XkkoB4PORd6zopNSQteqPVBX/cJUoS ++DHYKaOvF4agICMIhsRUB+FhoG3ZenOeRw1pre9AAXKqGWuP2dfRdqK3xHVQzsZc +8LXYkFjIcgxvX8bXU6egcN26k0yzYg4CVzbfYYlgd+l+pWH/Q47KeYFcOQTdReJj +Tv9gyoC9hlm33/q0tyt5BnS5aoLtlasdUGkkLPz8oHaLy/9CMi/Ux2fCldZ3Z481 +cSrFUq/ekjeoNgF3FIZk/J7z0K8NqP6TMMQEiOvMLwazsAB2mEcFMRSB4EoUstSO +2s2H2N7zJEOUfaGHyxi5ZCw1psLw3z8P3sf0ak9nLmM6qI2C1kWAnbV3fdCahnmH +VslfYdFgLV5+i099TCjwdVyZCZ80cDZuSEIrQcoY38jVU9yOrMn8ig+xjCSVxXz7 +0Wr0VFi9GFjjCbuJ3jB7achrQY3HWIXrJcgGO99w43/MjsvDy1vNmrlm2Fa/sDxv +65pMadFk1+lSO11EqI5+npu++EKkecUcVkPlhCQC83ojN1gH2VoDPwYb3r6aulNf +cuF1d45EVnxLzIqEq1KblqbQf+i0G9Av84L19Q58zjeVppk6SDxJYu/sgbpa9D7j +n5RHwEOAbgIBfdClvh2787SR0envTHxrvLROsS5WCxOeFLR9xNEfPyz2bv3OUe9t +4C6E3O1VDsF3adO0DExhe1CM9dI7MsUTTWERJS4PZsgfEVsXDnSjhe/4QOKy1Nya +f2B8qQ/L5zAeSd4nNQn5DlE5CEOuoscfuyyexR5EylFg0qK3Ay5GlJUbeFdL+yO0 +4IAMZ3SHEYe5l71Fio2GaXUswzLXNsVwtt4iFm6BchRq9xaPxArLNlkgql9aI7ev +lwQ0ihZ1Tpx3jXLAgx7voK8FASEgZIg6m992ac7Dkq0phO3iz6XR9CjVrXxvWKg6 +NzE8zrv0KYHnkuI6JC+85eLX/BJOnnQt3iJ23UY7p8t9d4pLUW7bUVZE254Ilmqe +TAojcS144kYWB8JgZ3k69UjNs9GXg2FCJLEJPPl/xnj6vg0c6xdhHQvzMvEhprnw +MG2ipK9W797casI1bR4zO/TnMjidqYu1WvUGVRq9ooYYi9J1pisTdTtBdJdokqYz +yeXHpdZyUQDqqEaXOTOagrVpenyCyoU6PaBqZqSR40kbH6meQ41TexR0KGUDxds0 ++WgS0i/6tn2FfXxSQdaXpzDIUp2LdO2FMnrFOeoLeMlcE60aAIo2c5vxniuKQCXv +2VA2wj4yKuQKmKloMcwvLDQ4L2ftGq0dIWxWHNT1CCBJe21mhLcAdmxqaAgDXcnC +UoLaKrqeb1fBD23MfRiF1KsjJ1+BswuWnDBfgm14WtQok3Cp6hu6gwjuSk/aBCUm +hGfqRBvdDcBByXIKi5DQJGh2XgwxQiRMMn8l7AUxR5dZYB86u3I/81cdliaCUzsE +xA5Ez2zaL1vCO0w+8AZJvIFnAcnaZQ5Hjg57+aXiN0k9+0ap1V2ZA4VwV2ZqdzvV +XPzjsaztemBH0467LWRT1RZvZSVmzJHXu7JYu1EFJoUnuikWNGpeMejc4tHovakI +koj5q7DvgFUEqVY1aMldnlDw52lnaUfFMrCaGH1TACIeFPtrklkgWw3WUWrXl6jr +TNTGpAHJHyVvu+T/T1QRtCfy4T33HeaQhcY4bx0osWShG3eGXLI4jC4DGYSnBMwo +vtInTsS0+LpJHOkrIfiPK+TEOtevYtUEFAT8YOL51VcbM+RryTIs3D3sABzbmE93 +WAKKeYUHm8w2jpT8M7o021iVI6eveRaVkPWygpJhIyQJML9gJ0Wbb3qeS1N/wmcQ +U0XLm4TI2BeaoReuAdOBhNifmkpwJP2kxZitSbbvNODBPMGm34soTXC+NX4VdLX4 ++0q0DgEYYiMZOzXsafDpbZm7mBA9qEimjMCCgC0CRZEsXSrAOMIZnwjM5vC05rl6 +IQkxtazPptreBY8RkwMyeWrFxJY4pnyr2VWnZ5bjKhDiOi5e87BxtqP8W8rwxWtm +Cq16w02O6fjkIQQISH/QrPmu7zRE21USo99TFZ2IYx/yxifZrv1pLMZIbtOWUh/5 +r41styPg9lZBqZHZOh8Iaw== diff --git a/dev/ci_secret_gpg_subkeys.pgp.enc b/dev/ci_secret_gpg_subkeys.pgp.enc index 4acec40faec3668ea4ee3112f219a2082f3281f4..b5aa165c4d8056d4e7631c93517a0fc8b03c65da 100644 --- a/dev/ci_secret_gpg_subkeys.pgp.enc +++ b/dev/ci_secret_gpg_subkeys.pgp.enc @@ -1,27 +1,34 @@ -U2FsdGVkX192wtoDMNw97I2z1ltAliLgkHEly1jCJaBuhPqySwrFKmk0At+X7aNW -LGygUWA0Dz7WAKRq1Fe7i4XRy9G9vHTKp9GJC+UV7bXpZhm7Qakx70Car3V2Xt7M -B3Q65TFiJuF/M1JNmYOj8APLv8un/euUsjPEwXtn7V9+O0izySpPbr/RmiFTu6L5 -28LLMiA0m/4ZDIqpFrAht0F/KWrUbn9TmdO+VPBAxXD6ETbuR7dO72vOXVELZr6m -IIq3rNKN3RuVFUQId7KcbDQdeKUmlXI/oeROND1PCq9n04vIhNJ5V+iPFa3uyYc/ -f4uzMdojv40aoRLsmS2YPdK4StkMn2q/yOlnw2KIC7ic+V5hFx3PCTvQPhHikyaZ -y1C8XNpXl8NdOrn1lQlh4yTNIxgD05V8R8eytdj9xD1KruEh6e7yFfLQJcH5j5Qv -EYeYdj153+RSZdkPwzE3mCdX8i1Zfl2sil3iGia4m262n8/+lbHQtFK2c8oj5S33 -UlYxpgT6JVuEYue0Jr/IEgJDBGq1r2+OMlSRbfYdJXYgxUJGAm2fBT261/1dc1YE -sEtuo/CWlinQS3vcsOhUpLYknlDNmeSx/mHdYJ9M/aOKdm4CMHqWi8WM6m7fz+CP -kvFw7SvAz8tER166sO1CN5T1eQ3U/Bo6BVU/9GEibGkNIq1MB5IHlPZxwrnbv5Ij -FeyLmhRgEaKfHXzDSi13DfJ5IqO/n/VFL1Cgng3/qMw6nvtOQv0+Xi8wXbhURGE6 -2RqkpasmCi/Ro0qTUZimH5qH2GbVHebbLo7Amn7E51CX9eHSRZMo9tW5kVXjMG+g -Va/ckpAZXrH6bnCXlHLVD0K//TXtycyIy9g8pqYecJR6PF3ziQQROhc2URQmJXlw -F4cUcRj3MemrLk8LSO6yntgd1K510x3jKI0T+gOZKcY2ucNHUL7WW+QGbS4EW+wr -9HJhkCgNKGqLMKxbJOvpWoQ9SNt1vR51boMmPbC7f+FG00Evg/sCJr3Nm1Y5VrY+ -2I9XkNnHpI4c3jAT1yU4hBpzGXTiPW5tOq+sY8bP24D4O/vdQJiaSmfiX95jW4uR -xh4QQzfi8bz5o6HJWKTxrcRjpihWUgIzjBPoVvjXUQG68MfumgmJcVALuIU7nhay -EMNNDR07Rqeeixoe2zKnvZJkkzjNDDqK3nQ3s9S+SAkiJiy/m+Yq2Z872e3nTop7 -HkIR4dt7NBCLiabVTQcTfSkjmobcA7edEJRjabRDV5lduxXFE9yM7v4ULeMhY3uT -NTEailyttfUcGuLoFQ/1DMQoPKD4Rs72mLOn5hu+nuFyoitooUZXh30dSk0OASdR -jB3el2JK20nk2TLgGy8P+xcmHlTedF3/R3UikNNTHtOyztPvSiCWTWiC6pptajSa -pFQ6GUbryR6PcFz4VMT94rxCw74ujB6Ti6uSyHexoyAnZecOkWwlvv0VatomUPLm -Lo8FUvIddaDCWdOr8ARgghSBLKnz/viLaKquKsgOO77VtRzBgeflTZXlC9Icj78X -QUlXb6pOy1eaEN8HsAY72iPX514IUvszNwsFcB1vtMhrEgwIbBoDoClBeePzo5sl -UZeIZ034QAMWJfoid132wZTZbs/x3kXIlV9uqpiBopeC5oRGA3U/kuMB8z+yGau3 -G6fslHDIItnnJ70wyfyxV/tSae4ysbFjeEZIcg1KeCjYXrXmgD7MYCmVzIPJu2eV +U2FsdGVkX1+t29eIVevLkakbK4zlTsJB8G+uvb+Qiepx9dIerardb7bjhSsEHRt+ +/mpRC9PruQlp7cSbt0/11A69HBjU5qwVDbC/QKBpMKcNxvP5Z2W5/HZOZ7dqXpAA +ttEy/wMhn0va5MLGnX8b0UdT2OQOkLR0IRNLaW/VAMIPYBkC18on5bfz33mpYZJz +am5h7T7yuiAx6zmh4zV9UvZiHzGMx910ecv081tfdXXJFdHWU3JibmmMP+CJtbB3 +eGAAGOSVCfozmU/2wes25dCOQKxH5xmrSJIkrvu0G6ktoE8Mrgd3VTwZWXSMzTOt +pUN0Zq/5D7NzZL7VV+VNJGpaPyO6Bzhxjy1p+Re+t2GRf2SWqrmSL7WQo3SpL6Fa +6Bgr1mnGpVUUsUhikTZeM9Ej4EwWJaPpRieEMglYrrSFG4xg++IkEvFsIusydXvK +JDe24RB+FytqMlox4t+Gn/NagyRe8HNTNKeMbGBk6t8rh+CjrillVwMS36UnZMGg +VEadEIhQN1RkypdEh4BzgQyNGp0shWKv5f3mVHOMVG6OPFsNJmz26FfKlrZiyvsE +bniTS/v8WjMwzAQYuyDy5ZmE8mSgwP7NcKSZMIVeHOM4FrkKlxG7SuXMD0nuaZ8Y +PFuOm3S1eaXGE5Y75nFeqvUwae40dZGXR1vfaKusXFzQcK8y9Z0Ezp0vQ7n1Ilga +ZGuQ9w+tIobNzigGKuv9GjWZv9LOgPfHqqocAcgq6V0Cx/vN5zWZp7/7QeVnJ3V4 ++XEY7tcZKuYcEIGqMaSsgvrPkkOxR/zPgzDKUX5NpES0+DmA5kOfluCPa0wzchmF +4tDAUyvVa2OR5FQ+1XTg02yl3Gft039jVWppArUAGbFNZ4+8/8i9ZdezVq0jRRo3 +9yujYxVTgAks48IjVRX9hzqvS3VVs+Jj1JdAtZ6w+k2kDhKAbydVSL9i4aIefqN7 +XgTfMNaYMCkDdPwecCszGPbBH/jCaCJMtbjMS+KKVT6YCZGG/ulsjGKVaBueNvUo +9zvSmLogA5CLRRjFFtedKc38IkmpY8/shxKfxLWIKCdJrS5MXeR3xrXVms34j5PP +CCai85vlXcTH8JJJCxZSU1qbe5Wf1HmE0Z5/7c3bIOreeonBHxuj680e/UfSQe5i +Uk00Wva3Cx4cPRHgfNCcEpJ/yoCKtuDcRGyZ98FHQ3biTWRusM4jwMuS1zWSkxKl +y19HNmfXXC6xgKokdBV+vqIFREtglQMi1HYOkbBiE9Nbn+LSgS7+i9lBVLVWm2vj ++NwUISelcUl1uyCjzgg/TD7USqEimUQq3l9WS0tEl5rX18ENK2lyr1E1gFXzaeO9 +ZfseYLJLOKkQTn0u1ciDzjyYk3bM7m69NaIRkkjkRAFCAlQa5lEBxhDCtEVOju/E +RsE57idgO0HZCmQEemJLPaX/afpJrb3WeUVDpEMlGknsXJ06VYI02jMsoxdqzzr1 +xEHjpEYqymU2MBxmqEkyPisxscjh8XapZBKtwMXqYEwD4hSGnfwI7ByqXJiuIlic +fIWkLwLpKqaA23stg5Y+GqpyLzHwFJr+6+Lro1yPUsIgM2lWeny0PA9a6Sdz+CJ/ +XYjWxVCVFHL50E4Wjw6x9oNFkHoOjhn0ATBw1u+PYQTfigK+IBz3rA1F0LCq+AKR +UKzT3xcDLs4umnJefVslb2fyVdotnslyEl08R+lPGgwhQQjq3k8xopSbOys7HNjO +0k8o90tIBskASAl4PI7KFQ46g8MeDbq1puvvENLfv6KxIZFStAlakpr9ZVSAXLbA +XVV5+VE2oUL0X9CufAvo8wuD0eo+J2fHiP3KyuutJYyi/9zgH1nMZ0dXmZE7v7Uh +jPWx5/AGR0MeiRQySOUL6XtC2p/vvJL69SewXsc4fuFTxJU/xU0qFwkY9YMr75zK +2Te+cEQUReDQnvjtp0ULZcUYxrJXKY3V26EWLzTqg6gIftQABg8MNCCFMAmCACWx +Lg5vSbeAiakelCYNXvuBWrKBWER6qTHb+rsXQh8bwK3pRiUps5iAyCcjasMa2uGx +qlsAqVeccZ90v5XM3e0zUZRj9/6fCcCXUJmI8mtvdZie4yPFHFQlODKaMZ0Jtt/t +bD+9k1zKDfYP/4OaFkuawO3TxsLGdmC7Gvz2RuS3010RJHUq6I1jeY7sXt1zO2Kt diff --git a/dev/gpg_owner_trust.enc b/dev/gpg_owner_trust.enc index 452aa9475a992c78e601f62c8fad8181ec7aa4b4..b5775131c07d1a71266541d27edd3b83dad1ae36 100644 --- a/dev/gpg_owner_trust.enc +++ b/dev/gpg_owner_trust.enc @@ -1,10 +1,11 @@ -U2FsdGVkX1+fix/zD7WLjEq5xzyU5A4FKBnPmyfHSd5mpLszwFKm7ElieCFvknUf -TYx1vg7q+bZmFCIT+NsMpeVzOqof84dPFUGeYmRHoWO1RX/4hC86yI+yWQzKdPaD -aY5ZiR8xf3ZVP2UTFC5xOpcI54YHcVh30FsVtn6edXA/iXv2q+rnDC/5W229TEAj -huhBn+Km3JuhFXzlQjSUcfwHrFhb8LxM5T4ARhthGtG6UQs6gyAcAgFl2ucHdek7 -Ti4HeQEUmIjgdXA9GJP6H4GHYhQZO3Pk7i0m9YXnmjpJLd3uxAmcJ0nh4BJTTLBS -v715I1tHRK+SChdZP1IWXjRRq2WFP758YVYdhSiozFBtPOcFkmtOzYJwxwbvcIFM -CgXkbJzwgOue2Rb/x0380UT/X4NlmQh6tGTVqhZy4JKKoXdOqDUem2JxohZ8fS4o -zYaBTSVooOmsAKnxXTdCm0u3bpt0kETDWtzqrzmJuRizq9ptJ5xdiky2DEQSXEC1 -tkYwx0jE033OYCJK2QRAr8AGi1jcsc95gFvxquqQHyByPNkNgIjqwk0p0W8Cgzf8 -TomviPy9/TVhEM5ttvshZw== +U2FsdGVkX1+h+fPJ5xi4CBTM7xRxGjbvKmg6GRkxDr5GtQwdLuiTjhV8PJMEBn2Y +9u44vtyl+1kWg5rUtKDfww4xBoCKpQg0o8mD80KaYrjIWlH8aRpxm9nx3q5oIBPN +pK9jxJ4vAokxPz2rjuGAQM/qx1CnQ/Nw4qjC+6S1KWbmaMOAV4fOWCZVCiTj70PK +mECaMiTbv2VMvWNkS3SvZizBxRgGjbmENmzYilbvP6iPRsWxVLjKlzXn/wlWRazG +6DLcud8amSo6CRNmlhPbX2dZVaCYLH6IO0MvnCeq3ojEqFUZN8M/3P5HJ3dJXEXI +nYqDC38UpurFdQg9LoAzZtXweyRhfs8DABFg+cv05QUbC9jBId/QY7zdttdyaJud +L7Qp0aYMlSpouDJmqD2jHnNRsFZFaKv1SrMELkGntq55/nogH6oo1+ngU1odLrYn +3wZHXGAiCdSgUNKmYU4DrSVYhwsFYp6nJASYrit08q+2S95L5MGM9vmhUibzYBM8 +ueZomzTduPaeU8tzAGWqgmwhrrw8n6N5xYVfiANEndM9WsO2zhH30hB+qM6ia9er +vsP6vCv4fSHjO3ie+BQfN07m1YDhRd03EsfbVhpClVrM82oRgkbHkWGPal2Oesx4 +cSjgP5ZvarFHOLRo2c7Utw== diff --git a/dev/secrets_configuration.sh b/dev/secrets_configuration.sh index 9110a5193e811455aa9b2bc42836539b436249ac..340e2357cd0843123a853c59e1f750903640d1e0 100644 --- a/dev/secrets_configuration.sh +++ b/dev/secrets_configuration.sh @@ -1,7 +1,7 @@ export VARNAME_CI_SECRET="CI_KITWARE_SECRET" -export VARNAME_TWINE_USERNAME="TWINE_USERNAME" -export VARNAME_TWINE_PASSWORD="TWINE_PASSWORD" -export VARNAME_TEST_TWINE_USERNAME="TEST_TWINE_USERNAME" -export VARNAME_TEST_TWINE_PASSWORD="TEST_TWINE_PASSWORD" +export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" +export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_PUSH_TOKEN="GITLAB_KITWARE_TOKEN" +export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" +export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " diff --git a/dev/setup_secrets.sh b/dev/setup_secrets.sh index 5394075892845a6b5fe320c5089be2feffc86048..1ead971dd4e45c91aefcff92c22f25ed0ec322f5 100644 --- a/dev/setup_secrets.sh +++ b/dev/setup_secrets.sh @@ -123,11 +123,11 @@ setup_package_environs(){ setup_package_environs_gitlab_kitware(){ echo ' export VARNAME_CI_SECRET="CI_KITWARE_SECRET" - export VARNAME_TWINE_USERNAME="TWINE_USERNAME" - export VARNAME_TWINE_PASSWORD="TWINE_PASSWORD" - export VARNAME_TEST_TWINE_USERNAME="TEST_TWINE_USERNAME" - export VARNAME_TEST_TWINE_PASSWORD="TEST_TWINE_PASSWORD" + export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" + export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_PUSH_TOKEN="GITLAB_KITWARE_TOKEN" + export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" + export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh @@ -136,10 +136,10 @@ setup_package_environs_gitlab_kitware(){ setup_package_environs_github_erotemic(){ echo ' export VARNAME_CI_SECRET="EROTEMIC_CI_SECRET" - export VARNAME_TWINE_USERNAME="TWINE_USERNAME" - export VARNAME_TWINE_PASSWORD="TWINE_PASSWORD" - export VARNAME_TEST_TWINE_USERNAME="TEST_TWINE_USERNAME" - export VARNAME_TEST_TWINE_PASSWORD="TEST_TWINE_PASSWORD" + export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" + export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" + export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" + export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh @@ -148,9 +148,11 @@ setup_package_environs_github_erotemic(){ setup_package_environs_github_pyutils(){ echo ' export VARNAME_CI_SECRET="PYUTILS_CI_SECRET" + export VARNAME_TWINE_PASSWORD="PYUTILS_PYPI_MASTER_TOKEN" + export VARNAME_TEST_TWINE_PASSWORD="PYUTILS_TEST_PYPI_MASTER_TOKEN" + export VARNAME_TWINE_USERNAME="PYUTILS_PYPI_MASTER_TOKEN_USERNAME" + export VARNAME_TEST_TWINE_USERNAME="PYUTILS_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=PyUtils-CI " - export VARNAME_TWINE_PASSWORD="PYUTILS_TWINE_PASSWORD" - export VARNAME_TWINE_PASSWORD="PYUTILS_TWINE_PASSWORD" ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh @@ -166,11 +168,37 @@ upload_github_secrets(){ #printf "%s" "$GITHUB_TOKEN" | gh auth login --hostname Github.com --with-token gh auth login source dev/secrets_configuration.sh - gh secret set "$VARNAME_CI_SECRET" -b"${!VARNAME_CI_SECRET}" - gh secret set "$VARNAME_TWINE_USERNAME" -b"${!VARNAME_TWINE_USERNAME}" - gh secret set "$VARNAME_TWINE_PASSWORD" -b"${!VARNAME_TWINE_PASSWORD}" - gh secret set "$VARNAME_TEST_TWINE_PASSWORD" -b"${!VARNAME_TEST_TWINE_PASSWORD}" - gh secret set "$VARNAME_TEST_TWINE_USERNAME" -b"${!VARNAME_TEST_TWINE_USERNAME}" + gh secret set "TWINE_USERNAME" -b"${!VARNAME_TWINE_USERNAME}" + gh secret set "TEST_TWINE_USERNAME" -b"${!VARNAME_TEST_TWINE_USERNAME}" + toggle_setx_enter + gh secret set "CI_SECRET" -b"${!VARNAME_CI_SECRET}" + gh secret set "TWINE_PASSWORD" -b"${!VARNAME_TWINE_PASSWORD}" + gh secret set "TEST_TWINE_PASSWORD" -b"${!VARNAME_TEST_TWINE_PASSWORD}" + toggle_setx_exit +} + + +toggle_setx_enter(){ + # Can we do something like a try/finally? + # https://stackoverflow.com/questions/15656492/writing-try-catch-finally-in-shell + echo "Enter sensitive area" + if [[ -n "${-//[^x]/}" ]]; then + __context_1_toggle_setx=1 + else + __context_1_toggle_setx=0 + fi + if [[ "$__context_1_toggle_setx" == "1" ]]; then + echo "Setx was on, disable temporarilly" + set +x + fi +} + +toggle_setx_exit(){ + echo "Exit sensitive area" + # Can we guarentee this will happen? + if [[ "$__context_1_toggle_setx" == "1" ]]; then + set -x + fi } @@ -210,7 +238,7 @@ upload_gitlab_group_secrets(){ fi source dev/secrets_configuration.sh - SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_USERNAME VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" @@ -228,6 +256,8 @@ upload_gitlab_group_secrets(){ if [[ "$REMOTE_VALUE" == "" ]]; then # New variable echo "Remove variable does not exist, posting" + + toggle_setx_enter curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ --form "key=${SECRET_VARNAME}" \ --form "value=${LOCAL_VALUE}" \ @@ -235,11 +265,14 @@ upload_gitlab_group_secrets(){ --form "masked=true" \ --form "environment_scope=*" \ --form "variable_type=env_var" + toggle_setx_exit elif [[ "$REMOTE_VALUE" != "$LOCAL_VALUE" ]]; then echo "Remove variable does not agree, putting" # Update variable value + toggle_setx_enter curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ --form "value=${LOCAL_VALUE}" + toggle_setx_exit else echo "Remote value agrees with local" fi @@ -269,18 +302,25 @@ upload_gitlab_repo_secrets(){ fi TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) + toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" + toggle_setx_exit GROUP_ID=$(cat "$TMP_DIR/all_group_info" | jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" + toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" + toggle_setx_exit + GROUP_ID=$(cat "$TMP_DIR/all_group_info" | jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") cat "$TMP_DIR/group_info" | jq PROJECT_ID=$(cat "$TMP_DIR/group_info" | jq ".projects | map(select(.path==\"$PROJECT_NAME\")) | .[0].id") echo "PROJECT_ID = $PROJECT_ID" # Get group-level secret variables + toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" + toggle_setx_exit cat "$TMP_DIR/project_vars" | jq '.[] | .key' if [[ "$?" != "0" ]]; then echo "Failed to access project level variables. Probably a permission issue" @@ -288,7 +328,7 @@ upload_gitlab_repo_secrets(){ LIVE_MODE=1 source dev/secrets_configuration.sh - SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_USERNAME VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) + SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" @@ -353,6 +393,15 @@ export_encrypted_code_signing_keys(){ MAIN_GPG_KEYID=$(gpg --list-keys --keyid-format LONG "$GPG_IDENTIFIER" | head -n 2 | tail -n 1 | awk '{print $1}') GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') + # Careful, if you don't have a subkey, requesting it will export more than you want. + # Export the main key instead (its better to have subkeys, but this is a lesser evil) + if [[ "$GPG_SIGN_SUBKEY" == "" ]]; then + # NOTE: if you get here this probably means your subkeys expired (and + # wont even be visible), so we probably should check for that here and + # thrown an error instead of using this hack, which likely wont work + # anyway. + GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') + fi echo "MAIN_GPG_KEYID = $MAIN_GPG_KEYID" echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d0c3cbf1020d5c292abdedf27627c6abe25e2293 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..6fcf05b4b76f8b9774c317ac8ada402f8a7087de --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/netharn/__init__.py b/netharn/__init__.py index a3fe57f227e6cf9fdde981d2e9a7074d62e38b23..eefb53d490f1720f15794ddc2ae48b0060704261 100644 --- a/netharn/__init__.py +++ b/netharn/__init__.py @@ -4,7 +4,7 @@ mkinit netharn --noattrs --dry mkinit netharn --noattrs """ -__version__ = '0.6.0' +__version__ = '0.6.1' try: # PIL 7.0.0 removed PIL_VERSION, which breaks torchvision, monkey patch it diff --git a/netharn/api.py b/netharn/api.py index a34d7fc456bba5104a572c019c780574b8aaa1cf..fe86ba1d729679dddb304f6a0519bdcb9bb44f70 100644 --- a/netharn/api.py +++ b/netharn/api.py @@ -333,9 +333,9 @@ class Initializer(object): # Allow init to specify a pretrained fpath if isinstance(init, six.string_types) and pretrained_fpath is None: from os.path import exists - pretraind_cand = ub.expandpath(init) - if exists(pretraind_cand): - pretrained_fpath = pretraind_cand + pretrained_cand = ub.expandpath(init) + if exists(pretrained_cand): + pretrained_fpath = pretrained_cand config['init'] = init config['pretrained_fpath'] = pretrained_fpath @@ -501,6 +501,7 @@ class Optimizer(object): cls = nh.optimizers.AdamW kw = { 'lr': lr, + 'weight_decay': decay, # 'betas': (0.9, 0.999), # 'eps': 1e-8, # 'amsgrad': False @@ -545,6 +546,13 @@ class Optimizer(object): defaultkw = util_inspect.default_kwargs(cls) kw = defaultkw.copy() kw.update(ub.dict_isect(config, kw)) + # Hacks for common cases, otherwise if learning_rate is + # given, but only lr exists in the signature, it will be + # incorrectly ignored. + if 'lr' in kw: + kw['lr'] = lr + if 'weight_decay' in kw: + kw['weight_decay'] = decay break if cls is None: diff --git a/netharn/criterions/contrastive_loss.py b/netharn/criterions/contrastive_loss.py index 4459901e38d50544ae1e190aaf438c4707671dc2..df8e30b716e30acd3ce0b637aca39ec74e5579a4 100644 --- a/netharn/criterions/contrastive_loss.py +++ b/netharn/criterions/contrastive_loss.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals import torch import torch.nn as nn @@ -41,7 +39,7 @@ class ContrastiveLoss(nn.Module): >>> loss2x, dist = ut.exec_func_src(self.forward, globals(), globals(), keys=['loss2x', 'dist']) >>> ut.quit_if_noshow() >>> loss2x, dist, label = map(np.array, [loss2x, dist, label]) - >>> label = label.astype(np.bool) + >>> label = label.astype(bool) >>> dist0_l2 = dist[~label] >>> dist1_l2 = dist[label] >>> loss0 = loss2x[~label] / 2 diff --git a/netharn/data/collate.py b/netharn/data/collate.py index 08c1beb3efe52d730ad487fa77a75d3f26238f75..e38bdfea0f4bea271ea91dc9ad9aee05e2653ad1 100644 --- a/netharn/data/collate.py +++ b/netharn/data/collate.py @@ -270,11 +270,9 @@ def _debug_inbatch_shapes(inbatch): import ubelt as ub print('len(inbatch) = {}'.format(len(inbatch))) extensions = ub.util_format.FormatterExtensions() - @extensions.register((torch.Tensor, np.ndarray)) def format_shape(data, **kwargs): return ub.repr2(dict(type=str(type(data)), shape=data.shape), nl=1, sv=1) - print('inbatch = ' + ub.repr2(inbatch, extensions=extensions, nl=True)) diff --git a/netharn/data/data_containers.py b/netharn/data/data_containers.py index 49c34256a93e6741c6c3dfcffef06475ef93164f..e93d16d3d20fbdf71234a196a385a34667c4b64c 100644 --- a/netharn/data/data_containers.py +++ b/netharn/data/data_containers.py @@ -969,7 +969,7 @@ def nestshape(data): >>> num_masks, H, W = 3, 32, 32 >>> rng = np.random.RandomState(0) - >>> masks = (rng.rand(num_masks, H, W) > 0.1).astype(np.int) + >>> masks = (rng.rand(num_masks, H, W) > 0.1).astype(int) >>> bitmasks = BitmapMasks(masks, height=H, width=W) >>> nestshape(bitmasks) diff --git a/netharn/data/transforms/augmenters.py b/netharn/data/transforms/augmenters.py index 27f947ec7cd44c5d453febbb888d4db723bc0b29..5ace79bc6b755d2fb7b9827cd278e5344ca0ede6 100644 --- a/netharn/data/transforms/augmenters.py +++ b/netharn/data/transforms/augmenters.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import six from netharn.data.transforms import augmenter_base @@ -503,11 +501,11 @@ class Resize(augmenter_base.ParamatarizedAugmenter): sf = 1 / fw if fw >= fh else 1 / fh # Whats the closest integer size we can resize to? - embed_size = np.round(orig_size * sf).astype(np.int) + embed_size = np.round(orig_size * sf).astype(int) # Determine how much padding we need for the top/left side # Note: the right/bottom side might need an extra pixel of padding # depending on rounding issues. - shift = np.round((target_size - embed_size) / 2).astype(np.int) + shift = np.round((target_size - embed_size) / 2).astype(int) scale = embed_size / orig_size return shift, scale, embed_size diff --git a/netharn/examples/object_detection.py b/netharn/examples/object_detection.py index c3c6ca29d760867023612b7d45ca1c7348595ee5..12bba3d5576d01ff91f703370d282ea6885583a9 100644 --- a/netharn/examples/object_detection.py +++ b/netharn/examples/object_detection.py @@ -102,7 +102,7 @@ class DetectDataset(torch.utils.data.Dataset): self.sampler = sampler self.factor = factor # downsample factor of yolo grid - self.input_dims = np.array(input_dims, dtype=np.int) + self.input_dims = np.array(input_dims, dtype=int) assert np.all(self.input_dims % self.factor == 0) self.multi_scale_inp_size = np.array([ diff --git a/netharn/examples/yolo_voc.py b/netharn/examples/yolo_voc.py index 327bdf4b14bd0853bc87f7e8318212193387aaeb..c4e0972a627e99bd50ddc9fe84260272dc1b483f 100644 --- a/netharn/examples/yolo_voc.py +++ b/netharn/examples/yolo_voc.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- """ References: https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/ """ -from __future__ import absolute_import, division, print_function, unicode_literals import os import torch import ubelt as ub @@ -54,7 +52,7 @@ class YoloVOCDataset(nh.data.voc.VOCDataset): self.factor = factor # downsample factor of yolo grid - self.base_wh = np.array(base_wh, dtype=np.int) + self.base_wh = np.array(base_wh, dtype=int) assert np.all(self.base_wh % self.factor == 0) @@ -206,10 +204,10 @@ class YoloVOCDataset(nh.data.voc.VOCDataset): image = self._load_image(index) annot = self._load_annotation(index) # VOC loads annotations in tlbr - tlbr = annot['boxes'].astype(np.float) + tlbr = annot['boxes'].astype(float) gt_classes = annot['gt_classes'] # Weight samples so we dont care about difficult cases - gt_weights = 1.0 - annot['gt_ishard'].astype(np.float) + gt_weights = 1.0 - annot['gt_ishard'].astype(float) return image, tlbr, gt_classes, gt_weights # @ub.memoize_method # remove this if RAM is a problem diff --git a/netharn/fit_harn.py b/netharn/fit_harn.py index 38623de9d8e6124820900d5cf9cb0dcb2d025e82..24c972e57d18a98e140efa32a62d3a75cbdccd45 100644 --- a/netharn/fit_harn.py +++ b/netharn/fit_harn.py @@ -79,6 +79,7 @@ Example: >>> }), >>> 'monitor' : (nh.Monitor, { >>> 'max_epoch': 10, + >>> 'ignore_first_epochs': 2, >>> }), >>> # dynamics are a config option that modify the behavior of the main >>> # training loop. These parameters effect the learned model. diff --git a/netharn/hyperparams.py b/netharn/hyperparams.py index b0b143d476c5d0e0b6dd1eb5a526615cc8d21485..11f0ca5fbeb85737c055b95570613a615b661c1a 100644 --- a/netharn/hyperparams.py +++ b/netharn/hyperparams.py @@ -943,8 +943,8 @@ class HyperParams(object): ```python - from scipy import exp, log - from scipy.special import gammaln + from numpy import exp, log + from scipy.special import loggamma def prob_unique(N, r): return exp( gammaln(N+1) - gammaln(N-r+1) - r*log(N) ) @@ -959,17 +959,13 @@ class HyperParams(object): ``` This is approximately 0.00056 or about 1 in 1784. - When r = 10000, it becomes had to compute the number because of - floating point errors, but the probability is likely astronomically - low. I doubt we will ever run training in the same work directory - (and with the same nice "name") 10,000 different times, so using an - 8 character hash seems safe and user friendly for this purpose. - Perhaps we may move to 12, 16, or 32+ in the future, but for the - pre 1.0 netharn, 8 seems fine. + + Should probably bump the size in a later version. Note, the above + code does not seem to be producing the correct number, likely due + to floating point errors. References: ..[1] https://www.johndcook.com/blog/2016/01/30/general-birthday-problem/ - """ train_hashid = _hash_data(train_id)[0:8] @@ -1002,7 +998,6 @@ class HyperParams(object): # TODO: software versions - train_info = ub.odict([ ('train_hashid', train_hashid), diff --git a/netharn/initializers/_nx_ext_v2/__init__.py b/netharn/initializers/_nx_ext_v2/__init__.py deleted file mode 100644 index 064aa9924b733dcbad8cd8ab00fa40999997f39b..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -mkinit ~/code/netharn/netharn/initializers/_nx_ext_v2/__init__.py -w -""" -from netharn.initializers._nx_ext_v2 import balanced_embedding -from netharn.initializers._nx_ext_v2 import balanced_isomorphism -from netharn.initializers._nx_ext_v2 import balanced_sequence -from netharn.initializers._nx_ext_v2 import tree_embedding -from netharn.initializers._nx_ext_v2 import tree_isomorphism -from netharn.initializers._nx_ext_v2 import utils - -from netharn.initializers._nx_ext_v2.balanced_embedding import (available_impls_longest_common_balanced_embedding, - longest_common_balanced_embedding,) -from netharn.initializers._nx_ext_v2.balanced_isomorphism import (available_impls_longest_common_balanced_isomorphism, - balanced_decomp_unsafe_nocat, - generate_all_decomp_nocat, - longest_common_balanced_isomorphism,) -from netharn.initializers._nx_ext_v2.balanced_sequence import (random_balanced_sequence,) -from netharn.initializers._nx_ext_v2.tree_embedding import (maximum_common_ordered_subtree_embedding,) -from netharn.initializers._nx_ext_v2.tree_isomorphism import (maximum_common_ordered_subtree_isomorphism,) -from netharn.initializers._nx_ext_v2.utils import (forest_str, - random_ordered_tree, - random_tree,) - -__all__ = ['available_impls_longest_common_balanced_embedding', - 'available_impls_longest_common_balanced_isomorphism', - 'balanced_decomp_unsafe_nocat', 'balanced_embedding', - 'balanced_isomorphism', 'balanced_sequence', 'forest_str', - 'generate_all_decomp_nocat', 'longest_common_balanced_embedding', - 'longest_common_balanced_isomorphism', - 'maximum_common_ordered_subtree_embedding', - 'maximum_common_ordered_subtree_isomorphism', - 'random_balanced_sequence', 'random_ordered_tree', 'random_tree', - 'tree_embedding', 'tree_isomorphism', 'utils'] diff --git a/netharn/initializers/_nx_ext_v2/_autojit.py b/netharn/initializers/_nx_ext_v2/_autojit.py deleted file mode 100644 index d44eb713f6457378a5f5a4943d00d34fffd9283c..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/_autojit.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Utilities to just-in-time-cythonize a module at runtime. -""" -from collections import defaultdict -from os.path import dirname, join, basename, splitext, exists -import os -import warnings - - -# Track the number of times we've tried to autojit specific pyx files -NUM_AUTOJIT_TRIES = defaultdict(lambda: 0) -MAX_AUTOJIT_TRIES = 1 - - -def import_module_from_pyx(fname, dpath, error="raise", autojit=True, verbose=1): - """ - Attempts to import a module corresponding to a pyx file. - - If the corresponding compiled module is not found, this can attempt to - JIT-cythonize the pyx file. - - Parameters - ---------- - fname : str - The basename of the cython pyx file - - dpath : str - The directory containing the cython pyx file - - error : str - Can be "raise" or "ignore" - - autojit : bool - If True, we will cythonize and compile the pyx file if possible. - - verbose : int - verbosity level (higher is more verbose) - - Returns - ------- - ModuleType | None : module - Returns the compiled and imported module if possible, otherwise None - - Ignore - ------ - from netharn.initializers._nx_ext_v2._autojit import * - fname = "balanced_embedding_cython.pyx" - dpath = ub.expandpath('$HOME/code/netharn/netharn/initializers/_nx_ext_v2') - module = import_module_from_pyx(fname, dpath, error="ignore", verbose=1) - print('module = {!r}'.format(module)) - """ - pyx_fpath = join(dpath, fname) - if not exists(pyx_fpath): - raise AssertionError("pyx file {!r} does not exist".format(pyx_fpath)) - - try: - # This functionality depends on ubelt - # TODO: the required functionality could be moved to nx.utils - import ubelt as ub - except Exception: - if verbose: - print("Autojit requires ubelt, which failed to import") - if error == "ignore": - module = None - elif error == "raise": - raise - else: - raise KeyError(error) - else: - - if autojit: - # Try to JIT the cython module if we ship the pyx without the compiled - # library. - NUM_AUTOJIT_TRIES[pyx_fpath] += 1 - if NUM_AUTOJIT_TRIES[pyx_fpath] <= MAX_AUTOJIT_TRIES: - try: - _autojit_cython(pyx_fpath, verbose=verbose) - except Exception as ex: - warnings.warn("Cython autojit failed: ex={!r}".format(ex)) - if error == "raise": - raise - - try: - module = ub.import_module_from_path(pyx_fpath) - except Exception: - if error == "ignore": - module = None - elif error == "raise": - raise - else: - raise KeyError(error) - - return module - - -def _platform_pylib_exts(): # nocover - """ - Returns .so, .pyd, or .dylib depending on linux, win or mac. Returns the - previous with and without abi (e.g. .cpython-35m-x86_64-linux-gnu) flags. - """ - import sysconfig - - valid_exts = [] - # handle PEP 3149 -- ABI version tagged .so files - base_ext = "." + sysconfig.get_config_var("EXT_SUFFIX").split(".")[-1] - # ABI = application binary interface - tags = [ - sysconfig.get_config_var("SOABI"), - "abi3", # not sure why this one is valid, but it is - ] - tags = [t for t in tags if t] - for tag in tags: - valid_exts.append("." + tag + base_ext) - # return with and without API flags - valid_exts.append(base_ext) - valid_exts = tuple(valid_exts) - return valid_exts - - -def _autojit_cython(pyx_fpath, verbose=1): - """ - This idea is that given a pyx file, we try to compile it. We write a stamp - file so subsequent calls should be very fast as long as the source pyx has - not changed. - - Parameters - ---------- - pyx_fpath : str - path to the pyx file - - verbose : int - higher is more verbose. - """ - import shutil - - # TODO: move necessary ubelt utilities to nx.utils? - # Separate this into its own util? - if shutil.which("cythonize"): - pyx_dpath = dirname(pyx_fpath) - - # Check if the compiled library exists - pyx_base = splitext(basename(pyx_fpath))[0] - - SO_EXTS = _platform_pylib_exts() - so_fname = False - for fname in os.listdir(pyx_dpath): - if fname.startswith(pyx_base) and fname.endswith(SO_EXTS): - so_fname = fname - break - - try: - # Currently this functionality depends on ubelt. - # We could replace ub.cmd with subprocess.check_call and ub.augpath - # with os.path operations, but hash_file and CacheStamp are harder - # to replace. We can use "liberator" to statically extract these - # and add them to nx.utils though. - import ubelt as ub - except Exception: - return False - else: - if so_fname is False: - # We can compute what the so_fname will be if it doesnt exist - so_fname = pyx_base + SO_EXTS[0] - - so_fpath = join(pyx_dpath, so_fname) - depends = [ub.hash_file(pyx_fpath, hasher="sha1")] - stamp_fname = ub.augpath(so_fname, ext=".jit.stamp") - stamp = ub.CacheStamp( - stamp_fname, - dpath=pyx_dpath, - product=so_fpath, - depends=depends, - verbose=verbose, - ) - if stamp.expired(): - ub.cmd("cythonize -i {}".format(pyx_fpath), verbose=verbose, check=True) - stamp.renew() - return True diff --git a/netharn/initializers/_nx_ext_v2/_liberate.py b/netharn/initializers/_nx_ext_v2/_liberate.py deleted file mode 100644 index 9a7cc4c034fbecc6ea4f312b0e66283db0f84e31..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/_liberate.py +++ /dev/null @@ -1,74 +0,0 @@ -import ubelt as ub -from os.path import dirname -import networkx as nx -import liberator - - -def copy_over_stuff_from_nx_pr(): - """ - Copy from networkx dev/ordered_subtree_isomorphism b08106baae7987af1dc755a6308fcd11bc21cbc8 - """ - dst = ub.expandpath('~/code/netharn/netharn/initializers/_nx_ext_v2') - - from os.path import join - nx_repo = dirname(dirname(nx.__file__)) - - to_copy = [ - join(nx_repo, 'networkx/algorithms/string/_autojit.py'), - join(nx_repo, 'networkx/algorithms/string/balanced_embedding.py'), - join(nx_repo, 'networkx/algorithms/string/balanced_embedding_cython.pyx'), - join(nx_repo, 'networkx/algorithms/string/balanced_isomorphism.py'), - join(nx_repo, 'networkx/algorithms/string/balanced_isomorphism_cython.pyx'), - join(nx_repo, 'networkx/algorithms/string/balanced_sequence.py'), - join(nx_repo, 'networkx/algorithms/minors/tree_embedding.py'), - join(nx_repo, 'networkx/algorithms/minors/tree_isomorphism.py'), - ] - - import shutil - fpath_list = [] - for fpath in to_copy: - fpath2 = ub.augpath(fpath, dpath=dst) - fpath_list.append(fpath2) - shutil.copy2(fpath, fpath2) - - util_fpath = join(dst, 'utils.py') - closer = liberator.Closer() - closer.add_dynamic(nx.forest_str) - closer.add_dynamic(nx.random_ordered_tree) - closer.add_dynamic(nx.random_tree) - with open(util_fpath, 'w') as file: - file.write(closer.current_sourcecode()) - - from rob import rob_nav - force = True - # force = 0 - rob_nav._ut_sed(r'networkx\.algorithms\.string', 'netharn.initializers._nx_ext_v2', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'networkx/networkx/algorithms/string', 'netharn/netharn/initializers/_nx_ext_v2', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'networkx/algorithms/string', 'netharn/initializers/_nx_ext_v2', fpath_list=fpath_list, force=force) - - rob_nav._ut_sed(r'networkx\.algorithms\.minors', 'netharn.initializers._nx_ext_v2', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'networkx/networkx/algorithms/minors', 'netharn/netharn/initializers/_nx_ext_v2', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'networkx/algorithms/minors', 'netharn/initializers/_nx_ext_v2', fpath_list=fpath_list, force=force) - - rob_nav._ut_sed(r'networkx\.generators\.random_graphs', 'netharn.initializers._nx_ext_v2.utils', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'networkx\.readwrite\.text', 'netharn.initializers._nx_ext_v2.utils', fpath_list=fpath_list, force=force) - - rob_nav._ut_sed(r'nx.random_tree', 'random_tree', fpath_list=[join(dst, 'utils.py')], force=force) - rob_nav._ut_sed(r'nx.forest_str', 'forest_str', fpath_list=[join(dst, 'utils.py')], force=force) - rob_nav._ut_sed(r'nx.random_ordered_tree', 'random_ordered_tree', fpath_list=[join(dst, 'utils.py')], force=force) - - # force = 0 - rob_nav._ut_sed(r'nx.forest_str', 'forest_str', fpath_list=fpath_list, force=force) - rob_nav._ut_sed(r'nx.random_ordered_tree', 'random_ordered_tree', fpath_list=fpath_list, force=force) - - with open(join(dst, 'tree_embedding.py'), 'a') as file: - file.write('\n') - file.write('from netharn.initializers._nx_ext_v2.utils import forest_str # NOQA\n') - file.write('from netharn.initializers._nx_ext_v2.utils import random_ordered_tree # NOQA\n') - - # Enable default autojit - rob_nav._ut_sed(r'# NETWORKX_AUTOJIT', 'NETWORKX_AUTOJIT', fpath_list=fpath_list, force=force) - - """ - xdoctest ~/code/netharn/netharn/initializers/_nx_ext_v2 all - """ diff --git a/netharn/initializers/_nx_ext_v2/balanced_embedding.py b/netharn/initializers/_nx_ext_v2/balanced_embedding.py deleted file mode 100644 index 6068519f644efea69688bcd3d3a5175f0e2c7bb5..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/balanced_embedding.py +++ /dev/null @@ -1,512 +0,0 @@ -""" -Core python implementations for the longest common balanced sequence -subproblem, which is used by -:mod:`netharn.initializers._nx_ext_v2.tree_embedding`. -""" -import operator -from .balanced_sequence import ( - IdentityDict, - generate_all_decomp, - balanced_decomp_unsafe, -) - -__all__ = [ - "available_impls_longest_common_balanced_embedding", - "longest_common_balanced_embedding", -] - - -def longest_common_balanced_embedding( - seq1, seq2, open_to_close, open_to_node=None, node_affinity="auto", impl="auto" -): - """ - Finds the longest common balanced sequence embedding between two sequences - - This is used as a subproblem to solve the maximum common embedded subtree - problem described in [1]. - - Parameters - ---------- - seq1, seq2: Sequence[TokT] - two input balanced sequences - - open_to_close : Dict[TokT, TokT] - a mapping from opening to closing tokens in the balanced sequence - - open_to_node : Dict[TokT, Any] | None - If unspecified an identity mapping is assumed. Otherwise this is a - dictionary that maps a sequence token to a value which is used in the - ``node_affinity`` comparison. Typically these are values corresponding - to an original problem (e.g. a tree node). This should only used in - the case where the tokens in each sequence ``seq1`` and ``seq2`` are - unique. NOTE: in the case where sequence tokens are not unique, - sequences can always be re-encoded to differentiate between the same - token at different indices without loss of generality. - - node_affinity : None | str | callable - Function for to determine if two nodes can be matched. The function - should take two arguments `node1` and `node2` and return a non-negative - affinity score that is zero if the nodes are not allowed to match and - some positive value indicating the strength of the match. The return - is interpreted as a weight that is used to break ties. If - ``node_affinity=None`` then any node can match any other node and only - the topology is important. The default is "auto", which will use - ``operator.eq`` to do a simple equality test on the nodes. - - impl : str - Determines the backend implementation. Available choices are given by - :func:`available_impls_longest_common_balanced_embedding`. The default - is "auto", which chooses "iter-cython" if available, otherwise "iter". - - Returns - ------- - Tuple[Tuple[Sequence[TokT], Sequence[TokT]], Float] - A tuple indicating the common subsequence embedding of sequence1 and - sequence2 (usually these are the same) and its value. - - See Also - -------- - * This function is used to implement :func:`netharn.initializers._nx_ext_v2.tree_embedding.maximum_common_ordered_subtree_embedding` - - Notes - ----- - - A balanced sequence is a sequence of tokens where there is a valid - "nesting" of tokens given some relationship between opening and closing - tokens (e.g. parenthesis, square brackets, and curly brackets). The - following grammar generates all balanced sequences: - - .. code:: - - t -> any opening token - close(t) -> the closing token for t - seq -> '' - seq -> t + seq + close(t) + seq - - Given a balanced sequence s. - - .. code:: - - s = '([()[]])[{}([[]])]' - - We can use the grammar to decompose it as follows: - - .. code:: - - s.a = '(' - s.head = '[()[]]' - s.b = ')' - s.tail = '[{}([[]])]' - - Where s.a is the first token in s, and s.b is the corresponding closing - token. s.head is everything between s.a and s.b, and s.tail is everything - after s.b. - - Given two balanced sequences s1 and s2, their longest common subsequence - embedding (lcse) is the largest common string you can obtain by deleting - pairs of opening and closing tokens. - - We also define affinity as some degree of agreement between two tokens. - By default we use an equality check: ``affinity(a1, a2) = (a1 == a2)``. - - The recurrence is defined as follows: - - .. code:: - - lcse(s1, '') = 0 - lcse('', s2) = 0 - lcse(s1, s2) = max( - lcse(s1.head, s2.head) + lcse(s1.tail, s2.tail) + affinity(s1.a, s2.a), - lcse(s1.head + s1.tail, s2), - lcse(s1, s2.head + s2.tail), - ) - - For example the longest common subsequence between the following s1 and s2 - are marked: - - .. code:: - - seq1 = '([()[]])[{}([[]])]' - _ = ' x xxxx x' - seq2 = '[[([])]]{[]}' - _ = 'xx xx xx ' - - subseq = '[[[]]]' - - Also notice that '[[]]{}' is another solution to the previous example. - That the longest common subsequence is not always unique. For instance, - consider the following two sequences: - - .. code:: - - s1 = '({}[])' - s2 = '[{}()]' - - They have three distinct longest common subsequences: '{}', '[]', and '()'. - - References - ---------- - .. [1] Lozano, Antoni, and Gabriel Valiente. - "On the maximum common embedded subtree problem for ordered trees." - String Algorithmics (2004): 155-170. - https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf - - Example - ------- - >>> # Given two sequences and a mapping between opening and closing tokens - >>> # we find the longest common subsequence (achievable by repeated - >>> # balanced decomposition) - >>> seq1 = "[][[]][]" - >>> seq2 = "[[]][[]]" - >>> open_to_close = {"[": "]"} - >>> best, value = longest_common_balanced_embedding(seq1, seq2, open_to_close) - ... - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '[][[]]' - - >>> # 1-label case from the paper (see Example 5) - >>> # https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf - >>> seq1 = "0010010010111100001011011011" - >>> seq2 = "001000101101110001000100101110111011" - >>> open_to_close = {"0": "1"} - >>> best, value = longest_common_balanced_embedding(seq1, seq2, open_to_close) - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '00100101011100001011011011' - >>> assert value == 13 - - >>> # 3-label case - >>> seq1 = "{({})([[]([]){(()(({()[]({}{})}))){}}])}" - >>> seq2 = "{[({{}}{{[][{}]}(()[(({()})){[]()}])})]}" - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> best, value = longest_common_balanced_embedding(seq1, seq2, open_to_close) - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '{{}[][]()(({()})){}}' - >>> assert value == 10 - """ - if node_affinity == "auto" or node_affinity == "eq": - node_affinity = operator.eq - if node_affinity is None: - - def _matchany(a, b): - return True - - node_affinity = _matchany - if open_to_node is None: - open_to_node = IdentityDict() - full_seq1 = seq1 - full_seq2 = seq2 - if impl == "auto": - if _cython_lcse_backend(error="ignore"): - impl = "iter-cython" - else: - impl = "iter" - - if impl == "iter": - value, best = _lcse_iter( - full_seq1, full_seq2, open_to_close, node_affinity, open_to_node - ) - elif impl == "iter-cython": - balanced_embedding_cython = _cython_lcse_backend(error="raise") - value, best = balanced_embedding_cython._lcse_iter_cython( - full_seq1, full_seq2, open_to_close, node_affinity, open_to_node - ) - elif impl == "recurse": - _memo = {} - _seq_memo = {} - value, best = _lcse_recurse( - full_seq1, - full_seq2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - else: - raise KeyError(impl) - return best, value - - -def available_impls_longest_common_balanced_embedding(): - """ - Returns all available implementations for - :func:`longest_common_balanced_embedding`. - - Returns - ------- - List[str] - the string code for each available implementation - """ - impls = [] - if _cython_lcse_backend(): - impls += [ - "iter-cython", - ] - - # Pure python backends - impls += [ - "iter", - "recurse", - ] - return impls - - -def _cython_lcse_backend(error="ignore", verbose=0): - """ - Returns the cython backend if available, otherwise None - - CommandLine - ----------- - xdoctest -m netharn.initializers._nx_ext_v2.balanced_embedding _cython_lcse_backend - """ - from netharn.initializers._nx_ext_v2._autojit import import_module_from_pyx - from os.path import dirname - import os - - # Toggle comments depending on the desired autojit default - NETWORKX_AUTOJIT = os.environ.get("NETWORKX_AUTOJIT", "") - NETWORKX_AUTOJIT = not os.environ.get("NETWORKX_NO_AUTOJIT", "") - - module = import_module_from_pyx( - "balanced_embedding_cython.pyx", - dpath=dirname(__file__), - error=error, - autojit=NETWORKX_AUTOJIT, - verbose=verbose, - ) - balanced_embedding_cython = module - return balanced_embedding_cython - - -def _lcse_iter(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node): - """ - Depth first stack trajectory and replace try except statements with ifs - - This is the current best pure-python algorithm candidate - - Converts :func:`_lcse_recurse` into an iterative algorithm using a fairly - straightforward method that effectively simulates callstacks. Uses a - breadth-first trajectory and try-except to catch missing memoized results - (which seems to be slightly faster than if statements). - - Example - ------- - >>> full_seq1 = "[][[]][]" - >>> full_seq2 = "[[]][[]]" - >>> open_to_close = {"[": "]"} - >>> import operator as op - >>> node_affinity = op.eq - >>> open_to_node = IdentityDict() - >>> res = _lcse_iter(full_seq1, full_seq2, open_to_close, node_affinity, - ... open_to_node) - >>> val, embeddings = res - >>> print(embeddings[0]) - [][[]] - """ - all_decomp1 = generate_all_decomp(full_seq1, open_to_close, open_to_node) - all_decomp2 = generate_all_decomp(full_seq2, open_to_close, open_to_node) - - key0 = (full_seq1, full_seq2) - frame0 = key0 - stack = [frame0] - - # Memoize mapping (seq1, seq2) -> best size, embeddings - _results = {} - - # Populate base cases - empty1 = type(next(iter(all_decomp1.keys())))() - empty2 = type(next(iter(all_decomp2.keys())))() - best = (empty1, empty2) - base_result = (0, best) - for seq1 in all_decomp1.keys(): - key1 = seq1 - t1, a1, b1, head1, tail1, head_tail1 = all_decomp1[key1] - _results[(seq1, empty2)] = base_result - _results[(head1, empty2)] = base_result - _results[(tail1, empty2)] = base_result - _results[(head_tail1, empty2)] = base_result - - for seq2 in all_decomp2.keys(): - key2 = seq2 - t2, a2, b2, head2, tail2, head_tail2 = all_decomp2[key2] - _results[(empty1, seq2)] = base_result - _results[(empty1, head2)] = base_result - _results[(empty1, tail2)] = base_result - _results[(empty1, head_tail2)] = base_result - - while stack: - key = stack[-1] - if key not in _results: - seq1, seq2 = key - - t1, a1, b1, head1, tail1, head_tail1 = all_decomp1[seq1] - t2, a2, b2, head2, tail2, head_tail2 = all_decomp2[seq2] - - # Case 2: The current edge in sequence1 is deleted - try_key = (head_tail1, seq2) - if try_key in _results: - cand1 = _results[try_key] - else: - stack.append(try_key) - continue - - # Case 3: The current edge in sequence2 is deleted - try_key = (seq1, head_tail2) - if try_key in _results: - cand2 = _results[try_key] - else: - stack.append(try_key) - continue - - # Case 1: The LCSE involves this edge - affinity = node_affinity(t1, t2) - if affinity: - try_key = (head1, head2) - if try_key in _results: - pval_h, new_heads = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, tail2) - if try_key in _results: - pval_t, new_tails = _results[try_key] - else: - stack.append(try_key) - continue - - new_head1, new_head2 = new_heads - new_tail1, new_tail2 = new_tails - - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - - res3 = (subseq1, subseq2) - val3 = pval_h + pval_t + affinity - cand3 = (val3, res3) - else: - cand3 = (-1, None) - - # We solved the frame - _results[key] = max(cand1, cand2, cand3) - stack.pop() - - found = _results[key0] - return found - - -def _lcse_recurse( - seq1, seq2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo -): - """ - Surprisingly, this recursive implementation is one of the faster - pure-python methods for certain input types. However, its major drawback is - that it can raise a RecursionError if the inputs are too deep. - - See also the iterative version :func:`_lcse_iter` - """ - if not seq1: - return 0, (seq1, seq1) - elif not seq2: - return 0, (seq2, seq2) - else: - key1 = hash(seq1) # using hash(seq) is faster than seq itself - key2 = hash(seq2) - key = hash((key1, key2)) - if key in _memo: - return _memo[key] - - if key1 in _seq_memo: - a1, b1, head1, tail1, head1_tail1 = _seq_memo[key1] - else: - a1, b1, head1, tail1, head1_tail1 = balanced_decomp_unsafe( - seq1, open_to_close - ) - _seq_memo[key1] = a1, b1, head1, tail1, head1_tail1 - - if key2 in _seq_memo: - a2, b2, head2, tail2, head2_tail2 = _seq_memo[key2] - else: - a2, b2, head2, tail2, head2_tail2 = balanced_decomp_unsafe( - seq2, open_to_close - ) - _seq_memo[key2] = a2, b2, head2, tail2, head2_tail2 - - # Case 2: The current edge in sequence1 is deleted - val, best = _lcse_recurse( - head1_tail1, - seq2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - - # Case 3: The current edge in sequence2 is deleted - val_alt, cand = _lcse_recurse( - seq1, - head2_tail2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - if val_alt > val: - best = cand - val = val_alt - - # Case 1: The LCSE involves this edge - t1 = open_to_node[a1[0]] - t2 = open_to_node[a2[0]] - affinity = node_affinity(t1, t2) - if affinity: - pval_h, new_heads = _lcse_recurse( - head1, - head2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - pval_t, new_tails = _lcse_recurse( - tail1, - tail2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - - new_head1, new_head2 = new_heads - new_tail1, new_tail2 = new_tails - - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - - cand = (subseq1, subseq2) - val_alt = pval_h + pval_t + affinity - if val_alt > val: - best = cand - val = val_alt - - found = (val, best) - _memo[key] = found - return found - - -if __name__ == "__main__": - """ - CommandLine - ------------ - xdoctest -m netharn.initializers._nx_ext_v2.balanced_embedding all - """ - import xdoctest - - xdoctest.doctest_module(__file__) diff --git a/netharn/initializers/_nx_ext_v2/balanced_embedding_cython.pyx b/netharn/initializers/_nx_ext_v2/balanced_embedding_cython.pyx deleted file mode 100644 index b27c9a92a6e0129774f4126be25d3812654bb325..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/balanced_embedding_cython.pyx +++ /dev/null @@ -1,245 +0,0 @@ -# distutils: language = c++ -""" -This module re-implements functions in :module:`balanced_sequence` in cython -and obtains 25-35% speedups in common circumstances. There are likely more -speed improvements that could be made. - - -Issues ------- -- [ ] How to deal with cython + networkx? Do we need to fix that skbuild -with pypy? - - -CommandLine ------------ -# Explicitly build this cython module -# NOTE: cd to networkx repo root before running -cythonize -a -i netharn/initializers/_nx_ext_v2/balanced_embedding_cython.pyx - -# With xdoctest this should work if networkx is installed (and this file is -# distributed with it) -xdoctest -m netharn.initializers._nx_ext_v2.balanced_sequence _cython_lcse_backend - -# Which will then let you run these examples and benchmarks -xdoctest -m netharn.initializers._nx_ext_v2.balanced_embedding_cython list -python -m xdoctest netharn.initializers._nx_ext_v2.balanced_embedding_cython __doc__:0 --bench - - -Example -------- ->>> from netharn.initializers._nx_ext_v2.balanced_embedding_cython import _lcse_iter_cython, IdentityDictCython ->>> from netharn.initializers._nx_ext_v2.balanced_sequence import random_balanced_sequence ->>> from netharn.initializers._nx_ext_v2.balanced_embedding import _lcse_iter ->>> import operator ->>> seq1, open_to_close1 = random_balanced_sequence(300, item_type='paren') ->>> seq2, open_to_close2 = random_balanced_sequence(300, item_type='paren') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> full_seq1 = seq1 ->>> full_seq2 = seq2 ->>> node_affinity = operator.eq ->>> open_to_node = IdentityDictCython() ->>> best2, value2 = _lcse_iter_cython(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node) ->>> best1, value1 = _lcse_iter(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node) ->>> assert value1 == value2 - -Benchmark ---------- ->>> # xdoctest: +REQUIRES(--bench) ->>> # xdoctest: +REQUIRES(module:timerit) ->>> print((chr(10) * 3)+ ' --- BEGIN BENCHMARK ---') ->>> import timerit ->>> from netharn.initializers._nx_ext_v2 import balanced_sequence as bseq ->>> from netharn.initializers._nx_ext_v2 import balanced_embedding as bemb ->>> seq_len = 200 ->>> seq1, open_to_close1 = bseq.random_balanced_sequence(seq_len, item_type='paren', container_type='str') ->>> seq2, open_to_close2 = bseq.random_balanced_sequence(seq_len, item_type='paren', container_type='str') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> n = 1 ->>> ti = timerit.Timerit(n, bestof=max(2, n), verbose=2) ->>> for timer in ti.reset('impl=iter-cython'): ->>> with timer: ->>> bemb.longest_common_balanced_embedding(seq1, seq2, open_to_close, impl='iter-cython') ->>> for timer in ti.reset('impl=iter'): ->>> with timer: ->>> bemb.longest_common_balanced_embedding(seq1, seq2, open_to_close, impl='iter') - ->>> seq_len = 1000 ->>> seq1, open_to_close1 = bseq.random_balanced_sequence(seq_len, item_type='chr') ->>> seq2, open_to_close2 = bseq.random_balanced_sequence(seq_len, item_type='chr') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> n = 1 ->>> ti = timerit.Timerit(n, bestof=max(2, n), verbose=2) ->>> # Following specs are for my machine ->>> for timer in ti.reset('impl=iter-cython'): ->>> with timer: ->>> bemb.longest_common_balanced_embedding(seq1, seq2, open_to_close, impl='iter-cython') ->>> for timer in ti.reset('impl=iter'): ->>> with timer: ->>> bemb.longest_common_balanced_embedding(seq1, seq2, open_to_close, impl='iter') -""" -cimport cython - - -# Template sequence types over strings and tuples -ctypedef fused SeqT: - tuple - str - - -@cython.boundscheck(False) # turn off bounds-checking for entire function -def _lcse_iter_cython(SeqT full_seq1, SeqT full_seq2, dict open_to_close, node_affinity, open_to_node): - """ - Depth first stack trajectory and replace try except statements with ifs - """ - cdef float pval_h, pval_t, val3, affinity - cdef SeqT seq1, seq2 - cdef SeqT a1, b1, head1, tail1, head_tail1 - cdef SeqT a2, b2, head2, tail2, head_tail2 - cdef SeqT subseq1, subseq2 - cdef SeqT new_head1, new_head2, new_tail1, new_tail2 - - if open_to_node is None: - open_to_node = IdentityDictCython() - all_decomp1 = generate_all_decomp_cython(full_seq1, open_to_close, open_to_node) - all_decomp2 = generate_all_decomp_cython(full_seq2, open_to_close, open_to_node) - - key0 = (full_seq1, full_seq2) - frame0 = key0 - stack = [frame0] - - _results = {} - # Populate base cases - empty1 = type(next(iter(all_decomp1.keys())))() - empty2 = type(next(iter(all_decomp2.keys())))() - best = (empty1, empty2) - base_result = (0, best) - for seq1 in all_decomp1.keys(): - key1 = seq1 - t1, a1, b1, head1, tail1, head_tail1 = all_decomp1[key1] - _results[(seq1, empty2)] = base_result - _results[(head1, empty2)] = base_result - _results[(tail1, empty2)] = base_result - _results[(head_tail1, empty2)] = base_result - - for seq2 in all_decomp2.keys(): - key2 = seq2 - t2, a2, b2, head2, tail2, head_tail2 = all_decomp2[key2] - _results[(empty1, seq2)] = base_result - _results[(empty1, head2)] = base_result - _results[(empty1, tail2)] = base_result - _results[(empty1, head_tail2)] = base_result - - while stack: - key = stack[-1] - if key not in _results: - seq1, seq2 = key - - t1, a1, b1, head1, tail1, head_tail1 = all_decomp1[seq1] - t2, a2, b2, head2, tail2, head_tail2 = all_decomp2[seq2] - - # Case 2: The current edge in sequence1 is deleted - try_key = (head_tail1, seq2) - if try_key in _results: - cand1 = _results[try_key] - else: - stack.append(try_key) - continue - - # Case 3: The current edge in sequence2 is deleted - try_key = (seq1, head_tail2) - if try_key in _results: - cand2 = _results[try_key] - else: - stack.append(try_key) - continue - - # Case 1: The LCS involves this edge - affinity = float(node_affinity(t1, t2)) - if affinity: - try_key = (head1, head2) - if try_key in _results: - pval_h, new_heads = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, tail2) - if try_key in _results: - pval_t, new_tails = _results[try_key] - else: - stack.append(try_key) - continue - - new_head1, new_head2 = new_heads - new_tail1, new_tail2 = new_tails - - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - - res3 = (subseq1, subseq2) - val3 = pval_h + pval_t + affinity - cand3 = (val3, res3) - else: - cand3 = (-1, None) - - # We solved the frame - _results[key] = max(cand1, cand2, cand3) - stack.pop() - - found = _results[key0] - return found - - -@cython.boundscheck(False) # turn off bounds-checking for entire function -@cython.wraparound(False) # turn off negative index wrapping for entire function -cdef tuple balanced_decomp_unsafe_cython(SeqT sequence, dict open_to_close): - """ - Cython version of :func:`balanced_decomp_unsafe`. - """ - cdef int stacklen = 1 # always +1 in the first iteration - cdef int head_stop = 1 - cdef SeqT pop_open, pop_close, head, tail, head_tail - - tok_curr = sequence[0] - want_close = open_to_close[tok_curr] - - # for tok_curr in sequence[1:]: - for head_stop in range(1, len(sequence)): - tok_curr = sequence[head_stop] - stacklen += 1 if tok_curr in open_to_close else -1 - if stacklen == 0 and tok_curr == want_close: - pop_close = sequence[head_stop:head_stop + 1] - break - - pop_open = sequence[0:1] - head = sequence[1:head_stop] - tail = sequence[head_stop + 1:] - head_tail = head + tail - return pop_open, pop_close, head, tail, head_tail - - -@cython.boundscheck(False) # turn off bounds-checking for entire function -@cython.wraparound(False) # turn off negative index wrapping for entire function -cdef generate_all_decomp_cython(SeqT seq, dict open_to_close, open_to_node=None): - """ - Cython version of :func:`generate_all_decomp`. - """ - all_decomp = {} - stack = [seq] - while stack: - seq = stack.pop() - if seq not in all_decomp and seq: - pop_open, pop_close, head, tail, head_tail = balanced_decomp_unsafe_cython(seq, open_to_close) - node = open_to_node[pop_open[0]] - all_decomp[seq] = (node, pop_open, pop_close, head, tail, head_tail) - stack.append(head_tail) - stack.append(head) - stack.append(tail) - return all_decomp - - -class IdentityDictCython: - """ Used when ``open_to_node`` is unspecified """ - def __getitem__(self, key): - return key diff --git a/netharn/initializers/_nx_ext_v2/balanced_isomorphism.py b/netharn/initializers/_nx_ext_v2/balanced_isomorphism.py deleted file mode 100644 index 30d76eecae0f11c04f159d801c1098755e3ece33..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/balanced_isomorphism.py +++ /dev/null @@ -1,735 +0,0 @@ -import operator -from netharn.initializers._nx_ext_v2.balanced_sequence import ( - generate_balance_unsafe, - IdentityDict, -) - - -def available_impls_longest_common_balanced_isomorphism(): - """ - Returns all available implementations for - :func:`longest_common_balanced_isomorphism`. - - Returns - ------- - List[str] - the string code for each available implementation - """ - impls = ["recurse", "iter"] - if _cython_lcsi_backend(): - impls += [ - "iter-cython", - ] - return impls - - -def longest_common_balanced_isomorphism( - seq1, seq2, open_to_close, open_to_node=None, node_affinity="auto", impl="auto" -): - r""" - Finds the longest common balanced sequence isomorphism between two - sequences. - - Parameters - ---------- - seq1, seq2: Sequence[TokT] - two input balanced sequences - - open_to_close : Dict[TokT, TokT] - a mapping from opening to closing tokens in the balanced sequence - - open_to_node : Dict[TokT, Any] | None - If unspecified an identity mapping is assumed. Otherwise this is a - dictionary that maps a sequence token to a value which is used in the - ``node_affinity`` comparison. Typically these are values corresponding - to an original problem (e.g. a tree node). This should only used in - the case where the tokens in each sequence ``seq1`` and ``seq2`` are - unique. NOTE: in the case where sequence tokens are not unique, - sequences can always be re-encoded to differentiate between the same - token at different indices without loss of generality. - - node_affinity : None | str | callable - Function for to determine if two nodes can be matched. The function - should take two arguments `node1` and `node2` and return a non-negative - affinity score that is zero if the nodes are not allowed to match and - some positive value indicating the strength of the match. The return - is interpreted as a weight that is used to break ties. If - ``node_affinity=None`` then any node can match any other node and only - the topology is important. The default is "auto", which will use - ``operator.eq`` to do a simple equality test on the nodes. - - impl : str - Determines the backend implementation. Available choices are given by - :func:`available_impls_longest_common_balanced_isomorphism`. - The default is "auto", which chooses "iter-cython" if available, - otherwise "iter". - - Returns - ------- - Tuple[Tuple[Sequence[TokT], Sequence[TokT]], Float] - A tuple indicating the common subsequence isomorphism of sequence1 and - sequence2 (usually these are the same) and its value. - - See Also - -------- - * This function is used to implement :func:`netharn.initializers._nx_ext_v2.tree_isomorphism.maximum_common_ordered_subtree_isomorphism` - * A similar function that relaxes isomorphisms to embeddings is :func:`networkx.algorithms.strength.balanced_sequence.longest_common_balanced_sequence` - - Example - ------- - >>> # Given two sequences and a mapping between opening and closing tokens - >>> # we find the longest common subsequence (achievable by repeated - >>> # balanced decomposition) - >>> seq1 = "[][[]][]" - >>> seq2 = "[[]][[]]" - >>> open_to_close = {"[": "]"} - >>> best, value = longest_common_balanced_isomorphism(seq1, seq2, open_to_close) - ... - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '[[]][]' - - >>> # 1-label case from the paper (see Example 5) - >>> # https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf - >>> seq1 = "0010010010111100001011011011" - >>> seq2 = "001000101101110001000100101110111011" - >>> open_to_close = {"0": "1"} - >>> best, value = longest_common_balanced_isomorphism(seq1, seq2, open_to_close) - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '001000101111000010111011' - >>> assert value == 12 - - >>> # 3-label case - >>> seq1 = "{({})([[]([]){(()(({()[]({}{})}))){}}])}" - >>> seq2 = "{[({{}}{{[][{}]}(()[(({()}[])){[]()}])})]}" - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> best, value = longest_common_balanced_isomorphism(seq1, seq2, open_to_close) - >>> subseq1, subseq2 = best - >>> print("subseq1 = {!r}".format(subseq1)) - subseq1 = '{([(){()}])}' - >>> assert value == 6 - """ - if node_affinity == "auto" or node_affinity == "eq": - node_affinity = operator.eq - if node_affinity is None: - - def _matchany(a, b): - return True - - node_affinity = _matchany - if open_to_node is None: - open_to_node = IdentityDict() - full_seq1 = seq1 - full_seq2 = seq2 - if impl == "auto": - if _cython_lcsi_backend(error="ignore"): - impl = "iter-cython" - else: - impl = "iter" - - if impl == "iter": - val_any, best_any, val_lvl, best_lvl = _lcsi_iter( - full_seq1, - full_seq2, - open_to_close, - node_affinity, - open_to_node, - ) - elif impl == "iter-cython": - balanced_isomorphism_cython = _cython_lcsi_backend(error="raise") - ( - val_any, - best_any, - val_lvl, - best_lvl, - ) = balanced_isomorphism_cython._lcsi_iter_cython( - full_seq1, full_seq2, open_to_close, node_affinity, open_to_node - ) - elif impl == "recurse": - _memo = {} - _seq_memo = {} - val_any, best_any, val_lvl, best_lvl = _lcsi_recurse( - full_seq1, - full_seq2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - else: - raise KeyError(impl) - - best = best_any - value = val_any - - return best, value - - -def _lcsi_iter(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node): - """ - Converts :func:`_lcsi_recurse` into an iterative algorithm. - - Example - ------- - >>> import operator as op - >>> seq1 = full_seq1 = '[[]][]' - >>> seq2 = full_seq2 = '[]{}[]' - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> node_affinity = op.eq - >>> _memo, _seq_memo = {}, {} - >>> open_to_node = IdentityDict() - >>> res = _lcsi_iter(full_seq1, full_seq2, open_to_close, node_affinity, - ... open_to_node) - >>> value, best, *_ = _lcsi_iter( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 2, best = '[][]' - """ - all_decomp1 = generate_all_decomp_nocat(full_seq1, open_to_close, open_to_node) - all_decomp2 = generate_all_decomp_nocat(full_seq2, open_to_close, open_to_node) - key0 = (full_seq1, full_seq2) - frame0 = key0 - stack = [frame0] - - # Memoize mapping (seq1, seq2) -> best size, embeddings - _results = {} - - # Populate base cases - empty1 = type(next(iter(all_decomp1.keys())))() - empty2 = type(next(iter(all_decomp2.keys())))() - best = (empty1, empty2) - base_result = (0, best, 0, best) - for seq1 in all_decomp1.keys(): - key1 = seq1 - t1, a1, b1, head1, tail1 = all_decomp1[key1] - _results[(seq1, empty2)] = base_result - _results[(head1, empty2)] = base_result - _results[(tail1, empty2)] = base_result - - for seq2 in all_decomp2.keys(): - key2 = seq2 - t2, a2, b2, head2, tail2 = all_decomp2[key2] - _results[(empty1, seq2)] = base_result - _results[(empty1, head2)] = base_result - _results[(empty1, tail2)] = base_result - - while stack: - key = stack[-1] - if key not in _results: - seq1, seq2 = key - - t1, a1, b1, head1, tail1 = all_decomp1[seq1] - t2, a2, b2, head2, tail2 = all_decomp2[seq2] - - best_any = None - best_lvl = None - val_any = 0 - val_lvl = 0 - - # When using the head part of the decomp, we can only update the "low" candidate - try_key = (head1, seq2) - if try_key in _results: - val_any_h1s2, cand_any_h1s2, _, _ = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, seq2) - if try_key in _results: - val_any_t1s2, cand_any_t1s2, val_lvl_t1s2, cand_lvl_t1s2 = _results[ - try_key - ] - else: - stack.append(try_key) - continue - - try_key = (seq1, head2) - if try_key in _results: - val_any_s1h2, cand_any_s1h2, _, _ = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (seq1, tail2) - if try_key in _results: - val_any_s1t2, cand_any_s1t2, val_lvl_s1t2, cand_lvl_s1t2 = _results[ - try_key - ] - else: - stack.append(try_key) - continue - - if val_any_h1s2 > val_any: - val_any = val_any_h1s2 - best_any = cand_any_h1s2 - - if val_any_t1s2 > val_any: - val_any = val_any_t1s2 - best_any = cand_any_t1s2 - - if val_any_s1h2 > val_any: - val_any = val_any_s1h2 - best_any = cand_any_s1h2 - - if val_any_s1t2 > val_any: - val_any = val_any_s1t2 - best_any = cand_any_s1t2 - - # The "LVL" case should include the case where any match exists on this - # level. That means as long as we don't consider the heads, tail - # matches are fine. - if val_lvl_s1t2 > val_lvl: - val_lvl = val_lvl_s1t2 - best_lvl = cand_lvl_s1t2 - - if val_lvl_t1s2 > val_lvl: - val_lvl = val_lvl_t1s2 - best_lvl = cand_lvl_t1s2 - - # Case 1: The LCS involves this edge - affinity = node_affinity(t1, t2) - if affinity: - try_key = (head1, head2) - if try_key in _results: - _, _, pval_lvl_h1h2, new_lvl_h1h2 = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, tail2) - if try_key in _results: - _, _, pval_lvl_t1t2, new_lvl_t1t2 = _results[try_key] - else: - stack.append(try_key) - continue - - # Add to the best solution at the former level - new_val_lvl = pval_lvl_h1h2 + pval_lvl_t1t2 + affinity - if new_val_lvl > val_lvl: - val_lvl = new_val_lvl - new_head1, new_head2 = new_lvl_h1h2 - new_tail1, new_tail2 = new_lvl_t1t2 - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - best_lvl = (subseq1, subseq2) - - # If the current level is better than any of the nestings forget the - # nestings (we can never improve them) and just use the level - if val_lvl >= val_any: - val_any = val_lvl - best_any = best_lvl - - if best_lvl is None: - best_lvl = (empty1, empty2) - - if best_any is None: - best_any = (empty1, empty2) - - # We solved the frame - found = (val_any, best_any, val_lvl, best_lvl) - _results[key] = found - stack.pop() - - found = _results[key0] - return found - - -def _lcsi_recurse( - seq1, seq2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo -): - """ - Recursive implementation of longest common substring isomorphism. - - Notes - ----- - - Recall a balanced sequence ``s`` can be decomposed as follows: - - .. code:: - # Input balanced sequence s - s = '([()[]])[{}([[]])]' - - # Its decomposition into a head and tail - s.a = '(' - s.head = '[()[]]' - s.b = ')' - s.tail = '[{}([[]])]' - - - # A recursive tail decomposition of a sequence - - - The recurrence returns two values: (0) the best isomorphism that includes - the start of one of the input sequences and (1) the best isomorphism at a - deeper location in the sequence. Is defined as follows: - - .. code:: - - Let lcsi(s1, s2, LVL) be the best isoseq between s1 and s2 that includes - either the token at s1[0] and s2[0] or some token in the recursive - tail decomposition of each sequence. (i.e. the lsci begins at some - node at the current outermost level of nesting) - - Let lcsi(s1, s2, ANY) be the best overall isoseq between s1 and s2 that - might exist in the head of one of the sequences in the recursive - tail decompositions of s1 and s2. (i.e. the lsci might begin at - some deeper nesting level in either sequence). - - lcsi(s1, '', ANY) = 0 - lcsi(s1, '', LVL) = 0 - lcsi('', s2, ANY) = 0 - lcsi('', s2, LVL) = 0 - - # The value of the LCSI including the a first token match is 0 if the - # tokens dont match otherwise it is the affinity plus the LCSI that - # includes the next token in both the head and tail of the balanced - # sequence. - # - # IT CAN ALSO be the case that one string matches the tail of another - lcsi(s1, s2, LVL) = max( - lcsi(s1, s2.tail, LVL) + lcsi(s1.tail, s2, LVL) - lcsi(s1.head, s2.head, LVL) + lcsi(s1.tail, s2.tail, LVL) + affinity(s1.a, s2.a) if affinity(s1.a, s2.a) else 0 - ) - # Note that we cannot consider any exclusion cases because we are not - # allowed to "skip" edges like we are in the "subsequence embedding" - # problem. - - # For the LCSI that excludes the current matching token, we peel that - # token off of the first and second sequence and subproblems that - # compare the head or tail of one sequence to the entire other - # sequence. Because the current leading token is discarded in at least - # one of the input sequences we consider the include and exclude case - # for all subproblems here. - lcsi(s1, s2, ANY) = max( - # - lcsi(s1, s2, LVL) - - # The case where we only consider the head/tail of s1 - lcsi(s1.head, s2, ANY), - lcsi(s1.tail, s2, ANY), - - # The case where we only consider the head/tail of s1 - lcsi(s1.head, s2, ANY), - lcsi(s1.tail, s2, ANY), - ) - - # Note that by the way the recurrence is defined, s1.head will be - # compared to s2.head in subsequent subproblems, so explicitly adding - # that decomposition here is not necessary. - - The final lcsi for s1 and s2 is - - lcsi(s1, s2) = lcsi(s1, s2, ANY) - - Example - ------- - >>> import operator as op - >>> node_affinity = op.eq - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> open_to_node = IdentityDict() - >>> # --- - >>> seq1 = full_seq1 = "[][[]][]" - >>> seq2 = full_seq2 = "[[]][[]]" - >>> _memo, _seq_memo = {}, {} - >>> value, best, *_ = _lcsi_recurse( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node, - ... _memo, _seq_memo) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 3, best = '[[]][]' - >>> # --- - >>> seq1 = full_seq1 = "[{[[]]}]" - >>> seq2 = full_seq2 = "[[{[[]]}]]" - >>> _memo, _seq_memo = {}, {} - >>> value, best, *_ = _lcsi_recurse( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node, - ... _memo, _seq_memo) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 4, best = '[{[[]]}]' - >>> # --- - >>> seq1 = full_seq1 = '({{{[]}}})' - >>> seq2 = full_seq2 = '[{{([()])}}]' - >>> _memo, _seq_memo = {}, {} - >>> value, best, *_ = _lcsi_recurse( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node, - ... _memo, _seq_memo) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 2, best = '{{}}' - >>> # --- - >>> full_seq1 = '[[]][]' - >>> full_seq2 = '[]{}[]' - >>> _memo, _seq_memo = {}, {} - >>> value, best, *_ = _lcsi_recurse( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node, - ... _memo, _seq_memo) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 2, best = '[][]' - >>> # --- - >>> full_seq1 = '[[]][]' - >>> full_seq2 = '[]{}{}{}{}[]' - >>> _memo, _seq_memo = {}, {} - >>> value, best, *_ = _lcsi_recurse( - ... full_seq1, full_seq2, open_to_close, node_affinity, open_to_node, - ... _memo, _seq_memo) - >>> print('value = {!r}, best = {!r}'.format(value, best[0])) - value = 2, best = '[][]' - """ - if not seq1: - return 0, (seq1, seq1), 0, (seq1, seq1) - elif not seq2: - return 0, (seq2, seq2), 0, (seq2, seq2) - else: - key1 = hash(seq1) - key2 = hash(seq2) - key = hash((key1, key2)) - if key in _memo: - return _memo[key] - - if key1 in _seq_memo: - a1, b1, head1, tail1 = _seq_memo[key1] - else: - a1, b1, head1, tail1 = balanced_decomp_unsafe_nocat(seq1, open_to_close) - _seq_memo[key1] = a1, b1, head1, tail1 - - if key2 in _seq_memo: - a2, b2, head2, tail2 = _seq_memo[key2] - else: - a2, b2, head2, tail2 = balanced_decomp_unsafe_nocat(seq2, open_to_close) - _seq_memo[key2] = a2, b2, head2, tail2 - - # TODO: IS THIS THE CORRECT MODIFICATION TO THE RECURRANCE TO - # ACHIEVE A SUBTREE ISOMORPHISM INSTEAD OF AN EMBEDDING? - r""" - We return two solutions at each step: the solution value at - this level if one exists, and the solution value at any other depth. - We are allowed to add to the first, but can take the second if we want - to. - - This should work because we know a solution that skipped a layer will - never be added to, and we are always keeping track of the solution that - might change. By the time we get to the root level, we have enough info - to know which is better. - """ - - # Consider the case where the best isoseq does not include the leading - # tokens of s1 and s2. - best_any = None - best_lvl = None - val_any = 0 - val_lvl = 0 - - # When using the head part of the decomp, we can only update the "low" candidate - val_any_h1s2, cand_any_h1s2, _, _, = _lcsi_recurse( - head1, seq2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo - ) - val_any_t1s2, cand_any_t1s2, val_lvl_t1s2, cand_lvl_t1s2 = _lcsi_recurse( - tail1, seq2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo - ) - val_any_s1h2, cand_any_s1h2, _, _ = _lcsi_recurse( - seq1, head2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo - ) - val_any_s1t2, cand_any_s1t2, val_lvl_s1t2, cand_lvl_s1t2 = _lcsi_recurse( - seq1, tail2, open_to_close, node_affinity, open_to_node, _memo, _seq_memo - ) - - if val_any_h1s2 > val_any: - val_any = val_any_h1s2 - best_any = cand_any_h1s2 - - if val_any_t1s2 > val_any: - val_any = val_any_t1s2 - best_any = cand_any_t1s2 - - if val_any_s1h2 > val_any: - val_any = val_any_s1h2 - best_any = cand_any_s1h2 - - if val_any_s1t2 > val_any: - val_any = val_any_s1t2 - best_any = cand_any_s1t2 - - # The "LVL" case should include the case where any match exists on this - # level. That means as long as we don't consider the heads, tail - # matches are fine. - if val_lvl_s1t2 > val_lvl: - val_lvl = val_lvl_s1t2 - best_lvl = cand_lvl_s1t2 - - if val_lvl_t1s2 > val_lvl: - val_lvl = val_lvl_t1s2 - best_lvl = cand_lvl_t1s2 - - # Consider the case where the best isoseq does include the leading - # tokens of s1 and s2. - t1 = open_to_node[a1[0]] - t2 = open_to_node[a2[0]] - affinity = node_affinity(t1, t2) - if affinity: - - # Note, the "ex" portions of the LCSI don't matter because val_any - # and best_any will already contain them if they matter We only care - # about extending the current "in" case. - # (Actually this is wrong) - _, _, pval_lvl_h1h2, new_lvl_h1h2 = _lcsi_recurse( - head1, - head2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - - _, _, pval_lvl_t1t2, new_lvl_t1t2 = _lcsi_recurse( - tail1, - tail2, - open_to_close, - node_affinity, - open_to_node, - _memo, - _seq_memo, - ) - - # Add to the best solution at the former level - new_val_lvl = pval_lvl_h1h2 + pval_lvl_t1t2 + affinity - if new_val_lvl > val_lvl: - val_lvl = new_val_lvl - new_head1, new_head2 = new_lvl_h1h2 - new_tail1, new_tail2 = new_lvl_t1t2 - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - best_lvl = (subseq1, subseq2) - - # If the current level is better than any of the nestings forget the - # nestings (we can never improve them) and just use the level - if val_lvl >= val_any: - val_any = val_lvl - best_any = best_lvl - - if best_lvl is None: - best_lvl = (type(seq1)(), type(seq2)()) - - if best_any is None: - best_any = (type(seq1)(), type(seq2)()) - - # We return two solutions: - # the best that includes any token at this level (lvl) - # the best overall that could be at a deeper nesting (nst) - found = (val_any, best_any, val_lvl, best_lvl) - _memo[key] = found - return found - - -def balanced_decomp_unsafe_nocat(sequence, open_to_close): - """ - Similar to :func:`balanced_decomp` but assumes that ``sequence`` is valid - balanced sequence in order to execute faster. Also does not return - the concatenated head_tail as it is unused in the isomorphim problem. - - SeeAlso - ------- - balanced_decomp, balanced_decomp_unsafe - """ - gen = generate_balance_unsafe(sequence, open_to_close) - - bal_curr, tok_curr = next(gen) - pop_open = sequence[0:1] - want_close = open_to_close[tok_curr] - - head_stop = 1 - for head_stop, (bal_curr, tok_curr) in enumerate(gen, start=1): - if bal_curr and tok_curr == want_close: - pop_close = sequence[head_stop : head_stop + 1] - break - head = sequence[1:head_stop] - tail = sequence[head_stop + 1 :] - return pop_open, pop_close, head, tail - - -def generate_all_decomp_nocat(seq, open_to_close, open_to_node=None): - """ - Generates all decompositions of a single balanced sequence by recursive - decomposition of the head, tail. - - Parameters - ---------- - seq : Tuple | str - a tuple of hashable items or a string where each character is an item - - open_to_close : Dict - a dictionary that maps opening tokens to closing tokens in the balanced - sequence problem. - - open_to_node : Dict - a dictionary that maps a sequence token to a token corresponding to an - original problem (e.g. a tree node) - - Returns - ------- - Dict : - mapping from a sub-sequence to its decomposition - - SeeAlso - ------- - generate_balance_unsafe, generate_balance - - Example - ------- - >>> import pprint - >>> seq = '{{(){}}}' - >>> open_to_close = {'[': ']', '{': '}', '(': ')'} - >>> all_decomp = generate_all_decomp_nocat(seq, open_to_close) - >>> print('all_decomp = {}'.format(pprint.pformat(all_decomp))) - all_decomp = {'(){}': ('(', '(', ')', '', '{}'), - '{(){}}': ('{', '{', '}', '(){}', ''), - '{{(){}}}': ('{', '{', '}', '{(){}}', ''), - '{}': ('{', '{', '}', '', '')} - - """ - if open_to_node is None: - open_to_node = IdentityDict() - all_decomp = {} - stack = [seq] - while stack: - seq = stack.pop() - if seq not in all_decomp and seq: - (pop_open, pop_close, head, tail) = balanced_decomp_unsafe_nocat( - seq, open_to_close - ) - node = open_to_node[pop_open[0]] - all_decomp[seq] = (node, pop_open, pop_close, head, tail) - if head: - if tail: - stack.append(tail) - stack.append(head) - elif tail: - stack.append(tail) - return all_decomp - - -def _cython_lcsi_backend(error="ignore", verbose=0): - """ - Returns the cython backend if available, otherwise None - - CommandLine - ----------- - xdoctest -m netharn.initializers._nx_ext_v2.balanced_isomorphism _cython_lcsi_backend - """ - from netharn.initializers._nx_ext_v2._autojit import import_module_from_pyx - from os.path import dirname - import os - - # Toggle comments depending on the desired autojit default - NETWORKX_AUTOJIT = os.environ.get("NETWORKX_AUTOJIT", "") - NETWORKX_AUTOJIT = not os.environ.get("NETWORKX_NO_AUTOJIT", "") - - module = import_module_from_pyx( - "balanced_isomorphism_cython.pyx", - dpath=dirname(__file__), - error=error, - autojit=NETWORKX_AUTOJIT, - verbose=verbose, - ) - balanced_embedding_cython = module - return balanced_embedding_cython diff --git a/netharn/initializers/_nx_ext_v2/balanced_isomorphism_cython.pyx b/netharn/initializers/_nx_ext_v2/balanced_isomorphism_cython.pyx deleted file mode 100644 index 995fb9135e814ff4189489953a8e493261e9de13..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/balanced_isomorphism_cython.pyx +++ /dev/null @@ -1,294 +0,0 @@ -# distutils: language = c++ -""" -This module re-implements functions in :module:`balanced_isomorphism` in cython -and obtains 25-35% speedups in common circumstances. There are likely more -speed improvements that could be made. - - -Issues ------- -- [ ] How to deal with cython + networkx? Do we need to fix that skbuild -with pypy? - - -CommandLine ------------ -# Explicitly build this cython module -# NOTE: cd to networkx repo root before running -cythonize -a -i netharn/initializers/_nx_ext_v2/balanced_isomorphism_cython.pyx - -# With xdoctest this should work if networkx is installed (and this file is -# distributed with it) -xdoctest -m netharn.initializers._nx_ext_v2.balanced_isomorphism _cython_lcs_backend - -# Which will then let you run these examples and benchmarks -xdoctest -m netharn.initializers._nx_ext_v2.balanced_isomorphism_cython list -xdoctest -m netharn.initializers._nx_ext_v2.balanced_isomorphism_cython __doc__:0 --bench - - -Example -------- ->>> from netharn.initializers._nx_ext_v2.balanced_isomorphism_cython import _lcsi_iter_cython, IdentityDictCython ->>> from netharn.initializers._nx_ext_v2.balanced_isomorphism import _lcsi_iter ->>> from netharn.initializers._nx_ext_v2.balanced_sequence import random_balanced_sequence ->>> import operator ->>> seq1, open_to_close1 = random_balanced_sequence(300, item_type='paren') ->>> seq2, open_to_close2 = random_balanced_sequence(300, item_type='paren') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> full_seq1 = seq1 ->>> full_seq2 = seq2 ->>> node_affinity = operator.eq ->>> open_to_node = IdentityDictCython() ->>> value2, best2, *_ = _lcsi_iter_cython(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node) ->>> value1, best1, *_ = _lcsi_iter(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node) ->>> assert value1 == value2 - -Benchmark ---------- ->>> # xdoctest: +REQUIRES(--bench) ->>> # xdoctest: +REQUIRES(module:timerit) ->>> print((chr(10) * 3)+ ' --- BEGIN BENCHMARK ---') ->>> import timerit ->>> from netharn.initializers._nx_ext_v2 import balanced_sequence as bseq ->>> from netharn.initializers._nx_ext_v2 import balanced_isomorphism as biso ->>> seq_len = 200 ->>> seq1, open_to_close1 = bseq.random_balanced_sequence(seq_len, item_type='paren', container_type='str') ->>> seq2, open_to_close2 = bseq.random_balanced_sequence(seq_len, item_type='paren', container_type='str') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> n = 1 ->>> ti = timerit.Timerit(n, bestof=max(2, n), verbose=2) ->>> for timer in ti.reset('impl=iter-cython'): ->>> with timer: ->>> biso.longest_common_balanced_isomorphism(seq1, seq2, open_to_close, impl='iter-cython') ->>> for timer in ti.reset('impl=iter'): ->>> with timer: ->>> biso.longest_common_balanced_isomorphism(seq1, seq2, open_to_close, impl='iter') ->>> print(ti.summary()) - ->>> seq_len = 1000 ->>> seq1, open_to_close1 = bseq.random_balanced_sequence(seq_len, item_type='chr') ->>> seq2, open_to_close2 = bseq.random_balanced_sequence(seq_len, item_type='chr') ->>> open_to_close = {**open_to_close1, **open_to_close2} ->>> n = 1 ->>> ti = timerit.Timerit(n, bestof=max(2, n), verbose=2) ->>> # Following specs are for my machine ->>> for timer in ti.reset('impl=iter-cython'): ->>> with timer: ->>> biso.longest_common_balanced_isomorphism(seq1, seq2, open_to_close, impl='iter-cython') ->>> for timer in ti.reset('impl=iter'): ->>> with timer: ->>> biso.longest_common_balanced_isomorphism(seq1, seq2, open_to_close, impl='iter') ->>> print(ti.summary()) -""" -cimport cython - - - -@cython.boundscheck(False) # Deactivate bounds checking -def _lcsi_iter_cython(full_seq1, full_seq2, open_to_close, node_affinity, open_to_node): - """ - Cython implementation of :func:`_lcsi_iter`. - """ - all_decomp1 = generate_all_decomp_nocat_cython(full_seq1, open_to_close, open_to_node) - all_decomp2 = generate_all_decomp_nocat_cython(full_seq2, open_to_close, open_to_node) - key0 = (full_seq1, full_seq2) - frame0 = key0 - stack = [frame0] - - # Memoize mapping (seq1, seq2) -> best size, embeddings - _results = {} - - # Populate base cases - empty1 = type(next(iter(all_decomp1.keys())))() - empty2 = type(next(iter(all_decomp2.keys())))() - best = (empty1, empty2) - base_result = (0, best, 0, best) - for seq1 in all_decomp1.keys(): - key1 = seq1 - t1, a1, b1, head1, tail1 = all_decomp1[key1] - _results[(seq1, empty2)] = base_result - _results[(head1, empty2)] = base_result - _results[(tail1, empty2)] = base_result - - for seq2 in all_decomp2.keys(): - key2 = seq2 - t2, a2, b2, head2, tail2 = all_decomp2[key2] - _results[(empty1, seq2)] = base_result - _results[(empty1, head2)] = base_result - _results[(empty1, tail2)] = base_result - - cdef double val_any, val_lvl - cdef double val_any_h1s2, val_any_t1s2, val_any_s1h2, val_any_s1t2 - cdef double val_lvl_t1s2, val_lvl_s1t2 - cdef double pval_lvl_t1t2, pval_lvl_h1h2 - cdef double new_val_lvl - cdef double affinity - - while stack: - key = stack[-1] - if key not in _results: - seq1, seq2 = key - - t1, a1, b1, head1, tail1 = all_decomp1[seq1] - t2, a2, b2, head2, tail2 = all_decomp2[seq2] - - best_any = None - best_lvl = None - val_any = 0 - val_lvl = 0 - - # When using the head part of the decomp, we can only update the "low" candidate - try_key = (head1, seq2) - if try_key in _results: - val_any_h1s2, cand_any_h1s2, _, _ = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, seq2) - if try_key in _results: - val_any_t1s2, cand_any_t1s2, val_lvl_t1s2, cand_lvl_t1s2 = _results[ - try_key - ] - else: - stack.append(try_key) - continue - - try_key = (seq1, head2) - if try_key in _results: - val_any_s1h2, cand_any_s1h2, _, _ = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (seq1, tail2) - if try_key in _results: - val_any_s1t2, cand_any_s1t2, val_lvl_s1t2, cand_lvl_s1t2 = _results[ - try_key - ] - else: - stack.append(try_key) - continue - - if val_any_h1s2 > val_any: - val_any = val_any_h1s2 - best_any = cand_any_h1s2 - - if val_any_t1s2 > val_any: - val_any = val_any_t1s2 - best_any = cand_any_t1s2 - - if val_any_s1h2 > val_any: - val_any = val_any_s1h2 - best_any = cand_any_s1h2 - - if val_any_s1t2 > val_any: - val_any = val_any_s1t2 - best_any = cand_any_s1t2 - - # The "LVL" case should include the case where any match exists on this - # level. That means as long as we don't consider the heads, tail - # matches are fine. - if val_lvl_s1t2 > val_lvl: - val_lvl = val_lvl_s1t2 - best_lvl = cand_lvl_s1t2 - - if val_lvl_t1s2 > val_lvl: - val_lvl = val_lvl_t1s2 - best_lvl = cand_lvl_t1s2 - - # Case 1: The LCS involves this edge - affinity = float(node_affinity(t1, t2)) - if affinity: - try_key = (head1, head2) - if try_key in _results: - _, _, pval_lvl_h1h2, new_lvl_h1h2 = _results[try_key] - else: - stack.append(try_key) - continue - - try_key = (tail1, tail2) - if try_key in _results: - _, _, pval_lvl_t1t2, new_lvl_t1t2 = _results[try_key] - else: - stack.append(try_key) - continue - - # Add to the best solution at the former level - new_val_lvl = pval_lvl_h1h2 + pval_lvl_t1t2 + affinity - if new_val_lvl > val_lvl: - val_lvl = new_val_lvl - new_head1, new_head2 = new_lvl_h1h2 - new_tail1, new_tail2 = new_lvl_t1t2 - subseq1 = a1 + new_head1 + b1 + new_tail1 - subseq2 = a2 + new_head2 + b2 + new_tail2 - best_lvl = (subseq1, subseq2) - - # If the current level is better than any of the nestings forget the - # nestings (we can never improve them) and just use the level - if val_lvl >= val_any: - val_any = val_lvl - best_any = best_lvl - - if best_lvl is None: - best_lvl = (empty1, empty2) - - if best_any is None: - best_any = (empty1, empty2) - - # We solved the frame - found = (val_any, best_any, val_lvl, best_lvl) - _results[key] = found - stack.pop() - - found = _results[key0] - return found - - -cdef tuple balanced_decomp_unsafe_nocat_cython(sequence, dict open_to_close): - cdef int stacklen = 1 # always +1 in the first iteration - cdef int head_stop = 1 - - tok_curr = sequence[0] - want_close = open_to_close[tok_curr] - - # for tok_curr in sequence[1:]: - for head_stop in range(1, len(sequence)): - tok_curr = sequence[head_stop] - stacklen += 1 if tok_curr in open_to_close else -1 - if stacklen == 0 and tok_curr == want_close: - pop_close = sequence[head_stop:head_stop + 1] - break - - pop_open = sequence[0:1] - head = sequence[1:head_stop] - tail = sequence[head_stop + 1:] - return pop_open, pop_close, head, tail - - -cdef dict generate_all_decomp_nocat_cython(seq, open_to_close, open_to_node=None): - if open_to_node is None: - open_to_node = IdentityDictCython() - all_decomp = {} - stack = [seq] - while stack: - seq = stack.pop() - if seq not in all_decomp and seq: - (pop_open, pop_close, head, tail) = balanced_decomp_unsafe_nocat_cython( - seq, open_to_close - ) - node = open_to_node[pop_open[0]] - all_decomp[seq] = (node, pop_open, pop_close, head, tail) - if head: - if tail: - stack.append(tail) - stack.append(head) - elif tail: - stack.append(tail) - return all_decomp - - -class IdentityDictCython: - """ Used when ``open_to_node`` is unspecified """ - def __getitem__(self, key): - return key diff --git a/netharn/initializers/_nx_ext_v2/balanced_sequence.py b/netharn/initializers/_nx_ext_v2/balanced_sequence.py deleted file mode 100644 index 64c7e4e78c10a483e30004e05e138581119d1249..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/balanced_sequence.py +++ /dev/null @@ -1,413 +0,0 @@ -""" -Helpers and utilities for balanced sequence problems. -""" - -__all__ = [ - "random_balanced_sequence", -] - - -class UnbalancedException(Exception): - """ - Denotes that a sequence was unbalanced - """ - - pass - - -class IdentityDict: - """ - Used when ``open_to_node`` is unspecified - """ - - def __getitem__(self, key): - return key - - -def generate_all_decomp(seq, open_to_close, open_to_node=None): - """ - Generates all decompositions of a single balanced sequence by - recursive decomposition of the head, tail, and head|tail. - - Parameters - ---------- - seq : Tuple | str - a tuple of hashable items or a string where each character is an item - - open_to_close : Dict - a dictionary that maps opening tokens to closing tokens in the balanced - sequence problem. - - open_to_node : Dict - a dictionary that maps a sequence token to a token corresponding to an - original problem (e.g. a tree node) - - Returns - ------- - Dict : - mapping from a sub-sequence to its decomposition - - Notes - ----- - In the paper: See Definition 2, 4, Lemma, 1, 2, 3, 4. - - Example - ------- - >>> # Example 2 in the paper (one from each column) - >>> seq = "00100100101111" - >>> open_to_close = {"0": "1"} - >>> all_decomp = generate_all_decomp(seq, open_to_close) - >>> assert len(all_decomp) == len(seq) // 2 - >>> import pprint - >>> pprint.pprint(all_decomp) - {'00100100101111': ('0', '0', '1', '010010010111', '', '010010010111'), - '0010010111': ('0', '0', '1', '01001011', '', '01001011'), - '001011': ('0', '0', '1', '0101', '', '0101'), - '01': ('0', '0', '1', '', '', ''), - '010010010111': ('0', '0', '1', '', '0010010111', '0010010111'), - '01001011': ('0', '0', '1', '', '001011', '001011'), - '0101': ('0', '0', '1', '', '01', '01')} - - Example - ------- - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> seq = "({[[]]})[[][]]{{}}" - >>> all_decomp = generate_all_decomp(seq, open_to_close) - >>> node, *decomp = all_decomp[seq] - >>> pop_open, pop_close, head, tail, head_tail = decomp - >>> print("node = {!r}".format(node)) - node = '(' - >>> print("pop_open = {!r}".format(pop_open)) - pop_open = '(' - >>> print("pop_close = {!r}".format(pop_close)) - pop_close = ')' - >>> print("head = {!r}".format(head)) - head = '{[[]]}' - >>> print("tail = {!r}".format(tail)) - tail = '[[][]]{{}}' - >>> print("head_tail = {!r}".format(head_tail)) - head_tail = '{[[]]}[[][]]{{}}' - >>> decomp_alt = balanced_decomp(seq, open_to_close) - >>> assert decomp_alt == tuple(decomp) - - Example - ------- - >>> seq, open_to_close = random_balanced_sequence(10) - >>> all_decomp = generate_all_decomp(seq, open_to_close) - """ - if open_to_node is None: - open_to_node = IdentityDict() - all_decomp = {} - stack = [seq] - while stack: - seq = stack.pop() - if seq not in all_decomp and seq: - (pop_open, pop_close, head, tail, head_tail) = balanced_decomp_unsafe( - seq, open_to_close - ) - node = open_to_node[pop_open[0]] - all_decomp[seq] = (node, pop_open, pop_close, head, tail, head_tail) - if head: - if tail: - stack.append(head_tail) - stack.append(tail) - stack.append(head) - elif tail: - stack.append(tail) - return all_decomp - - -def balanced_decomp(sequence, open_to_close): - """ - Generates a decomposition of a balanced sequence. - - Parameters - ---------- - sequence : str | Tuple - balanced sequence to be decomposed - - open_to_close: dict - a dictionary that maps opening tokens to closing tokens in the balanced - sequence problem. - - Returns - ------- - : tuple[SeqT, SeqT, SeqT, SeqT, SeqT] - where ``SeqT = type(sequence)`` - Contents of this tuple are: - - 0. a1 - a sequence of len(1) containing the current opening token - 1. b1 - a sequence of len(1) containing the current closing token - 2. head - head of the sequence - 3. tail - tail of the sequence - 4. head_tail - the concatenated head and tail - - Example - ------- - >>> # Example 3 from the paper - >>> sequence = "001000101101110001000100101110111011" - >>> open_to_close = {"0": "1"} - >>> a1, b1, head, tail, head_tail = balanced_decomp(sequence, open_to_close) - >>> print("head = {!r}".format(head)) - head = '010001011011' - >>> print("tail = {!r}".format(tail)) - tail = '0001000100101110111011' - - Example - ------- - >>> open_to_close = {0: 1} - >>> sequence = [0, 0, 0, 1, 1, 1, 0, 1] - >>> a1, b1, head, tail, head_tail = balanced_decomp(sequence, open_to_close) - >>> print("a1 = {!r}".format(a1)) - a1 = [0] - >>> print("b1 = {!r}".format(b1)) - b1 = [1] - >>> print("head = {!r}".format(head)) - head = [0, 0, 1, 1] - >>> print("tail = {!r}".format(tail)) - tail = [0, 1] - >>> print("head_tail = {!r}".format(head_tail)) - head_tail = [0, 0, 1, 1, 0, 1] - >>> a2, b2, tail1, tail2, head_tail2 = balanced_decomp(tail, open_to_close) - - Example - ------- - >>> open_to_close = {"{": "}", "(": ")", "[": "]"} - >>> sequence = "({[[]]})[[][]]" - >>> a1, b1, head, tail, head_tail = balanced_decomp(sequence, open_to_close) - >>> print("a1 = {!r}".format(a1)) - a1 = '(' - >>> print("b1 = {!r}".format(b1)) - b1 = ')' - >>> print("head = {!r}".format(head)) - head = '{[[]]}' - >>> print("tail = {!r}".format(tail)) - tail = '[[][]]' - >>> print("head_tail = {!r}".format(head_tail)) - head_tail = '{[[]]}[[][]]' - >>> a2, b2, tail1, tail2, head_tail2 = balanced_decomp(tail, open_to_close) - >>> print("a2 = {!r}".format(a2)) - a2 = '[' - >>> print("b2 = {!r}".format(b2)) - b2 = ']' - >>> print("tail1 = {!r}".format(tail1)) - tail1 = '[][]' - >>> print("tail2 = {!r}".format(tail2)) - tail2 = '' - >>> print("head_tail2 = {!r}".format(head_tail2)) - head_tail2 = '[][]' - """ - gen = generate_balance(sequence, open_to_close) - - bal_curr, tok_curr, _ = next(gen) - pop_open = sequence[0:1] - want_close = open_to_close[tok_curr] - - head_stop = 1 - for head_stop, (bal_curr, tok_curr, _) in enumerate(gen, start=1): - if tok_curr is None: - break - elif bal_curr and tok_curr == want_close: - pop_close = sequence[head_stop : head_stop + 1] - break - head = sequence[1:head_stop] - tail = sequence[head_stop + 1 :] - head_tail = head + tail - return pop_open, pop_close, head, tail, head_tail - - -def generate_balance(sequence, open_to_close): - r""" - Iterates through a balanced sequence and reports if the sequence-so-far - is balanced at that position or not. - - Parameters - ---------- - sequence: Iterable[TokT]: - an input balanced sequence - - open_to_close : Dict[TokT, TokT] - a mapping from opening to closing tokens in the balanced sequence - - Yields - ------ - Tuple[bool, TokT, int]: - boolean indicating if the sequence is balanced at this index, and the - current token, and the index of the matching opening token. - - Raises - ------ - UnbalancedException - if the input sequence is not balanced - - Example - ------- - >>> open_to_close = {0: 1} - >>> sequence = [0, 0, 0, 1, 1, 1] - >>> gen = list(generate_balance(sequence, open_to_close)) - >>> for flag, token, idx in gen: - ... print("flag={:d}, token={}, prev_idx={}".format(flag, token, idx)) - flag=0, token=0, prev_idx=None - flag=0, token=0, prev_idx=None - flag=0, token=0, prev_idx=None - flag=0, token=1, prev_idx=2 - flag=0, token=1, prev_idx=1 - flag=1, token=1, prev_idx=0 - - Example - ------- - >>> sequence, open_to_close = random_balanced_sequence(4, seed=0) - >>> gen = list(generate_balance(sequence, open_to_close)) - """ - stack = [] - # Traversing the Expression - for idx, token in enumerate(sequence): - - if token in open_to_close: - # Push opening elements onto the stack - stack.append((token, idx)) - prev_idx = None - else: - # Check that closing elements - if not stack: - raise UnbalancedException - prev_open, prev_idx = stack.pop() - want_close = open_to_close[prev_open] - - if token != want_close: - raise UnbalancedException - - # If the stack is empty the sequence is currently balanced - currently_balanced = not bool(stack) - yield currently_balanced, token, prev_idx - - if stack: - raise UnbalancedException - - -def balanced_decomp_unsafe(sequence, open_to_close): - """ - Similar to :func:`balanced_decomp` but assumes that ``sequence`` is valid - balanced sequence in order to execute faster. - """ - gen = generate_balance_unsafe(sequence, open_to_close) - - bal_curr, tok_curr = next(gen) - pop_open = sequence[0:1] - want_close = open_to_close[tok_curr] - - head_stop = 1 - for head_stop, (bal_curr, tok_curr) in enumerate(gen, start=1): - if bal_curr and tok_curr == want_close: - pop_close = sequence[head_stop : head_stop + 1] - break - head = sequence[1:head_stop] - tail = sequence[head_stop + 1 :] - head_tail = head + tail - return pop_open, pop_close, head, tail, head_tail - - -def generate_balance_unsafe(sequence, open_to_close): - """ - Similar to :func:`generate_balance` but assumes that ``sequence`` is valid - balanced sequence in order to execute faster. - """ - stacklen = 0 - for token in sequence: - if token in open_to_close: - stacklen += 1 - else: - stacklen -= 1 - yield stacklen == 0, token - - -def random_balanced_sequence( - n, seed=None, item_type="chr", container_type="auto", open_to_close=None -): - r""" - Creates a random balanced sequence for testing / benchmarks - - Parameters - ---------- - n : int - A positive integer representing the number of nodes in the tree. - - seed : integer, random_state, or None (default) - Indicator of random number generation state. - See :ref:`Randomness`. - - open_to_close : dict | None - if specified, updates existing open_to_close with tokens from this - sequence. - - item_type: str - the type of sequence returned (see `item_type` in :func:`tree_to_seq` - for details) can also be "paren", which is a special case that returns - a nested set of parenthesis. - - container_type : str - Determines the container_type type. Can be "auto", "list", "tuple", or - "str". If "auto" tries to choose the best given the input data. - - Returns - ------- - Tuple[(str | Tuple), Dict[str, str]] - The first item is the sequence itself - the second item is the open_to_close mappings. - - Example - ------- - >>> # Demo the various sequence encodings that we might use - >>> seq, open_to_close = random_balanced_sequence(4, seed=1, item_type="chr") - >>> print("seq = {!r}".format(seq)) - seq = '\x00\x02\x04\x05\x03\x06\x07\x01' - >>> seq, open_to_close = random_balanced_sequence(4, seed=1, item_type="number") - >>> print("seq = {!r}".format(seq)) - seq = (1, 2, 3, -3, -2, 4, -4, -1) - >>> seq, open_to_close = random_balanced_sequence(10, seed=1, item_type="paren") - >>> print("seq = {!r}".format(seq)) - seq = '([[{{[]{[]}}{}()}]])' - """ - from netharn.initializers._nx_ext_v2.tree_embedding import tree_to_seq - from netharn.initializers._nx_ext_v2.utils import random_ordered_tree - from networkx.utils import create_py_random_state - - # Create a random otree and then convert it to a balanced sequence - rng = create_py_random_state(seed) - - if open_to_close is None: - open_to_close = {} - - # To create a random balanced sequences we simply create a random ordered - # tree and convert it to a sequence - tree = random_ordered_tree(n, seed=rng, directed=True) - if item_type == "paren": - # special case - pool = "[{(" - for node in tree.nodes: - tree.nodes[node]["label"] = rng.choice(pool) - open_to_close.update({"[": "]", "{": "}", "(": ")"}) - seq, open_to_close, *_ = tree_to_seq( - tree, - open_to_close=open_to_close, - item_type="label", - container_type=container_type, - ) - else: - seq, open_to_close, *_ = tree_to_seq( - tree, - open_to_close=open_to_close, - item_type=item_type, - container_type=container_type, - ) - return seq, open_to_close - - -if __name__ == "__main__": - """ - CommandLine - ------------ - xdoctest -m netharn.initializers._nx_ext_v2.balanced_sequence all - """ - import xdoctest - - xdoctest.doctest_module(__file__) diff --git a/netharn/initializers/_nx_ext_v2/tree_embedding.py b/netharn/initializers/_nx_ext_v2/tree_embedding.py deleted file mode 100644 index 8493ab9c6c58e45733f94c3c3432693eb452c150..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/tree_embedding.py +++ /dev/null @@ -1,488 +0,0 @@ -""" -Algorithm for computing the largest common tree embeddings (also known as a -minor) shared by two trees. See :func:`maximum_common_ordered_subtree_embedding` -for more details. -""" -from netharn.initializers._nx_ext_v2 import balanced_sequence -from netharn.initializers._nx_ext_v2 import balanced_embedding - -__all__ = ["maximum_common_ordered_subtree_embedding"] - - -def maximum_common_ordered_subtree_embedding( - tree1, tree2, node_affinity="auto", impl="auto", item_type="auto" -): - r""" - Finds the maximum common subtree-embedding between two ordered trees. - - A tree S is an embedded subtree (also known as a minor) of T if it can be - obtained from T by a series of edge contractions. - - Subtree embeddings (or minors) are similar to tree isomorphisms --- if T is - a subtree isomorphism then T is a minor. However, if you contract an edge in - T it, then it may no longer be an isomorphism, but it is still a minor. - - This function computes the maximum common embedded subtrees S1 and S2 - between two trees T1 and T2. S1 and S2 are minors of T1 and T2 with maximal - size such that S1 is isomorphic to S2. - - The computational complexity is: ``O(n1 * n2 * min(d1, l1) * min(d2, l2))`` - on ordered trees with n1 and n2 nodes, of depth d1 and d2 and with l1 and - l2 leaves, respectively. - - This implementation follows the algorithm described in [1]_, which - introduces the problem as follows: - - "An important generalization of tree and subtree isomorphism, known as - minor containment, is the problem of determining whether a tree is - isomorphic to an embedded subtree of another tree, where an embedded - subtree of a tree is obtained by contracting some of the edges in the tree. - A further generalization of minor containment on trees, known as maximum - common embedded subtree, is the problem of finding or determining the size - of a largest common embedded subtree of two trees. The latter also - generalizes the maximum common subtree isomorphism problem, in which a - common subtree of largest size is contained as a subtree, not only - embedded, in the two trees." - - Parameters - ---------- - tree1, tree2 : nx.OrderedDiGraph - Trees to find the maximum embedding between - - node_affinity : None | str | callable - Function for to determine if two nodes can be matched. The return is - interpreted as a weight that is used to break ties. If None then any - node can match any other node and only the topology is important. - The default is "eq", which is the same as ``operator.eq``. - - impl : str - Determines the backend implementation. Defaults to "auto". - See :func:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding` - for details. Other valid options are "iter", "recurse", and - "iter-cython". - - item_type : str - Determines the backend data structure used to encode the tree as a - balanced sequence. Defaults to "auto", other valid options are "chr" - and "number". - - Returns - ------- - S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, float] - The maximum value common embedding for each tree with respect to the - chosen ``node_affinity`` function. The topology of both graphs will - always be the same, the only difference is that the node labels in the - first and second embeddings will correspond to ``tree1`` and ``tree2`` - respectively. When ``node_affinity='eq'`` then embeddings should be - identical. The last return value is the "weight" of the solution with - respect to ``node_affinity``. - - References - ---------- - .. [1] Lozano, Antoni, and Gabriel Valiente. - "On the maximum common embedded subtree problem for ordered trees." - String Algorithmics (2004): 155-170. - https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf - - See Also - -------- - * For example usage see ``examples/applications/filesystem_embedding.py`` - * Core backends are in :mod:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding` - - Example - ------- - >>> from netharn.initializers._nx_ext_v2.tree_embedding import * # NOQA - >>> import networkx as nx - >>> # Create two random trees - >>> tree1 = random_ordered_tree(7, seed=3257073545741117277206611, directed=True) - >>> tree2 = random_ordered_tree(7, seed=123568587133124688238689717, directed=True) - >>> print(forest_str(tree1)) - ╙── 0 - ├─╼ 5 - │   └─╼ 2 - └─╼ 1 - └─╼ 6 - ├─╼ 3 - └─╼ 4 - >>> print(forest_str(tree2)) - ╙── 0 - └─╼ 2 - ├─╼ 1 - │   ├─╼ 4 - │   └─╼ 3 - │   └─╼ 5 - └─╼ 6 - >>> # Compute the maximum common embedding between the two trees - >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding(tree1, tree2) - >>> print(forest_str(embedding1)) - ╙── 0 - └─╼ 1 - └─╼ 4 - >>> assert embedding1.edges == embedding2.edges, ( - ... 'when node_affinity is "eq" both embeddings will be the same') - - >>> # Demo with a custom node affinity where any node can match unless - >>> # they are the same and we much prefer nodes that are disimilar - >>> def custom_node_affinity(n1, n2): - ... return abs(n1 - n2) ** 2 - >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding( - ... tree1, tree2, node_affinity=custom_node_affinity) - >>> # In this case the embeddings for each tree will be differnt - >>> print(forest_str(embedding1)) - ╙── 0 - ├─╼ 5 - │   └─╼ 2 - └─╼ 1 - >>> print(forest_str(embedding2)) - ╙── 2 - ├─╼ 1 - │   └─╼ 5 - └─╼ 6 - """ - import networkx as nx - - # Note: checks that inputs are forests are handled by tree_to_seq - if not isinstance(tree1, nx.OrderedDiGraph): - raise nx.NetworkXNotImplemented("only implemented for directed ordered trees") - if not isinstance(tree1, nx.OrderedDiGraph): - raise nx.NetworkXNotImplemented("only implemented for directed ordered trees") - - if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0: - raise nx.NetworkXPointlessConcept - - if item_type == "label": - # If we do allow label, I think the algorithm will work, but the - # returned tree embeddings will only be embedding wrt to the label - # structure. - raise AssertionError( - "allowing sequences to be specified by the labels breaks assumptions" - ) - - # Convert the trees to balanced sequences. - # NOTE: each sequence will contain each token at most once, this is an - # important assumption in subsequent steps. - seq1, open_to_close, node_to_open = tree_to_seq( - tree1, - open_to_close=None, - node_to_open=None, - item_type=item_type, - container_type="auto", - ) - seq2, open_to_close, node_to_open = tree_to_seq( - tree2, open_to_close, node_to_open, item_type=item_type, container_type="auto" - ) - - # NOTE: This DOES work in the case where all opening tokens within a single - # sequence are unique. And we CAN enforce that this is the case in our - # reduction because each node in a graph is always unique and we always - # choose a unique token for each unique node in ``tree_to_seq``. - open_to_node = {tok: node for node, tok in node_to_open.items()} - - # Solve the longest common balanced sequence problem - best, value = balanced_embedding.longest_common_balanced_embedding( - seq1, - seq2, - open_to_close, - open_to_node=open_to_node, - node_affinity=node_affinity, - impl=impl, - ) - subseq1, subseq2 = best - - # Convert the subsequence back into a tree. - # Note: we could return the contracted edges as well here, but that can - # always be done as a postprocessing step. See tests for an example of - # this. - embedding1 = seq_to_tree(subseq1, open_to_close, open_to_node) - embedding2 = seq_to_tree(subseq2, open_to_close, open_to_node) - - return embedding1, embedding2, value - - -def tree_to_seq( - tree, open_to_close=None, node_to_open=None, item_type="auto", container_type="auto" -): - r""" - Converts an ordered tree to a balanced sequence --- typically with unique - tokens --- for use in algorithm reductions. - - Used to convert a tree to a sequence before solving - :func:`longest_common_balanced_embedding` in - :func:`maximum_common_ordered_subtree_embedding`. - - Parameters - ---------- - tree: nx.OrderedDiGraph - The forest to encode as a string sequence. - - open_to_close : Dict | None - Dictionary of opening to closing tokens to be updated for problems - where multiple trees are converted to sequences. - - node_to_open : Dict | None - Dictionary of nodes mapped to the opening tokens to be updated for - problems where multiple trees are converted to sequences. - - item_type : str - Determines the item type of the sequence. - Can be 'auto', 'number', 'chr', or 'label'. - Default is 'auto', which will choose 'chr' if the graph is small enough - otherwise 'number'. If item_type is 'label', then the label of each - node is used to create the token, and the `open_to_close` dictionary - must be specified. - - container_type : str - Determines the container_type type. Can be "auto", "list", "tuple", or - "str". If "auto" tries to choose the best given the input data. - - Returns: - -------- - Tuple[SeqT, Dict, Dict] - A tuple containing - sequence - the string representation of an ordered tree - open_to_close - a mapping between opening and closing tokens - node_to_open - a mapping between tree nodes and opening tokens - - Examples - -------- - >>> from netharn.initializers._nx_ext_v2.tree_embedding import tree_to_seq # NOQA - >>> import networkx as nx - >>> # This function helps us encode this graph as a balance sequence - >>> tree = nx.path_graph(3, nx.OrderedDiGraph) - >>> print(forest_str(tree)) - ╙── 0 - └─╼ 1 - └─╼ 2 - >>> # The sequence is represented by opening and closing tokens - >>> # These are returned a container, which might be a tuple of numbers - >>> sequence, open_to_close, node_to_open, *_ = tree_to_seq(tree, item_type='number') - >>> print((''' - ... sequence = {sequence} - ... open_to_close = {open_to_close} - ... node_to_open = {node_to_open} - ... ''').format(**locals()).strip()) - sequence = (1, 2, 3, -3, -2, -1) - open_to_close = {1: -1, 2: -2, 3: -3} - node_to_open = {0: 1, 1: 2, 2: 3} - - >>> # But you might also encode as a sequence of utf8-characters - >>> # These can often be quicker to use than number encodings - >>> sequence, open_to_close, node_to_open, *_ = tree_to_seq(tree, item_type='chr') - >>> print((''' - ... sequence = {sequence!r} - ... open_to_close = {open_to_close!r} - ... node_to_open = {node_to_open!r} - ... ''').format(**locals()).strip()) - sequence = '\x00\x02\x04\x05\x03\x01' - open_to_close = {'\x00': '\x01', '\x02': '\x03', '\x04': '\x05'} - node_to_open = {0: '\x00', 1: '\x02', 2: '\x04'} - - >>> # Here is a more complex example - >>> tree = nx.balanced_tree(2, 2, nx.DiGraph) - >>> print(forest_str(tree)) - ╙── 0 - ├─╼ 1 - │   ├─╼ 3 - │   └─╼ 4 - └─╼ 2 - ├─╼ 5 - └─╼ 6 - >>> sequence, *_ = tree_to_seq(tree, item_type='number') - >>> print('sequence = {!r}'.format(sequence)) - sequence = (1, 2, 3, -3, 4, -4, -2, 5, 6, -6, 7, -7, -5, -1) - >>> sequence, *_ = tree_to_seq(tree, item_type='chr') - >>> print('sequence = {!r}'.format(sequence)) - sequence = '\x00\x02\x04\x05\x06\x07\x03\x08\n\x0b\x0c\r\t\x01' - - >>> # Demo custom label encoding: If you have custom labels on your - >>> # tree nodes, those can be used in the encoding. - >>> import random - >>> tree = random_ordered_tree(10, seed=1, directed=True) - >>> rng = random.Random(0) - >>> open_to_close = dict(zip("[{(", "]})")) - >>> for node in tree.nodes: - ... tree.nodes[node]["label"] = rng.choice(list(open_to_close.keys())) - >>> sequence, *_ = tree_to_seq(tree, item_type="label", container_type="str", open_to_close=open_to_close) - >>> print('sequence = {!r}'.format(sequence)) - sequence = '{[{{{{}({})}{}{}}}]}' - """ - import networkx as nx - - # Create a sequence and mapping from each index in the sequence to the - # graph edge is corresponds to. - sequence = [] - - # mapping between opening and closing tokens - if open_to_close is None: - open_to_close = {} - if node_to_open is None: - node_to_open = {} - - # utf8 can only encode this many chars - NUM_CHRS = 1112064 - NUM_OPEN_CHRS = NUM_CHRS // 2 - - if item_type == "label": - # Special case, where the user specifies the encoding - all_labels = {n["label"] for n in tree.nodes.values()} - - if container_type in {"auto", "str"}: - # Determine if the container_type can be a string - can_be_str = all(isinstance(x, str) and len(x) == 1 for x in all_labels) - if container_type == "str" and not can_be_str: - raise ValueError("Labels cannot be contained as a string") - if container_type == "auto": - container_type = "str" if can_be_str else "tuple" - - if not open_to_close: - raise ValueError("must specify open_to_close for custom labeling") - else: - # Normal case where we will define the sequence encoding for the tree - if item_type == "auto": - # chr mode is fastest but limited to ~half-a-million nodes - item_type = "chr" if len(tree) < NUM_OPEN_CHRS else "number" - if container_type == "auto": - container_type = "str" if item_type == "chr" else "tuple" - - sources = [n for n in tree.nodes if tree.in_degree[n] == 0] - dfs_forest_edge_gen = ( - (u, v, etype) - for source in sources - for u, v, etype in nx.dfs_labeled_edges(tree, source=source) - ) - for u, v, etype in dfs_forest_edge_gen: - if etype == "forward": - # u has been visited by v has not - if v not in node_to_open: - if item_type == "number": - # Pos nums are open toks. Neg nums are close toks. - open_tok = len(node_to_open) + 1 - close_tok = -open_tok - elif item_type == "chr": - # Even chars are open toks. Odd chars are close toks. - open_tok = chr(len(node_to_open) * 2) - close_tok = chr(len(node_to_open) * 2 + 1) - elif item_type == "label": - # The user must specify the closing token - open_tok = tree.nodes[v]["label"] - close_tok = open_to_close[open_tok] - else: - raise KeyError(item_type) - node_to_open[v] = open_tok - open_to_close[open_tok] = close_tok - open_tok = node_to_open[v] - sequence.append(open_tok) - elif etype == "reverse": - # Both u and v are visited and the edge is in the tree - close_tok = open_to_close[node_to_open[v]] - sequence.append(close_tok) - elif etype == "nontree": - raise TypeError("Input must be a forest") - else: - raise KeyError(etype) - - if item_type == "chr": - assert len(node_to_open) < NUM_OPEN_CHRS, "graph is way too big" - - if container_type == "str": - sequence = "".join(sequence) - elif container_type == "list": - sequence = sequence - elif container_type == "tuple": - sequence = tuple(sequence) - else: - raise KeyError(container_type) - - return sequence, open_to_close, node_to_open - - -def seq_to_tree(subseq, open_to_close, open_to_node): - """ - Converts a balanced sequence to an ordered tree - - Used to convert back to a tree after solving - :func:`longest_common_balanced_embedding` in - :func:`maximum_common_ordered_subtree_embedding`. - - Parameters - ---------- - subseq : Tuple | str - a balanced sequence of hashable items as a string or tuple - - open_to_close : Dict - a dictionary that maps opening tokens to closing tokens in the balanced - sequence problem. - - open_to_node : Dict - a dictionary that maps a sequence token to a node corresponding to an - original problem (e.g. a tree node). Must be unique. If unspecified new - nodes will be generated and the opening sequence token will be used as - a node label. - - Returns - ------- - subtree: nx.OrderedDiGraph - The ordered tree that corresponds to the balanced sequence - - Example - -------- - >>> from netharn.initializers._nx_ext_v2.tree_embedding import seq_to_tree - >>> from netharn.initializers._nx_ext_v2.utils import forest_str - >>> # For a given balanced sequence - >>> open_to_close = {'{': '}', '(': ')', '[': ']'} - >>> open_to_node = None - >>> subseq = '({[[]]})[[][]]{{}}' - >>> # We can convert it into an ordered directed tree - >>> subtree = seq_to_tree(subseq, open_to_close, open_to_node) - >>> print(forest_str(subtree)) - ╟── ( - ╎   └─╼ { - ╎   └─╼ [ - ╎   └─╼ [ - ╟── [ - ╎   ├─╼ [ - ╎   └─╼ [ - ╙── { - └─╼ { - """ - import networkx as nx - - nextnode = 0 # only used if open_to_node is not specified - subtree = nx.OrderedDiGraph() - stack = [] - for token in subseq: - if token in open_to_close: - if open_to_node is None: - node = nextnode - nextnode += 1 - else: - node = open_to_node[token] - if stack: - parent_tok, parent_node = stack[-1] - subtree.add_edge(parent_node, node) - else: - subtree.add_node(node) - if open_to_node is None: - subtree.nodes[node]["label"] = token - stack.append((token, node)) - else: - if not stack: - raise balanced_sequence.UnbalancedException - prev_open, prev_node = stack.pop() - want_close = open_to_close[prev_open] - if token != want_close: - raise balanced_sequence.UnbalancedException - return subtree - - -if __name__ == "__main__": - """ - CommandLine: - xdoctest -m netharn.initializers._nx_ext_v2.tree_embedding all - """ - import xdoctest - - xdoctest.doctest_module(__file__) - -from netharn.initializers._nx_ext_v2.utils import forest_str # NOQA -from netharn.initializers._nx_ext_v2.utils import random_ordered_tree # NOQA diff --git a/netharn/initializers/_nx_ext_v2/tree_isomorphism.py b/netharn/initializers/_nx_ext_v2/tree_isomorphism.py deleted file mode 100644 index fdb5a53f18e686d14f2656636611170be08c1436..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/tree_isomorphism.py +++ /dev/null @@ -1,109 +0,0 @@ -from netharn.initializers._nx_ext_v2.balanced_isomorphism import ( - longest_common_balanced_isomorphism, -) -from netharn.initializers._nx_ext_v2.tree_embedding import tree_to_seq, seq_to_tree - - -def maximum_common_ordered_subtree_isomorphism( - tree1, tree2, node_affinity="auto", impl="auto", item_type="auto" -): - """ - Finds the maximum common subtree-isomorphism between two ordered trees. - - This function computes the maximum-weight common subtrees S1 and S2 between - two trees T1 and T2. S1 and S2 are isomorphic to subgraphs of T1 and T2 - with maximal size such that S1 and S2 are also isomorphic to each other. - - This function is similar to :func:`maximum_common_ordered_subtree_embedding` - with the main difference being that returned solution from this function - will be proper subgraphs (i.e. all edges in the subgraphs will exist in the - original graph), whereas in the subtree embedding problem the returned - solutions are allowed to be minors of the input graphs (i.e. edges are - allowed to be contracted). - - Parameters - ---------- - tree1, tree2 : nx.OrderedDiGraph - Trees to find the maximum subtree isomorphism between - - node_affinity : None | str | callable - Function for to determine if two nodes can be matched. The return is - interpreted as a weight that is used to break ties. If None then any - node can match any other node and only the topology is important. - The default is "eq", which is the same as ``operator.eq``. - - impl : str - Determines the backend implementation. Defaults to "auto". - See :func:`netharn.initializers._nx_ext_v2.balanced_sequence.longest_common_balanced_sequence` - for details. Other valid options are "iter", "recurse", and - "iter-cython". - - item_type : str - Determines the backend data structure used to encode the tree as a - balanced sequence. Defaults to "auto", other valid options are "chr" - and "number". - - Returns - ------- - S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, int] - The maximum value common subtree isomorphism for each tree with respect - to the chosen ``node_affinity`` function. The topology of both graphs - will always be the same, the only difference is that the node labels in - the first and second embeddings will correspond to ``tree1`` and - ``tree2`` respectively. When ``node_affinity='eq'`` then embeddings - should be identical. The last return value is the "size" of the - solution with respect to ``node_affinity``. - - See Also - -------- - `maximum_common_ordered_subtree_embedding` - """ - import networkx as nx - - # Note: checks that inputs are forests are handled by tree_to_seq - if not isinstance(tree1, nx.OrderedDiGraph): - raise nx.NetworkXNotImplemented("only implemented for directed ordered trees") - if not isinstance(tree1, nx.OrderedDiGraph): - raise nx.NetworkXNotImplemented("only implemented for directed ordered trees") - - if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0: - raise nx.NetworkXPointlessConcept - - if item_type == "label": - # If we do allow label, I think the algorithm will work, but the - # returned tree embeddings will only be embedding wrt to the label - # structure. - raise AssertionError( - "allowing sequences to be specified by the labels breaks assumptions" - ) - - # Convert the trees to balanced sequences. - # Each sequence will contain each token at most once, this is an important - # assumption in subsequent steps. - seq1, open_to_close, node_to_open = tree_to_seq( - tree1, - open_to_close=None, - node_to_open=None, - item_type=item_type, - container_type="auto", - ) - seq2, open_to_close, node_to_open = tree_to_seq( - tree2, open_to_close, node_to_open, item_type=item_type, container_type="auto" - ) - open_to_node = {tok: node for node, tok in node_to_open.items()} - - # Solve the longest common balanced sequence problem - best, value = longest_common_balanced_isomorphism( - seq1, - seq2, - open_to_close, - open_to_node=open_to_node, - node_affinity=node_affinity, - impl=impl, - ) - subseq1, subseq2 = best - - # Convert the subsequence back into a tree. - subtree1 = seq_to_tree(subseq1, open_to_close, open_to_node) - subtree2 = seq_to_tree(subseq2, open_to_close, open_to_node) - return subtree1, subtree2, value diff --git a/netharn/initializers/_nx_ext_v2/utils.py b/netharn/initializers/_nx_ext_v2/utils.py deleted file mode 100644 index 380ebb26406e1c8af50d4952b511429deabb43db..0000000000000000000000000000000000000000 --- a/netharn/initializers/_nx_ext_v2/utils.py +++ /dev/null @@ -1,298 +0,0 @@ -import networkx as nx -from networkx.utils import py_random_state - - -@py_random_state(1) -def random_tree(n, seed=None, create_using=None): - """Returns a uniformly random tree on `n` nodes. - - Parameters - ---------- - n : int - A positive integer representing the number of nodes in the tree. - seed : integer, random_state, or None (default) - Indicator of random number generation state. - See :ref:`Randomness`. - - Returns - ------- - NetworkX graph - A tree, given as an undirected graph, whose nodes are numbers in - the set {0, …, *n* - 1}. - - Raises - ------ - NetworkXPointlessConcept - If `n` is zero (because the null graph is not a tree). - - Notes - ----- - The current implementation of this function generates a uniformly - random Prüfer sequence then converts that to a tree via the - :func:`~networkx.from_prufer_sequence` function. Since there is a - bijection between Prüfer sequences of length *n* - 2 and trees on - *n* nodes, the tree is chosen uniformly at random from the set of - all trees on *n* nodes. - - Example - ------- - >>> import networkx as nx - >>> tree = random_tree(n=10, seed=0) - >>> print(forest_str(tree, sources=[0])) - ╙── 0 - ├── 3 - └── 4 - ├── 6 - │   ├── 1 - │   ├── 2 - │   └── 7 - │   └── 8 - │   └── 5 - └── 9 - - >>> import networkx as nx - >>> tree = random_tree(n=10, seed=0, create_using=nx.OrderedDiGraph) - >>> print(forest_str(tree)) - ╙── 0 - ├─╼ 3 - └─╼ 4 - ├─╼ 6 - │   ├─╼ 1 - │   ├─╼ 2 - │   └─╼ 7 - │   └─╼ 8 - │   └─╼ 5 - └─╼ 9 - """ - if n == 0: - raise nx.NetworkXPointlessConcept("the null graph is not a tree") - # Cannot create a Prüfer sequence unless `n` is at least two. - if n == 1: - utree = nx.empty_graph(1) - else: - sequence = [seed.choice(range(n)) for i in range(n - 2)] - utree = nx.from_prufer_sequence(sequence) - - if create_using is None: - tree = utree - else: - # TODO: maybe a tree classmethod like - # Graph.new, Graph.fresh, or something like that - def new(cls_or_self): - if hasattr(cls_or_self, "_adj"): - # create_using is a NetworkX style Graph - cls_or_self.clear() - self = cls_or_self - else: - # try create_using as constructor - self = cls_or_self() - return self - - tree = new(create_using) - if tree.is_directed(): - # Use a arbitrary root node and dfs to define edge directions - edges = nx.dfs_edges(utree, source=0) - else: - edges = utree.edges - - # Populate the specified graph type - tree.add_nodes_from(utree.nodes) - tree.add_edges_from(edges) - - return tree - - -@py_random_state(2) -def random_ordered_tree(n, seed=None, directed=False): - """ - Creates a random ordered tree - - Parameters - ---------- - n : int - A positive integer representing the number of nodes in the tree. - - seed : integer, random_state, or None (default) - Indicator of random number generation state. - See :ref:`Randomness`. - - directed : bool - if the edges are one-way - - Returns - ------- - networkx.OrderedDiGraph | networkx.OrderedGraph - - Example - ------- - >>> import networkx as nx - >>> assert len(random_ordered_tree(n=1, seed=0).nodes) == 1 - >>> assert len(random_ordered_tree(n=2, seed=0).nodes) == 2 - >>> assert len(random_ordered_tree(n=3, seed=0).nodes) == 3 - >>> otree = random_ordered_tree(n=5, seed=3, directed=True) - >>> print(forest_str(otree)) - ╙── 0 - └─╼ 1 - └─╼ 4 - ├─╼ 2 - └─╼ 3 - """ - from networkx.utils import create_py_random_state - - rng = create_py_random_state(seed) - # Create a random undirected tree - create_using = nx.OrderedDiGraph if directed else nx.OrderedGraph - otree = random_tree(n, seed=rng, create_using=create_using) - return otree - - -def forest_str(graph, with_labels=True, sources=None, write=None): - """ - Creates a nice utf8 representation of a directed forest - - Parameters - ---------- - graph : nx.DiGraph | nx.Graph - Graph to represent (must be a tree, forest, or the empty graph) - - with_labels : bool - If True will use the "label" attribute of a node to display if it - exists otherwise it will use the node value itself. Defaults to True. - - sources : List - Mainly relevant for undirected forests, specifies which nodes to list - first. If unspecified the root nodes of each tree will be used for - directed forests; for undirected forests this defaults to the nodes - with the smallest degree. - - write : callable - Function to use to write to, if None new lines are appended to - a list and returned. If set to the `print` function, lines will - be written to stdout as they are generated. If specified, - this function will return None. Defaults to None. - - Returns - ------- - str | None : - utf8 representation of the tree / forest - - Example - ------- - >>> import networkx as nx - >>> graph = nx.balanced_tree(r=2, h=3, create_using=nx.DiGraph) - >>> print(forest_str(graph)) - ╙── 0 - ├─╼ 1 - │   ├─╼ 3 - │   │   ├─╼ 7 - │   │   └─╼ 8 - │   └─╼ 4 - │   ├─╼ 9 - │   └─╼ 10 - └─╼ 2 - ├─╼ 5 - │   ├─╼ 11 - │   └─╼ 12 - └─╼ 6 - ├─╼ 13 - └─╼ 14 - - - >>> graph = nx.balanced_tree(r=1, h=2, create_using=nx.Graph) - >>> print(forest_str(graph)) - ╙── 0 - └── 1 - └── 2 - """ - import networkx as nx - - printbuf = [] - if write is None: - _write = printbuf.append - else: - _write = write - - if len(graph.nodes) == 0: - _write("╙") - else: - if not nx.is_forest(graph): - raise nx.NetworkXNotImplemented("input must be a forest or the empty graph") - - is_directed = graph.is_directed() - succ = graph.succ if is_directed else graph.adj - - if sources is None: - if is_directed: - # use real source nodes for directed trees - sources = [n for n in graph.nodes if graph.in_degree[n] == 0] - else: - # use arbitrary sources for undirected trees - sources = [ - min(cc, key=lambda n: graph.degree[n]) - for cc in nx.connected_components(graph) - ] - - # Populate the stack with each source node, empty indentation, and mark - # the final node. Reverse the stack so sources are popped in the - # correct order. - last_idx = len(sources) - 1 - stack = [(node, "", (idx == last_idx)) for idx, node in enumerate(sources)][ - ::-1 - ] - - seen = set() - while stack: - node, indent, islast = stack.pop() - if node in seen: - continue - seen.add(node) - - # Notes on available box and arrow characters - # https://en.wikipedia.org/wiki/Box-drawing_character - # https://stackoverflow.com/questions/2701192/triangle-arrow - if not indent: - # Top level items (i.e. trees in the forest) get different - # glyphs to indicate they are not actually connected - if islast: - this_prefix = indent + "╙── " - next_prefix = indent + " " - else: - this_prefix = indent + "╟── " - next_prefix = indent + "╎   " - - else: - # For individual forests distinguish between directed and - # undirected cases - if is_directed: - if islast: - this_prefix = indent + "└─╼ " - next_prefix = indent + " " - else: - this_prefix = indent + "├─╼ " - next_prefix = indent + "│   " - else: - if islast: - this_prefix = indent + "└── " - next_prefix = indent + " " - else: - this_prefix = indent + "├── " - next_prefix = indent + "│   " - - if with_labels: - label = graph.nodes[node].get("label", node) - else: - label = node - - _write(this_prefix + str(label)) - - # Push children on the stack in reverse order so they are popped in - # the original order. - children = [child for child in succ[node] if child not in seen] - for idx, child in enumerate(children[::-1], start=1): - islast_next = idx <= 1 - try_frame = (child, next_prefix, islast_next) - stack.append(try_frame) - - if write is None: - # Only return a string if the custom write function was not specified - return "\n".join(printbuf) diff --git a/netharn/initializers/functional.py b/netharn/initializers/functional.py index b3066dab0d5e6ba0a7466523e9418a82578699b6..2a171a3ffc81ce3e48ebec13ab333a0fac142a2c 100644 --- a/netharn/initializers/functional.py +++ b/netharn/initializers/functional.py @@ -812,54 +812,10 @@ def maximum_common_ordered_subpaths(paths1, paths2, sep='.', mode='embedding'): >>> mapping = ub.dzip(subpaths1, subpaths2) >>> print('mapping = {}'.format(ub.repr2(mapping, nl=1))) """ - import networkx as nx - - # the longest common balanced sequence problem - def _affinity(tok1, tok2): - score = 0 - for t1, t2 in zip(tok1[::-1], tok2[::-1]): - if t1 == t2: - score += 1 - else: - break - return score - - # return tok1[-1] == tok2[-1] - node_affinity = _affinity - # import operator - # eq = operator.eq - - def paths_to_otree(paths): - tree = nx.OrderedDiGraph() - for path in sorted(paths): - parts = tuple(path.split(sep)) - node_path = [] - for i in range(1, len(parts) + 1): - node = parts[0:i] - tree.add_node(node) - tree.nodes[node]['label'] = node[-1] - node_path.append(node) - for u, v in ub.iter_window(node_path, 2): - tree.add_edge(u, v) - return tree - - tree1 = paths_to_otree(paths1) - tree2 = paths_to_otree(paths2) - - # from netharn.initializers._nx_ext_v2.tree_embedding import forest_str - # print(len(tree1.nodes)) - # print(len(tree2.nodes)) - # print(forest_str(tree1)) - # print(forest_str(tree2)) - - from netharn.initializers import _nx_ext_v2 - if mode == 'embedding': - subtree1, subtree2, value = _nx_ext_v2.maximum_common_ordered_subtree_embedding(tree1, tree2, node_affinity=node_affinity) - elif mode == 'isomorphism': - subtree1, subtree2, value = _nx_ext_v2.maximum_common_ordered_subtree_isomorphism(tree1, tree2, node_affinity=node_affinity) - else: - raise KeyError(mode) - - subpaths1 = [sep.join(node) for node in subtree1.nodes if subtree1.out_degree[node] == 0] - subpaths2 = [sep.join(node) for node in subtree2.nodes if subtree2.out_degree[node] == 0] - return subpaths1, subpaths2 + ub.schedule_deprecation( + 'netharn', 'maximum_common_ordered_subpaths', 'function', + migration='use torch_liberator.initializer.maximum_common_ordered_subpaths instead', + deprecate='now', + ) + from torch_liberator.initializer import maximum_common_ordered_subpaths + return maximum_common_ordered_subpaths(paths1, paths2, sep) diff --git a/netharn/models/yolo2/light_yolo.py b/netharn/models/yolo2/light_yolo.py index 64969337216ae23dffd6fd841331ae643bb7675c..3583467a4d2edda359c114e02b5e55e45ecf3b04 100644 --- a/netharn/models/yolo2/light_yolo.py +++ b/netharn/models/yolo2/light_yolo.py @@ -158,10 +158,10 @@ class Yolo(nn.Module): if anchors is None: anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), - (11.2364, 10.0071)], dtype=np.float) + (11.2364, 10.0071)], dtype=float) # np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), # (9.42, 5.11), (16.62, 10.52)], - # dtype=np.float) + # dtype=float) # Parameters self.num_classes = num_classes diff --git a/netharn/models/yolo2/yolo2.py b/netharn/models/yolo2/yolo2.py index c5f05d9a575c2c2ce602d9ebc86b5923f28295ff..a1ee811e91aa2c7a950e9fe914b910df776c6c0e 100644 --- a/netharn/models/yolo2/yolo2.py +++ b/netharn/models/yolo2/yolo2.py @@ -178,7 +178,7 @@ class Yolo2(layers.AnalyticModule): if anchors is None: anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), - (11.2364, 10.0071)], dtype=np.float) + (11.2364, 10.0071)], dtype=float) import ndsampler classes = ndsampler.CategoryTree.coerce(classes) diff --git a/netharn/monitor.py b/netharn/monitor.py index 1260e6b953e3dcfae9501bdef02c9739bb4999e8..cb9dca1ab3af19e3e284631476795d0db3588a74 100644 --- a/netharn/monitor.py +++ b/netharn/monitor.py @@ -379,17 +379,43 @@ class Monitor(ub.NiceRepr): >>> monitor.update(0, {'loss': 0.1}) >>> print(monitor.message(ansi=False)) vloss: 0.8800 (n_bad=00, best=0.8800) + + Example: + >>> # Test case for ignore_first_epochs + >>> monitor = Monitor(smoothing=0.6, ignore_first_epochs=2) + >>> monitor.update(0, {'loss': 0.1}) + >>> print(monitor.message(ansi=False)) + >>> monitor.update(1, {'loss': 1.1}) + >>> print(monitor.message(ansi=False)) + >>> monitor.update(2, {'loss': 0.3}) + >>> print(monitor.message(ansi=False)) + >>> monitor.update(3, {'loss': 0.2}) + >>> print(monitor.message(ansi=False)) + vloss: 0.1000 (n_bad=00, best=ignored) + vloss: 0.5000 (n_bad=00, best=ignored) + vloss: 0.4200 (n_bad=00, best=0.4200) + vloss: 0.3320 (n_bad=00, best=0.3320) + """ if not monitor._epochs: message = 'vloss is unevaluated' if ansi: message = ub.color_text(message, 'blue') else: - prev_loss = monitor._smooth_metrics[-1]['loss'] - best_loss = monitor._best_smooth_metrics['loss'] + if monitor._smooth_metrics is None: + prev_loss_str = 'unknown' + else: + prev_loss = monitor._smooth_metrics[-1]['loss'] + prev_loss_str = '{:.4f}'.format(prev_loss) + + if monitor._best_smooth_metrics is None: + best_loss_str = 'ignored' + else: + best_loss = monitor._best_smooth_metrics['loss'] + best_loss_str = '{:.4f}'.format(best_loss) - message = 'vloss: {:.4f} (n_bad={:02d}, best={:.4f})'.format( - prev_loss, monitor._n_bad_epochs, best_loss, + message = 'vloss: {} (n_bad={:02d}, best={})'.format( + prev_loss_str, monitor._n_bad_epochs, best_loss_str, ) if monitor.patience is None: patience = monitor.max_epoch diff --git a/netharn/util/util_json.py b/netharn/util/util_json.py index 821f5f9ac6c095f0aca6f5a3425882444b85ebd1..25f264c8391446df0b33109b94045de706860774 100644 --- a/netharn/util/util_json.py +++ b/netharn/util/util_json.py @@ -144,7 +144,7 @@ def ensure_json_serializable(dict_, normalize_containers=False, verbose=0): >>> data['foo'] = ub.ddict(lambda: int) >>> data['bar'] = np.array([1, 2, 3]) >>> data['foo']['a'] = 1 - >>> data['foo']['b'] = (1, np.array([1, 2, 3]), {3: np.int(3), 4: np.float16(1.0)}) + >>> data['foo']['b'] = (1, np.array([1, 2, 3]), {3: int(3), 4: np.float16(1.0)}) >>> dict_ = data >>> print(ub.repr2(data, nl=-1)) >>> result = ensure_json_serializable(data, normalize_containers=True) diff --git a/netharn/util/util_slider.py b/netharn/util/util_slider.py index 21133c2eb90c27a61dcca780824e0d53f762dcc7..ce3f4928bf18b2dbbbf0d2f9578370773c8b0e0b 100644 --- a/netharn/util/util_slider.py +++ b/netharn/util/util_slider.py @@ -498,7 +498,7 @@ class Stitcher(ub.NiceRepr): last = batch_idxs.shape[0] - 1 base_multi_idxs = tuple(batch_idxs[[0, last]].T) # Add extra dimension for output classes - extra_multi_idxs = np.zeros(2, dtype=np.int) + extra_multi_idxs = np.zeros(2, dtype=int) multi_idxs_range = base_multi_idxs + (extra_multi_idxs,) ravel_idxs_range = np.ravel_multi_index(multi_idxs_range, dims=shape) first = ravel_idxs_range[0] @@ -507,7 +507,7 @@ class Stitcher(ub.NiceRepr): ravel_index = ravel_sl else: base_multi_idxs = tuple(batch_idxs.T) - extra_multi_idxs = np.zeros(len(batch_idxs), dtype=np.int) + extra_multi_idxs = np.zeros(len(batch_idxs), dtype=int) # The indices for the 0-th class (which should be the last dimension) multi_idxs_first = base_multi_idxs + (extra_multi_idxs,) ravel_idxs_first = np.ravel_multi_index(multi_idxs_first, dims=shape) diff --git a/netharn/util/util_slider_dep.py b/netharn/util/util_slider_dep.py index ae86f8e98205dc12dfe03c267d80b68478afa954..a58b69437f3e603442e23556db19a1b49dab4ca4 100644 --- a/netharn/util/util_slider_dep.py +++ b/netharn/util/util_slider_dep.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals import itertools as it import numpy as np import torch @@ -313,7 +311,7 @@ class SlidingSlices(ub.NiceRepr): >>> dims = (-2, -1) >>> # Make dummy predicted data >>> pred_shape = list(ub.take(slider.basis_shape, dims)) - >>> pred = np.arange(1, slider.n_total + 1).reshape(pred_shape).astype(np.float) + >>> pred = np.arange(1, slider.n_total + 1).reshape(pred_shape).astype(float) >>> # upscale using computed transforms >>> upscaled = slider.upscale_overlay(pred) @@ -325,7 +323,7 @@ class SlidingSlices(ub.NiceRepr): >>> dims = (-2, -1) >>> # Make dummy predicted data >>> pred_shape = list(ub.take(slider.basis_shape, dims)) - >>> pred = np.arange(1, slider.n_total + 1).reshape(pred_shape).astype(np.float) + >>> pred = np.arange(1, slider.n_total + 1).reshape(pred_shape).astype(float) >>> # upscale using computed transforms >>> upscaled = slider.upscale_overlay(pred) """ diff --git a/netharn/util/util_torch.py b/netharn/util/util_torch.py index e244513f8b44fdd1313cb3aa7e6b3d5ea0011466..04c93dcefbd3ff61501082a677feb2b5db25facc 100644 --- a/netharn/util/util_torch.py +++ b/netharn/util/util_torch.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import torch import six @@ -408,7 +406,7 @@ def one_hot_embedding(labels, num_classes, dtype=None): >>> assert np.all(t3.cpu().numpy() == t.numpy()) """ if isinstance(labels, np.ndarray): - dtype = dtype or np.float + dtype = dtype or float y = np.eye(num_classes, dtype=dtype) y_onehot = y[labels] else: # if torch.is_tensor(labels): @@ -436,7 +434,7 @@ def one_hot_lookup(probs, labels): >>> one_hot_lookup(probs, labels) array([ 0, 4, 8, 10]) """ - return probs[np.eye(probs.shape[1], dtype=np.bool)[labels]] + return probs[np.eye(probs.shape[1], dtype=bool)[labels]] def torch_ravel_multi_index(multi_index, dims=None, device=None, strides_=None): diff --git a/publish.sh b/publish.sh index 97e60bc0ae1eee85f8c2857654364ed30ef6c74e..cee2385a773712f5c93b19e91b46a91642241efc 100755 --- a/publish.sh +++ b/publish.sh @@ -405,13 +405,9 @@ WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_PATHS[@]}") echo "WHEEL_PATHS_STR = $WHEEL_PATHS_STR" echo " - -GLOBED ------- MODE=$MODE VERSION='$VERSION' WHEEL_PATHS='$WHEEL_PATHS_STR' - " diff --git a/pyproject.toml b/pyproject.toml index 8073d696ecdd50c7c7cf2fc51646655b64586029..5325ffcd5903e66fcf57efd58687bcf4bd4c07b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,10 +7,17 @@ ignore_missing_imports = true [tool.xcookie] tags = [ "kitware", "gitlab", "purepy", "cv2",] mod_name = "netharn" +description = "Train and deploy pytorch models" repo_name = "netharn" rel_mod_parent_dpath = "." +ci_cpython_versions = ["3.7", "3.8", "3.9", "3.10"] os = [ "linux", "win", "all", "osx",] min_python = 3.7 +author = 'Jon Crall' +author_email = 'jon.crall@kitware.com' +url = 'https://gitlab.kitware.com/computer-vision/netharn' +dev_status = "beta" +typed = false [tool.pytest.ini_options] addopts = "-p no:doctest --xdoctest --xdoctest-style=google --ignore-glob=setup.py" diff --git a/requirements/optional.txt b/requirements/optional.txt index 8e073b6c2d6b522b3da16e34bc435609208f868f..017d1f023a30af9a8b8f38ab561b1c728a87081b 100644 --- a/requirements/optional.txt +++ b/requirements/optional.txt @@ -1,18 +1,15 @@ -pandas>=1.4.0 ; python_version >= '3.10' # Python 3.10+ -pandas>=1.4.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 -pandas>=1.4.0 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 -pandas>=1.2.0 ; python_version < '3.8' and python_version >= '3.7.1' # Python 3.7.1 -pandas>=1.1.4 ; python_version < '3.7.1' and python_version >= '3.7' # Python 3.7 -pandas>=1.1.4 ; python_version < '3.7' and python_version >= '3.6.1' # Python 3.6.1 -pandas>=1.1.4 ; python_version < '3.6.1' and python_version >= '3.6' # Python 3.6 +pandas>=1.5.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ +pandas>=1.3.5 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +pandas>=1.4.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 +pandas>=1.4.0 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 +pandas>=1.2.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7.1 +pandas>=1.0.0 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6.1 tqdm >= 4.23.4 -Pillow>=9.1.0 ; python_version >= '3.10' # Python 3.10+ -Pillow>=9.1.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 -Pillow>=8.0.1 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 -Pillow>=8.0.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 -Pillow>=8.0.0 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 +Pillow>=9.2.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ +Pillow>=9.1.0 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +Pillow>=8.0.0 ; python_version < '3.10' and python_version >= '3.6' # Python 3.6-3.9 # opencv-python >= 3.4.1 @@ -27,30 +24,29 @@ seaborn>=0.10.0 ; python_version >= '3.6' # Pyt seaborn>=0.9.1 ; python_version < '3.6' and python_version >= '2.7' # Python 2.7 # h5py >= 2.8.0 -protobuf >= 3.6.0 # scikit-learn >= 0.19.1 -scikit-learn>=1.0.2 ; python_version >= '3.10' # Python 3.10+ +scikit-learn>=1.1.0 ; python_version >= '3.10' # Python 3.10+ scikit-learn>=1.0.2 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 scikit-learn>=1.0.2 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 scikit-learn>=0.24.1 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 scikit-learn>=0.24.1 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 - -scipy>=1.8.0 ; python_version >= '3.10' # Python 3.10+ +scipy>=1.9.2 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ +scipy>=1.8.0 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 scipy>=1.8.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 scipy>=1.8.0 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 scipy>=1.6.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 scipy>=1.5.4 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 -psutil >= 5.4.7 +psutil>=5.9.1 ; python_version >= '3.11' # Python 3.11+ +psutil>=5.9.1 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +psutil>=5.8.0 ; python_version < '3.10' # Python 3.8-3.9 Pygments >= 2.2.0 -tensorboard_logger >= 0.1.0 -tensorboard >= 1.8.0 sympy >= 1.3 -ndsampler >= 0.6.7 -kwcoco >= 0.2.31 +ndsampler >= 0.7.3 +kwcoco >= 0.5.6 # pyqt5>= 5.11.2;python_version>'2.7' # diff --git a/requirements/problematic.txt b/requirements/problematic.txt index 22e6010ccc16d9ea8035d71b666c2fe246adb200..1e1563cfe34db0ce56a9b4835e921ea0712eb64d 100644 --- a/requirements/problematic.txt +++ b/requirements/problematic.txt @@ -1,4 +1,6 @@ # These are optional requirements that are problematic when installing via pip pycocotools - imgaug >= 0.2.6 +tensorboard_logger >= 0.1.0 +tensorboard >= 1.8.0 +protobuf >= 3.6.0 diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 8fd24af528dc68bf48f08296b9724aa2526fcba6..880f514c84024122b056718d1327a9d7a50c056f 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -7,31 +7,30 @@ # python ~/local/tools/supported_python_versions_pip.py numpy -torch>=1.11.0 ; python_version >= '3.10' # Python 3.10+ -torch>=1.11.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 -torch>=1.7.0 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 -torch>=1.7.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 -torch>=1.7.0 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 +torch>=1.13.0 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ +torch>=1.11.0 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +torch>=1.9.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 +torch>=1.9.0 ; python_version < '3.9.0' and python_version >= '3.6.0' # Python 3.6-3.8 + # torchvision req table +# xdev availpkg torchvision # https://github.com/pytorch/vision -torchvision>=0.12.0 ; python_version >= '3.10' # Python 3.10+ -torchvision>=0.12.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 -torchvision>=0.8.1 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 -torchvision>=0.8.1 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 -torchvision>=0.8.1 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 +torchvision>=0.12.0 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10+ +torchvision>=0.10.0 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 +torchvision>=0.10.0 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 +torchvision>=0.10.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 +torchvision>=0.10.0 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 six >= 1.11.0 -numpy>=1.21.6 ; python_version >= '3.10' # Python 3.10+ -numpy>=1.21.4 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 -numpy>=1.19.2 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 -numpy>=1.19.2 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 -numpy>=1.19.2 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 +numpy>=1.23.2 ; python_version < '4.0' and python_version >= '3.11' # Python 3.11+ +numpy>=1.21.6 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 +numpy>=1.19.3 ; python_version < '3.10' and python_version >= '3.6' # Python 3.6 - 3.9 -ubelt >= 1.1.2 +ubelt>=1.2.3 -parse >= 1.8.4 +parse >= 1.12.0 pyflakes >= 2.4.0 astunparse >= 1.6.1 pygtrie >= 2.3.3 @@ -42,11 +41,11 @@ imageio < 2.8.0;python_version < '3.0' # imgaug >= 0.2.6 # imgaug < 0.3.0;python_version < '3.0' -# NOTE: in the future kwimage and kwplot may become optional -scriptconfig >= 0.5.8 -kwarray >= 0.6.0 -kwimage >= 0.9.2 -kwplot >= 0.4.12 +# NOTE: in the future kwplot may become optional +scriptconfig >= 0.7.0 +kwarray>=0.6.7 +kwimage >= 0.9.7 +# kwplot >= 0.4.12 qualname>=0.1.0;python_version < '3.0' -torch_liberator >= 0.0.4 +torch_liberator >= 0.2.1 diff --git a/run_doctests.sh b/run_doctests.sh index 5126b094076785f1abfe623b6e9b67792a7b40e6..f5cf5d8f98a7d88a51b4e604c85d0709de6807b2 100755 --- a/run_doctests.sh +++ b/run_doctests.sh @@ -1,2 +1,2 @@ -#!/bin/bash -xdoctest netharn --style=google all +#!/usr/bin/env bash +xdoctest netharn --style=google all "$@" \ No newline at end of file diff --git a/run_tests.py b/run_tests.py index babad7151f5965db869b19111b16360ff01f66fd..3bfd26ee188d6deb44fd3ad2b56a00b751e4255e 100755 --- a/run_tests.py +++ b/run_tests.py @@ -3,10 +3,10 @@ if __name__ == '__main__': import pytest import sys package_name = 'netharn' - mod_dpath = package_name + mod_dpath = 'netharn' test_dpath = 'tests' pytest_args = [ - '--cov-config', '.coveragerc', + '--cov-config', 'pyproject.toml', '--cov-report', 'html', '--cov-report', 'term', '--xdoctest', diff --git a/setup.py b/setup.py index a4ecb94bd78c44503d3466bd531a061ef25ac74f..b321f1a057e04f7e4424d71670366bfa90e838f4 100755 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -# NOTE: pip install -U --pre h5py -from __future__ import absolute_import, division, print_function +# Generated by ~/code/xcookie/xcookie/builders/setup.py +# based on part ~/code/xcookie/xcookie/rc/setup.py.in import sys +import re +from os.path import exists, dirname, join from setuptools import find_packages -from os.path import exists from setuptools import setup @@ -12,7 +12,7 @@ def parse_version(fpath): """ Statically parse the version number from a python file """ - value = static_parse('__version__', fpath) + value = static_parse("__version__", fpath) return value @@ -21,23 +21,27 @@ def static_parse(varname, fpath): Statically parse the a constant variable from a python file """ import ast + if not exists(fpath): - raise ValueError('fpath={!r} does not exist'.format(fpath)) - with open(fpath, 'r') as file_: + raise ValueError("fpath={!r} does not exist".format(fpath)) + with open(fpath, "r") as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) + class StaticVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: - if getattr(target, 'id', None) == varname: + if getattr(target, "id", None) == varname: self.static_value = node.value.s + visitor = StaticVisitor() visitor.visit(pt) try: value = visitor.static_value except AttributeError: import warnings - value = 'Unknown {}'.format(varname) + + value = "Unknown {}".format(varname) warnings.warn(value) return value @@ -50,17 +54,16 @@ def parse_description(): pandoc --from=markdown --to=rst --output=README.rst README.md python -c "import setup; print(setup.parse_description())" """ - from os.path import dirname, join, exists - readme_fpath = join(dirname(__file__), 'README.rst') + readme_fpath = join(dirname(__file__), "README.rst") # This breaks on pip install, so check that it exists. if exists(readme_fpath): - with open(readme_fpath, 'r') as f: + with open(readme_fpath, "r") as f: text = f.read() return text - return '' + return "" -def parse_requirements(fname='requirements.txt', versions=False): +def parse_requirements(fname="requirements.txt", versions=False): """ Parse the package dependencies listed in a requirements file but strips specific versioning information. @@ -73,12 +76,13 @@ def parse_requirements(fname='requirements.txt', versions=False): Returns: List[str]: list of requirements items + + CommandLine: + python -c "import setup, ubelt; print(ubelt.urepr(setup.parse_requirements()))" """ - from os.path import exists, dirname, join - import re require_fpath = fname - def parse_line(line, dpath=''): + def parse_line(line, dpath=""): """ Parse information from a line in a requirements text file @@ -86,91 +90,127 @@ def parse_requirements(fname='requirements.txt', versions=False): line = '-e git+https://a.com/somedep@sometag#egg=SomeDep' """ # Remove inline comments - comment_pos = line.find(' #') + comment_pos = line.find(" #") if comment_pos > -1: line = line[:comment_pos] - if line.startswith('-r '): + if line.startswith("-r "): # Allow specifying requirements in other files - target = join(dpath, line.split(' ')[1]) + target = join(dpath, line.split(" ")[1]) for info in parse_require_file(target): yield info else: # See: https://www.python.org/dev/peps/pep-0508/ - info = {'line': line} - if line.startswith('-e '): - info['package'] = line.split('#egg=')[1] + info = {"line": line} + if line.startswith("-e "): + info["package"] = line.split("#egg=")[1] else: - if ';' in line: - pkgpart, platpart = line.split(';') + if "--find-links" in line: + # setuptools doesnt seem to handle find links + line = line.split("--find-links")[0] + if ";" in line: + pkgpart, platpart = line.split(";") # Handle platform specific dependencies # setuptools.readthedocs.io/en/latest/setuptools.html # #declaring-platform-specific-dependencies plat_deps = platpart.strip() - info['platform_deps'] = plat_deps + info["platform_deps"] = plat_deps else: pkgpart = line platpart = None # Remove versioning from the package - pat = '(' + '|'.join(['>=', '==', '>']) + ')' + pat = "(" + "|".join([">=", "==", ">"]) + ")" parts = re.split(pat, pkgpart, maxsplit=1) parts = [p.strip() for p in parts] - info['package'] = parts[0] + info["package"] = parts[0] if len(parts) > 1: op, rest = parts[1:] version = rest # NOQA - info['version'] = (op, version) + info["version"] = (op, version) yield info def parse_require_file(fpath): dpath = dirname(fpath) - with open(fpath, 'r') as f: + with open(fpath, "r") as f: for line in f.readlines(): line = line.strip() - if line and not line.startswith('#'): + if line and not line.startswith("#"): for info in parse_line(line, dpath=dpath): yield info def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): - parts = [info['package']] - if versions and 'version' in info: - if versions == 'strict': + parts = [info["package"]] + if versions and "version" in info: + if versions == "strict": # In strict mode, we pin to the minimum version - if info['version']: + if info["version"]: # Only replace the first >= instance - verstr = ''.join(info['version']).replace('>=', '==', 1) + verstr = "".join(info["version"]).replace(">=", "==", 1) parts.append(verstr) else: - parts.extend(info['version']) - if not sys.version.startswith('3.4'): + parts.extend(info["version"]) + if not sys.version.startswith("3.4"): # apparently package_deps are broken in 3.4 - plat_deps = info.get('platform_deps') + plat_deps = info.get("platform_deps") if plat_deps is not None: - parts.append(';' + plat_deps) - item = ''.join(parts) + parts.append(";" + plat_deps) + item = "".join(parts) yield item packages = list(gen_packages_items()) return packages -VERSION = version = parse_version('netharn/__init__.py') # needs to be a global var for git tags -NAME = 'netharn' - -if __name__ == '__main__': - +# # Maybe use in the future? But has private deps +# def parse_requirements_alt(fpath='requirements.txt', versions='loose'): +# """ +# Args: +# versions (str): can be +# False or "free" - remove all constraints +# True or "loose" - use the greater or equal (>=) in the req file +# strict - replace all greater equal with equals +# """ +# # Note: different versions of pip might have different internals. +# # This may need to be fixed. +# from pip._internal.req import parse_requirements +# from pip._internal.network.session import PipSession +# requirements = [] +# for req in parse_requirements(fpath, session=PipSession()): +# if not versions or versions == 'free': +# req_name = req.requirement.split(' ')[0] +# requirements.append(req_name) +# elif versions == 'loose' or versions is True: +# requirements.append(req.requirement) +# elif versions == 'strict': +# part1, *rest = req.requirement.split(';') +# strict_req = ';'.join([part1.replace('>=', '==')] + rest) +# requirements.append(strict_req) +# else: +# raise KeyError(versions) +# requirements = [r.replace(' ', '') for r in requirements] +# return requirements + + +NAME = "netharn" +INIT_PATH = "netharn/__init__.py" +VERSION = parse_version(INIT_PATH) + +if __name__ == "__main__": setupkw = {} - setupkw["install_requires"] = parse_requirements("requirements/runtime.txt") + + setupkw["install_requires"] = parse_requirements( + "requirements/runtime.txt", versions="loose" + ) setupkw["extras_require"] = { - "all": parse_requirements("requirements.txt"), - "tests": parse_requirements("requirements/tests.txt"), - "optional": parse_requirements("requirements/optional.txt"), - "headless": parse_requirements("requirements/headless.txt"), - "graphics": parse_requirements("requirements/graphics.txt"), + "all": parse_requirements("requirements.txt", versions="loose"), + "tests": parse_requirements("requirements/tests.txt", versions="loose"), + "optional": parse_requirements("requirements/optional.txt", versions="loose"), + "headless": parse_requirements("requirements/headless.txt", versions="loose"), + "graphics": parse_requirements("requirements/graphics.txt", versions="loose"), # Strict versions "headless-strict": parse_requirements( "requirements/headless.txt", versions="strict" @@ -188,40 +228,27 @@ if __name__ == '__main__': ), } - setup( - name=NAME, - version=VERSION, - author='Jon Crall', - author_email='jon.crall@kitware.com', - url='https://gitlab.kitware.com/computer-vision/netharn', - description='Train and deploy pytorch models', - long_description=parse_description(), - long_description_content_type='text/x-rst', - packages=find_packages(include='netharn.*'), - package_data={ - 'netharn.initializers._nx_ext_v2': ['*.pyx'], - }, - python_requires='>=3.6', - license='Apache 2', - classifiers=[ - # List of classifiers available at: - # https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Topic :: Utilities', - # This should be interpreted as Apache License v2.0 - 'License :: OSI Approved :: Apache Software License', - # Supported Python versions - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - ], - **setupkw, - ) + setupkw["name"] = NAME + setupkw["version"] = VERSION + setupkw["author"] = "Jon Crall" + setupkw["author_email"] = "jon.crall@kitware.com" + setupkw["url"] = "https://gitlab.kitware.com/computer-vision/netharn" + setupkw["description"] = "Train and deploy pytorch models" + setupkw["long_description"] = parse_description() + setupkw["long_description_content_type"] = "text/x-rst" + setupkw["license"] = "Apache 2" + setupkw["packages"] = find_packages(".") + setupkw["python_requires"] = ">=3.7" + setupkw["classifiers"] = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Utilities", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + ] + setup(**setupkw) diff --git a/super_setup.py b/super_setup.py index 90bd84ae6f76e0e4f2405ed8da38e251e0cd5ebb..6bccb6b1460e6b6b78bea02f451b74b2af341e84 100755 --- a/super_setup.py +++ b/super_setup.py @@ -866,13 +866,13 @@ DEVEL_REPOS = [ 'remotes': {'origin': 'git@gitlab.kitware.com:computer-vision/kwcoco.git'}, }, { - 'name': 'kwplot', 'branch': 'dev/0.4.13', 'remote': 'origin', + 'name': 'kwplot', 'branch': 'dev/0.4.14', 'remote': 'origin', 'remotes': {'origin': 'git@gitlab.kitware.com:computer-vision/kwplot.git'}, }, # Pytorch deployer / exporter { - 'name': 'liberator', 'branch': 'dev/0.0.2', 'remote': 'origin', + 'name': 'liberator', 'branch': 'dev/0.0.3', 'remote': 'origin', 'remotes': {'origin': 'git@gitlab.kitware.com:python/liberator.git'}, }, { @@ -886,7 +886,7 @@ DEVEL_REPOS = [ 'remotes': {'origin': 'git@gitlab.kitware.com:utils/scriptconfig.git'}, }, { - 'name': 'ndsampler', 'branch': 'dev/0.6.8', 'remote': 'origin', + 'name': 'ndsampler', 'branch': 'dev/0.6.11', 'remote': 'origin', 'remotes': {'origin': 'git@gitlab.kitware.com:computer-vision/ndsampler.git'}, }, diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 0000000000000000000000000000000000000000..1ed23a038a7ddb31d08bff748297861e01d13edf --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1,2 @@ +def test_import(): + import netharn \ No newline at end of file