Commit 43f0b283 authored by Brad King's avatar Brad King

Add ExternalData infrastructure

Port the ExternalData configuration from ITK 'master' as of 2013-05-06.

Add a CMake/vtkExternalData.cmake module to include and configure the
ExternalData module for VTK.  List MIDAS and vtk.org URLs.

Include vtkExternalData from the top-level CMakeLists.txt file and from
Testing/External/CMakeLists.txt so it works for both the main build and
when building tests externally.

Teach the VTK pre-commit hook to move staged .ExternalData_MD5_* files
left by the ExternalData module into the store at the top of the source
tree as .ExternalData/MD5/* and import them into Git as refs/data/MD5/*.
Teach the VTK git-gerrit-push alias to push content from refs/data/MD5/*
when commits referencing it are pushed to Gerrit.

Add a .gitignore to tell Git to ignore .ExternalData* names.

Change-Id: I8c0a91cb3ce350450e378e4f16a23c62e7f2de6f
parent a0172acd
# Do not add ExternalData module staging files
.ExternalData*
get_filename_component(_VTKExternalData_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
include(${_VTKExternalData_DIR}/ExternalData.cmake)
if(NOT ExternalData_OBJECT_STORES)
# Use ExternalData_OBJECT_STORES from environment as default.
set(ExternalData_OBJECT_STORES_DEFAULT "")
if(DEFINED "ENV{ExternalData_OBJECT_STORES}")
file(TO_CMAKE_PATH "$ENV{ExternalData_OBJECT_STORES}" ExternalData_OBJECT_STORES_DEFAULT)
endif()
endif()
set(ExternalData_OBJECT_STORES "${ExternalData_OBJECT_STORES_DEFAULT}" CACHE STRING
"Semicolon-separated list of local directories holding data objects in the layout %(algo)/%(hash).")
mark_as_advanced(ExternalData_OBJECT_STORES)
if(NOT ExternalData_OBJECT_STORES)
set(ExternalData_OBJECT_STORES "${CMAKE_BINARY_DIR}/ExternalData/Objects")
file(MAKE_DIRECTORY "${ExternalData_OBJECT_STORES}")
endif()
list(APPEND ExternalData_OBJECT_STORES
# Local data store populated by the VTK pre-commit hook
"${CMAKE_SOURCE_DIR}/.ExternalData"
)
set(ExternalData_BINARY_ROOT ${CMAKE_BINARY_DIR}/ExternalData)
set(ExternalData_URL_TEMPLATES "" CACHE STRING
"Additional URL templates for the ExternalData CMake script to look for testing data. E.g.
file:///var/bigharddrive/%(algo)/%(hash)")
mark_as_advanced(ExternalData_URL_TEMPLATES)
list(APPEND ExternalData_URL_TEMPLATES
# Data published by MIDAS
"http://midas3.kitware.com/midas/api/rest?method=midas.bitstream.download&checksum=%(hash)&algorithm=%(algo)"
# Data published by developers using git-gerrit-push.
"http://www.vtk.org/files/ExternalData/%(algo)/%(hash)"
)
# Tell ExternalData commands to transform raw files to content links.
# TODO: Condition this feature on presence of our pre-commit hook.
set(ExternalData_LINK_CONTENT MD5)
# Match series of the form <base>.<ext>, <base>_<n>.<ext> such that <base> may
# end in a (test) number that is not part of any series numbering.
set(ExternalData_SERIES_PARSE "()(\\.[^./]*)$")
set(ExternalData_SERIES_MATCH "(_[0-9]+)?")
......@@ -16,6 +16,7 @@ set(VTK_CMAKE_DIR "${VTK_SOURCE_DIR}/CMake")
set(CMAKE_MODULE_PATH ${VTK_CMAKE_DIR} ${CMAKE_MODULE_PATH})
include(vtkModuleMacros)
include(vtkExternalData)
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
......@@ -412,3 +413,7 @@ endif()
unset(_vtk_targets)
unset(_vtk_compiletools_targets)
unset(_vtk_all_targets)
# Create target to download data from the VTKData group. This must come after
# all tests have been added that reference the group, so we put it last.
ExternalData_Add_Target(VTKData)
......@@ -14,6 +14,7 @@ get_filename_component(VTK_TOP_DIR ${VTKTestExternal_SOURCE_DIR}/../.. ABSOLUTE)
# Load module infrastructure macros.
list(APPEND CMAKE_MODULE_PATH ${VTK_TOP_DIR}/CMake)
include(vtkModuleMacros)
include(vtkExternalData)
include(vtkTestingMacros)
# Find the VTK build or install tree. Assume the version matches exactly.
......
#!/usr/bin/env bash
USAGE="[<remote>] [--no-topic] [--dry-run] [--]"
USAGE="[<remote>] [--no-topic] [--no-data] [--keep-data] [--dry-run] [--]"
OPTIONS_SPEC=
SUBDIRECTORY_OK=Yes
. "$(git --exec-path)/git-sh-setup"
egrep-q() {
egrep "$@" >/dev/null 2>/dev/null
}
data_commit() {
# Get data refs. Skip if none.
test $# != 0 || return 0
state=$(git for-each-ref "$@") || return
test -n "$state" || return 0
# Convert each data ref to an index entry.
index=$(
echo "$state" |
while read obj type refname; do
# Take blobs with valid ref names.
name="${refname#refs/data/}"
if echo "$type,$name" | egrep-q '^blob,[A-Za-z0-9-]+/[0-9A-Fa-f]+$'; then
# Place the blob at the path named by the ref.
echo "100644 $obj 0 $name"
else
# Warn about unprocessed refs.
echo "unknown $refname" 1>&2
fi
done
) || return
test -n "$index" || return 0
# Convert the index into a tree.
tree=$(
GIT_INDEX_FILE="$GIT_DIR/tmp-index.$$.$RANDOM" &&
export GIT_INDEX_FILE &&
trap "rm -f '$GIT_INDEX_FILE'" EXIT &&
rm -f "$GIT_INDEX_FILE" &&
echo "$index" | git update-index --index-info &&
git write-tree
) &&
# Store the tree in a commit object.
echo 'data' | git commit-tree "$tree"
}
data_remove() {
test -z "$dry_run" || return 0
git ls-tree -r "$1" |
while read mode type obj name; do
# Remove ref only if it still has the data we expected.
git update-ref -d "refs/data/$name" "$obj" 2>/dev/null || true
done
}
data_report_and_remove() {
if test -n "$keep_data"; then
action="kept"
else
action="removed"
data_remove "$1" || true
fi &&
echo "Pushed refs/data and $action local copy:" &&
git ls-tree --name-only -r "$1" | sed "s/^/ /"
}
data_push_refspec() {
echo "$1:refs/data/commits/$1"
}
data_refs() {
git rev-list "$@" |
git diff-tree --no-commit-id --root -c -r --diff-filter=AM --stdin |
egrep '\.(md5)$' |
# read :srcmode dstmode srcobj dstobj status file
while read _ _ _ obj _ file; do
# Identify the hash algorithm used.
case "$file" in
*.md5) algo=MD5 ; validate="^[0-9a-fA-F]{32}$" ;;
*) continue ;;
esac
# Load and validate the hash.
if hash=$(git cat-file blob $obj 2>/dev/null) &&
echo "$hash" | egrep-q "$validate"; then
echo "refs/data/$algo/$hash"
fi
done
}
#-----------------------------------------------------------------------------
remote=''
refspecs=''
keep_data=''
no_topic=''
no_data=''
dry_run=''
# Parse the command line options.
# Parse command line options.
while test $# != 0; do
case "$1" in
--no-topic) no_topic=1 ;;
--dry-run) dry_run=--dry-run ;;
--keep-data) keep_data=1 ;;
--no-topic) no_topic=1 ;;
--no-data) no_data=1 ;;
--dry-run) dry_run=--dry-run ;;
--) shift; break ;;
-*) usage ;;
*) test -z "$remote" || usage ; remote="$1" ;;
......@@ -30,29 +119,48 @@ test -n "$remote" || remote="gerrit"
if test -z "$no_topic"; then
# Identify and validate the topic branch name.
topic="$(git symbolic-ref HEAD | sed -e 's|^refs/heads/||')"
if test "$topic" = "master"; then
head="$(git symbolic-ref HEAD)" && topic="${head#refs/heads/}" || topic=''
if test -z "$topic" -o "$topic" = "master"; then
die 'Please name your topic:
git checkout -b descriptive-name'
git checkout -b descriptive-name'
fi
refspecs="HEAD:refs/for/master/$topic"
# The topic branch will be pushed by name.
refspecs="HEAD:refs/for/master/$topic $refspecs"
fi
# Fetch the current upstream master branch head.
# This helps computation of a minimal pack to push.
echo "Fetching $remote master"
fetch_out=$(git fetch "$remote" master 2>&1) || die "$fetch_out"
master=$(git rev-parse FETCH_HEAD) || exit
if test -z "$no_data"; then
# Create a commit containing the data to push.
data_refs=$(data_refs $master..) &&
data=$(data_commit $data_refs) || die 'Failed to create data commit'
if test -n "$data"; then
refspecs="$(data_push_refspec "$data") $refspecs"
fi
else
data=''
fi
# Exit early if we have nothing to push.
if test -z "$refspecs"; then
echo "Nothing to push!"
echo 'Nothing to push!'
exit 0
fi
# Fetch the current upstream master branch head.
# This helps the computation of a minimal pack to push.
echo "Fetching $remote master"
fetch_out=$(git fetch "$remote" master 2>&1) || die "$fetch_out"
# Push. Save output and exit code.
echo "Pushing to $remote"
push_stdout=$(git push --porcelain $dry_run "$remote" $refspecs); push_exit=$?
echo "$push_stdout"
# Check if data were pushed successfully.
if test -n "$data" &&
echo "$push_stdout" | egrep-q "^[*=+] $data"; then
data_report_and_remove "$data"
fi
# Reproduce the push exit code.
exit $push_exit
......@@ -12,6 +12,62 @@ die() {
exit 1
}
ExternalData_stage_linked_content() {
# Identify the hash algorithm used.
case "$file" in
*.md5) algo=MD5 ; base="${file/.md5}" ; validate="^[0-9a-fA-F]{32}$" ;;
*) die "$file: invalid content link (unrecognized extension)" ;;
esac
# Load and validate the hash stored in the staged blob.
hash=$(git cat-file blob $dst_obj) || hash=""
echo "$hash" | egrep-q "$validate" ||
die "$file: invalid content link (does not match '$validate')"
# Reject simultaneous raw file and content link.
files=$(git ls-files -- "$base")
if test -n "$files"; then
die "$file: content link may not coexist with $files"
fi
# Find the content referenced by the link.
staged="$(dirname "$file")/.ExternalData_${algo}_${hash}"
stored="${ExternalData_STORE}/$algo/$hash"
ref="refs/data/$algo/$hash"
obj=$(git rev-parse --verify -q "$ref") || obj=""
if test -z "$obj" -a -f "$staged"; then
# Content is staged by the ExternalData module. Store it in Git.
obj=$(git hash-object -w -- "$staged") ||
die "$file: git hash-object failed to load $staged"
git update-ref "$ref" "$obj" "" ||
die "$file: git update-ref failed to create $ref = $obj"
echo "$file: Added content to Git at $ref"
fi
# Move staged object to local store if it is in Git.
if test -f "$staged" && test -n "$obj"; then
mkdir -p "${stored%/*}" &&
mv -n "$staged" "$stored" &&
rm -f "$staged" &&
echo "$file: Added content to local store at $stored"
fi
# Report destination of content link.
if test -f "$stored"; then
echo "Content link $file -> $stored"
else
echo "Content link $file -> (object not in local store)"
fi
}
ExternalData_non_content_link() {
# Reject simultaneous raw file and content link.
files=$(git ls-files -- "$file.md5")
if test -n "$files"; then
die "$file: file may not coexist with $files"
fi
}
#-----------------------------------------------------------------------------
# Check that developmer setup is up-to-date.
......@@ -24,3 +80,18 @@ if test $lastSetupForDevelopment -lt $SetupForDevelopment_VERSION; then
Utilities/SetupForDevelopment.sh
'
fi
#-----------------------------------------------------------------------------
# Local ExternalData object repository.
ExternalData_STORE=".ExternalData"
# Process content links created by/for the CMake ExternalData module.
git diff-index --cached HEAD --diff-filter=AM |
while read src_mode dst_mode src_obj dst_obj status file; do
if echo "$dst_mode $file" | egrep-q '^100644 .*\.(md5)$'; then
ExternalData_stage_linked_content
else
ExternalData_non_content_link
fi
done || exit 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment