Commit c734c0d6 authored by Baptiste CHOCOT's avatar Baptiste CHOCOT
Browse files

Merge branch 'master' into 'development'

# Conflicts:
#   scripts/diva_data_download_cli_execution.sh
parents ea32e52e b29e37e9
SDL-pipelinecommands-execution.txt
December 18, 2020
Overview:
This document describes the variables and commands used in the steps
of the execution (evaluation) pipeline. This pipeline is meant to
use the OpenStack Image produced in the Validation Pipeline to run
the system on a portion of sequestered dataset called a 'part'. The
parts are ran on parallel on separate nodes and merged at the end of
this pipeline AFTER all parts are ran.
The SDL execution pipeline is a virtualized server environment ran
using an OpenStack Cluster that consists of a main server that starts
an external 4-GPU node to do the processing. The description below
covers the programs and commands executed on the external node and
not the commands ran on the main server. Thus, and node
manipulations (e.g., starting/stopping a node, restricting the
network) and processing performed on the main server (e.g., moving
outputs to the reports, merging part outputs) are out of document-scope but the
descriptions are included to document steps on the SDL.
Variables:
run_id -unique identifier for the run (e.g. run-a48e2a06644344e592f98942b04d514a)
part_id -the evaluation dataset is split into multiple parts to allow parallel processing
and this represents the current part.
evaluation_dataset -sequestered dataset used for evaluating system
vids_per_chunk -selected number of videos to be processed in each designed chunk
current_date -the current date in the format yyyy-mm-dd
Steps and Commands:
STEP: setup
purpose: the main server creates the necessary config files to run the system
commands:
None
STEP: create instance
purpose: the main server deploys the base image to use for testing the submitted system
commands:
None
STEP: actev nist update
purpose: update any required packages
commands:
find ~/tmpnist | grep validate_execution | grep /bin/activate | head -n 1
rm -rf $(realpath /home/ubuntu/tmpnist/diva_evaluation_cli/diva_evaluation_cli/bin/private_src/implementation/validate_execution/python_env/bin/..)
cd ~/tmpnist
cd diva_evaluation_cli
git config core.filemode false
git pull -f --ff
git config core.filemode false
git submodule update --recursive --remote -f
while [ $(pgrep apt-* | wc -l) -ne 0 ]; do sleep 5; echo 'Installation pending, dpkg already running...'; done;
sudo apt-get update
diva_evaluation_cli/bin/install.sh --all
STEP: sequester network
purpose: modifies the firewall to shut off external access
commands:
host=$(hostname)
sudo sed -i -e "s/127.0.0.1 localhost/127.0.0.1 localhost $host/" /etc/hosts
STEP: attach volumes
purpose: mounts the datasets and outputs drives
commands:
sudo mkdir -p -m 777 /datasets
sudo mount 10.0.0.5:/datasets2 /datasets
sudo mkdir -m 777 -p /datasets/${part_id}
sudo umount /datasets
sudo chmod 777 -R COMMAND: /datasets
sudo mount 10.0.0.5:/datasets2/${part_id} /datasets
sudo mkdir -p -m 777 /outputs
sudo mount 10.0.0.5:/outputs /outputs
sudo mkdir -m 777 -p /outputs/${current_date}/${run_id}
sudo umount /outputs
sudo chmod 777 -R /outputs
sudo mount 10.0.0.5:/outputs/${current_date}/${run_id} /outputs
STEP: prepare system execution
purpose: removes any old resource monitoring reports and creates the necessary output dir for processing data
commands:
mkdir -p /outputs/exec_results/${dataset}
sudo chown ubuntu:ubuntu /mnt && sudo chmod -R 777 /mnt
cd ~/tmpnist; python3 -c "import diva_evaluation_cli; print(diva_evaluation_cli.__file__)"
find /home/ubuntu/tmpnist/diva_evaluation_cli | grep resources_monitoring.json | xargs -r rm
STEP: actev design chunks
purpose: designs the data chunks that will be processed based on the input number of videos per chunk
commands:
actev design-chunks -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -o /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_designed_chunks.json -n $vids_per_chunk
STEP: actev experiment init
purpose: start any servers or clusters needed
commands:
actev experiment-init -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -c /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_designed_chunks.json -v /datasets/video -s /mnt
STEP: actev process chunks
purpose: Begins processing the videos of the validation dataset. The for loop is run on the main instance, while the "actev" commands are run on the node.
commands:
for chunk in `jq 'keys' /outputs/exec_results/${dataset}/${dataset}_designed_chunks.json`; do
actev pre-process-chunk -i $chunk -s /mnt
actev process-chunk -i $chunk -s /mnt
actev post-process-chunk -i $chunk -s /mnt
done
STEP: actev merge chunks
purpose: merges the outputs from the different chunks into one output and validates it
commands:
actev merge-chunks -c /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_chunk_result_summary.json -r /mnt -o /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_output.json
actev validate-execution -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -o /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_output.json
STEP: collect results
purpose: retrieves the system output and resource monitoring report
commands:
cat /outputs/exec_results/${evaluation_dataset}/${evaluation_dataset}_output.json
cat /datasets/indexes/file-index.json
find ~ -name resources_monitoring.json 2>/dev/null | head -n 1 | xargs cat;
STEP: update names
purpose: a main server command that updates names from previously used activity names to the ActEV20 names
commands:
None
STEP: create report
purpose: a main server script that creates the runtime report
commands:
None
STEP: delete system
purpose: the main server deletes the running virtual system
commands:
None
STEP: aggregate output
purpose: if the system has completed all parts, this main server script merges the system output
for each part into one file.
commands:
None
STEP: submit aggregated results
purpose: if the system has completed all parts, this main server script pushes the results to the scoring server
commands:
None
STEP: submit scoring run request
purpose: if the system has completed all parts, this main server script creates a request for the
submitted system output to be scored
commands:
None
--------------------------------------------------------------
Created by: Andrew Delgado
History:
-12/18/2020 created
SDL-pipelinecommands-validation.txt
December 18, 2020
Overview:
This document describes the variables and commands used
in the steps of the validation pipeline. This pipeline is
meant to test the system on a known dataset and verify the
outputs match the desired syntax before the system processes
sequestered data.
The SDL Validation pipeline is a virtualized server environment ran using an
OpenStack Cluster that consists of a main server that starts an
external 4-GPU node to do the processing. The description below
covers the programs and commands executed on the external node and
not the commands ran on the main server. Thus, and node
manipulations (e.g., starting/stopping a node, restricting the
network) and processing performed on the main server (e.g., moving
outputs to the reports) are out of document-scope but the
descriptions are included to document steps on the SDL.
Variables:
validation_dataset -selected validation dataset for testing. (e.g. ActEV-Eval-CLI-Validation-Set5)
scoring_run -unique identifier for the run (e.g. run-a48e2a06644344e592f98942b04d514a)
vids_per_chunk -selected number of videos to be processed in each designed chunk
current_date -the current date in the format yyyy-mm-dd
system_git_repo -if the system is submitted via a git repo, this is the url
system_git_checkout -if the system is submitted via a git repo, this is the branch or commit to checkout
system_tar -if the system is submitted via tar, this is the url to download from
evaluation_dataset -sequestered dataset used for evaluation
Steps and Commands:
STEP: setup
purpose: the main server creates the necessary config files to run the system
commands:
None
STEP: create instance
purpose: the main server deploys the base image to use for testing the submitted system
commands:
None
STEP: actev nist install
purpose: retrieves the base CLI and installs the required packages
commands:
sed -ie "5 i PATH=$PATH:/home/ubuntu/.local/bin" /home/ubuntu/.bashrc
rm -rf ~/tmpnist;
mkdir -p ~/tmpnist
cd ~/tmpnist
git clone https://oauth2:a38YdwbfNrksDQxo5kff@gitlab.kitware.com/actev/diva_evaluation_cli.git --recursive
cd diva_evaluation_cli
sudo apt-get update
diva_evaluation_cli/bin/install.sh –all
STEP: actev get system
purpose: retrieves the submitted system via git or tar download and installs any extra required packages
commands:
# IF THE SYSTEM IS A TAR FILE
actev get-system archive -u ${system_tar} -l ~ -n diva_evaluation_cli
#IF THE SYSTEM IS A GIT REPO
actev get-system git -u ${system_git_repo} -l ~ -n diva_evaluation_cli -s ${system_git_checkout}
local_diva_src=$(find ~ | grep -v ~/tmpnist | grep -v "__MACOSX" | grep diva_evaluation_cli/src | head -n 1)
rm -rf ~/tmpnist/diva_evaluation_cli/diva_evaluation_cli/src
ln -s $local_diva_src ~/tmpnist/diva_evaluation_cli/diva_evaluation_cli/
rm -rf ~/tmpnist/diva_evaluation_cli/diva_evaluation_cli/container_output
cp -R ${local_diva_src}/../*_output ~/tmpnist/diva_evaluation_cli/diva_evaluation_cli/container_output
python3 -m pip install -r ${local_diva_src}/../../requirements.txt --upgrade
sudo apt remove python3-apt --yes;
chmod -R 777 ~/tmpnist $local_diva_src
STEP: actev test cmd
purpose: tests the 'actev' command to verify it's installed properly
commands:
actev -h
STEP: actev validate system
purpose: validates that the user implemented functions are properly created
commands:
actev validate-system --strict
STEP: actev system setup
purpose: runs any compilation and setup steps required for the system
commands:
actev system-setup &> ~/setup.log
actev status system-query
cat ~/setup.log
STEP: install nfs
purpose: installs the nfs package for future drive mounting
commands:
sudo apt-get update && sudo apt install nfs-common -y
STEP: sequester network
purpose: modifies the firewall to shut off external access
commands:
host=$(hostname)
sudo sed -i -e "s/127.0.0.1 localhost/127.0.0.1 localhost $host/" /etc/hosts
STEP: mount nfs server
purpose: mounts the datasets and outputs drives
commands:
sudo mkdir -p -m 777 /datasets
sudo mount 10.0.0.5:/datasets2 /datasets
sudo mkdir -m 777 -p /datasets/$validation_dataset
sudo umount /datasets && sudo chmod 777 -R /datasets
sudo mount 10.0.0.5:/datasets2/$validation_dataset
sudo mkdir -p -m 777 /outputs
sudo mount 10.0.0.5:/outputs /outputs
sudo mkdir -m 777 -p /outputs/${current_date}/$scoring_run
sudo umount /outputs
sudo chmod 777 -R /outputs
sudo mount 10.0.0.5:/outputs/${current_date}/$scoring_run /outputs
STEP: prepare system execution
purpose: removes any old resource monitoring reports and creates the necessary output dir for processing data
commands:
mkdir -p /outputs/exec_results/$validation_dataset
sudo chown ubuntu:ubuntu /mnt && sudo chmod -R 777 /mnt
cd ~/tmpnist; python3 -c "import diva_evaluation_cli; print(diva_evaluation_cli.__file__)"
find /home/ubuntu/tmpnist/diva_evaluation_cli | grep resources_monitoring.json | xargs -r rm
STEP: actev train system for validation
purpose: trains system for the validation dataset using non-sequestered data
commands:
actev train-system -a /datasets/training_${validation_dataset}/activity-index.json -t /datasets/training_${validation_dataset}
STEP: actev design chunks
purpose: designs the data chunks that will be processed based on the input number of videos per chunk
commands:
actev design-chunks -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -o /outputs/exec_results/${$validation_dataset}/${$validation_dataset}_designed_chunks.json -n $vids_per_chunk
STEP: actev experiment init
purpose: start any servers or clusters needed
commands:
actev experiment-init -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -c /outputs/exec_results/${validation_dataset}/${validation_dataset}_designed_chunks.json -v /datasets/video -s /mnt
STEP: actev process chunks
purpose: Begins processing the videos of the validation dataset. The for loop is run on the main instance, while the "actev" commands are run on the node.
commands:
for chunk in `jq 'keys' /outputs/exec_results/${validation_dataset}/${validation_dataset}_designed_chunks.json`; do
actev pre-process-chunk -i $chunk -s /mnt
actev process-chunk -i $chunk -s /mnt
actev post-process-chunk -i $chunk -s /mnt
done
STEP: actev merge chunks
purpose: merges the outputs from the different chunks into one output
commands:
actev merge-chunks -c /outputs/exec_results/${validation_dataset}/${validation_dataset}_chunk_result_summary.json -r /mnt -o /outputs/exec_results/${validation_dataset}/${validation_dataset}_output.json
STEP: actev validate execution:
purpose: validates the merged output file
commands:
actev validate-execution -f /datasets/indexes/file-index.json -a /datasets/indexes/activity-index.json -o /outputs/exec_results/${validation_dataset}/${validation_dataset}_output.json
STEP: actev collect results
purpose: retrieves the system output and resource monitoring report
commands:
cat /outputs/exec_results/${validation_dataset}/${validation_dataset}_output.json
cat /datasets/indexes/file-index.json
find ~ -name resources_monitoring.json 2>/dev/null | head -n 1 | xargs cat;
STEP: update names
purpose: main server script to update names from previously used activity names to the ActEV20 names.
commands:
None
STEP: score results
purpose: main server script that scores the system output
commands:
None
STEP: package results
purpose: main server script that creates a tar of the scoring results directory
commands:
None
STEP: actev experiment cleanup
purpose: performs any necessary system cleanup before using image in execution (e.g. removing temp files)
commands:
actev experiment-cleanup -s /mnt
STEP: actev train system for execution
purpose: trains system using sequestered data
commands:
actev train-system -a /datasets/training_${evaluation_dataset}/activity-index.json -t /datasets/training_${evaluation_dataset}
STEP: snapshot instance
purpose: main server script that saves an image of the system to use for evaluation. Also clears logs on the CLI
commands:
actev clear-logs
STEP: delete system
purpose: main server script that deletes the running virtual system
commands:
None
STEP: post scores
purpose: main server script that pushes the scoring results tar to the frontend for participants to view
commands:
None
STEP: submit reports validation
purpose: main server script that pushes the resource monitoring reports to the frontend for participants to view
commands:
None
STEP: start execution
purpose: main server script that creates the partition processing runs to evaluate the system on sequestered data
commands:
None
------------------------------------------------------
Created by: Andrew Delgado
History:
-12/18/2020 created
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment