Commit bb884787 authored by John Tourtellott's avatar John Tourtellott
Browse files

Add code to check md5 sums on SALI queries

parent 0683399a
......@@ -70,7 +70,20 @@ class SimulationAssetLocationIndex():
self._sali_folder_id = sali_folder['_id']
def query(self, local_location, remote_machine='cori', verify=True, return_all_metadata=False):
"""Checks for item in the SALI folder with the given model name."""
"""Checks for item in the SALI folder with the given model name.
Return value is one of the following:
* None if the asset is not found in the index.
* None if the asset is found, and the "verify" argument is True, and either the local
or remote asset fails verification.
* The remote location of the asset if it is found and the "verify" argument is False.
* The remote location of that asset if it is found, and the "verify" argument is True,
and both local and remote assets pass verification.
* The internal SALI meta data for the assets if the "return_all_metadata" argument is set
and either the "verify" argument is False or the "verify" argument is True and both
local and remote assets pass verfication. The "return_all_metadata" argument is for
test and debug.
"""
filename = os.path.basename(local_location)
# print('query name:', filename)
gen = self._girder_client.listItem(self._sali_folder_id, name=filename)
......@@ -79,11 +92,11 @@ class SimulationAssetLocationIndex():
return None
if verify:
# TODO Verify local location md5sum
# TODO Verify remote location exists
# TODO Verify remote location md5sum
remote_location = item.get('meta').get(remote_machine, {}).get('path')
print('Warning: simulation asset was NOT verified:', remote_location)
if not self._verify_local_asset(item, local_location):
return None
if not self._verify_remote_asset(item):
return None
if return_all_metadata:
return item.get('meta')
......@@ -102,20 +115,21 @@ class SimulationAssetLocationIndex():
return False
filename = os.path.basename(local_location)
# Check if item for this filename is already in girder
gen = self._girder_client.listItem(self._sali_folder_id, name=filename)
item = self._next_item(gen)
if item is not None and not can_replace:
print('Item already exits for {}. Use can_replace flag to overwrite'.format(filename))
return False
# Get local md5 and hostname
local_md5 = None
with open(local_location, 'rb') as fp:
local_md5 = hashlib.md5(fp.read()).hexdigest()
local_md5 = self._get_local_md5(local_location)
if local_md5 is None:
print('Error getting model file md5')
return False
print('local md5:', local_md5)
hostname = socket.gethostname()
print('local hostname:', hostname)
# Check remote file
# Check for remote file
url = '{}/command/{}'.format(NEWT_URL, remote_machine)
data = {
'executable': '/usr/bin/ls {}'.format(remote_location),
......@@ -129,26 +143,11 @@ class SimulationAssetLocationIndex():
return False
# Get md5 for remote file
data['executable'] = '/usr/bin/md5sum {}'.format(remote_location)
r = self._newt_requests.post(url, data=data)
print('md5sum command returned', r.json())
md5_result = r.json()
if md5_result['error']:
print(md5_result['error'])
remote_md5 = self._get_remote_md5(remote_location, remote_machine)
if remote_md5 is None:
return False
# Valid output is of the form "<md5sum> <path>"
output = md5_result['output']
remote_md5 = output.split(' ')[0]
# Check if item for this filename is already in girder
gen = self._girder_client.listItem(self._sali_folder_id, name=filename)
item = self._next_item(gen)
if item is not None and not can_replace:
print('Item already exits for {}. Use can_replace flag to overwrite'.format(filename))
return False
# Build metadata object
source = dict(hostname=hostname, path=local_location, md5=local_md5)
remote = dict(path=remote_location, md5=remote_md5)
metadata = dict(sources=[source])
......@@ -159,12 +158,34 @@ class SimulationAssetLocationIndex():
print('createItem returned', item)
return True
def _check_remote_file(self, remote_path, md5=None):
"""Verifies that remote_path exists, and checkes md5 if specified.
def _get_local_md5(self, local_location):
""""""
local_md5 = None
with open(local_location, 'rb') as fp:
local_md5 = hashlib.md5(fp.read()).hexdigest()
if local_md5 is None:
print('Error getting model file md5')
# print('local md5:', local_md5)
return local_md5
Returns boolean indication success.
"""
return True
def _get_remote_md5(self, remote_location, remote_machine='cori'):
"""Runs md5sum command on remote machine"""
url = '{}/command/{}'.format(NEWT_URL, remote_machine)
data = {
'executable': '/usr/bin/md5sum {}'.format(remote_location),
'loginenv': 'true'
}
r = self._newt_requests.post(url, data=data)
print('md5sum command returned', r.json())
md5_result = r.json()
if md5_result['error']:
print(md5_result['error'])
return None
# Valid output is of the form "<md5sum> <path>"
output = md5_result['output']
remote_md5 = output.split(' ')[0]
return remote_md5
def _next_item(self, gen):
"""Returns next item from generator, or None if empty"""
......@@ -173,3 +194,40 @@ class SimulationAssetLocationIndex():
except StopIteration:
return None
return entry
def _verify_local_asset(self, sali_item, local_location):
"""Checks that the file at local_location matches the source information in the SALI item"""
sources_meta = sali_item.get('meta', {}).get('sources')
if sources_meta is None:
print('Internal error: Invalid SALI item {} - no meta.sources list'.format(sali_item[_id]))
return False
expected_md5 = sources_meta[0].get('md5')
if expected_md5 is None:
print('Internal error: Missing sources[0].md5 in SALI item {}'.format(sali_item[_id]))
return False
local_md5 = self._get_local_md5(local_location)
if local_md5 is None:
return False
# Verify if the 2 md5's match, of course
return local_md5 == expected_md5
def _verify_remote_asset(self, sali_item):
"""Checks that the file at local_location matches the source information in the SALI item"""
sources_meta = sali_item.get('meta', {}).get('sources')
if sources_meta is None:
print('Internal error: Invalid SALI item {} - no meta.sources list'.format(sali_item[_id]))
return False
expected_md5 = sources_meta[0].get('md5')
if expected_md5 is None:
print('Internal error: Missing sources[0].md5 in SALI item {}'.format(sali_item[_id]))
return False
remote_md5 = this._get_remote_md5(remote_location)
if remote_md5 is None:
return False
return remote_md5 == expected_md5
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment