Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
John Tourtellott
ACE3P Extensions
Commits
bb884787
Commit
bb884787
authored
Mar 05, 2020
by
John Tourtellott
Browse files
Add code to check md5 sums on SALI queries
parent
0683399a
Changes
1
Hide whitespace changes
Inline
Side-by-side
simulation-workflows/internal/writers/sali.py
View file @
bb884787
...
...
@@ -70,7 +70,20 @@ class SimulationAssetLocationIndex():
self
.
_sali_folder_id
=
sali_folder
[
'_id'
]
def
query
(
self
,
local_location
,
remote_machine
=
'cori'
,
verify
=
True
,
return_all_metadata
=
False
):
"""Checks for item in the SALI folder with the given model name."""
"""Checks for item in the SALI folder with the given model name.
Return value is one of the following:
* None if the asset is not found in the index.
* None if the asset is found, and the "verify" argument is True, and either the local
or remote asset fails verification.
* The remote location of the asset if it is found and the "verify" argument is False.
* The remote location of that asset if it is found, and the "verify" argument is True,
and both local and remote assets pass verification.
* The internal SALI meta data for the assets if the "return_all_metadata" argument is set
and either the "verify" argument is False or the "verify" argument is True and both
local and remote assets pass verfication. The "return_all_metadata" argument is for
test and debug.
"""
filename
=
os
.
path
.
basename
(
local_location
)
# print('query name:', filename)
gen
=
self
.
_girder_client
.
listItem
(
self
.
_sali_folder_id
,
name
=
filename
)
...
...
@@ -79,11 +92,11 @@ class SimulationAssetLocationIndex():
return
None
if
verify
:
# TODO Verify
local
location
md5sum
# TODO Verify remote location exists
# TODO Verify remote location md5sum
remote_location
=
item
.
get
(
'meta'
).
get
(
remote_machine
,
{}).
get
(
'path'
)
print
(
'Warning: simulation asset was NOT verified:'
,
remote_locati
on
)
if
not
self
.
_verify_local_asset
(
item
,
local
_
location
):
return
None
if
not
self
.
_verify_remote_asset
(
item
):
return
N
on
e
if
return_all_metadata
:
return
item
.
get
(
'meta'
)
...
...
@@ -102,20 +115,21 @@ class SimulationAssetLocationIndex():
return
False
filename
=
os
.
path
.
basename
(
local_location
)
# Check if item for this filename is already in girder
gen
=
self
.
_girder_client
.
listItem
(
self
.
_sali_folder_id
,
name
=
filename
)
item
=
self
.
_next_item
(
gen
)
if
item
is
not
None
and
not
can_replace
:
print
(
'Item already exits for {}. Use can_replace flag to overwrite'
.
format
(
filename
))
return
False
# Get local md5 and hostname
local_md5
=
None
with
open
(
local_location
,
'rb'
)
as
fp
:
local_md5
=
hashlib
.
md5
(
fp
.
read
()).
hexdigest
()
local_md5
=
self
.
_get_local_md5
(
local_location
)
if
local_md5
is
None
:
print
(
'Error getting model file md5'
)
return
False
print
(
'local md5:'
,
local_md5
)
hostname
=
socket
.
gethostname
()
print
(
'local hostname:'
,
hostname
)
# Check remote file
# Check for remote file
url
=
'{}/command/{}'
.
format
(
NEWT_URL
,
remote_machine
)
data
=
{
'executable'
:
'/usr/bin/ls {}'
.
format
(
remote_location
),
...
...
@@ -129,26 +143,11 @@ class SimulationAssetLocationIndex():
return
False
# Get md5 for remote file
data
[
'executable'
]
=
'/usr/bin/md5sum {}'
.
format
(
remote_location
)
r
=
self
.
_newt_requests
.
post
(
url
,
data
=
data
)
print
(
'md5sum command returned'
,
r
.
json
())
md5_result
=
r
.
json
()
if
md5_result
[
'error'
]:
print
(
md5_result
[
'error'
])
remote_md5
=
self
.
_get_remote_md5
(
remote_location
,
remote_machine
)
if
remote_md5
is
None
:
return
False
# Valid output is of the form "<md5sum> <path>"
output
=
md5_result
[
'output'
]
remote_md5
=
output
.
split
(
' '
)[
0
]
# Check if item for this filename is already in girder
gen
=
self
.
_girder_client
.
listItem
(
self
.
_sali_folder_id
,
name
=
filename
)
item
=
self
.
_next_item
(
gen
)
if
item
is
not
None
and
not
can_replace
:
print
(
'Item already exits for {}. Use can_replace flag to overwrite'
.
format
(
filename
))
return
False
# Build metadata object
source
=
dict
(
hostname
=
hostname
,
path
=
local_location
,
md5
=
local_md5
)
remote
=
dict
(
path
=
remote_location
,
md5
=
remote_md5
)
metadata
=
dict
(
sources
=
[
source
])
...
...
@@ -159,12 +158,34 @@ class SimulationAssetLocationIndex():
print
(
'createItem returned'
,
item
)
return
True
def
_check_remote_file
(
self
,
remote_path
,
md5
=
None
):
"""Verifies that remote_path exists, and checkes md5 if specified.
def
_get_local_md5
(
self
,
local_location
):
""""""
local_md5
=
None
with
open
(
local_location
,
'rb'
)
as
fp
:
local_md5
=
hashlib
.
md5
(
fp
.
read
()).
hexdigest
()
if
local_md5
is
None
:
print
(
'Error getting model file md5'
)
# print('local md5:', local_md5)
return
local_md5
Returns boolean indication success.
"""
return
True
def
_get_remote_md5
(
self
,
remote_location
,
remote_machine
=
'cori'
):
"""Runs md5sum command on remote machine"""
url
=
'{}/command/{}'
.
format
(
NEWT_URL
,
remote_machine
)
data
=
{
'executable'
:
'/usr/bin/md5sum {}'
.
format
(
remote_location
),
'loginenv'
:
'true'
}
r
=
self
.
_newt_requests
.
post
(
url
,
data
=
data
)
print
(
'md5sum command returned'
,
r
.
json
())
md5_result
=
r
.
json
()
if
md5_result
[
'error'
]:
print
(
md5_result
[
'error'
])
return
None
# Valid output is of the form "<md5sum> <path>"
output
=
md5_result
[
'output'
]
remote_md5
=
output
.
split
(
' '
)[
0
]
return
remote_md5
def
_next_item
(
self
,
gen
):
"""Returns next item from generator, or None if empty"""
...
...
@@ -173,3 +194,40 @@ class SimulationAssetLocationIndex():
except
StopIteration
:
return
None
return
entry
def
_verify_local_asset
(
self
,
sali_item
,
local_location
):
"""Checks that the file at local_location matches the source information in the SALI item"""
sources_meta
=
sali_item
.
get
(
'meta'
,
{}).
get
(
'sources'
)
if
sources_meta
is
None
:
print
(
'Internal error: Invalid SALI item {} - no meta.sources list'
.
format
(
sali_item
[
_id
]))
return
False
expected_md5
=
sources_meta
[
0
].
get
(
'md5'
)
if
expected_md5
is
None
:
print
(
'Internal error: Missing sources[0].md5 in SALI item {}'
.
format
(
sali_item
[
_id
]))
return
False
local_md5
=
self
.
_get_local_md5
(
local_location
)
if
local_md5
is
None
:
return
False
# Verify if the 2 md5's match, of course
return
local_md5
==
expected_md5
def
_verify_remote_asset
(
self
,
sali_item
):
"""Checks that the file at local_location matches the source information in the SALI item"""
sources_meta
=
sali_item
.
get
(
'meta'
,
{}).
get
(
'sources'
)
if
sources_meta
is
None
:
print
(
'Internal error: Invalid SALI item {} - no meta.sources list'
.
format
(
sali_item
[
_id
]))
return
False
expected_md5
=
sources_meta
[
0
].
get
(
'md5'
)
if
expected_md5
is
None
:
print
(
'Internal error: Missing sources[0].md5 in SALI item {}'
.
format
(
sali_item
[
_id
]))
return
False
remote_md5
=
this
.
_get_remote_md5
(
remote_location
)
if
remote_md5
is
None
:
return
False
return
remote_md5
==
expected_md5
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment