Commit 97db235a authored by Burlen Loring's avatar Burlen Loring
Browse files

python analysis

Adds PythonAnalysis a class that embeds a Python interpreter.
Includes documentaion in web page and a regression test.
parent b4decc94
if (ENABLE_PYTHON)
# TODO -- Python 3
find_package(PythonInterp REQUIRED)
if(PYTHONINTERP_FOUND)
find_program(PYTHON_CONFIG_EXECUTABLE python-config)
......@@ -41,7 +42,7 @@ if (ENABLE_PYTHON)
COMMAND ${swig_cmd} -c++ -python -MM
-I${MPI4PY_INCLUDE_DIR} -I${CMAKE_BINARY_DIR}
-I${CMAKE_CURRENT_BINARY_DIR} -I${CMAKE_CURRENT_SOURCE_DIR}
-I${CMAKE_SOURCE_DIR}/sensei
-I${CMAKE_SOURCE_DIR}/sensei -I${CMAKE_SOURCE_DIR}/python
${input_file} | sed -e 's/[[:space:]\\]\\{1,\\}//g' -e '1,2d' -e '/senseiConfig\\.h/d' > ${output_file}
MAIN_DEPENDENCY ${input_file}
COMMENT "Generating dependency file for ${input}...")
......@@ -51,13 +52,13 @@ if (ENABLE_PYTHON)
COMMAND ${swig_cmd} -c++ -python -MM
-I${MPI4PY_INCLUDE_DIR} -I${CMAKE_BINARY_DIR}
-I${CMAKE_CURRENT_BINARY_DIR} -I${CMAKE_CURRENT_SOURCE_DIR}
-I${CMAKE_SOURCE_DIR}/sensei -DSWIG_BOOTSTRAP
${input_file}
-I${CMAKE_SOURCE_DIR}/sensei -I${CMAKE_SOURCE_DIR}/python
-DSWIG_BOOTSTRAP ${input_file}
COMMAND sed -e s/[[:space:]\\]\\{1,\\}//g -e 1,2d -e /senseiConfig\\.h/d
OUTPUT_FILE ${output_file})
endfunction()
function(wrap_swig input output depend)
function(wrap_swig input output depend ttable)
set(input_file ${CMAKE_CURRENT_SOURCE_DIR}/${input})
set(output_file ${CMAKE_CURRENT_BINARY_DIR}/${output})
set(depend_file ${CMAKE_CURRENT_BINARY_DIR}/${depend})
......@@ -65,10 +66,10 @@ if (ENABLE_PYTHON)
add_custom_command(
OUTPUT ${output_file}
COMMAND ${swig_cmd} -c++ -python -w341,325,401,504
-DSWIG_TYPE_TABLE=senseiPython
-DSWIG_TYPE_TABLE=${ttable}
-I${MPI4PY_INCLUDE_DIR} -I${CMAKE_BINARY_DIR}
-I${CMAKE_CURRENT_BINARY_DIR} -I${CMAKE_CURRENT_SOURCE_DIR}
-I${CMAKE_SOURCE_DIR}/sensei
-I${CMAKE_SOURCE_DIR}/sensei -I${CMAKE_SOURCE_DIR}/python
-o ${output_file} ${input_file}
MAIN_DEPENDENCY ${input_file}
DEPENDS ${depend_file} ${depends}
......
<sensei>
<analysis type="python" script_file="Histogram.py" enabled="1">
<initialize_source>
numBins=10
meshName='mesh'
arrayName='values'
arrayCen=1
</initialize_source>
</analysis>
</sensei>
---
markdown:
gfm: false
---
# PythonAnalysis adaptor
The python analysis adaptor enables the use of a Python scripts as an analysis
back end. It accomplishes this by embedding a Python interpreter and includes a
minimal set of the sensei python bindings. To author a new python analysis one
must provide a python script that implements three functions: `Inititalize`,
`Execute` and `Finalize`. These functions implement the `sensei::AnalysisAdaptor`
API. The `Execute` function is required while `Initialize` and `Finalize`
functions are optional. The `Execute` function is passed a `sensei::DataAdaptor`
instance from which one has access to simulation data structures. If an error
occurs during processing one should `raise` an exception. If the analysis
required MPI communication, one must make use of the adaptor's MPI communicator
which is stored in the global variable `comm`. Additionally one can provide a
secondary script that is executed prior to the API functions. This script can
set global variables that control runtime behavior.
End users will make use of the `sensei::ConfigurableAnalysis` and point to the
python analysis script. The script can be loaded in one of two ways: via
python's import machinery or via a customized mechanism that reads the file on
MPI rank 0 and broadcasts it to the other ranks. The latter is the recommended
approach.
## ConfigurableAnalysis XML
The `<analysis>` element is used to create and configure the `PythonAnalysis` instance.
| name | type | allowable value(s) | description |
|------|------|--------------------|-------------|
| `type` | attribute | "python" | Creates a `PythonAnalysis` instance |
| `script_module` | attribute | a module name | Names a module that is in the PYTHONPATH. The module should define the 3 analysis adaptor API functions: `Initialize`, `Execute`, and `Finalize`. It is imported during initialization using python's import machinery. \* |
| `script_file` | attribute | a file path | A path to a python script to be loaded and broadcast by rank 0. The script should define the 3 analysis adaptor API functions: `Initialize`, `Execute`, and `Finalize`. \*|
| `enabled` | attribute | 0,1 | When 0 the analysis is skipped |
| `initialize_source` | child element | python source code | A snippet of source code that can be used to control run time behavior. The source code must be properly formatted and indented. The contents of the element are taken verbatim including newline tabs and spaces. |
\* -- use one of `script_file` or `script_module`. Prefer `script_file`.
See the [example](#histogramxml) below.
## Code Template
The following template provides stubs that one can fill in to write a new python
analysis.
```python
# YOUR IMPORTS HERE
def Initialize():
""" Initialization code """
# YOUR CODE HERE
return
def Execute(dataAdaptor):
""" Use sensei::DataAdaptor instance passed in
dataAdaptor to access and process simulation data """
# YOUR CODE HERE
return
def Finalize():
""" Finalization code """
# YOUR CODE HERE
return
```
## Example
The following example computes a histogram in parallel. It is included in the source code at `sensei/Histogram.py`.
### Histogram.py
```python
import sys
import numpy as np
import vtk.util.numpy_support as vtknp
from vtk import vtkDataObject, vtkCompositeDataSet, vtkMultiBlockDataSet
# default values of control parameters
numBins = 10
meshName = ''
arrayName = ''
arrayCen = vtkDataObject.POINT
outFile = 'hist'
def Initialize():
# check for valid control parameters
if not meshName:
raise RuntimeError('meshName was not set')
if not arrayName:
raise RuntimeError('arrayName was not set')
def Execute(adaptor):
r = comm.Get_rank()
# get the mesh and array we need
mesh = adaptor.GetMesh(meshName, True)
adaptor.AddArray(mesh, meshName, arrayCen, arrayName)
# force composite data to simplify computations
if not isinstance(mesh, vtkCompositeDataSet):
s = comm.Get_size()
mb = vtkMultiBlockDataSet()
mb.SetNumberOfBlocks(s)
mb.SetBlock(r, mesh)
mesh = mb
# compute the min and max over local blocks
mn = sys.float_info.max
mx = -mn
it = mesh.NewIterator()
while not it.IsDoneWithTraversal():
do = it.GetCurrentDataObject()
atts = do.GetPointData() if arrayCen == vtkDataObject.POINT \
else do.GetCellData()
da = vtknp.vtk_to_numpy(atts.GetArray(arrayName))
mn = min(mn, np.min(da))
mx = max(mx, np.max(da))
it.GoToNextItem()
# compute global min and max
mn = comm.allreduce(mn, op=MPI.MIN)
mx = comm.allreduce(mx, op=MPI.MAX)
# compute the histogram over local blocks
it.InitTraversal()
while not it.IsDoneWithTraversal():
do = it.GetCurrentDataObject()
atts = do.GetPointData() if arrayCen == vtkDataObject.POINT \
else do.GetCellData()
da = vtknp.vtk_to_numpy(atts.GetArray(arrayName))
h,be = np.histogram(da, bins=numBins, range=(mn,mx))
hist = hist + h if 'hist' in globals() else h
it.GoToNextItem()
# compute the global histogram on rank 0
h = comm.reduce(hist, root=0, op=MPI.SUM)
# rank 0 write to disk
if r == 0:
ts = adaptor.GetDataTimeStep()
fn = '%s_%s_%d.txt'%(outFile, arrayName, ts)
f = file(fn, 'w')
f.write('num bins : %d\n'%(numBins))
f.write('range : %0.6g %0.6g\n'%(mn, mx))
f.write('bin edges: ')
for v in be:
f.write('%0.6g '%(v))
f.write('\n')
f.write('counts : ')
for v in h:
f.write('%d '%(v))
f.write('\n')
f.close()
def Finalize():
return
```
### Histogram.xml
```xml
<analysis type="python" script_file="./Histogram.py" enabled="1">
<initialize_source>
numBins=10
meshName='mesh'
arrayName='values'
arrayCen=1
</initialize_source>
</analysis>
```
if (ENABLE_PYTHON)
depend_swig(senseiPython.i senseiPython.dep)
wrap_swig(senseiPython.i senseiPython.cxx senseiPython.dep)
wrap_swig(senseiPython.i senseiPython.cxx senseiPython.dep senseiPython)
include_directories(SYSTEM ${PYTHON_INCLUDE_PATH} ${NUMPY_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}
......@@ -8,7 +8,7 @@ if (ENABLE_PYTHON)
PYTHON_ADD_MODULE(_senseiPython ${CMAKE_CURRENT_BINARY_DIR}/senseiPython.cxx)
target_link_libraries(_senseiPython sensei python)
target_link_libraries(_senseiPython PUBLIC sensei python)
add_custom_command(TARGET _senseiPython POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
......
......@@ -24,7 +24,10 @@ import_array();
%include <std_string.i>
%include <std_vector.i>
%include <std_map.i>
%template(vector_string) std::vector<std::string>;
%template(map_string_bool) std::map<std::string, bool>;
%template(map_int_vector_string) std::map<int, std::vector<std::string>>;
%include <mpi4py/mpi4py.i>
%include "vtk.i"
%include "senseiTypeMaps.i"
......
if (ENABLE_SENSEI)
message(STATUS "Enabled: sensei library")
set(sensei_sources AnalysisAdaptor.cxx Autocorrelation.cxx
ConfigurableAnalysis.cxx DataAdaptor.cxx DataRequirements.cxx
Histogram.cxx Error.cxx ProgrammableDataAdaptor.cxx VTKHistogram.cxx
VTKDataAdaptor.cxx VTKUtils.cxx)
# senseiCore
# everything but the Python and configurable analysis adaptors.
set(senseiCore_sources AnalysisAdaptor.cxx Autocorrelation.cxx
DataAdaptor.cxx DataRequirements.cxx Histogram.cxx Error.cxx
ProgrammableDataAdaptor.cxx VTKHistogram.cxx VTKDataAdaptor.cxx
VTKUtils.cxx)
set(sensei_libs mpi pugixml vtk thread ArrayIO timer diy grid)
set(senseiCore_libs mpi pugixml vtk thread ArrayIO timer diy grid)
if(ENABLE_CATALYST)
list(APPEND sensei_sources CatalystAnalysisAdaptor.cxx
list(APPEND senseiCore_sources CatalystAnalysisAdaptor.cxx
CatalystSlice.cxx CatalystUtilities.cxx)
endif()
if(ENABLE_ADIOS)
list(APPEND sensei_sources ADIOSSchema.cxx
list(APPEND senseiCore_sources ADIOSSchema.cxx
ADIOSAnalysisAdaptor.cxx ADIOSDataAdaptor.cxx)
list(APPEND sensei_libs adios)
list(APPEND senseiCore_libs adios)
endif()
if(ENABLE_VTK_M)
list(APPEND sensei_sources
list(APPEND senseiCore_sources
VTKmContourAnalysis.cxx
VTKmContourAnalysis.h)
endif()
if(ENABLE_LIBSIM)
list(APPEND sensei_sources LibsimAnalysisAdaptor.cxx
list(APPEND senseiCore_sources LibsimAnalysisAdaptor.cxx
LibsimImageProperties.cxx)
list(APPEND sensei_libs libsim)
list(APPEND senseiCore_libs libsim)
endif()
if(ENABLE_VTK_IO)
list(APPEND sensei_sources VTKPosthocIO.cxx)
list(APPEND senseiCore_sources VTKPosthocIO.cxx)
if (ENABLE_VTK_MPI)
list(APPEND sensei_sources VTKAmrWriter.cxx)
list(APPEND senseiCore_sources VTKAmrWriter.cxx)
endif()
endif()
add_library(senseiCore ${senseiCore_sources})
target_include_directories(senseiCore PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>)
target_link_libraries(senseiCore PUBLIC ${senseiCore_libs})
install(TARGETS senseiCore EXPORT senseiCore
INCLUDES DESTINATION include ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(EXPORT senseiCore DESTINATION lib/cmake
EXPORT_LINK_INTERFACE_LIBRARIES)
set(sensei_sources ConfigurableAnalysis.cxx)
set(sensei_libs senseiCore)
# PythonAnalysis
# an analysis adaptor with a minimal set of Python bindings to enable an
# embedded interpreter to execute run time provided Python code implementing
# AnalysisAdaptor::Execute
if (ENABLE_PYTHON)
depend_swig(PythonAnalysis.i PythonAnalysis.dep)
wrap_swig(PythonAnalysis.i _PythonAnalysis.cxx
PythonAnalysis.dep _PythonAnalysis)
include_directories(SYSTEM ${PYTHON_INCLUDE_PATH} ${NUMPY_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR})
add_library(_PythonAnalysis SHARED
${CMAKE_CURRENT_BINARY_DIR}/_PythonAnalysis.cxx)
set_target_properties(_PythonAnalysis PROPERTIES PREFIX "")
target_link_libraries(_PythonAnalysis PUBLIC senseiCore python)
add_custom_command(TARGET _PythonAnalysis POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_BINARY_DIR}/PythonAnalysis.py
${CMAKE_CURRENT_BINARY_DIR}/../lib)
install(TARGETS _PythonAnalysis EXPORT _PythonAnalysis
LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(EXPORT _PythonAnalysis DESTINATION lib/cmake
EXPORT_LINK_INTERFACE_LIBRARIES)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/PythonAnalysis.py
DESTINATION lib)
set(python_analyses Histogram.py)
foreach(python_analysis ${python_analyses})
configure_file(${python_analysis}
${CMAKE_CURRENT_BINARY_DIR}/../lib/${python_analysis} COPYONLY)
endforeach()
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION lib FILES_MATCHING PATTERN "*.py")
list(APPEND sensei_libs _PythonAnalysis)
endif()
# sensei
# world facing library that users will link to
# contains senseiCore classes, PythonAnalysis, and ConfigurableAnalysis
add_library(sensei ${sensei_sources})
target_include_directories(sensei PUBLIC
......@@ -49,8 +118,9 @@ if (ENABLE_SENSEI)
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION include FILES_MATCHING PATTERN "*.h")
install(TARGETS sensei EXPORT sensei INCLUDES DESTINATION include
ARCHIVE DESTINATION lib LIBRARY DESTINATION lib)
install(TARGETS sensei EXPORT sensei
INCLUDES DESTINATION include ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
install(EXPORT sensei DESTINATION lib/cmake
EXPORT_LINK_INTERFACE_LIBRARIES)
......
......@@ -26,6 +26,9 @@
#include "LibsimAnalysisAdaptor.h"
#include "LibsimImageProperties.h"
#endif
#ifdef ENABLE_PYTHON
#include "PythonAnalysis.h"
#endif
#include <vtkObjectFactory.h>
#include <vtkSmartPointer.h>
......@@ -135,7 +138,9 @@ struct ConfigurableAnalysis::InternalsType
int AddAutoCorrelation(pugi::xml_node node);
int AddPosthocIO(pugi::xml_node node);
int AddVTKAmrWriter(pugi::xml_node node);
int AddPythonAnalysis(pugi::xml_node node);
public:
// list of all analyses. api calls are forwareded to each
// analysis in the list
AnalysisAdaptorVector Analyses;
......@@ -519,12 +524,9 @@ int ConfigurableAnalysis::InternalsType::AddPosthocIO(pugi::xml_node node)
return -1;
#else
DataRequirements req;
if (req.Initialize(node) ||
(req.GetNumberOfRequiredMeshes() < 1))
if (req.Initialize(node))
{
SENSEI_ERROR("Failed to initialize VTKPosthocIO. "
"At least one mesh is required")
SENSEI_ERROR("Failed to initialize VTKPosthocIO.")
return -1;
}
......@@ -594,6 +596,53 @@ int ConfigurableAnalysis::InternalsType::AddVTKAmrWriter(pugi::xml_node node)
#endif
}
// --------------------------------------------------------------------------
int ConfigurableAnalysis::InternalsType::AddPythonAnalysis(pugi::xml_node node)
{
if (!node.attribute("script_file") && !node.attribute("script_module"))
{
SENSEI_ERROR("Failed to initialize PythonAnalysis. Missing "
"a required attribute: script_file or script_module");
return -1;
}
std::string scriptFile = node.attribute("script_file").value();
std::string scriptModule = node.attribute("script_module").value();
std::string initSource;
pugi::xml_node inode = node.child("initialize_source");
if (inode)
initSource = inode.text().as_string();
vtkNew<PythonAnalysis> pyAnalysis;
if (this->Comm != MPI_COMM_NULL)
pyAnalysis->SetCommunicator(this->Comm);
pyAnalysis->SetScriptFile(scriptFile);
pyAnalysis->SetScriptModule(scriptModule);
pyAnalysis->SetInitializeSource(initSource);
if (pyAnalysis->Initialize())
{
SENSEI_ERROR("Failed to initialize PythonAnalysis")
return -1;
}
this->Analyses.push_back(pyAnalysis.GetPointer());
const char *scriptType = scriptFile.empty() ? "module" : "file";
const char *scriptName =
scriptFile.empty() ? scriptModule.c_str() : scriptFile.c_str();
SENSEI_STATUS("Configured python with " << scriptType
<< " \"" << scriptName << "\"")
return 0;
}
//----------------------------------------------------------------------------
......@@ -661,7 +710,8 @@ int ConfigurableAnalysis::Initialize(const std::string& filename)
|| ((type == "libsim") && !this->Internals->AddLibsim(node))
|| ((type == "PosthocIO") && !this->Internals->AddPosthocIO(node))
|| ((type == "VTKAmrWriter") && !this->Internals->AddVTKAmrWriter(node))
|| ((type == "vtkmcontour") && !this->Internals->AddVTKmContour(node))))
|| ((type == "vtkmcontour") && !this->Internals->AddVTKmContour(node))
|| ((type == "python") && !this->Internals->AddPythonAnalysis(node))))
{
if (rank == 0)
SENSEI_ERROR("Failed to add '" << type << "' analysis")
......
import sys
import numpy as np
import vtk.util.numpy_support as vtknp
from vtk import vtkDataObject, vtkCompositeDataSet, vtkMultiBlockDataSet
# default values of control parameters
numBins = 10
meshName = ''
arrayName = ''
arrayCen = vtkDataObject.POINT
outFile = 'hist'
def Initialize():
r = comm.Get_rank()
if r == 0:
sys.stderr.write('Initialize numBins=%d meshName=%s arrayName=%s arrayCen=%d outFile=%s\n'%( \
numBins, meshName, arrayName, arrayCen, outFile))
# check for valid control parameters
if not meshName:
raise RuntimeError('meshName was not set')
if not arrayName:
raise RuntimeError('arrayName was not set')
def Execute(adaptor):
r = comm.Get_rank()
# get the mesh and array we need
mesh = adaptor.GetMesh(meshName, True)
adaptor.AddArray(mesh, meshName, arrayCen, arrayName)
# force composite data to simplify computations
if not isinstance(mesh, vtkCompositeDataSet):
s = comm.Get_size()
mb = vtkMultiBlockDataSet()
mb.SetNumberOfBlocks(s)
mb.SetBlock(r, mesh)
mesh = mb
# compute the min and max over local blocks
mn = sys.float_info.max
mx = -mn
it = mesh.NewIterator()
while not it.IsDoneWithTraversal():
do = it.GetCurrentDataObject()
atts = do.GetPointData() if arrayCen == vtkDataObject.POINT \
else do.GetCellData()
da = vtknp.vtk_to_numpy(atts.GetArray(arrayName))
mn = min(mn, np.min(da))
mx = max(mx, np.max(da))
it.GoToNextItem()
# compute global min and max
mn = comm.allreduce(mn, op=MPI.MIN)
mx = comm.allreduce(mx, op=MPI.MAX)
# compute the histogram over local blocks
it.InitTraversal()
while not it.IsDoneWithTraversal():
do = it.GetCurrentDataObject()
atts = do.GetPointData() if arrayCen == vtkDataObject.POINT \
else do.GetCellData()
da = vtknp.vtk_to_numpy(atts.GetArray(arrayName))
h,be = np.histogram(da, bins=numBins, range=(mn,mx))
hist = hist + h if 'hist' in globals() else h
it.GoToNextItem()
# compute the global histogram on rank 0
h = comm.reduce(hist, root=0, op=MPI.SUM)
# rank 0 write to disk
if r == 0:
ts = adaptor.GetDataTimeStep()
fn = '%s_%s_%d.txt'%(outFile, arrayName, ts)
f = file(fn, 'w')
f.write('num bins : %d\n'%(numBins))
f.write('range : %0.6g %0.6g\n'%(mn, mx))
f.write('bin edges: ')
for v in be:
f.write('%0.6g '%(v))
f.write('\n')
f.write('counts : ')
for v in h:
f.write('%d '%(v))
f.write('\n')
f.close()
sys.stderr.write('Execute %s written\n'%(fn))
def Finalize():
r = comm.Get_rank()
if r == 0:
sys.stderr.write('Finalize\n')
return 0
#include "PythonAnalysis.h"
#include "Error.h"
#include <vtkObjectFactory.h>
#include <mpi4py/mpi4py.MPI_api.h>
#include <string>
#include <cstdio>
#include <cstring>
#include <errno.h>
#include <Python.h>