Commit decc7dd4 authored by Kenneth Leiter's avatar Kenneth Leiter

ENH: Improve performance of hdf5 writes.

Previously, each time a dataset was written to hdf5, the file was opened
and closed. This was implemented because inside the heavy data writer
we have no idea whether another dataset will be written, so we don't know
whether to keep the file open. After profiling large xdmf writes it was
determined that closing the file appeared to be a bottleneck so hooks were
added to the heavy data writer to open the file and keep it open while writing.
This is optional, but was incorporated into several higher level classes
like XdmfWriter where we know we can leave a file open until we finish writing
the XML. For our test problem we saw a 7 time performance gain.
parent 51b36c5d
......@@ -26,9 +26,73 @@
#include <cstdio>
#include "XdmfArray.hpp"
#include "XdmfArrayType.hpp"
#include "XdmfError.hpp"
#include "XdmfHDF5Controller.hpp"
#include "XdmfHDF5Writer.hpp"
#include "XdmfError.hpp"
/**
* PIMPL
*/
class XdmfHDF5Writer::XdmfHDF5WriterImpl {
public:
XdmfHDF5WriterImpl():
mHDF5Handle(-1)
{
};
~XdmfHDF5WriterImpl()
{
closeFile();
};
void
closeFile()
{
if(mHDF5Handle >= 0) {
herr_t status = H5Fclose(mHDF5Handle);
mHDF5Handle = -1;
}
};
void
openFile(const std::string & filePath)
{
if(mHDF5Handle >= 0) {
// Perhaps we should throw a warning.
closeFile();
}
// Save old error handler and turn off error handling for now
H5E_auto_t old_func;
void * old_client_data;
H5Eget_auto(0, &old_func, &old_client_data);
H5Eset_auto2(0, NULL, NULL);
int fapl = H5P_DEFAULT;
if(H5Fis_hdf5(filePath.c_str()) > 0) {
mHDF5Handle = H5Fopen(filePath.c_str(),
H5F_ACC_RDWR,
fapl);
}
else {
mHDF5Handle = H5Fcreate(filePath.c_str(),
H5F_ACC_TRUNC,
H5P_DEFAULT,
fapl);
}
// Restore previous error handler
H5Eset_auto2(0, old_func, old_client_data);
}
hid_t mHDF5Handle;
};
shared_ptr<XdmfHDF5Writer>
XdmfHDF5Writer::New(const std::string & filePath,
......@@ -42,12 +106,14 @@ XdmfHDF5Writer::New(const std::string & filePath,
}
XdmfHDF5Writer::XdmfHDF5Writer(const std::string & filePath) :
XdmfHeavyDataWriter(filePath)
XdmfHeavyDataWriter(filePath),
mImpl(new XdmfHDF5WriterImpl())
{
}
XdmfHDF5Writer::~XdmfHDF5Writer()
{
delete mImpl;
}
shared_ptr<XdmfHDF5Controller>
......@@ -66,6 +132,18 @@ XdmfHDF5Writer::createHDF5Controller(const std::string & hdf5FilePath,
count);
}
void
XdmfHDF5Writer::closeFile()
{
mImpl->closeFile();
}
void
XdmfHDF5Writer::openFile()
{
mImpl->openFile(mFilePath);
}
void
XdmfHDF5Writer::visit(XdmfArray & array,
const shared_ptr<XdmfBaseVisitor> visitor)
......@@ -135,25 +213,20 @@ XdmfHDF5Writer::write(XdmfArray & array,
// Open a hdf5 dataset and write to it on disk.
herr_t status;
hsize_t size = array.getSize();
hid_t hdf5Handle;
// Save old error handler and turn off error handling for now
H5E_auto_t old_func;
void * old_client_data;
H5Eget_auto(0, &old_func, &old_client_data);
H5Eset_auto2(0, NULL, NULL);
if(H5Fis_hdf5(hdf5FilePath.c_str()) > 0) {
hdf5Handle = H5Fopen(hdf5FilePath.c_str(), H5F_ACC_RDWR, fapl);
bool closeFile = false;
if(mImpl->mHDF5Handle < 0) {
mImpl->openFile(hdf5FilePath);
closeFile = true;
}
else {
hdf5Handle = H5Fcreate(hdf5FilePath.c_str(),
H5F_ACC_TRUNC,
H5P_DEFAULT,
fapl);
}
hid_t dataset = H5Dopen(hdf5Handle,
hid_t dataset = H5Dopen(mImpl->mHDF5Handle,
dataSetPath.str().c_str(),
H5P_DEFAULT);
......@@ -162,11 +235,14 @@ XdmfHDF5Writer::write(XdmfArray & array,
while(dataset >= 0 && mMode == Default) {
dataSetPath.str(std::string());
dataSetPath << "Data" << ++mDataSetId;
dataset = H5Dopen(hdf5Handle,
dataset = H5Dopen(mImpl->mHDF5Handle,
dataSetPath.str().c_str(),
H5P_DEFAULT);
}
// Restore previous error handler
H5Eset_auto2(0, old_func, old_client_data);
hid_t dataspace = H5S_ALL;
hid_t memspace = H5S_ALL;
......@@ -180,7 +256,7 @@ XdmfHDF5Writer::write(XdmfArray & array,
hid_t property = H5Pcreate(H5P_DATASET_CREATE);
std::vector<hsize_t> chunk_size(dimensions.size(), 1024);
status = H5Pset_chunk(property, dimensions.size(), &chunk_size[0]);
dataset = H5Dcreate(hdf5Handle,
dataset = H5Dcreate(mImpl->mHDF5Handle,
dataSetPath.str().c_str(),
datatype,
memspace,
......@@ -252,10 +328,9 @@ XdmfHDF5Writer::write(XdmfArray & array,
status = H5Sclose(memspace);
}
status = H5Dclose(dataset);
status = H5Fclose(hdf5Handle);
// Restore previous error handler
H5Eset_auto2(0, old_func, old_client_data);
if(closeFile) {
mImpl->closeFile();
}
// Attach a new controller to the array
shared_ptr<XdmfHDF5Controller> newDataController =
......
......@@ -51,7 +51,7 @@ class XDMFCORE_EXPORT XdmfHDF5Writer : public XdmfHeavyDataWriter {
public:
/**
* Construct XdmfHDF5Writer
* Construct XdmfHDF5Writer.
*
* @param filePath the location of the hdf5 file to output to on disk.
* @param clobberFile whether to overwrite the previous file if it exists.
......@@ -63,6 +63,10 @@ public:
virtual ~XdmfHDF5Writer();
void closeFile();
void openFile();
virtual void visit(XdmfArray & array,
const shared_ptr<XdmfBaseVisitor> visitor);
......@@ -104,8 +108,16 @@ protected:
private:
/**
* PIMPL
*/
class XdmfHDF5WriterImpl;
XdmfHDF5Writer(const XdmfHDF5Writer &); // Not implemented.
void operator=(const XdmfHDF5Writer &); // Not implemented.
XdmfHDF5WriterImpl * mImpl;
};
#endif /* XDMFHDF5WRITER_HPP_ */
......@@ -71,6 +71,12 @@ public:
virtual ~XdmfHeavyDataWriter() = 0;
/**
* Close file. This is only needed when the file is opened manually
* through openFile().
*/
virtual void closeFile() = 0;
/**
* Get the path to the heavy data file on disk this writer is writing to.
*
......@@ -93,6 +99,22 @@ public:
*/
bool getReleaseData() const;
/**
* Open file for writing. This is an optional command that can
* improve performance for some writers when writing many datasets
* to a single file. User must call closeFile() after completing
* output.
*
* By default, heavy data files are open and closed before and after
* writing each dataset to ensure that other writers have access to
* the file (we never know whether we will be writing to the file
* again). This is expensive in some cases, but is always
* safe. Opening the file once and writing many datasets may result
* in improved performance, but the user must tell the writer when
* to open and close the file.
*/
virtual void openFile() = 0;
/**
* Set the mode of operation for this writer.
*
......
......@@ -92,6 +92,10 @@ public:
xmlFreeDoc(mXMLDocument);
xmlCleanupParser();
if(mHeavyDataWriter->getMode() == XdmfHeavyDataWriter::Default) {
mHeavyDataWriter->closeFile();
}
};
void
......@@ -106,6 +110,9 @@ public:
(xmlChar*)"Version",
(xmlChar*)mVersionString.c_str());
xmlDocSetRootElement(mXMLDocument, mXMLCurrentNode);
if(mHeavyDataWriter->getMode() == XdmfHeavyDataWriter::Default) {
mHeavyDataWriter->openFile();
}
}
int mDepth;
......@@ -326,6 +333,7 @@ void
XdmfWriter::visit(XdmfItem & item,
const shared_ptr<XdmfBaseVisitor> visitor)
{
if (mImpl->mDepth == 0) {
mImpl->openFile();
}
......
......@@ -221,6 +221,10 @@ shared_ptr<XdmfUnstructuredGrid>
XdmfExodusReader::read(const std::string & fileName,
const shared_ptr<XdmfHeavyDataWriter> heavyDataWriter) const
{
if(heavyDataWriter) {
heavyDataWriter->openFile();
}
shared_ptr<XdmfUnstructuredGrid> toReturn = XdmfUnstructuredGrid::New();
// Read Exodus II file to XdmfGridUnstructured via Exodus II API
......@@ -628,9 +632,15 @@ XdmfExodusReader::read(const std::string & fileName,
}
ex_close(exodusHandle);
delete [] blockIds;
delete [] numElemsInBlock;
delete [] numNodesPerElemInBlock;
delete [] numElemAttrInBlock;
if(heavyDataWriter) {
heavyDataWriter->closeFile();
}
return toReturn;
}
......@@ -74,6 +74,11 @@ XdmfPartitioner::partition(const shared_ptr<XdmfUnstructuredGrid> gridToPartitio
const MetisScheme metisScheme,
const shared_ptr<XdmfHeavyDataWriter> heavyDataWriter) const
{
if(heavyDataWriter) {
heavyDataWriter->openFile();
}
// Make sure geometry and topology are non null
if(!(gridToPartition->getGeometry() && gridToPartition->getTopology()))
XdmfError::message(XdmfError::FATAL,
......
......@@ -1245,10 +1245,19 @@ XdmfTopologyConverter::convert(const shared_ptr<XdmfUnstructuredGrid> gridToConv
}
if(converter) {
if(heavyDataWriter) {
heavyDataWriter->openFile();
}
shared_ptr<XdmfUnstructuredGrid> toReturn =
converter->convert(gridToConvert,
topologyType,
heavyDataWriter);
if(heavyDataWriter) {
heavyDataWriter->closeFile();
}
delete converter;
return toReturn;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment