From 62b12316a9071f23ad65659cfbc22a9d65bc3500 Mon Sep 17 00:00:00 2001 From: Kenneth Leiter Date: Mon, 11 Feb 2013 16:56:23 -0500 Subject: [PATCH] BUG: Fix bug in hdf5 chunking mechanism for multi-dimensional datasets. Current chunking mechanism produces chunks too large for hdf5 to handle. Reduce sizes of chunks for multi-dimensional hdf5 datasets. Chunks for multi-dimensional hdf5 datasets have shape resembling overarching dataset. Add test for this to TestXdmfArrayWriteRead. --- core/XdmfHDF5Writer.cpp | 57 +++++++++++++++++-- core/XdmfHDF5Writer.hpp | 16 ++++++ core/tests/Cxx/CMakeLists.txt | 8 ++- core/tests/Cxx/TestXdmfArray.cpp | 1 - core/tests/Cxx/TestXdmfArrayWriteRead.cpp | 42 +++++++++++--- .../Cxx/TestXdmfArrayWriteReadHyperSlabs.cpp | 2 - 6 files changed, 105 insertions(+), 21 deletions(-) diff --git a/core/XdmfHDF5Writer.cpp b/core/XdmfHDF5Writer.cpp index 92c2880e..92356011 100644 --- a/core/XdmfHDF5Writer.cpp +++ b/core/XdmfHDF5Writer.cpp @@ -22,14 +22,22 @@ /*****************************************************************************/ #include -#include +#include #include +#include +#include #include "XdmfArray.hpp" #include "XdmfArrayType.hpp" #include "XdmfError.hpp" #include "XdmfHDF5Controller.hpp" #include "XdmfHDF5Writer.hpp" +namespace { + + const static unsigned int DEFAULT_CHUNK_SIZE = 10000; + +} + /** * PIMPL */ @@ -38,6 +46,7 @@ class XdmfHDF5Writer::XdmfHDF5WriterImpl { public: XdmfHDF5WriterImpl(): + mChunkSize(DEFAULT_CHUNK_SIZE), mHDF5Handle(-1) { }; @@ -54,8 +63,8 @@ public: herr_t status = H5Fclose(mHDF5Handle); mHDF5Handle = -1; } - }; - + }; + int openFile(const std::string & filePath, const int fapl) @@ -93,9 +102,9 @@ public: H5Eset_auto2(0, old_func, old_client_data); return toReturn; - } + unsigned int mChunkSize; hid_t mHDF5Handle; }; @@ -146,6 +155,12 @@ XdmfHDF5Writer::closeFile() mImpl->closeFile(); } +unsigned int +XdmfHDF5Writer::getChunkSize() const +{ + return mImpl->mChunkSize; +} + void XdmfHDF5Writer::openFile() { @@ -159,6 +174,12 @@ XdmfHDF5Writer::openFile(const int fapl) fapl); } +void +XdmfHDF5Writer::setChunkSize(const unsigned int chunkSize) +{ + mImpl->mChunkSize = chunkSize; +} + void XdmfHDF5Writer::visit(XdmfArray & array, const shared_ptr visitor) @@ -243,7 +264,6 @@ XdmfHDF5Writer::write(XdmfArray & array, // Open a hdf5 dataset and write to it on disk. herr_t status; - hsize_t size = array.getSize(); // Save old error handler and turn off error handling for now H5E_auto_t old_func; @@ -293,8 +313,27 @@ XdmfHDF5Writer::write(XdmfArray & array, dataspace = H5Screate_simple(dimensions.size(), ¤t_dims[0], &maximum_dims[0]); + + // calculate a proper chunk size - for multidimensional datasets the + // chunk dimensions have similar shape to original dataset hid_t property = H5Pcreate(H5P_DATASET_CREATE); - std::vector chunk_size(dimensions.size(), 1024); + const hsize_t totalDimensionsSize = + std::accumulate(current_dims.begin(), + current_dims.end(), + 1, + std::multiplies()); + const double factor = + std::pow(((double)mImpl->mChunkSize / totalDimensionsSize), + 1.0 / current_dims.size()); + std::vector chunk_size(current_dims.begin(), + current_dims.end()); + for(std::vector::iterator iter = chunk_size.begin(); + iter != chunk_size.end(); ++iter) { + *iter = (hsize_t)(*iter * factor); + if(*iter == 0) { + *iter = 1; + } + } status = H5Pset_chunk(property, dimensions.size(), &chunk_size[0]); dataset = H5Dcreate(mImpl->mHDF5Handle, dataSetPath.str().c_str(), @@ -303,6 +342,11 @@ XdmfHDF5Writer::write(XdmfArray & array, H5P_DEFAULT, property, H5P_DEFAULT); + if(dataset < 0) { + XdmfError::message(XdmfError::FATAL, + "H5Dcreate returned failure in " + "XdmfHDF5Writer::write -- status: " + dataset); + } status = H5Pclose(property); } @@ -315,6 +359,7 @@ XdmfHDF5Writer::write(XdmfArray & array, status = H5Sclose(dataspace); // Resize to fit size of old and new data. + hsize_t size = array.getSize(); hsize_t newSize = size + datasize; status = H5Dset_extent(dataset, &newSize); diff --git a/core/XdmfHDF5Writer.hpp b/core/XdmfHDF5Writer.hpp index 1a498cfa..62ba8fe9 100644 --- a/core/XdmfHDF5Writer.hpp +++ b/core/XdmfHDF5Writer.hpp @@ -65,8 +65,24 @@ public: virtual void closeFile(); + /** + * Get the chunk size used to output datasets to hdf5. + * + * @return chunk size used to output datasets to hdf5. + */ + unsigned int getChunkSize() const; + virtual void openFile(); + /** + * Set the chunk size used to output datasets to hdf5. For + * multidimensional datasets the chunk size is the total number of + * elements in the chunk. + * + * @param chunkSize the number of elements per chunk. + */ + void setChunkSize(const unsigned int chunkSize); + virtual void visit(XdmfArray & array, const shared_ptr visitor); diff --git a/core/tests/Cxx/CMakeLists.txt b/core/tests/Cxx/CMakeLists.txt index dd5abe5b..a2f99648 100644 --- a/core/tests/Cxx/CMakeLists.txt +++ b/core/tests/Cxx/CMakeLists.txt @@ -38,9 +38,11 @@ ADD_TEST_CXX(TestXdmfVersion) CLEAN_TEST_CXX(TestXdmfArray) CLEAN_TEST_CXX(TestXdmfArrayMultidimensional) CLEAN_TEST_CXX(TestXdmfArrayWriteRead - test.h5) -CLEAN_TEST_CXX(TestXdmfArrayWriteRead - testHyperslab.h5) + test.h5, + testHyperslab.h5, + testDimensions.h5, + testLargeArray.h5, + testString.h5) CLEAN_TEST_CXX(TestXdmfHDF5Controller) CLEAN_TEST_CXX(TestXdmfHDF5Writer hdf5WriterTest.h5) diff --git a/core/tests/Cxx/TestXdmfArray.cpp b/core/tests/Cxx/TestXdmfArray.cpp index 2f0fcf57..abafb054 100644 --- a/core/tests/Cxx/TestXdmfArray.cpp +++ b/core/tests/Cxx/TestXdmfArray.cpp @@ -163,7 +163,6 @@ int main(int, char **) array5->setValuesInternal(doubleValues, 3, 1); assert(array5->getSize() == 3); assert(array5->getArrayType() == XdmfArrayType::Float64()); - assert(array5->getValuesString().compare("0 1.1 10.1") == 0); // // SHARED ASSIGNMENTS diff --git a/core/tests/Cxx/TestXdmfArrayWriteRead.cpp b/core/tests/Cxx/TestXdmfArrayWriteRead.cpp index b32ea374..2a5e6d21 100644 --- a/core/tests/Cxx/TestXdmfArrayWriteRead.cpp +++ b/core/tests/Cxx/TestXdmfArrayWriteRead.cpp @@ -42,16 +42,40 @@ int main(int, char **) assert(stringArray->getValuesString().compare("foo bar cat dog blah") == 0); assert(stringArray->getValue(0).compare("foo") == 0); - /* - assert(array->getSize() == 4); - assert(array->getValuesString().compare("1 2 3 4") == 0); + shared_ptr dimensionsArray = XdmfArray::New(); + std::vector dimensions(3); + dimensions[0] = 2; + dimensions[1] = 3; + dimensions[2] = 4; + dimensionsArray->resize(dimensions); + double data[24] = {0.0, 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, 9.0, + 10.0, 11.0, 12.0, 13.0, 14.0, + 15.0, 16.0, 17.0, 18.0, 19.0, + 20.0, 21.0, 22.0, 23.0}; + dimensionsArray->insert(0, data, 24); + shared_ptr dimensionsWriter = + XdmfHDF5Writer::New("testDimensions.h5"); + dimensionsWriter->setChunkSize(4); + dimensionsArray->accept(dimensionsWriter); - array->release(); - assert(array->getValuesString() == ""); - assert(array->getSize() == 4); + dimensionsArray->release(); + dimensionsArray->read(); + assert(dimensionsArray->getSize() == 24); + std::vector readDimensions = dimensionsArray->getDimensions(); + assert(readDimensions.size() == 3); + assert(readDimensions[0] = 2); + assert(readDimensions[1] = 3); + assert(readDimensions[2] = 4); - array->read(); - assert(array->getValuesString().compare("1 2 3 4") == 0); - */ + shared_ptr largeArrayWriter = + XdmfHDF5Writer::New("testLargeArray.h5"); + largeArrayWriter->setChunkSize(1500); + shared_ptr largeArray = XdmfArray::New(); + std::vector largeDimensions(2); + largeDimensions[0] = 1000; + largeDimensions[1] = 3; + largeArray->resize(largeDimensions); + largeArray->accept(largeArrayWriter); } diff --git a/core/tests/Cxx/TestXdmfArrayWriteReadHyperSlabs.cpp b/core/tests/Cxx/TestXdmfArrayWriteReadHyperSlabs.cpp index 169250a2..861c8024 100644 --- a/core/tests/Cxx/TestXdmfArrayWriteReadHyperSlabs.cpp +++ b/core/tests/Cxx/TestXdmfArrayWriteReadHyperSlabs.cpp @@ -2,7 +2,6 @@ #include "XdmfArrayType.hpp" #include "XdmfHDF5Controller.hpp" #include "XdmfHDF5Writer.hpp" -#include int main(int, char **) { @@ -57,6 +56,5 @@ int main(int, char **) assert(array1->getValuesString().compare("1 2") == 0); array2->release(); array2->read(); - std::cout << array2->getValuesString() << std::endl; assert(array2->getValuesString().compare("3 4") == 0); } -- 2.22.0