From f142912e12c19f24adbcae512d4ab7199e54f951 Mon Sep 17 00:00:00 2001 From: Scott Wittenburg Date: Wed, 16 Jul 2025 14:32:02 -0600 Subject: [PATCH] DataSetWriter: Fix overflow encountered at scale --- fides/DataSetWriter.cxx | 95 +++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/fides/DataSetWriter.cxx b/fides/DataSetWriter.cxx index 58e1ec4f..9dffc298 100644 --- a/fides/DataSetWriter.cxx +++ b/fides/DataSetWriter.cxx @@ -13,6 +13,8 @@ #include #include +#include +#include #include #include #include @@ -45,6 +47,21 @@ #include #endif +#if SIZE_MAX == UCHAR_MAX +#define FIDES_MPI_SIZE_T MPI_UNSIGNED_CHAR +#elif SIZE_MAX == USHRT_MAX +#define FIDES_MPI_SIZE_T MPI_UNSIGNED_SHORT +#elif SIZE_MAX == UINT_MAX +#define FIDES_MPI_SIZE_T MPI_UNSIGNED +#elif SIZE_MAX == ULONG_MAX +#define FIDES_MPI_SIZE_T MPI_UNSIGNED_LONG +#elif SIZE_MAX == ULLONG_MAX +#define FIDES_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG +#else +// Pick a "safe" default +#define FIDES_MPI_SIZE_T MPI_UNSIGNED_LONG +#endif + template std::ostream& operator<<(std::ostream& out, const std::vector& v) { @@ -814,16 +831,19 @@ protected: this->DataSetsPerRank.clear(); this->DataSetsPerRank.resize(static_cast(this->NumRanks), 0); - this->DataSetsPerRank[static_cast(this->Rank)] = - static_cast(this->NumberOfDataSets); + this->DataSetsPerRank[static_cast(this->Rank)] = this->NumberOfDataSets; #ifdef FIDES_USE_MPI - MPI_Allreduce( - MPI_IN_PLACE, this->DataSetsPerRank.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); + MPI_Allreduce(MPI_IN_PLACE, + this->DataSetsPerRank.data(), + this->NumRanks, + FIDES_MPI_SIZE_T, + MPI_SUM, + this->Comm); #endif - int tot = std::accumulate(this->DataSetsPerRank.begin(), this->DataSetsPerRank.end(), 0); - this->TotalNumberOfDataSets = static_cast(tot); + this->TotalNumberOfDataSets = + std::accumulate(this->DataSetsPerRank.begin(), this->DataSetsPerRank.end(), std::size_t{ 0 }); this->DataSetOffset = 0; for (size_t i = 0; i < static_cast(this->Rank); i++) @@ -832,8 +852,8 @@ protected: } // Need to determine the point and cell offsets for each block. - std::vector numPoints(static_cast(this->NumRanks), 0); - std::vector numCells(static_cast(this->NumRanks), 0); + std::vector numPoints(static_cast(this->NumRanks), 0); + std::vector numCells(static_cast(this->NumRanks), 0); for (std::size_t i = 0; i < this->NumberOfDataSets; i++) { @@ -843,14 +863,15 @@ protected: } #ifdef FIDES_USE_MPI - MPI_Allreduce(MPI_IN_PLACE, numPoints.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, numCells.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numPoints.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numCells.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); #endif - tot = std::accumulate(numPoints.begin(), numPoints.end(), 0); - this->TotalNumberOfPoints = static_cast(tot); - tot = std::accumulate(numCells.begin(), numCells.end(), 0); - this->TotalNumberOfCells = static_cast(tot); + this->TotalNumberOfPoints = + std::accumulate(numPoints.begin(), numPoints.end(), std::size_t{ 0 }); + this->TotalNumberOfCells = std::accumulate(numCells.begin(), numCells.end(), std::size_t{ 0 }); this->DataSetPointsOffset = 0; this->DataSetCellsOffset = 0; @@ -882,7 +903,7 @@ protected: int Rank = 0; int NumRanks = 1; - std::vector DataSetsPerRank; + std::vector DataSetsPerRank; std::size_t TotalNumberOfDataSets = 0; std::size_t TotalNumberOfPoints = 0; std::size_t TotalNumberOfCells = 0; @@ -1108,7 +1129,7 @@ protected: { std::size_t numDS = static_cast(this->DataSets.GetNumberOfPartitions()); - std::vector numCoordinates(this->NumRanks * 3, 0); + std::vector numCoordinates(this->NumRanks * 3, 0); this->NumXCoords = 0; this->NumYCoords = 0; @@ -1128,8 +1149,12 @@ protected: numCoordinates[this->Rank * 3 + 2] = this->NumZCoords; #ifdef FIDES_USE_MPI - MPI_Allreduce( - MPI_IN_PLACE, numCoordinates.data(), this->NumRanks * 3, MPI_INT, MPI_SUM, this->Comm); + MPI_Allreduce(MPI_IN_PLACE, + numCoordinates.data(), + this->NumRanks * 3, + FIDES_MPI_SIZE_T, + MPI_SUM, + this->Comm); #endif this->TotalNumberOfXCoords = 0; this->TotalNumberOfYCoords = 0; @@ -1245,10 +1270,10 @@ protected: void ComputeDataModelSpecificGlobalBlockInfo() override { std::size_t numDS = static_cast(this->DataSets.GetNumberOfPartitions()); - std::vector numCoordinates(this->NumRanks, 0); - std::vector numCells(this->NumRanks, 0); - std::vector numPtsInCell(this->NumRanks, 0); - std::vector cellShape(this->NumRanks, 0); + std::vector numCoordinates(this->NumRanks, 0); + std::vector numCells(this->NumRanks, 0); + std::vector numPtsInCell(this->NumRanks, 0); + std::vector cellShape(this->NumRanks, 0); this->NumCoords = 0; this->NumPointsInCell = 0; @@ -1293,10 +1318,13 @@ protected: #ifdef FIDES_USE_MPI MPI_Allreduce( - MPI_IN_PLACE, numCoordinates.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, numCells.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, numPtsInCell.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, cellShape.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); + MPI_IN_PLACE, numCoordinates.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numCells.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numPtsInCell.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, cellShape.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); #endif for (int i = 0; i < this->NumRanks; i++) @@ -1459,10 +1487,9 @@ public: protected: void ComputeDataModelSpecificGlobalBlockInfo() override { - std::vector numCoordinates(this->NumRanks, 0); - std::vector numCells(this->NumRanks, 0); - std::vector cellShape(this->NumRanks, 0); - std::vector numConns(this->NumRanks, 0); + std::vector numCoordinates(this->NumRanks, 0); + std::vector numCells(this->NumRanks, 0); + std::vector numConns(this->NumRanks, 0); this->NumCoords = 0; this->NumCells = 0; @@ -1490,9 +1517,11 @@ protected: numConns[this->Rank] = this->NumConns; #ifdef FIDES_USE_MPI MPI_Allreduce( - MPI_IN_PLACE, numCoordinates.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, numCells.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); - MPI_Allreduce(MPI_IN_PLACE, numConns.data(), this->NumRanks, MPI_INT, MPI_SUM, this->Comm); + MPI_IN_PLACE, numCoordinates.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numCells.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); + MPI_Allreduce( + MPI_IN_PLACE, numConns.data(), this->NumRanks, FIDES_MPI_SIZE_T, MPI_SUM, this->Comm); #endif for (int i = 0; i < this->NumRanks; i++) -- GitLab