Commit 6217a332 authored by Burlen Loring's avatar Burlen Loring

memory profiler

* add a sampling memory profiler. uses RSS. outputs using
  MPI I/O in csv format
* add MPI I/O csv output to timer log, and include start
  and end times.
* add config from environment variables
parent d88d2377
......@@ -65,8 +65,10 @@ int main(int argc, char **argv)
return showHelp ? 0 : 1;
}
timer::SetLogging(log || shortlog);
timer::SetTrackSummariesOverTime(shortlog);
if (log | shortlog)
timer::Enable(shortlog);
timer::Initialize();
SENSEI_STATUS("Opening: \"" << input.c_str() << "\" using method \""
<< readmethod.c_str() << "\"")
......@@ -134,7 +136,7 @@ int main(int argc, char **argv)
dataAdaptor = nullptr;
analysisAdaptor = nullptr;
timer::PrintLog(std::cout, comm);
timer::Finalize();
MPI_Finalize();
......
......@@ -18,6 +18,7 @@
#include "senseiConfig.h"
#ifdef ENABLE_SENSEI
#include "bridge.h"
#include "MemoryProfiler.h"
#else
#include "analysis.h"
#endif
......@@ -253,9 +254,10 @@ int main(int argc, char** argv)
std::default_random_engine rng(static_cast<RandomSeedType>(seed));
for (int i = 0; i < world.rank(); ++i) rng(); // different seed for each rank
if (log || shortlog)
timer::Enable(shortlog);
timer::SetLogging(log || shortlog);
timer::SetTrackSummariesOverTime(shortlog);
timer::Initialize();
timer::MarkStartEvent("oscillators::initialize");
Oscillators oscillators;
......@@ -434,10 +436,11 @@ int main(int argc, char** argv)
#endif
timer::MarkEndEvent("oscillators::finalize");
timer::Finalize();
auto duration = std::chrono::duration_cast<ms>(Time::now() - start);
if (world.rank() == 0)
{
fmt::print("Total run time: {}.{} s\n", duration.count() / 1000, duration.count() % 1000);
}
timer::PrintLog(std::cout, world);
}
......@@ -6,7 +6,7 @@
#include <vtkDataObject.h>
#include <vector>
#include <timer/Timer.h>
#include "Timer.h"
namespace BridgeInternals
{
......@@ -20,6 +20,8 @@ void bridge_initialize(MPI_Comm comm, int g_x, int g_y, int g_z,
uint64_t start_extents_z, int tot_blocks_x, int tot_blocks_y, int tot_blocks_z,
int block_id_x, int block_id_y, int block_id_z, const char* config_file)
{
timer::Initialize();
BridgeInternals::GlobalDataAdaptor = vtkSmartPointer<parallel3d::DataAdaptor>::New();
BridgeInternals::GlobalDataAdaptor->SetCommunicator(comm);
......@@ -45,12 +47,12 @@ void bridge_update(int tstep, double time, double *pressure, double* temperature
}
//-----------------------------------------------------------------------------
void bridge_finalize(MPI_Comm comm)
void bridge_finalize()
{
BridgeInternals::GlobalAnalysisAdaptor->Finalize();
BridgeInternals::GlobalAnalysisAdaptor = nullptr;
BridgeInternals::GlobalDataAdaptor = nullptr;
timer::PrintLog(std::cout, comm);
timer::Finalize();
}
......@@ -7,6 +7,7 @@
#ifdef __cplusplus
extern "C" {
#endif
/// This defines the analysis bridge for parallel_3d miniapp.
/// Called before simulation loop
......@@ -19,7 +20,7 @@ void bridge_initialize(MPI_Comm comm, int g_x, int g_y, int g_z,
void bridge_update(int tstep, double time, double *pressure, double* temperature, double* density);
/// Called just before simulation terminates.
void bridge_finalize(MPI_Comm comm);
void bridge_finalize();
#ifdef __cplusplus
} // extern "C"
......
......@@ -250,7 +250,7 @@ int main(int argc, char **argv)
/////////////////////////////
#ifdef ENABLE_SENSEI
bridge_finalize(MPI_COMM_WORLD);
bridge_finalize();
if (config_file) {
free(config_file);
......
......@@ -190,23 +190,28 @@ int ConfigurableAnalysis::InternalsType::TimeInitialization(
AnalysisAdaptorPtr adaptor, std::function<int()> initializer)
{
const char* analysisName = nullptr;
if (timer::GetLogging())
bool logEnabled = timer::Enabled();
if (logEnabled)
{
std::ostringstream initName;
std::ostringstream execName;
std::ostringstream finiName;
auto analysisNumber = this->Analyses.size();
initName << adaptor->GetClassName() << "::" << analysisNumber << "::initialize";
execName << adaptor->GetClassName() << "::" << analysisNumber << "::execute";
finiName << adaptor->GetClassName() << "::" << analysisNumber << "::finalize";
initName << adaptor->GetClassName() << "::" << analysisNumber << "::Initialize";
execName << adaptor->GetClassName() << "::" << analysisNumber << "::Execute";
finiName << adaptor->GetClassName() << "::" << analysisNumber << "::Finalize";
this->LogEventNames.push_back(initName.str());
this->LogEventNames.push_back(execName.str());
this->LogEventNames.push_back(finiName.str());
analysisName = this->LogEventNames[3 * analysisNumber].c_str();
}
timer::MarkStartEvent(analysisName);
}
int result = initializer();
if (logEnabled)
timer::MarkEndEvent(analysisName);
return result;
}
......@@ -243,8 +248,8 @@ int ConfigurableAnalysis::InternalsType::AddHistogram(pugi::xml_node node)
this->Analyses.push_back(histogram.GetPointer());
SENSEI_STATUS("Configured histogram with " << bins
<< " " << assocStr << "data array " << array
<< " on mesh " << mesh)
<< " bins on " << assocStr << " data array \"" << array
<< "\" on mesh \"" << mesh << "\"")
return 0;
}
......@@ -905,6 +910,8 @@ int ConfigurableAnalysis::SetCommunicator(MPI_Comm comm)
//----------------------------------------------------------------------------
int ConfigurableAnalysis::Initialize(const std::string& filename)
{
timer::MarkEvent("ConfigurableAnalysis::Initialize");
int rank = 0;
MPI_Comm_rank(this->GetCommunicator(), &rank);
......@@ -918,6 +925,7 @@ int ConfigurableAnalysis::Initialize(const std::string& filename)
int rv = 0;
pugi::xml_node root = doc.child("sensei");
for (pugi::xml_node node = root.child("analysis");
node; node = node.next_sibling("analysis"))
{
......@@ -949,27 +957,33 @@ int ConfigurableAnalysis::Initialize(const std::string& filename)
//----------------------------------------------------------------------------
bool ConfigurableAnalysis::Execute(DataAdaptor* data)
{
timer::MarkEvent("ConfigurableAnalysis::Execute");
int rank = 0;
MPI_Comm_rank(this->GetCommunicator(), &rank);
int rv = 0;
int ai = 0;
const char* analysisName = nullptr;
AnalysisAdaptorVector::iterator iter = this->Internals->Analyses.begin();
AnalysisAdaptorVector::iterator end = this->Internals->Analyses.end();
for (; iter != end; ++iter, ++ai)
{
if (timer::GetLogging())
const char* analysisName = nullptr;
bool logEnabled = timer::Enabled();
if (logEnabled)
{
analysisName = this->Internals->LogEventNames[3 * ai + 1].c_str();
}
timer::MarkStartEvent(analysisName);
}
if (!(*iter)->Execute(data))
{
if (rank == 0)
SENSEI_ERROR("Failed to execute " << (*iter)->GetClassName())
rv -= 1;
}
if (logEnabled)
timer::MarkEndEvent(analysisName);
}
......@@ -979,32 +993,33 @@ bool ConfigurableAnalysis::Execute(DataAdaptor* data)
//----------------------------------------------------------------------------
int ConfigurableAnalysis::Finalize()
{
timer::MarkEvent("ConfigurableAnalysis::Finalize");
int rank = 0;
MPI_Comm_rank(this->GetCommunicator(), &rank);
int rv = 0;
int ai = 0;
const char* analysisName = nullptr;
AnalysisAdaptorVector::iterator iter = this->Internals->Analyses.begin();
AnalysisAdaptorVector::iterator end = this->Internals->Analyses.end();
for (; iter != end; ++iter, ++ai)
{
if (timer::GetLogging())
bool logEnabled = timer::Enabled();
const char* analysisName = nullptr;
if (logEnabled)
{
analysisName = this->Internals->LogEventNames[3 * ai + 2].c_str();
}
timer::MarkStartEvent(analysisName);
}
if ((*iter)->Finalize())
{
SENSEI_ERROR("Failed to finalize " << (*iter)->GetClassName())
rv -= 1;
}
timer::MarkEndEvent(analysisName);
}
if (timer::GetLogging())
{
timer::PrintLog(std::cout, this->GetCommunicator());
if (logEnabled)
timer::MarkEndEvent(analysisName);
}
return rv;
......
add_library(timer STATIC Timer.cxx)
add_library(timer STATIC MemoryProfiler.cxx Timer.cxx)
target_include_directories(timer SYSTEM INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:include/timer>)
target_link_libraries(timer mpi)
target_link_libraries(timer vtk mpi)
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION include/timer FILES_MATCHING PATTERN "*.h")
......
#include "MemoryProfiler.h"
#include <vtksys/SystemInformation.hxx>
#include <vector>
#include <deque>
#include <sstream>
#include <sys/time.h>
#include <cstring>
#include <errno.h>
#include <iostream>
#include <cmath>
#include <iomanip>
#include <limits>
namespace timer
{
struct MemoryProfiler::InternalsType
{
InternalsType() : Comm(MPI_COMM_WORLD), FileName("MemProf.csv"),
Interval(60.0), DataMutex(PTHREAD_MUTEX_INITIALIZER)
{
}
MPI_Comm Comm;
std::string FileName;
double Interval;
std::deque<long long> MemUse;
std::deque<double> TimePt;
pthread_t Thread;
pthread_mutex_t DataMutex;
vtksys::SystemInformation SysInfo;
};
extern "C" void *Profile(void *argp)
{
timer::MemoryProfiler::InternalsType *internals =
reinterpret_cast<timer::MemoryProfiler::InternalsType*>(argp);
while (1)
{
// capture the current time and memory usage.
struct timeval tv;
gettimeofday(&tv, nullptr);
double curTime = tv.tv_sec + tv.tv_usec/1.0e6;
long long curMem = internals->SysInfo.GetProcMemoryUsed();
pthread_mutex_lock(&internals->DataMutex);
// log time and mem use
internals->TimePt.push_back(curTime);
internals->MemUse.push_back(curMem);
// get next interval
double interval = internals->Interval;
pthread_mutex_unlock(&internals->DataMutex);
// check for shut down code
if (interval < 0)
pthread_exit(nullptr);
// suspend the thread for the requested interval
long long secs = floor(interval);
long nsecs = (interval - secs)*1e9;
struct timespec sleepTime = {secs, nsecs};
int ierr = 0;
int tries = 0;
while ((ierr = nanosleep(&sleepTime, &sleepTime)) && (errno == EINTR) && (++tries < 1000));
if (ierr)
{
const char *estr = strerror(errno);
std::cerr << "Error: nanosleep had an error \"" << estr << "\"" << std::endl;
abort();
}
}
return nullptr;
}
// --------------------------------------------------------------------------
MemoryProfiler::MemoryProfiler()
{
this->Internals = new InternalsType;
}
// --------------------------------------------------------------------------
MemoryProfiler::~MemoryProfiler()
{
delete this->Internals;
}
// --------------------------------------------------------------------------
int MemoryProfiler::Initialize()
{
if (pthread_create(&this->Internals->Thread,
nullptr, Profile, this->Internals))
{
const char *estr = strerror(errno);
std::cerr << "Error: Failed to create memory profiler. "
<< estr << std::endl;
return -1;
}
return 0;
}
// --------------------------------------------------------------------------
int MemoryProfiler::Finalize()
{
int rank = 0;
int nRanks = 1;
MPI_Comm_rank(this->Internals->Comm, &rank);
MPI_Comm_size(this->Internals->Comm, &nRanks);
pthread_mutex_lock(&this->Internals->DataMutex);
// tell the thread to quit
this->Internals->Interval = -1;
// create the ascii buffer
// use ascii in the file as a convenince
std::ostringstream oss;
oss.precision(std::numeric_limits<double>::digits10 + 2);
oss.setf(std::ios::scientific, std::ios::floatfield);
if (rank == 0)
oss << "# rank, time, memory kiB" << std::endl;
long nElem = this->Internals->MemUse.size();
for (long i = 0; i < nElem; ++i)
{
oss << rank << ", " << this->Internals->TimePt[i]
<< ", " << this->Internals->MemUse[i] << std::endl;
}
// free resources
this->Internals->TimePt.clear();
this->Internals->MemUse.clear();
pthread_mutex_unlock(&this->Internals->DataMutex);
// cancle the profiler thread
pthread_cancel(this->Internals->Thread);
// compute the file offset
long nBytes = oss.str().size();
std::vector<long> gsizes(nRanks);
gsizes[rank] = nBytes;
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
gsizes.data(), 1, MPI_LONG, this->Internals->Comm);
long offset = 0;
for (int i = 0; i < rank; ++i)
offset += gsizes[i];
long fileSize = 0;
for (int i = 0; i < nRanks; ++i)
fileSize += gsizes[i];
// write the buffer
MPI_File fh;
MPI_File_open(this->Internals->Comm, this->Internals->FileName.c_str(),
MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, offset, MPI_BYTE, MPI_BYTE,
"native", MPI_INFO_NULL);
MPI_File_write(fh, oss.str().c_str(), nBytes,
MPI_BYTE, MPI_STATUS_IGNORE);
MPI_File_set_size(fh, fileSize);
MPI_File_close(&fh);
// wait for the proiler thread to finish
pthread_join(this->Internals->Thread, nullptr);
return 0;
}
// --------------------------------------------------------------------------
double MemoryProfiler::GetInterval() const
{
return this->Internals->Interval;
}
// --------------------------------------------------------------------------
void MemoryProfiler::SetInterval(double interval)
{
pthread_mutex_lock(&this->Internals->DataMutex);
this->Internals->Interval = interval;
pthread_mutex_unlock(&this->Internals->DataMutex);
}
// --------------------------------------------------------------------------
void MemoryProfiler::SetCommunicator(MPI_Comm comm)
{
this->Internals->Comm = comm;
}
// --------------------------------------------------------------------------
void MemoryProfiler::SetFileName(const std::string &fileName)
{
this->Internals->FileName = fileName;
}
// --------------------------------------------------------------------------
const char *MemoryProfiler::GetFileName() const
{
return this->Internals->FileName.c_str();
}
}
#ifndef MemoryProfiler_h
#define MemoryProfiler_h
#include <mpi.h>
#include <string>
namespace timer
{
extern "C" void *Profile(void *argp);
/// MemoryProfiler - A sampling memory use profiler
/**
The class samples process memory usage at the specified interval
given in seconds. For each sample the time is aquired. Calling
Initialize starts profiling, and Finalize ends it. During
Finaliziation the buffers are written using MPI-I/O to the
file name provided
*/
class MemoryProfiler
{
public:
MemoryProfiler(const MemoryProfiler &) = delete;
void operator=(const MemoryProfiler &) = delete;
MemoryProfiler();
~MemoryProfiler();
int Initialize();
int Finalize();
/// Set the interval in seconds between querrying
/// the processes memory use.
void SetInterval(double interval);
double GetInterval() const;
/// Set the comunicator for parallel I/O
void SetCommunicator(MPI_Comm comm);
/// Set the file name to write the data to
void SetFileName(const std::string &fileName);
const char *GetFileName() const;
friend void *timer::Profile(void *argp);
private:
struct InternalsType;
InternalsType *Internals;
};
}
#endif
This diff is collapsed.
......@@ -6,66 +6,111 @@
namespace timer
{
/// @brief Enable/Disable logging (default is `true`).
void SetLogging(bool val);
/// @brief Get whether logging is enabled.
bool GetLogging();
/// @brief Enable/Disable tracking temporal summaries (default is `false`).
///
/// For runs where the event markers are consistent across timesteps
/// i.e. they occur in same order and exactly as many across different
/// timesteps, one can use this to track a summary instead of full values
/// for duration and memory used. This not only keeps the memory overhead
/// for long runs low, but also make it easier to process the generated
/// output.
void SetTrackSummariesOverTime(bool val);
/// @brief Return whether summaries are tracked over time.
bool GetTrackSummariesOverTime();
/// @brief Log start of a log-able event.
///
/// This marks the beginning of a event that must be logged.
/// The @arg eventname must match when calling MarkEndEvent() to
/// mark the end of the event.
void MarkStartEvent(const char* eventname);
/// @brief Log end of a log-able event.
///
/// This marks the end of a event that must be logged.
/// The @arg eventname must match when calling MarkEndEvent() to
/// mark the end of the event.
void MarkStartEvent(const char* eventname);
void MarkEndEvent(const char* eventname);
/// @brief Mark the beginning of a timestep.
///
/// This marks the beginning of a timestep. All MarkStartEvent and
/// MarkEndEvent after this until MarkEndTimeStep are assumed to be
/// happening for a specific timestep and will be combined with subsequent
/// timesteps.
void MarkStartTimeStep(int timestep, double time);
/// @brief Marks the end of the current timestep.
void MarkEndTimeStep();
/// @brief Print log to the output stream.
///
/// Note this triggers collective operations and hence must be called on all
/// ranks. The amount of processes outputting can be reduced by using
/// the moduloOuput which only outputs for (rank % moduloOutput) == 0.
/// The default value for this is 1.
void PrintLog(std::ostream& stream, MPI_Comm world, int moduloOutput = 1);
class MarkEvent
{
const char* EventName;
public:
/// Initialize logging from environment variables, and/or the timer
/// API below. This is a collective call with respect to the timer's
/// communicator.
///
/// If found in the environment the following variable override the
/// the current settings
//
/// TIMER_ENABLE : integer turns on or off logging
/// TIMER_ENABLE_SUMMARY : integer truns on off logging in summary format
/// TIMER_SUMMARY_MODULUS : print rank data when rank % modulus == 0
/// TIMER_LOG_FILE : path to write timer log to
/// MEMPROF_LOG_FILE : path to write memory profiler log to
/// MEMPROF_INTERVAL : number of seconds between memory recordings
///
void Initialize();
/// Finalize the log. this is where logs are written and cleanup occurs.
/// All processes in the communicator must call, and it must be called
/// prior to MPI_Finalize.
void Finalize();
/// Sets the communicator for MPI calls. This must be called prior to
/// initialization.
/// default value: MPI_COMM_NULL
void SetCommunicator(MPI_Comm comm);
/// Sets the path to write the timer log to
/// overriden by TIMER_LOG_FILE environment variable
/// default value; TimerLog.csv
void SetTimerLogFile(const std::string &fileName);
/// Sets the path to write the timer log to
/// overriden by MEMPROF_LOG_FILE environment variable
/// default value: MemProfLog.csv
void SetMemProfLogFile(const std::string &fileName);
/// Sets the number of seconds in between memory use recordings
/// overriden by MEMPROF_INTERVAL environment variable.
void SetMemProfInterval(int interval);
/// Enable/Disable logging. Overriden by TIMER_ENABLE and
/// TIMER_ENABLE_SUMMARY environment variables. In the
/// default format a CSV file is generated capturing each ranks
/// timer events. In the summary format a pretty and breif output
/// is sent to the stderr stream.
/// default value: disabled
void Enable(bool summaryFmt = false);
void Disable();
/// return true if loggin is enabled.
bool Enabled();
/// Sets the timer's summary log modulus. Output incudes data from
/// MPI ranks where rank % modulus == 0 Overriden by
/// TIMER_SUMMARY_MODULUS environment variable
/// default value; 1000000000
void SetSummaryModulus(int modulus);
/// @brief Log start of an event.
///
/// This marks the beginning of a event that must be logged.
/// The @arg eventname must match when calling MarkEndEvent() to
/// mark the end of the event.
void MarkStartEvent(const char* eventname);
/// @brief Log end of a log-able event.
///
/// This marks the end of a event that must be logged.
/// The @arg eventname must match when calling MarkEndEvent() to
/// mark the end of the event.
void MarkStartEvent(const char* eventname);
void MarkEndEvent(const char* eventname);
/// @brief Mark the beginning of a timestep.
///
/// This marks the beginning of a timestep. All MarkStartEvent and
/// MarkEndEvent after this until MarkEndTimeStep are assumed to be
/// happening for a specific timestep and will be combined with subsequent
/// timesteps.
void MarkStartTimeStep(int timestep, double time);
/// @brief Marks the end of the current timestep.
void MarkEndTimeStep();
/// @brief Print log to the output stream.
///
/// Note this triggers collective operations and hence must be called on all
/// ranks. The amount of processes outputting can be reduced by using
/// the moduloOuput which only outputs for (rank % moduloOutput) == 0.
/// The default value for this is 1.
void PrintLog(std::ostream& stream);
/// MarkEvent -- A helper class that times it's life.
/// A timer event is created that starts at the object's construction
/// and ends at its destruction. The pointer to the event name must
/// be valid throughout the objects life.
class MarkEvent
{
public:
MarkEvent(const char* name) : EventName(name) { MarkStartEvent(name); }
~MarkEvent() { MarkEndEvent(this->EventName); }
};
private:
const char *EventName;
};
}
#endif
#ifndef timerc_h
#define timerc_h
#ifdef __cplusplus
extern "C" {
#endif
void TIMER_SetLogging(bool val);
void TIMER_MarkStartEvent(const char* val);
void TIMER_MarkEndEvent(const char* val);
void TIMER_MarkStartTimeStep(int timestep, double time);
void TIMER_MarkEndTimeStep();
void TIMER_Print();
#ifdef __cplusplus
}
#endif
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment