Commit 26082f87 authored by Aaron Bray's avatar Aaron Bray
Browse files

Merge branch 'study/cv_sensitivity' into study/hydrocephalus

parents a044821a 64caa365
......@@ -12,6 +12,7 @@
#include "engine/SEAutoSerialization.h"
#include "system/environment/SEEnvironmentalConditions.h"
#include "system/equipment/electrocardiogram/SEElectroCardioGramWaveformInterpolator.h"
#include "engine/SEOverrides.h"
#include "properties/SEScalar0To1.h"
#include "properties/SEScalarArea.h"
......@@ -48,6 +49,7 @@ PulseConfiguration::PulseConfiguration(SESubstanceManager& substances) : SEEngin
m_DynamicStabilization = nullptr;
m_AutoSerialization = nullptr;
m_WritePatientBaselineFile = eSwitch::Off;
m_InitialOverrides = nullptr;
// Blood Chemistry
m_MeanCorpuscularHemoglobin = nullptr;
......@@ -181,6 +183,7 @@ void PulseConfiguration::Clear()
RemoveStabilization();
SAFE_DELETE(m_AutoSerialization);
m_WritePatientBaselineFile = eSwitch::Off;
SAFE_DELETE(m_InitialOverrides);
// Blood Chemistry
SAFE_DELETE(m_MeanCorpuscularHemoglobin);
......@@ -548,6 +551,25 @@ void PulseConfiguration::RemoveAutoSerialization()
SAFE_DELETE(m_AutoSerialization);
}
bool PulseConfiguration::HasInitialOverrides() const
{
return m_InitialOverrides != nullptr;
}
SEOverrides& PulseConfiguration::GetInitialOverrides()
{
if (m_InitialOverrides == nullptr)
m_InitialOverrides = new SEOverrides();
return *m_InitialOverrides;
}
const SEOverrides* PulseConfiguration::GetInitialOverrides() const
{
return m_InitialOverrides;
}
void PulseConfiguration::RemoveInitialOverrides()
{
SAFE_DELETE(m_InitialOverrides);
}
//////////////////////
/** Blood Chemistry */
//////////////////////
......
......@@ -9,6 +9,7 @@ class SEDynamicStabilization;
class SETimedStabilization;
class SEEnvironmentalConditions;
class SEElectroCardioGramWaveformInterpolator;
class SEOverrides;
#include "engine/SEEngineConfiguration.h"
/**
......@@ -59,6 +60,11 @@ public:
virtual const SEAutoSerialization* GetAutoSerialization() const;
virtual void RemoveAutoSerialization();
// add method here for overrrides
virtual bool HasInitialOverrides() const;
virtual SEOverrides& GetInitialOverrides();
virtual const SEOverrides* GetInitialOverrides() const;
virtual void RemoveInitialOverrides();
protected:
SESubstanceManager& m_Substances;
......@@ -68,6 +74,8 @@ protected:
SEAutoSerialization* m_AutoSerialization;
eSwitch m_WritePatientBaselineFile;
SEOverrides* m_InitialOverrides;
//////////////////////
/** Blood Chemistry */
//////////////////////
......
......@@ -417,6 +417,7 @@ bool PulseController::Initialize(SEPatient const& patient)
// Due to needing the initial environment values for circuits to construct properly
Info("Creating Circuits and Compartments");
CreateCircuitsAndCompartments();
OverrideCircuits();
m_SpareAdvanceTime_s = 0;
m_AirwayMode = eAirwayMode::Free;
......@@ -1431,6 +1432,57 @@ bool PulseController::CreateCircuitsAndCompartments()
return true;
}
// assumes circuit overrides and doesn't check if override is not applied
bool PulseController::OverrideCircuits()
{
if (!m_Config->HasInitialOverrides()) return true;
SEOverrides& overrides = m_Config->GetInitialOverrides();
// Apply Overrides (Note using Force, as these values are locked (for good reason)
// But we know what we are doing, right?
SEFluidCircuit& cv = GetCircuits().GetActiveCardiovascularCircuit();
SEFluidCircuit& resp = GetCircuits().GetRespiratoryCircuit();
bool bReturn = true;
for (auto& sp : overrides.GetScalarProperties())
{
SEFluidCircuitPath* path = nullptr;
if (resp.HasPath(sp.name))
{
path = resp.GetPath(sp.name);
}
else if (cv.HasPath(sp.name))
{
path = cv.GetPath(sp.name);
}
if (path == nullptr)
{
continue;
}
if (PressureTimePerVolumeUnit::IsValidUnit(sp.unit))
{// Assume its a resistor
const PressureTimePerVolumeUnit& u = PressureTimePerVolumeUnit::GetCompoundUnit(sp.unit);
path->GetResistance().ForceValue(sp.value, u);
path->GetNextResistance().ForceValue(sp.value, u);
path->GetResistanceBaseline().ForceValue(sp.value, u);
}
else if (VolumePerPressureUnit::IsValidUnit(sp.unit))
{
const VolumePerPressureUnit& u = VolumePerPressureUnit::GetCompoundUnit(sp.unit);
path->GetCompliance().ForceValue(sp.value, u);
path->GetNextCompliance().ForceValue(sp.value, u);
path->GetComplianceBaseline().ForceValue(sp.value, u);
}
else
{
Error("Could not process circuit override " + sp.name);
bReturn = false;
}
}
return bReturn;
}
void PulseController::SetupCardiovascular()
{
Info("Setting Up Cardiovascular");
......
......@@ -224,6 +224,7 @@ public:
virtual bool GetPatientAssessment(SEPatientAssessment& assessment) const;
virtual bool CreateCircuitsAndCompartments();
virtual bool OverrideCircuits();
protected:
virtual void SetupCardiovascular();
......
......@@ -5,6 +5,7 @@
PUSH_PROTO_WARNINGS()
#include "pulse/cpm/bind/PulseConfiguration.pb.h"
POP_PROTO_WARNINGS()
#include "io/protobuf/PBActions.h"
#include "io/protobuf/PBPulseConfiguration.h"
#include "io/protobuf/PBEngine.h"
#include "io/protobuf/PBEnvironment.h"
......@@ -56,6 +57,9 @@ void PBPulseConfiguration::Serialize(const PULSE_BIND::ConfigurationData& src, P
if (src.writepatientbaselinefile() != CDM_BIND::eSwitch::NullSwitch)
dst.EnableWritePatientBaselineFile((eSwitch)src.writepatientbaselinefile());
if (src.has_initialoverrides())
PBAction::Load(src.initialoverrides(), dst.GetInitialOverrides());
// Blood Chemistry
if (src.has_bloodchemistryconfiguration())
{
......@@ -360,6 +364,8 @@ void PBPulseConfiguration::Serialize(const PulseConfiguration& src, PULSE_BIND::
if (src.HasAutoSerialization())
dst.set_allocated_autoserialization(PBEngine::Unload(*src.m_AutoSerialization));
dst.set_writepatientbaselinefile((CDM_BIND::eSwitch)src.m_WritePatientBaselineFile);
if (src.HasInitialOverrides())
dst.set_allocated_initialoverrides(PBAction::Unload(*src.m_InitialOverrides));
// Blood Chemistry
PULSE_BIND::ConfigurationData_BloodChemistryConfigurationData* bc = dst.mutable_bloodchemistryconfiguration();
......
......@@ -189,6 +189,7 @@ bool SaturationCalculator::Setup()
m_HbO2_g_Per_mol = m_HbO2->GetMolarMass(MassPerAmountUnit::g_Per_mol);
m_HbCO2_g_Per_mol = m_HbCO2->GetMolarMass(MassPerAmountUnit::g_Per_mol);
m_HbO2CO2_g_Per_mol = m_HbO2CO2->GetMolarMass(MassPerAmountUnit::g_Per_mol);
return true;
}
SaturationCalculator::~SaturationCalculator()
......
## Sensitivity Analysis Study
Sensitivity analysis and uncertainty quantification are important when using computational models to simulate the cardiovascular system.
The cardiovascular system in Pulse is represented with a lumped parameter model, where discrete lumped elements are learned during the simulation.
Uncertainty in simulation outputs can arise from uncertainty in the model inputs (eg. patient height, weight, age, etc.) and these learned lumped parameters.
This study uses sensitivity analysis to explore how cardiovascular simulation outputs vary from uncertain lumped parameter elements.
As well as the C++ provide in this folder, we also used several python scripts for simulation results analysis that can be found [here](INSERT LINK)
---
### Building
The multiplex engine will be built as part of the general Pulse build.
---
### Data Model
The data model for this study can be found [here](INSERT LINK).
The data requests used for CSV files can be found [here](INSERT LINK).
---
### Data
This data used in this study consists of combinations of lumped parameter values that override the learned values during the simulation.
The lumped parameter values are generated in [Python](INSERT LINK) and stored in a JSON file.
---
### Running
The study used Python to generate combinations of lumped parameter values that used were used to override learned values in the SensitivityAnalysisDriver.
This command line application has two modes of use which will be explained in sections below.
##### 1) Simulation of a single patient with hardcoded overrides for testing purposes
To simulate a single patient with hardcoded overrides, use the following command:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bash
$ SensitivityAnalysisEngineDriver single
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This will output data in the following folder:
- <b>bin/test_results/sensitivity_analysis</b>
The folder will contain a CSV file containing time varying data and a JSON file containing data at the end of the simulation.
##### 2) Simulation of combinations of lumped parameters
A python script was then used to create combinations of lumped parameter overrides and generate a SimulationListData json file containing all the combinations needed for this study.
To run the generated SimulationListData json file, execute the driver with the following arguments:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bash
$ SensitivityAnalysisEngineDriver sim_list ./test_results/sensitivity_analysis/sim_list.json
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This will generate a ./test_results/sensitivity/simulations/sim_list_results.json file that contains all the final stabilized values of lumped parameter combinations.
CSV files for each parameter combination will also be generated.
This sim_list_results.json file was used for data analysis.
......@@ -41,6 +41,7 @@
#include "engine/SEOverrides.h"
#include "engine/SEEngineTracker.h"
#include "engine/SEDataRequestManager.h"
#include "engine/SEPatientConfiguration.h"
#include "substance/SESubstanceManager.h"
#include "substance/SESubstanceFraction.h"
......@@ -84,6 +85,8 @@ public:
bool Run(pulse::study::bind::sensitivity_analysis::SimulationListData& simList);
bool RunSimulationUntilStable(std::string const& outDir, pulse::study::bind::sensitivity_analysis::SimulationData& sim, const std::string& dataDir="./");
bool RunSimulation(std::string const& outDir, pulse::study::bind::sensitivity_analysis::SimulationData& sim, const std::string& dataDir = "./");
protected:
bool Run();
bool SerializeToString(pulse::study::bind::sensitivity_analysis::SimulationListData& src, std::string& dst, SerializationFormat f) const;
......
# Distributed under the Apache License, Version 2.0.
# See accompanying NOTICE file for details.
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import sys
from sklearn.model_selection import train_test_split
import pulse.study.sensitivity_analysis.ml_surrogate as ml_surrogate
import pulse.study.sensitivity_analysis.analysis_utils as analysis
def gen_heatmaps(sobol_indices_df, quantities_of_interest, results_dir, sobol_index):
"""
Generate heatmaps.
:param sobol_indices_df: Pandas DataFrame - holds Sobol indices
:param quantities_of_interest: List - holds quantities of interest
:param results_dir: String - output directory
:param sobol_index: String - "S1" or "ST"
:return: None
"""
print("Generating heat maps ...")
df = pd.DataFrame()
for col in quantities_of_interest:
df[col] = sobol_indices_df[col, sobol_index]
bins = ["Brain", "Myocardium", "VenaCava", "RightArm", "LeftArm", "RightLeg", "LeftLeg", "Fat", "Muscle", "Skin",
"Liver", "Spleen", "SmallIntestine", "LargeIntestine", "Splanchnic", "Kidney", "Bone", "LeftPulmonary",
"RightPulmonary", "Pericardium"]
save_heatmap(df, results_dir, sobol_index)
found_bin_bool = True
if sobol_index == "S1" and len(quantities_of_interest) > 1:
binned_df = pd.DataFrame(0, columns=quantities_of_interest, index=bins)
for col in df:
for index, value in df[col].items():
found_bin = False
for part in bins:
if part in index:
binned_df.loc[part, col] = value
found_bin = True
break
if not found_bin and found_bin_bool:
print("Could not place {} into a bin.".format(index))
found_bin_bool = False
binned_df.to_csv(os.path.join(results_dir, "global_sensitivity_indices", "sobol_indices_binned_s1.csv"))
save_heatmap(binned_df, results_dir, "{}_binned".format(sobol_index))
def save_heatmap(df, results_dir, sobol_index):
"""
Save heatmaps.
:param df: Pandas DataFrame - holds sobol indices
:param results_dir: String - output directory
:param sobol_index: String - "S1", "ST", or "S1_binned"
:return: None
"""
plt.figure(figsize=(16, 16))
sns.heatmap(df)
plt.tight_layout()
plt.title(sobol_index)
plt.savefig(os.path.join(results_dir, "global_sensitivity_indices", "heatmap_{}".format(sobol_index)))
def compute_local_sensitivity(parsed_results_df, results_dir):
"""
Compute local sensitivity indices.
:param parsed_results_df: DataFrame - simulation results
:param results_dir: string - results directory
:return: None
"""
file = open(os.path.join(results_dir, "local_sa_problem.json"))
sa_problem = json.load(file)
file.close()
values = np.zeros((sa_problem["problem_size"], 1))
std_dev_df = pd.DataFrame(np.nan, columns=parsed_results_df.columns, index=sa_problem["paths"])
counter = 0
for col in parsed_results_df:
for index, value in enumerate(parsed_results_df[col]):
if (index + 1) % 100 == 0 and index > 0:
path = sa_problem["paths"][(index + 1) // 100 - 1]
std_dev_df.at[path, col] = np.std(values)
counter = 0
values[counter] = value
counter += 1
for col in std_dev_df:
std_dev_df[col] = std_dev_df[col] / std_dev_df[col].max()
print(std_dev_df.head())
std_dev_df.to_csv(os.path.join(results_dir, "local_sensitivity_indices", "normal_std_dev.csv"))
plt.figure(figsize=(16, 16))
sns.heatmap(std_dev_df)
plt.tight_layout()
plt.title("STD Local Sensitivity")
plt.savefig(os.path.join(results_dir, "local_sensitivity_indices", "heatmap_normal_std_dev.png"))
def compute_sobol_indices_without_ml(parsed_results_df, results_dir):
"""
Compute global sensitivity indices without machine learning.
:param parsed_results_df: DataFrame - simulation results
:param results_dir: string - results directory
:return: None
"""
print("Computing global sensitivity indices without machine learning ...")
sobol_indices_df = analysis.compute_sobol_indices(parsed_results_df, "all", results_dir)
sobol_indices_df.to_csv(os.path.join(results_dir, "global_sensitivity_indices", "sobol_indices.csv"))
gen_heatmaps(sobol_indices_df, parsed_results_df.columns.to_list(), results_dir, "S1")
gen_heatmaps(sobol_indices_df, parsed_results_df.columns.to_list(), results_dir, "ST")
def compute_sobol_indices_with_ml(parsed_results_df, model_type, quantity_of_interest, results_dir):
"""
Compute global sensitivity indices with machine learning
:param parsed_results_df: DataFrame - simulation results
:param model_type: string - machine learning model type
:param quantity_of_interest: string - quantity of interest
:param results_dir: string - results directory
:return: None
"""
print("Computing global sensitivity indices with machine learning ...")
# create all the results folders we need
if not os.path.exists(os.path.join(results_dir, "machine_learning")):
os.mkdir(os.path.join(results_dir, "machine_learning"))
if not os.path.exists(os.path.join(results_dir, "machine_learning", model_type)):
os.mkdir(os.path.join(results_dir, "machine_learning", model_type))
param_values_df = ml_surrogate.scale_parameters(analysis.sample_parameters(results_dir))
param_values_df_used = param_values_df.loc[list(parsed_results_df.index.values)]
param_values_df_unused = param_values_df.loc[~param_values_df.index.isin(list(parsed_results_df.index.values))]
x_train, x_val, y_train, y_val = train_test_split(
param_values_df_used.to_numpy(), parsed_results_df[quantity_of_interest].to_numpy(), test_size=0.2)
if model_type == "NeuralNetwork":
model = ml_surrogate.neural_network_model(x_train, y_train, x_val, y_val)
elif model_type == "RandomForest":
model = ml_surrogate.random_forest_model(x_train, y_train)
elif model_type == "SupportVectorMachine":
model = ml_surrogate.support_vector_machine_model(x_train, y_train)
else:
raise ValueError("Machine learning model type not supported.")
# test model on the test set
val_prediction = np.squeeze(model.predict(x_val))
ml_surrogate.plot_ml_vs_simulation(y_val, val_prediction, quantity_of_interest, model_type, results_dir)
# predict the remaining simulations
prediction = np.squeeze(model.predict(param_values_df_unused.to_numpy()))
total_samples_df = pd.DataFrame(data=prediction, index=list(param_values_df_unused.index.values),
columns=[quantity_of_interest])
# combine the simulation results with the predicted remaining simulation results
total_samples_df = total_samples_df.append(parsed_results_df[quantity_of_interest].to_frame())
# compute the sensitivity indices
sobol_indices_df = analysis.compute_sobol_indices(total_samples_df.sort_index(axis=0),
quantity_of_interest, results_dir)
# output metrics
sobol_indices_df.to_csv(os.path.join(results_dir, "global_sensitivity_indices", "sobol_indices.csv"))
gen_heatmaps(sobol_indices_df, [quantity_of_interest], results_dir, "S1")
gen_heatmaps(sobol_indices_df, [quantity_of_interest], results_dir, "ST")
if __name__ == '__main__':
results_dir = "./test_results/sensitivity_analysis/"
# cv or combined
phys_systems = "cv"
sensitivity_analysis = "local"
parsed_results_df = analysis.load_and_parse_results(results_dir, phys_systems)
if sensitivity_analysis == "local":
if not os.path.exists(os.path.join(results_dir, "local_sensitivity_indices")):
os.mkdir(os.path.join(results_dir, "local_sensitivity_indices"))
elif sensitivity_analysis == "global":
if not os.path.exists(os.path.join(results_dir, "global_sensitivity_indices")):
os.mkdir(os.path.join(results_dir, "global_sensitivity_indices"))
else:
raise ValueError("Sensitivity analysis type must be either 'local' or 'global'.")
if len(sys.argv) == 1:
"""
Compute global sensitivity indices if you were able to run all of the simulations that you need.
Often, this requires a very large number of simulations. To use this method, run this script
as follows: python3 analysis.py
"""
if sensitivity_analysis == 'local':
print("Computing local sensitivity parameters ...")
compute_local_sensitivity(parsed_results_df, results_dir)
elif sensitivity_analysis == "global":
print("Computing global sensitivity indices ...")
compute_sobol_indices_without_ml(parsed_results_df, results_dir)
elif len(sys.argv) == 3:
"""
If you ran a subset of the simulations that you need, you can build a surrogate machine learning
model to predict the remaining simulations. Model type options are NeuralNetwork, SupportVectorMachines,
and RegressionTrees. I found that SupportVectorMachines worked best with a smaller data set
(<2,000 simulations), but NeuralNetworks worked best with larger data sets (>2,000 simulations).
RegressionTrees generally had worse performance, but they are more explainable because you can determine
the importance of each parameter in your model. You also need to specify the quantity of interest. Because
machine learning models can take a long time to train, we only want to use this method for a single
quantity of interest. To use this method, run this script as follows:
python3 analysis.py arg1 arg2
arg1: either NeuralNetwork, SupportVectorMachines, or RegressionTrees
arg2: quantity of interest (ex: MeanArterialPressure_mmHg)
"""
model_type = sys.argv[1]
quantity_of_interest = sys.argv[2]
compute_sobol_indices_with_ml(parsed_results_df, model_type, quantity_of_interest, results_dir)
else:
raise ValueError("Please provide either no arguments or the machine learning model type"
"('NeuralNetwork', 'SupportVectorMachines', 'RegressionTrees') followed"
"by the quantity of interest.")
# Distributed under the Apache License, Version 2.0.
# See accompanying NOTICE file for details.
import json
import numpy as np
import os
import pandas as pd
import re
from SALib.analyze import sobol
from SALib.sample import saltelli
from tqdm import tqdm
def between(start_char, end_char, input_str):
"""
Get substring between two characters.
:param start_char: char - initial char
:param end_char: char - end char
:param input_str: string
:return: string - substring between two chars
"""
result = re.search('{}(.*){}'.format(start_char, end_char), input_str)
return result.group(1)
def sample_parameters(results_dir):
"""
Compute Sobol indices (https://salib.readthedocs.io/en/latest/).
:param results_dir: String - output directory
:param parsed_results_df: Pandas DataFrame - holds simulation results
:return: None
"""
# load the Sobol problem
file = open(os.path.join(results_dir, "sobol_problem.json"))
sobol_problem = json.load(file)
file.close()
problem_size = sobol_problem["problem_size"]
del sobol_problem["problem_size"]
param_values = saltelli.sample(sobol_problem, problem_size)
param_values_df = pd.DataFrame(data=param_values, columns=sobol_problem["names"])
return param_values_df
def load_and_parse_results(results_dir, phys_systems):
"""
Parse JSON results file.
:param results_dir: String - output directory
:return: Pandas DataFrame - holds results
"""
# for now, we are assuming results are separated into many results files - this may change in the future once
# we settle on a serialization method
file_nums = []
for file in os.listdir(os.path.join(results_dir, "simulations")):
if file.endswith(".json") and file.startswith("simlist_results"):
file_nums.append(int(between("results_", ".json", file)))
if not file_nums:
raise ValueError("No results files found.")
file_nums.sort()
num_sims = file_nums[-1]
print("Counted {} total simulations.".format(num_sims))
# store results in DataFrame
file = open(os.path.join(results_dir, "simulations/simlist_results_500.json"))
results_file = json.load(file)
file.close()
col_list = []
for key in results_file["Simulation"][0]:
if phys_systems == "cv":
if key not in ["ID", "Name", "Overrides", "AchievedStabilization", "StabilizationTime_s",
"TotalSimulationTime_s"]:
col_list.append(key)