/* Distributed under the Apache License, Version 2.0.
   See accompanying NOTICE file for details.*/

#include "LBM.h"
#include <iostream>
#include <fstream>
#include <sstream>
#define _USE_MATH_DEFINES
#include <cmath>
#include <math.h>
#include <cstring>
#include<Eigen/Dense>

#define SAFE_DELETE(ary) \
  if(ary!=nullptr) \
  { \
    delete[] ary; \
    ary = nullptr; \
  }

// CUDA Methods
extern "C" int FindNormals(float* p, int* g, int Mx, int My, int Mz);
extern "C" int LBMbc(float* p, float* qBC);
extern "C" int microLBM(float* p, float* q, int* g, float* qBC, float* f, float* fn, int Mx, int My, int Mz);

// LBM model codes, should be consistent with LBM.cu
#define D3Q19     1    //    1: Navier-Stokes D3Q19
#define D3Q19p0   2    //    2: Initial pressure approximation at u=0 for Navier-Stokes D3Q19
#define D3Q19T    3    //    3: Steady-state thermal field for given u D3Q19

LBM::Input::Input()
{
  dimensions[0] = 0;
  dimensions[1] = 0;
  dimensions[2] = 0;
  grid_spacing = 1;
}

LBM::Output::Output()
{

}
LBM::Output::~Output()
{
  Deallocate();
}
bool LBM::Output::Allocate(int dimensions[3])
{
  Deallocate();

  num_cells = dimensions[0]*dimensions[1]*dimensions[2];
  if (num_cells <= 0)
  {
    std::cerr << "Input has no dimensions\n";
    return false;
  }
  num_wall_cells = 0;
  num_inflow_cells = 0;
  num_outflow_cells = 0;
  num_inactive_cells = 0;
  num_interior_cells = 0;
  num_unknown_cells = 0;

  Q_num_cell_values = num_cells * (nQ + nS);
  T_num_cell_values = num_cells * nQ;
  num_boundary_values = nQ * nBound;
  nPDF = num_cells * 19;

  reference_velocity = 0;
  inlet_velocity = 0;
  inlet_area = 0;
  outlet_velocity = 0;
  outlet_area = 0;
  outlet_diameter = 0;
  average_area = 0;
  mass_flow = 0;
  pressure_drop = 0;
  volumetric_flow_rate = 0;

  labels = new int[num_cells];
  pressure_flows_and_stress = new float[Q_num_cell_values];
  wall_shear_stress = new float[num_cells * 2];
  temperature = new float[T_num_cell_values];
  fPDF = new float[nPDF];
  fnPDF = new float[nPDF];
  boundary_conditions = new float[num_boundary_values];
  memset(boundary_conditions, 0, num_boundary_values * sizeof(float));

  return true;
}
void LBM::Output::Deallocate()
{
  SAFE_DELETE(labels);
  SAFE_DELETE(pressure_flows_and_stress);
  SAFE_DELETE(wall_shear_stress);
  SAFE_DELETE(temperature);
  SAFE_DELETE(fPDF);
  SAFE_DELETE(fnPDF);
  SAFE_DELETE(boundary_conditions);
}

bool LBM::Output::CountCellTypes()
{
  if (num_cells <= 0 || labels == nullptr)
    return false;

  // Count up label types
  num_wall_cells = 0;
  num_inflow_cells = 0;
  num_outflow_cells = 0;
  num_inactive_cells = 0;
  num_interior_cells = 0;
  num_unknown_cells = 0;
  for (int i = 0; i < num_cells; ++i)
  {
    int label = labels[i];

    if (label == -1) // inactive - outside of the region of interest
    {
      num_inactive_cells++;
    }
    else if (label == 0) // interior of the region of interest
    {
      num_interior_cells++;
    }
    else if (label > 0 && label <= 99) // wall boundary condition area
    {
      num_wall_cells++;
    }
    else if (label > 100 && label <= 199) // inlet boundary condition area
    {
      num_inflow_cells++;
      /*if(m_gridXValues[iNodes] < xInflowStart)
          xInflowStart = gridXValues[iNodes];*/
    }
    else if (label > 200 && label <= 299) // outlet boundary condition area
    {
      num_outflow_cells++;
      /*if(m_gridXValues[iNodes] > xOutflowEnd)
          xOutflowEnd = gridXValues[iNodes];*/
    }
    else
    {
      num_unknown_cells++;
    }
  }
  std::cout << "Number of Nodes: " << num_cells << ". \n";
  std::cout << "Number of Inactive Nodes: " << num_inactive_cells << ". \n";
  std::cout << "Number of Interior Nodes: " << num_interior_cells << ". \n";
  std::cout << "Number of Wall Nodes: " << num_wall_cells << ". \n";
  std::cout << "Number of Inflow Nodes: " << num_inflow_cells << ". \n";
  std::cout << "Number of Outflow Nodes: " << num_outflow_cells << ". \n";
  std::cout << "Number of Unknown Nodes: " << num_unknown_cells << ". \n";

  return true;
}

LBM::LBM()
{

}
LBM::~LBM()
{
  out.Deallocate();
}
bool LBM::Run()
{
  if (in.labels.empty() && in.source_labels.empty())
  {
    std::cerr << "Input has no labels\n";
    return false;
  }
  if (in.labels.empty() && !in.source_labels.empty() && in.source_to_lbm_label_map.empty())
  {
    std::cerr << "When providing source labels, you must provide a soure to LBM label mapping\n";
    return false;
  }
  // Check that the number of labels matches the dimensions
  int num_labels = in.labels.empty() ? in.source_labels.size() : in.labels.size();
  int num_cells = in.dimensions[0] * in.dimensions[1] * in.dimensions[2];
  if (num_labels != num_cells)
  {
    std::cerr << "Number of labels does not match the provided dimensions\n";
    return false;
  }

  if (in.grid_spacing <= 0)
  {
    std::cerr << "Input has no grid spacing\n";
    return false;
  }

  // Allocate our output
  if (!out.Allocate(in.dimensions))
  {
    return false;
  }
  std::stringstream ss;

  std::cout << "Processing inputs...\n";
  std::cout << cfg;

  // Process labels
  size_t num_inactive = 0;
  size_t num_wall = 0;
  size_t num_inlet = 0;
  size_t num_outlet = 0;
  // Pull the initial labels, CUDA can change these
  if (!in.labels.empty())
  {
    std::cout << "Processing provided LBM labels\n";
    for (int i = 0; i < out.num_cells; i++)
    {
      BoundaryTypes t = in.labels[i];
      out.labels[i] = (int)t;
      // Count what we have
      switch (t)
      {
      case BoundaryTypes::Inactive:
        num_inactive++; break;
      case BoundaryTypes::Wall:
        num_wall++; break;
      case BoundaryTypes::Inlet:
        num_inlet++; break;
      case BoundaryTypes::Outlet:
        num_outlet++; break;
      }
    }
  }
  else
  {
    std::cout << "Translating source labels to LBM labels\n";
    for (int i = 0; i < out.num_cells; i++)
    {
      int source_label = in.source_labels[i];
      auto value = in.source_to_lbm_label_map.find(source_label);
      if (value == in.source_to_lbm_label_map.end())
      {
        std::cerr << "A mapping was not provided for source label " << source_label << "\n";
        return false;
      }
      out.labels[i] = (int)value->second;
      // Count what we have
      switch (value->second)
      {
      case BoundaryTypes::Inactive:
        num_inactive++; break;
      case BoundaryTypes::Wall:
        num_wall++; break;
      case BoundaryTypes::Inlet:
        num_inlet++; break;
      case BoundaryTypes::Outlet:
        num_outlet++; break;
      }
    }
  }
  std::cout << "Labeled mesh with " << num_labels << " labels\n";
  std::cout << "  Inactive cells  : " << num_inactive << "\n";
  std::cout << "  Wall Cells      : " << num_wall << "\n";
  std::cout << "  Inlet Cells     : " << num_inlet << "\n";
  std::cout << "  Outlet Cells    : " << num_outlet << "\n";


  float parameters[32];
  //Find Normals, completed in cuda algorithm
  for (int i = 0; i < 32; i++)
    parameters[i] = 0.f;
  parameters[0] = D3Q19;
  // Lattice Dimensions
  parameters[1] = (float)in.dimensions[0];
  parameters[2] = (float)in.dimensions[1];
  parameters[3] = (float)in.dimensions[2];
  // Lattice memory buffer (multiples of dimensions)
  parameters[4] = (float)in.dimensions[0];
  parameters[5] = (float)in.dimensions[1];
  parameters[6] = (float)in.dimensions[2];
  // Print flags
  parameters[30] = 0; // Flag to debug wall normal identification
  parameters[31] = 0; // Flag whether to print indices of wall sites
  std::cout << "Finding Normals...\n";
  int status = FindNormals(parameters, out.labels, in.dimensions[0], in.dimensions[1], in.dimensions[2]);
  if (!status)
  {
    std::cout << "Successfully calculated the normals to the boundary. \n";
  }
  else
  {
    std::cerr << "Error occurred while calculating the normals. \n";
    return false;
  }

  std::cout << "Find Normals Boundary Codes: \n";
  std::cout << "Grid Size (x, y, z) = (" << in.dimensions[0] << "," << in.dimensions[1] << "," << in.dimensions[2] << "). \n";
  out.CountCellTypes();

  // Calculate dimensions
  parameters[10] = (float)out.nBound;
  parameters[11] = (float)out.nQ;
  parameters[12] = 0; // Flag to return PDF's to host
  parameters[20] = in.grid_spacing;
  parameters[22] = cfg.reference_pressure;
  parameters[23] = cfg.speed_of_sound;
  parameters[24] = cfg.viscosity;

  // Calculate average area
  out.inlet_area = out.num_inflow_cells * std::pow(in.grid_spacing, 2);
  out.outlet_area = out.num_outflow_cells * std::pow(in.grid_spacing, 2);
  out.average_area = 0.5f * (out.inlet_area + out.outlet_area);
  out.outlet_diameter = std::pow(out.num_outflow_cells / float(M_PI), 0.5f) * in.grid_spacing * 2.f;

  // Imposed pressure drop
  if (cfg.inlet_boundary_condition == BoundaryCondition::Pressure && cfg.outlet_boundary_condition == BoundaryCondition::Pressure)
  {
    if (cfg.inlet_value <= cfg.outlet_value)
    {
      std::cerr << "!!! Inlet pressure is lower than outlet pressure !!!";
    }
    out.pressure_drop = cfg.inlet_value - cfg.outlet_value;
    out.mass_flow = std::pow(out.average_area, 2) * out.pressure_drop / (8.f * float(M_PI) * cfg.viscosity) / in.dimensions[0] / in.grid_spacing;
    out.volumetric_flow_rate = out.mass_flow / cfg.density;
  }
  // Constant Volumetric Flow Rate
  else if (cfg.inlet_boundary_condition == BoundaryCondition::Flow && cfg.outlet_boundary_condition == BoundaryCondition::Flow)
  {
    if (cfg.inlet_value != cfg.outlet_value)
    {
      std::cerr << "!!! Inlet and outlet flows are NOT the same !!!";
    }
    out.volumetric_flow_rate = cfg.outlet_value;
    out.mass_flow = out.volumetric_flow_rate * cfg.density;
    out.pressure_drop = out.mass_flow * 8.f * float(M_PI) * cfg.viscosity * in.dimensions[0] * in.grid_spacing / std::pow(out.average_area, 2);
  }
  else
  {
    std::cerr << "Unsupported Boundary Condtion Combination.";
    return false;
  }

  // Calculate velocities
  out.inlet_velocity = out.mass_flow / (cfg.density * out.inlet_area);
  out.outlet_velocity = out.mass_flow / (cfg.density * out.outlet_area);
  // Fill out the boundary conditions
  // Interior
  out.boundary_conditions[0] = cfg.reference_pressure;
  out.boundary_conditions[1] = 0;
  out.boundary_conditions[2] = 0;
  out.boundary_conditions[3] = 0;
  // Inactive
  out.boundary_conditions[4] = cfg.reference_pressure;
  out.boundary_conditions[5] = 0;
  out.boundary_conditions[6] = 0;
  out.boundary_conditions[7] = 0;
  // Wall
  out.boundary_conditions[8] = cfg.reference_pressure;
  out.boundary_conditions[9] = 0;
  out.boundary_conditions[10] = 0;
  out.boundary_conditions[11] = 0;
  // Inflow
  out.boundary_conditions[12] = cfg.reference_pressure;
  out.boundary_conditions[13] = out.inlet_velocity;
  out.boundary_conditions[14] = 0;
  out.boundary_conditions[15] = 0;
  // Outflow
  out.boundary_conditions[16] = cfg.reference_pressure - out.pressure_drop;
  out.boundary_conditions[17] = out.outlet_velocity;
  out.boundary_conditions[18] = 0;
  out.boundary_conditions[19] = 0;
  std::cout<<cfg.reference_pressure<<" , "<<out.pressure_drop<<std::endl;

  std::cout << "Boundary Conditions in physical units: \n";
  std::cout << "              " << "  Pressure  " << "  X-Velocity  " << "  Y-Velocity  " << "  Z-Velocity  \n";
  std::cout << "Interior:          " << out.boundary_conditions[0] << "          " << out.boundary_conditions[1] << "           " << out.boundary_conditions[2] << "         " << out.boundary_conditions[3] << "\n";
  std::cout << "Inactive:          " << out.boundary_conditions[4] << "          " << out.boundary_conditions[5] << "           " << out.boundary_conditions[6] << "         " << out.boundary_conditions[7] << "\n";
  std::cout << "Wall:              " << out.boundary_conditions[8] << "          " << out.boundary_conditions[9] << "           " << out.boundary_conditions[10] << "         " << out.boundary_conditions[11] << "\n";
  std::cout << "Inlet:             " << out.boundary_conditions[12] << "          " << out.boundary_conditions[13] << "           " << out.boundary_conditions[14] << "         " << out.boundary_conditions[15] << "\n";
  std::cout << "Outlet:            " << out.boundary_conditions[16] << "          " << out.boundary_conditions[17] << "           " << out.boundary_conditions[18] << "         " << out.boundary_conditions[19] << "\n \n";
  // Convert to unitless
  status = LBMbc(parameters, out.boundary_conditions);
  if (!status)
  {
    std::cout << "Successfully converted boundary conditions to unitless values. \n";
  }
  else
  {
    std::cerr << "Error occurred while converting boundary conditions to unitless values. \n";
    return false;
  }
  std::cout << "Boundary Conditions in non-dimensional lattice units: \n";
  std::cout << "              " << "  Pressure  " << "  X-Velocity  " << "  Y-Velocity  " << "  Z-Velocity  \n";
  std::cout << "Interior:          " << out.boundary_conditions[0] << "          " << out.boundary_conditions[1] << "           " << out.boundary_conditions[2] << "         " << out.boundary_conditions[3] << "\n";
  std::cout << "Inactive:          " << out.boundary_conditions[4] << "          " << out.boundary_conditions[5] << "           " << out.boundary_conditions[6] << "         " << out.boundary_conditions[7] << "\n";
  std::cout << "Wall:              " << out.boundary_conditions[8] << "          " << out.boundary_conditions[9] << "           " << out.boundary_conditions[10] << "         " << out.boundary_conditions[11] << "\n";
  std::cout << "Inlet:             " << out.boundary_conditions[12] << "          " << out.boundary_conditions[13] << "           " << out.boundary_conditions[14] << "         " << out.boundary_conditions[15] << "\n";
  std::cout << "Outlet:            " << out.boundary_conditions[16] << "          " << out.boundary_conditions[17] << "           " << out.boundary_conditions[18] << "         " << out.boundary_conditions[19] << "\n \n";

  out.reference_velocity = parameters[20] / parameters[21];
  std::cout << "Computational Parameters: \n";
  std::cout << "Reference Length        = " << parameters[20] << "\n";
  std::cout << "Reference Time          = " << parameters[21] << "\n";
  std::cout << "Reference Velocity      = " << out.reference_velocity << "\n";
  std::cout << "Inlet Velocity          = " << out.inlet_velocity << "\n";
  std::cout << "Outlet Velocity         = " << out.outlet_velocity << "\n";
  std::cout << "Outlet Diameter         = " << out.outlet_diameter << "\n";
  std::cout << "Outlet Reynolds Number  = " << (out.outlet_velocity * out.outlet_diameter) / cfg.viscosity << "\n";
  std::cout << "Reference Pressure      = " << cfg.reference_pressure << "\n";
  std::cout << "Kinematic Viscosity     = " << cfg.viscosity << "\n";
  std::cout << "BGK Relaxation Time     = " << parameters[9] << "\n\n";
  // Imposed pressure drop
  if (cfg.inlet_boundary_condition == BoundaryCondition::Pressure && cfg.outlet_boundary_condition == BoundaryCondition::Pressure)
  {
    std::cout << "Imposed pressure drop   = " << out.pressure_drop << "\n";
    std::cout << "Initial mass flow rate       = " << out.mass_flow << "\n";
    std::cout << "Initial volumetric flow rate       = " << out.volumetric_flow_rate << "\n";
  }
  // Constant volumetric flow rate
  else if (cfg.inlet_boundary_condition == BoundaryCondition::Flow && cfg.outlet_boundary_condition == BoundaryCondition::Flow)
  {
    std::cout << "Initial pressure drop   = " << out.pressure_drop << "\n";
    std::cout << "Imposed mass flow rate       = " << out.mass_flow << "\n";
    std::cout << "Imposed volumetric flow rate       = " << out.volumetric_flow_rate << "\n";
  }

  // Setup the initial pressure and flow values for every cell
  int iCell = 0;
  for (int z = 0; z < in.dimensions[2]; z++)
  {
    for (int y = 0; y < in.dimensions[1]; y++)
    {
      for (int x = 0; x < in.dimensions[0]; x++)
      {
        int label = out.labels[iCell];
        if (label >= 0 && label < 99) // Wall
        {
          out.temperature[iCell] = cfg.wall_temperature;                                          // Temperature
          out.pressure_flows_and_stress[iCell] = out.boundary_conditions[0];                      // Pressure
          out.pressure_flows_and_stress[out.num_cells + iCell] = out.boundary_conditions[1];      // X Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 2) + iCell] = out.boundary_conditions[2];// Y Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 3) + iCell] = out.boundary_conditions[3];// Z Flow Velocity
        }
        else if (label >= 101 && label < 200) // Inlet
        {
          out.temperature[iCell] = cfg.fluid_temperature;                                          // Temperature
          out.pressure_flows_and_stress[iCell] = out.boundary_conditions[12];                      // Pressure
          out.pressure_flows_and_stress[out.num_cells + iCell] = out.boundary_conditions[13];      // X Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 2) + iCell] = out.boundary_conditions[14];// Y Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 3) + iCell] = out.boundary_conditions[15];// Z Flow Velocity
        }
        else if (label >= 201 && label < 300) // Outlet
        {
          out.temperature[iCell] = cfg.wall_temperature;                                           // Temperature
          out.pressure_flows_and_stress[iCell] = out.boundary_conditions[16];                      // Pressure
          out.pressure_flows_and_stress[out.num_cells + iCell] = out.boundary_conditions[17];      // X Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 2) + iCell] = out.boundary_conditions[18];// Y Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 3) + iCell] = out.boundary_conditions[19];// Z Flow Velocity
        }
        else
        {
          out.temperature[iCell] = cfg.fluid_temperature;                                         // Temperature
          out.pressure_flows_and_stress[iCell] = out.boundary_conditions[4];                      // Pressure
          out.pressure_flows_and_stress[out.num_cells + iCell] = out.boundary_conditions[5];      // X Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 2) + iCell] = out.boundary_conditions[6];// Y Flow Velocity
          out.pressure_flows_and_stress[(out.num_cells * 3) + iCell] = out.boundary_conditions[7];// Z Flow Velocity
        }
        out.pressure_flows_and_stress[(out.num_cells * 4) + iCell] = 0.f;   //Sigma_xy
        out.pressure_flows_and_stress[(out.num_cells * 5) + iCell] = 0.f;   //Sigma_xz
        out.pressure_flows_and_stress[(out.num_cells * 6) + iCell] = 0.f;   //Sigma_yz
        out.pressure_flows_and_stress[(out.num_cells * 7) + iCell] = 0.f;   //Sigma_xx
        out.pressure_flows_and_stress[(out.num_cells * 8) + iCell] = 0.f;   //Sigma_yy
        out.pressure_flows_and_stress[(out.num_cells * 9) + iCell] = 0.f;   //Sigma_zz
        iCell++;
      }
    }
  }
  std::cout << "LBM Setup Complete.\n";
  // Only setup our structures and return
  if (cfg.setup_only)
    return true;

  // Setup call to initiation microLBM call
  parameters[0]  = D3Q19p0;                     // Select pressure Poisson solver D3Q19p0
  parameters[7]  = 24.f * in.dimensions[0];     // Max allowed iterations for Poisson solver
  parameters[8]  = 1.f;                         // Flag whether to initialize PDFs from Q
  parameters[12] = 1.f;                         // Flag to return PDFs to host
  parameters[13] = 1.f;                         // Progress indicator display
  parameters[14] = parameters[7] / 10.f;        // Progress indicator stride
  std::cout << "Running initial microLBM...\n";
  status = microLBM(parameters, out.pressure_flows_and_stress, out.labels, out.boundary_conditions, out.fPDF, out.fnPDF, in.dimensions[0], in.dimensions[1], in.dimensions[2]);
  if (!status)
  {
    std::cout << "Successfully ran initial microLBM call. \n";
  }
  else
  {
    std::cerr << "Error occurred while running the initial microLBM call. \n";
    return false;
  }

  // Advance time
  parameters[0]  = D3Q19;                       // Select pressure Poisson D3Q19 solver
  parameters[7]  = 24.f * in.dimensions[0];     // Max allowed iterations for Poisson solver
  parameters[8]  = 0.f;                         // Flag whether to initialize PDFs from Q
  parameters[12] = 1.f;                         // Flag to return PDFs to host
  parameters[13] = 1.f;                         // Progress indicator display
  parameters[14] = parameters[7] / 10.f;        // Progress indicator stride

  std::cout << "Running microLBM for pressure and velocity...\n";
  status = microLBM(parameters, out.pressure_flows_and_stress, out.labels, out.boundary_conditions, out.fPDF, out.fnPDF, in.dimensions[0], in.dimensions[1], in.dimensions[2]);
  if (!status)
    std::cout << "Successfully ran microLBM for pressure and velocity\n";
  else
  {
    std::cerr << "Error occurred while running microLBM for pressure and velocity\n";
  }
/*
  // Calculate mean flow rate for the outlet pressure for the next iteration
  int label;
  float new_outlet_pressure = 0;
  // Imposed pressure drop
  if (cfg.inlet_boundary_condition == BoundaryCondition::Pressure && cfg.outlet_boundary_condition == BoundaryCondition::Pressure)
  {
    new_outlet_pressure = 1.0f - (pressure_drop_Pa / cfg.reference_pressure);
  }
  // Constant volumetric flow rate
  else if (cfg.inlet_boundary_condition == BoundaryCondition::Flow && cfg.outlet_boundary_condition == BoundaryCondition::Flow)
  {

    for (int i = 0; i < num_cells; i++)
    {
      label = boundary_labels[i];
      if (label >= 201 && label <= 299)
      {
        new_outlet_pressure += pressure_flow[i];
      }
    }
    new_outlet_pressure /= num_outflow_nodes;
  }

  for (int c = 0; c < num_cells; c++)
  {
    label = boundary_labels[c];
    if (label >= 101 && label < 200) // Inlet
    {
      pressure_flow[c] = 1.0;                                                               // Pressure
      pressure_flow[num_cells + c] = inlet_velocity_m_per_s / reference_velocity_m_per_s;   // X Flow Velocity
      pressure_flow[num_cells * 2 + c] = 0.0;                                               // Y Flow Velocity
      pressure_flow[num_cells * 3 + c] = 0.0;                                               // Z Flow Velocity
    }
    else if (label >= 201 && label < 300) // Outlet
    {
      pressure_flow[c] = new_outlet_pressure;                                                // Pressure
      pressure_flow[num_cells + c] = outlet_velocity_m_per_s / reference_velocity_m_per_s;   // X Flow Velocity
      pressure_flow[num_cells * 2 + c] = 0.0;                                                // Y Flow Velocity
      pressure_flow[num_cells * 3 + c] = 0.0;                                                // Z Flow Velocity
    }
    else if (label >= 1 && label <= 99) // Wall
    {
      //Placeholder for future wall boundary conditions, e.g., permeable
    }
    else if (label == -1)
    {
      pressure_flow[c] = 1.0; // Pressure
      pressure_flow[num_cells + c] = 0.0; // X Flow Velocity
      pressure_flow[num_cells * 2 + c] = 0.0; // Y Flow Velocity
      pressure_flow[num_cells * 3 + c] = 0.0; // Z Flow Velocity
    }
  }
*/

  // Calculate temperature
  std::cout << "Running microLBM for temperature...\n";
  for (int j = 0; j < out.num_cells; j++)
  {
    float p = out.pressure_flows_and_stress[j];
    out.temperature[out.num_cells + j] = out.pressure_flows_and_stress[out.num_cells + j] / p;        // X Flow Velocity / Pressure
    out.temperature[out.num_cells * 2 + j] = out.pressure_flows_and_stress[out.num_cells * 2 + j] / p;// Y Flow Velocity / Pressure
    out.temperature[out.num_cells * 3 + j] = out.pressure_flows_and_stress[out.num_cells * 3 + j] / p;// Z Flow Velocity / Pressure
    int label = out.labels[j];
    if (label >= 0 && label < 99) // Wall
    {
      out.temperature[j] = cfg.wall_temperature;
    }
    else if (label >= 101 && label < 200) // Inlet
    {
      out.temperature[j] = cfg.fluid_temperature;
    }
    else if (label >= 201 && label < 300) // Outlet
    {
      out.temperature[j] = cfg.wall_temperature;
    }
    else
    {
      out.temperature[j] = cfg.fluid_temperature;
    }
  }
  parameters[0]  = D3Q19T;                     // Select temperature advection-diffusion solver D3Q19T
  parameters[7]  = 4.f * in.dimensions[0];     // Max allowed iterations for solver
  parameters[8]  = 1;                          // Flag whether to initialize PDFs from Q
  parameters[12] = 1;                          // Flag to return PDFs to host
  parameters[13] = 1;                          // Progress indicator display
  parameters[14] = in.dimensions[0] / 4.f;     // Progress indicator stride
  status = microLBM(parameters, out.temperature, out.labels, out.boundary_conditions, out.fPDF, out.fnPDF, in.dimensions[0], in.dimensions[1], in.dimensions[2]);
  if (!status)
    std::cout << "Successfully ran microLBM for temperature\n";
  else
  {
    std::cerr << "Error occurred while running microLBM for temperature\n";
    return false;
  }

  // Convert pressures, flows, stresses to the provided input units
  for (int i = 0; i<out.num_cells; i++)
  {
    float p = (out.pressure_flows_and_stress[i] * cfg.reference_pressure) - cfg.reference_pressure;
    out.pressure_flows_and_stress[i] = p;
    out.pressure_flows_and_stress[out.num_cells+i] *= out.reference_velocity;
    out.pressure_flows_and_stress[out.num_cells*2+i] *= out.reference_velocity;
    out.pressure_flows_and_stress[out.num_cells*3+i] *= out.reference_velocity;
    out.pressure_flows_and_stress[out.num_cells*4+i] *= cfg.reference_pressure;
    out.pressure_flows_and_stress[out.num_cells*5+i] *= cfg.reference_pressure;
    out.pressure_flows_and_stress[out.num_cells*6+i] *= cfg.reference_pressure;
    out.pressure_flows_and_stress[out.num_cells*7+i] *= cfg.reference_pressure;
    out.pressure_flows_and_stress[out.num_cells*8+i] *= cfg.reference_pressure;
    out.pressure_flows_and_stress[out.num_cells*9+i] *= cfg.reference_pressure;
  }

//  ===============================Compute Wall Shear Stress========================
//  ================================================================================

    std::cout<<"================Computing Wall Shear Stress=============\n\n";

    int count = 0;  // count the wall sites where stress tensor is singular

    for (int i = 0; i < out.num_cells; ++i) {

        int label = out.labels[i];
        float n_x, n_y, n_z;
        float n0_x, n0_y, n0_z;
        float n3_x, n3_y, n3_z;

        float Sxy, Sxz, Syz;
        float Sxx, Syy, Szz;
        float Sx, Sy, Sz;
        float Sx_new, Sy_new, Sz_new;

        float u_x, u_y, u_z;

        // dir below is same as dirD3Q19 in LBM.cu. These are pre-defined geometric normals for boundary voxels
        const float dir[19][3] = { {0,0,0},{0,0,1},{0,0,-1},{0,1,0},{0,-1,0},{1,0,0},{-1,0,0},{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1},{1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1} };

        if (label > 0 && label <= 99) // wall boundary condition area
        {
            // -----------------------------------------------------------
            // Computing Shear Stress by Uing the Default Existing Normals
            //-------------------------------------------------------------
            Sxy = out.pressure_flows_and_stress[out.num_cells * 4 + i];
            Sxz = out.pressure_flows_and_stress[out.num_cells * 5 + i];
            Syz = out.pressure_flows_and_stress[out.num_cells * 6 + i];

            // Components of normals as obtained from the FindNormals function in LBM.cu
            n_x = dir[label][2];
            n_y = dir[label][1];
            n_z = dir[label][0];

//            std::cout<<"label = "<<label<<" normal = "<<n_x<<","<<n_y<<","<<n_z<<"\n";

            Eigen::Vector3f Existing_Normal, Existing_Unit_Normal, Sigma_Default;
            Existing_Normal << n_x, n_y, n_z;
            Existing_Unit_Normal = Existing_Normal/Existing_Normal.norm();
            n0_x = Existing_Unit_Normal(0);
            n0_y = Existing_Unit_Normal(1);
            n0_z = Existing_Unit_Normal(2);

            float tmp = n0_x*n0_y*n0_z;

            Sx = Sxy*n0_y - Syz*tmp;
            Sy = Syz*n0_z - Sxz*tmp;
            Sz = Sxz*n0_x - Sxy*tmp;
            Sigma_Default<< Sx, Sy, Sz;

            out.wall_shear_stress[i] = Sigma_Default.norm();  // This is the result using the existing normals

            // ---------------------------------------------------------------------
            // Computing Shear Stress by Using Wall Normals Algorithm by Stahl et. al.
            //-----------------------------------------------------------------------

            Sxx = out.pressure_flows_and_stress[out.num_cells * 7 + i];
            Syy = out.pressure_flows_and_stress[out.num_cells * 8 + i];
            Szz = out.pressure_flows_and_stress[out.num_cells * 9 + i];

            u_x = out.pressure_flows_and_stress[out.num_cells + i];
            u_y = out.pressure_flows_and_stress[out.num_cells * 2 + i];
            u_z = out.pressure_flows_and_stress[out.num_cells * 3 + i];

            Eigen::Matrix3f Stress_Tensor, An_Eigen_Base;
            Eigen::Vector3f Velocity, N1, N2, N3, New_Unit_Normal, An_Eigen_Set, Sigma_New;
//            Eigen::Vector3f norm_Eigen_Base;

            Stress_Tensor<< Sxx, Sxy, Sxz,
                            Sxy, Syy, Syz,
                            Sxz, Syz, Szz;
//            std::cout<<Stress_Tensor<<"\n";
//            The if statement below checks if there are any wall sites for which analytical computation of the normal is possible.
            if(Stress_Tensor.determinant() >= 0.f && Stress_Tensor.determinant() <= 1E-20)
            {
                //std::cout<<"Sigma is singular\n";
                count++;
            }

            Velocity<< u_x, u_y, u_z;
            N1 = Velocity/Velocity.norm();

            Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> eigensolver(Stress_Tensor);
            An_Eigen_Set  = eigensolver.eigenvalues().cwiseAbs(); // Absolute of the Eigen values for Stress Tensor
            An_Eigen_Base = eigensolver.eigenvectors();           // Corresponding Unit Eigen Vectors of the Stress Tesnor (Bases for the Eigen Space)
//            norm_Eigen_Base <<  An_Eigen_Base.col(0).norm(),  An_Eigen_Base.col(1).norm(),  An_Eigen_Base.col(2).norm();  //Norm of Eigen vectors. Should be 1.

//          Finding the smallest Eigen value and its corresponding Eigen vector. This will be treated as N2.
            float Smallest_Eigen_Value = An_Eigen_Set.minCoeff();
            for(int j = 0; j<3; j++){
                if(An_Eigen_Set(j) == Smallest_Eigen_Value){
                    N2 = An_Eigen_Base.col(j);  // For being in the Bases of an Eigen Space, N2 is already a unit vector
                }
            }
//            std::cout<<"First column"<<An_Eigen_Base.col(0)<<"\n";
//            std::cout<<"Second Column"<<An_Eigen_Base.col(1)<<"\n";
//            std::cout<<"third column"<<An_Eigen_Base.col(2)<<"\n";

//            std::cout<< "Eigen Set:\n"<<An_Eigen_Set <<"\n";
//            std::cout<<An_Eigen_Set(0)<<","<<An_Eigen_Set(1)<<","<<An_Eigen_Set(2)<<"\n";
//            std::cout<< "Smallest Eigen Value = "<<Smallest_Eigen_Value <<"\n";
//            std::cout<<"Eigen Base:\n"<<An_Eigen_Base<<"\n";
//            std::cout<< "Corresponding Eigen Vector:\n"<<N2<<"\n";

//------------------------------------------------------
// Below routine to find the Kernel directly didn't work
//            Eigen::FullPivLU<Eigen::Matrix3f> lu(Stress_Tensor);
//            Eigen::Vector3f A_null_space = lu.kernel();
//std::cout<<"A_null_space"<< A_null_space<<"\n";
//------------------------------------------------------
            N3 = N1.cross(N2);
            New_Unit_Normal = N3/N3.norm();

            n3_x = New_Unit_Normal(0);
            n3_y = New_Unit_Normal(1);
            n3_z = New_Unit_Normal(2);

            float tmp1 = n3_x*n3_y*n3_z;

            Sx_new = Sxy*n3_y - Syz*tmp1;
            Sy_new = Syz*n3_z - Sxz*tmp1;
            Sz_new = Sxz*n3_x - Sxy*tmp1;

            Sigma_New<< Sx_new, Sy_new, Sz_new;

            // Final result using the newly computed normals
            out.wall_shear_stress[out.num_cells + i] = Sigma_New.norm();

        } else {
            out.wall_shear_stress[i] = 0.f;
            out.wall_shear_stress[out.num_cells+i] = 0.f;
        }
    }
    std::cout<<"Out of "<<out.num_wall_cells<<" wall sites, viscous stress tensor is singular at ="<<count<<" sites\n\n";
    std::cout<<"===============Finished Computing Wall Shear Stress=================\n\n";

  return true;
}


bool LBM::LoadLegacyScenario(std::string const& config_file,
                             std::string const& voxel_file,
                             std::string const& grid_file,
                             LBM& lbm)
{
  std::string line;
  std::istringstream iss;

  /////////////////
  // CONFIG FILE //
  /////////////////
  std::ifstream cFile(config_file, std::ios::in);

  float value;
  std::string name;
  int line_num = 0;
  while (std::getline(cFile, line))
  {
    if (line_num++ < 4)// Skip the header
      continue;
    iss.clear();
    iss.str(line);
    if (!(iss >> value >> name))
    {
      std::cerr << "Error occurred while reading dimensions from grid file: " << config_file;
      return false;
    }
    if (name.compare("pRef") == 0)
    {
      lbm.cfg.reference_pressure = value; continue;
    }
    if (name.compare("cSound") == 0)
    {
      lbm.cfg.speed_of_sound = value; continue;
    }
    if (name.compare("visc") == 0)
    {
      lbm.cfg.viscosity = value; continue;
    }
    if (name.compare("alphaT") == 0)
    {
      lbm.cfg.thermal_coefficient = value; continue;
    }
    if (name.compare("rho") == 0)
    {
      lbm.cfg.density = value; continue;
    }
    if (name.compare("WallTemp") == 0)
    {
      lbm.cfg.wall_temperature = value; continue;
    }
    if (name.compare("AirTemp") == 0)
    {
      lbm.cfg.fluid_temperature = value; continue;
    }
    if (name.compare("iRunMode") == 0)
    {
      if (value == 1)
      {
        // Impose a pressure drop
        lbm.cfg.inlet_boundary_condition = BoundaryCondition::Pressure;
        lbm.cfg.outlet_boundary_condition = BoundaryCondition::Pressure;
      }
      else
      {
        lbm.cfg.inlet_boundary_condition = BoundaryCondition::Flow;
        lbm.cfg.outlet_boundary_condition = BoundaryCondition::Flow;
      }
      continue;
    }
    if (name.compare("delp") == 0 && lbm.cfg.inlet_boundary_condition == BoundaryCondition::Pressure)
    {
      lbm.cfg.inlet_value = 0;
      lbm.cfg.outlet_value = -value;
      continue;
    }
    else if (name.compare("Qflow") == 0 && lbm.cfg.inlet_boundary_condition == BoundaryCondition::Flow)
    {
      lbm.cfg.inlet_value = value / 6e04f;
      lbm.cfg.outlet_value = value / 6e04f;
      continue;
    }
  }

  // Imposed volumetric flow rate
  //cfg.inlet_boundary_condition = BoundaryCondition::Flow;
  //cfg.inlet_value = 0.016/6e04;       // m^3/s (Volumetric airflow during inspiration (?). Conversion is made form l/min form numerical consistency)
  //cfg.outlet_boundary_condition = BoundaryCondition::Flow;
  //cfg.outlet_value = 0.016/6e04;

  ///////////////
  // GRID FILE //
  ///////////////

  std::ifstream gFile(grid_file, std::ios::in);
  // We only need to read the first 2 lines
  // Line 1
  std::getline(gFile, line);
  iss.str(line);
  if (!(iss >> lbm.in.dimensions[0] >> lbm.in.dimensions[1] >> lbm.in.dimensions[2]))
  {
    std::cerr << "Error occurred while reading dimensions from grid file: " << grid_file;
    return false;
  }
  // Line 2
  iss.clear();
  std::getline(gFile, line);
  iss.str(line);
  if (!(iss >> lbm.in.grid_spacing))
  {
    std::cerr << "Error occurred while reading grid spacing from grid file: " << grid_file;
    return false;
  }

  ////////////////
  // VOXEL FILE //
  ////////////////

  lbm.in.labels.clear();
  lbm.in.source_labels.clear();
  lbm.in.source_to_lbm_label_map.clear();
  int total_cells = lbm.in.dimensions[0] * lbm.in.dimensions[1] * lbm.in.dimensions[2];
  int num_wall_cells = 0;
  int num_inflow_cells = 0;
  int num_outflow_cells = 0;
  int num_inactive_cells = 0;
  int num_interior_cells = 0;
  int num_unknown_cells = 0;

  std::ifstream vFile(voxel_file, std::ios::in | std::ios::binary);
  if (vFile)
  {
    char* buffer = new char[total_cells];
    vFile.read(buffer, total_cells);

    for (int i = 0; i < total_cells; ++i)
    {
      switch (buffer[i])
      {
      case 0:
      {
        lbm.in.labels.push_back(BoundaryTypes::Inactive); //inactive - outside of the region of interest
        num_inactive_cells++;
        continue;
      }
      case 1:
      {
        lbm.in.labels.push_back(BoundaryTypes::Wall);  //interior of the region of interest
        num_interior_cells++;
        continue;
      }
      case 10:
      {
        lbm.in.labels.push_back(BoundaryTypes::Inlet);  //inlet boundary condition area
        num_inflow_cells++;
        continue;
      }
      case 20:
      {
        lbm.in.labels.push_back(BoundaryTypes::Outlet);  //outlet boundary condition area
        num_outflow_cells++;
        continue;
      }
      default:
        num_unknown_cells++;
        continue;
      }
    }
    delete[] buffer;
  }
  else
  {
    std::cerr << "Error occurred while opening voxelgrid.data file for reading.\n";
    return false;
  }
  std::cout << "Successfully read voxel grid from file. " << voxel_file << "\n";
  std::cout << "Initial Boundary Codes: \n";
  std::cout << "Grid Size (x, y, z) = (" << lbm.in.dimensions[0] << "," << lbm.in.dimensions[1] << "," << lbm.in.dimensions[2] << "). \n";
  std::cout << "Number of Nodes: " << total_cells << ". \n";
  std::cout << "Number of Inactive Nodes: " << num_inactive_cells << ". \n";
  std::cout << "Number of Interior Nodes: " << num_interior_cells << ". \n";
  std::cout << "Number of Wall Nodes: " << num_wall_cells << ". \n";
  std::cout << "Number of Inflow Nodes: " << num_inflow_cells << ". \n";
  std::cout << "Number of Outflow Nodes: " << num_outflow_cells << ". \n";
  std::cout << "Number of Unknown Nodes: " << num_unknown_cells << ". \n";

  return true;
}

void LBM::Configuration::ToString(std::ostream& str) const
{
  str << "LBM Configuration : \n";
  str << "  pRef = " << reference_pressure << "\n";
  str << "  pSound = " << speed_of_sound << "\n";
  str << "  visc = " << viscosity << "\n";
  str << "  alphaT = " << thermal_coefficient << "\n";
  str << "  rho = " << density << "\n";
  str << "  WallTemp = " << wall_temperature << "\n";
  str << "  AirTemp = " << fluid_temperature << "\n";
  str << "  Delta P = " << inlet_value - outlet_value << "\n";
  str << "  Inflow = " << inlet_value << "\n";
}
