//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//============================================================================

#include "FDirectSendCompositor.h"
#include "ImagefCompositor.h"

#include "vtkm_diy_collect.h"
#include "vtkm_diy_image_block.h"
#include "vtkm_diy_utils.h"

/*
#include <vtkh/compositing/MPICollect.hpp>
#include <vtkh/compositing/vtkh_diy_collect.hpp>
#include <vtkh/compositing/vtkh_diy_utils.hpp>

#include <diy/master.hpp>
#include <diy/mpi.hpp>
#include <diy/partners/swap.hpp>
#include <diy/reduce-operations.hpp>
#include <diy/reduce.hpp>
*/

namespace vtkm
{
namespace rendering
{
namespace compositing
{

namespace internal
{
struct Redistribute2
{
  typedef vtkmdiy::RegularDecomposer<vtkmdiy::DiscreteBounds> Decomposer;
  const vtkmdiy::RegularDecomposer<vtkmdiy::DiscreteBounds>& m_decomposer;
  Redistribute2(const Decomposer& decomposer)
    : m_decomposer(decomposer)
  {
  }

  void operator()(void* v_block, const vtkmdiy::ReduceProxy& proxy) const
  {
    MultiImageBlock<Imagef>* block = static_cast<MultiImageBlock<Imagef>*>(v_block);
    //
    // first round we have no incoming. Take the image we have,
    // chop it up into pieces, and send it to the domain resposible
    // for that portion
    //
    const int world_size = m_decomposer.nblocks;
    const int local_images = block->m_images.size();
    if (proxy.in_link().size() == 0)
    {
      // Fmt::Println("Redistribute: first round");
      std::map<vtkmdiy::BlockID, std::vector<Imagef>> outgoing;

      for (int i = 0; i < world_size; ++i)
      {
        vtkmdiy::DiscreteBounds sub_image_bounds(3);
        m_decomposer.fill_bounds(sub_image_bounds, i);
        vtkm::Bounds vtkm_sub_bounds =
          vtkm::rendering::compositing::DIYBoundsToVTKM(sub_image_bounds);

        vtkmdiy::BlockID dest = proxy.out_link().target(i);
        outgoing[dest].resize(local_images);

        for (int img = 0; img < local_images; ++img)
        {
          // Fmt::Println("Redistribute: sending image {} to {}: {}", img, dest.gid, vtkm_sub_bounds);
          outgoing[dest][img].SubsetFrom(block->m_images[img], vtkm_sub_bounds);
        }
      } //for

      typename std::map<vtkmdiy::BlockID, std::vector<Imagef>>::iterator it;
      for (it = outgoing.begin(); it != outgoing.end(); ++it)
      {
        proxy.enqueue(it->first, it->second);
      }
    } // if
    else if (block->m_images.at(0).CompositeOrder != -1)
    {
      // Fmt::Println("Redistribute: second round");
      // blend images according to vis order
      std::vector<Imagef> images;
      for (int i = 0; i < proxy.in_link().size(); ++i)
      {

        std::vector<Imagef> incoming;
        int gid = proxy.in_link().target(i).gid;
        proxy.dequeue(gid, incoming);
        const int in_size = incoming.size();
        for (int img = 0; img < in_size; ++img)
        {
          images.emplace_back(incoming[img]);
          //std::cout<<"rank "<<rank<<" rec "<<incoming[img].ToString()<<"\n";
        }
      } // for

      ImagefCompositor compositor;
      compositor.OrderedComposite(images);

      block->m_output.Swap(images[0]);
    } // else if
    else if (block->m_images.at(0).CompositeOrder == -1 &&
             block->m_images.at(0).GetHasTransparency())
    {
      Fmt::Println("Redistribute: third round");
      /*
      std::vector<Image> images;
      for (int i = 0; i < proxy.in_link().size(); ++i)
      {

        std::vector<Image> incoming;
        int gid = proxy.in_link().target(i).gid;
        proxy.dequeue(gid, incoming);
        const int in_size = incoming.size();
        for (int img = 0; img < in_size; ++img)
        {
          images.emplace_back(incoming[img]);
          //std::cout<<"rank "<<rank<<" rec "<<incoming[img].ToString()<<"\n";
        }
      } // for

      //
      // we have images with a depth buffer and transparency
      //
      ImagefCompositor compositor;
      compositor.ZBufferBlend(images);
      */
    }
  } // operator
};

} //namespace internal

FDirectSendCompositor::FDirectSendCompositor() {}

FDirectSendCompositor::~FDirectSendCompositor() {}

void FDirectSendCompositor::CompositeVolume(vtkmdiy::mpi::communicator& diy_comm,
                                            std::vector<Imagef>& images)
{
  vtkmdiy::DiscreteBounds global_bounds =
    vtkm::rendering::compositing::VTKMBoundsToDIY(images.at(0).OrigBounds);

  const int num_threads = 1;
  const int num_blocks = diy_comm.size();
  const int magic_k = 8;
  Imagef sub_image;
  //
  // DIY does not seem to like being called with different block types
  // so we isolate them within separate blocks
  //
  {
    vtkmdiy::Master master(diy_comm,
                           num_threads,
                           -1,
                           0,
                           [](void* b)
                           {
                             ImageBlock<Imagef>* block = reinterpret_cast<ImageBlock<Imagef>*>(b);
                             delete block;
                           });

    // create an assigner with one block per rank
    vtkmdiy::ContiguousAssigner assigner(num_blocks, num_blocks);

    AddMultiImageBlock<Imagef> create(master, images, sub_image);

    const int dims = 2;
    vtkmdiy::RegularDecomposer<vtkmdiy::DiscreteBounds> decomposer(dims, global_bounds, num_blocks);
    decomposer.decompose(diy_comm.rank(), assigner, create);

    vtkmdiy::all_to_all(master, assigner, internal::Redistribute2(decomposer), magic_k);
  }

  {
    vtkmdiy::Master master(diy_comm,
                           num_threads,
                           -1,
                           0,
                           [](void* b)
                           {
                             ImageBlock<Imagef>* block = reinterpret_cast<ImageBlock<Imagef>*>(b);
                             delete block;
                           });
    vtkmdiy::ContiguousAssigner assigner(num_blocks, num_blocks);

    const int dims = 2;
    vtkmdiy::RegularDecomposer<vtkmdiy::DiscreteBounds> decomposer(dims, global_bounds, num_blocks);
    AddImageBlock<Imagef> all_create(master, sub_image);
    decomposer.decompose(diy_comm.rank(), assigner, all_create);
    diy_comm.barrier();
    //MPI_Barrier(diy_comm);

    //MPICollect(sub_image,diy_comm);
    vtkmdiy::all_to_all(master, assigner, CollectImages<Imagef>(decomposer), magic_k);
  }

  Fmt::Println0("sub_image: {}", sub_image.ToString());
  images.at(0).Swap(sub_image);
}

std::string FDirectSendCompositor::GetTimingString()
{
  std::string res(m_timing_log.str());
  m_timing_log.str("");
  return res;
}

}
}
} //namespace vtkm::rendering::compositing
