Commit 161ffdff authored by Robert Maynard's avatar Robert Maynard

Move zfp helper functions to zfp namespace and remove debug code

parent bb74e177
......@@ -69,27 +69,18 @@ public:
const vtkm::Id totalBlocks = (paddedDims / four);
size_t outbits = detail::CalcMem1d(paddedDims, stream.minbits);
size_t outbits = zfp::detail::CalcMem1d(paddedDims, stream.minbits);
vtkm::Id outsize = vtkm::Id(outbits / sizeof(ZFPWord));
vtkm::cont::ArrayHandle<vtkm::Int64> output;
// hopefully this inits/allocates the mem only on the device
vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
vtkm::cont::Algorithm::Copy(zero, output);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,data);
// vtkm::Float64 time = timer.GetElapsedTime();
// std::cout<<"Copy scalars "<<time<<"\n";
// }
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Encode1> compressDispatcher(
zfp::Encode1(dims, paddedDims, stream.maxbits));
......
......@@ -76,29 +76,15 @@ public:
vtkm::Id totalBlocks = (paddedDims / four);
detail::CalcMem1d(paddedDims, stream.minbits);
zfp::detail::CalcMem1d(paddedDims, stream.minbits);
output.Allocate(dims);
// hopefully this inits/allocates the mem only on the device
//
//vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
//vtkm::cont::Algorithm::Copy(zero, output);
//
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,output);
// dis.Invoke(one,encodedData);
// vtkm::Float64 time = timer.GetElapsedTime();
// std::cout<<"Copy scalars "<<time<<"\n";
// }
output.Allocate(dims);
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Decode1> decompressDispatcher(
zfp::Decode1(dims, paddedDims, stream.maxbits));
......
......@@ -71,26 +71,18 @@ public:
const vtkm::Id totalBlocks = (paddedDims[0] / four) * (paddedDims[1] / (four));
size_t outbits = detail::CalcMem2d(paddedDims, stream.minbits);
size_t outbits = zfp::detail::CalcMem2d(paddedDims, stream.minbits);
vtkm::Id outsize = vtkm::Id(outbits / sizeof(ZFPWord));
vtkm::cont::ArrayHandle<vtkm::Int64> output;
// hopefully this inits/allocates the mem only on the device
vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
vtkm::cont::Algorithm::Copy(zero, output);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,data);
// vtkm::Float64 time = timer.GetElapsedTime();
// }
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Encode2> compressDispatcher(
zfp::Encode2(dims, paddedDims, stream.maxbits));
......
......@@ -78,29 +78,16 @@ public:
vtkm::Id totalBlocks = (paddedDims[0] / four) * (paddedDims[1] / (four));
detail::CalcMem2d(paddedDims, stream.minbits);
zfp::detail::CalcMem2d(paddedDims, stream.minbits);
output.Allocate(dims[0] * dims[1]);
// hopefully this inits/allocates the mem only on the device
//
//vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
//vtkm::cont::Algorithm::Copy(zero, output);
//
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,output);
// dis.Invoke(one,encodedData);
output.Allocate(dims[0] * dims[1]);
// vtkm::Float64 time = timer.GetElapsedTime();
// std::cout<<"Copy scalars "<<time<<"\n";
// }
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Decode2> decompressDispatcher(
zfp::Decode2(dims, paddedDims, stream.maxbits));
......
......@@ -73,27 +73,18 @@ public:
(paddedDims[0] / four) * (paddedDims[1] / (four) * (paddedDims[2] / four));
size_t outbits = detail::CalcMem3d(paddedDims, stream.minbits);
size_t outbits = zfp::detail::CalcMem3d(paddedDims, stream.minbits);
vtkm::Id outsize = vtkm::Id(outbits / sizeof(ZFPWord));
vtkm::cont::ArrayHandle<vtkm::Int64> output;
// hopefully this inits/allocates the mem only on the device
vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
vtkm::cont::Algorithm::Copy(zero, output);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,data);
// vtkm::Float64 time = timer.GetElapsedTime();
// std::cout<<"Copy scalars "<<time<<"\n";
// }
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Encode3> compressDispatcher(
zfp::Encode3(dims, paddedDims, stream.maxbits));
......
......@@ -40,44 +40,6 @@ namespace vtkm
{
namespace worklet
{
namespace detail
{
//size_t CalcMem3d(const vtkm::Id3 dims,
// const int bits_per_block)
//{
// const size_t vals_per_block = 64;
// const size_t size = dims[0] * dims[1] * dims[2];
// size_t total_blocks = size / vals_per_block;
// const size_t bits_per_word = sizeof(ZFPWord) * 8;
// const size_t total_bits = bits_per_block * total_blocks;
// const size_t alloc_size = total_bits / bits_per_word;
// return alloc_size * sizeof(ZFPWord);
//}
//class MemTransfer : public vtkm::worklet::WorkletMapField
//{
//public:
// VTKM_CONT
// MemTransfer()
// {
// }
// using ControlSignature = void(FieldIn<>, WholeArrayInOut<>);
// using ExecutionSignature = void(_1, _2);
// template<typename PortalType>
// VTKM_EXEC
// void operator()(const vtkm::Id id,
// PortalType& outValue) const
// {
// (void) id;
// (void) outValue;
// }
//}; //class MemTransfer
} // namespace detail
class ZFPDecompressor
{
public:
......@@ -111,29 +73,15 @@ public:
(paddedDims[0] / four) * (paddedDims[1] / (four) * (paddedDims[2] / four));
detail::CalcMem3d(paddedDims, stream.minbits);
zfp::detail::CalcMem3d(paddedDims, stream.minbits);
output.Allocate(dims[0] * dims[1] * dims[2]);
// hopefully this inits/allocates the mem only on the device
//
//vtkm::cont::ArrayHandleConstant<vtkm::Int64> zero(0, outsize);
//vtkm::cont::Algorithm::Copy(zero, output);
//
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// {
// Timer timer;
// vtkm::cont::ArrayHandleCounting<vtkm::Id> one(0,1,1);
// vtkm::worklet::DispatcherMapField<detail::MemTransfer> dis;
// dis.Invoke(one,output);
// dis.Invoke(one,encodedData);
// vtkm::Float64 time = timer.GetElapsedTime();
// std::cout<<"Copy scalars "<<time<<"\n";
// }
output.Allocate(dims[0] * dims[1] * dims[2]);
// launch 1 thread per zfp block
vtkm::cont::ArrayHandleCounting<vtkm::Id> blockCounter(0, 1, totalBlocks);
// using Timer = vtkm::cont::Timer<vtkm::cont::DeviceAdapterTagSerial>;
// Timer timer;
vtkm::worklet::DispatcherMapField<zfp::Decode3> decompressDispatcher(
zfp::Decode3(dims, paddedDims, stream.maxbits));
......
......@@ -39,7 +39,7 @@
template <typename T>
void writeArray(vtkm::cont::ArrayHandle<T>& field, std::string filename)
{
auto val = vtkm::worklet::GetVTKMPointer(field);
auto val = vtkm::worklet::zfp::detail::GetVTKMPointer(field);
std::ofstream output(filename, std::ios::binary | std::ios::out);
output.write(reinterpret_cast<char*>(val), field.GetNumberOfValues() * 8);
output.close();
......@@ -60,8 +60,7 @@ void Test1D(int rate)
vtkm::cont::testing::MakeTestDataSet testDataSet;
vtkm::cont::DataSet dataset = testDataSet.Make1DUniformDataSet2();
auto dynField = dataset.GetField("pointvar").GetData();
auto field = dynField.Cast<Handle64>();
//writeArray(field, "orig.zfp");
vtkm::worklet::ZFP1DCompressor compressor;
vtkm::worklet::ZFP1DDecompressor decompressor;
......@@ -98,7 +97,6 @@ void Test2D(int rate)
vtkm::cont::testing::MakeTestDataSet testDataSet;
vtkm::cont::DataSet dataset = testDataSet.Make2DUniformDataSet2();
auto dynField = dataset.GetField("pointvar").GetData();
auto field = dynField.Cast<Handle64>();
vtkm::worklet::ZFP2DCompressor compressor;
vtkm::worklet::ZFP2DDecompressor decompressor;
......@@ -139,7 +137,6 @@ void Test3D(int rate)
vtkm::cont::testing::MakeTestDataSet testDataSet;
vtkm::cont::DataSet dataset = testDataSet.Make3DUniformDataSet3(dims);
auto dynField = dataset.GetField("pointvar").GetData();
;
vtkm::worklet::ZFPCompressor compressor;
vtkm::worklet::ZFPDecompressor decompressor;
......
......@@ -56,9 +56,6 @@ struct BlockReader
m_buffer >>= m_current_bit;
m_block_idx = block_idx;
//std::cout<<"Reader index "<<Index<<"\n";
//print_bits(m_buffer);
//print_bits(Words.Get(Index));
}
inline VTKM_EXEC unsigned int read_bit()
......@@ -121,4 +118,4 @@ private:
} // namespace zfp
} // namespace worklet
} // namespace vtkm
#endif // vtk_m_worklet_zfp_type_info_h
#endif // vtk_m_worklet_zfp_block_reader_h
......@@ -45,46 +45,15 @@ struct BlockWriter
const int m_maxbits;
AtomicPortalType& Portal;
//int debug_index;
VTKM_EXEC BlockWriter(AtomicPortalType& portal, const int& maxbits, const vtkm::Id& block_idx)
: m_current_bit(0)
, m_maxbits(maxbits)
, Portal(portal)
{
m_word_index = (block_idx * maxbits) / vtkm::Int32(sizeof(Word) * 8);
// debug_index = m_word_index;
//std::cout<<"** Block "<<block_idx<<" start "<<m_word_index<<"\n";
m_start_bit = vtkm::Int32((block_idx * maxbits) % vtkm::Int32(sizeof(Word) * 8));
}
template <typename T>
void print_bits(T bits)
{
const int bit_size = sizeof(T) * 8;
for (int i = bit_size - 1; i >= 0; --i)
{
T one = 1;
T mask = one << i;
int val = (bits & mask) >> i;
printf("%d", val);
}
printf("\n");
}
void print()
{
//vtkm::Int64 v = Portal.Add(debug_index,0);
//std::cout<<"current bit "<<m_current_bit<<" debug_index "<<debug_index<<" ";
//print_bits(*reinterpret_cast<vtkm::UInt64*>(&v));
}
// void print(int index)
// {
// vtkm::Int64 v = Portal.Add(index, 0);
// //print_bits(*reinterpret_cast<vtkm::UInt64*>(&v));
// }
inline VTKM_EXEC void Add(const vtkm::Id index, Word& value)
{
UIntInt newval;
......@@ -92,27 +61,11 @@ struct BlockWriter
(void)old;
newval.uintpart = value;
Portal.Add(index, newval.intpart);
//old.uintpart = 0;
//UIntInt expected;
//expected.uintpart = newval.uintpart;
//while(old.uintpart != expected.uintpart)
//{
// expected.uintpart = old.uintpart + newval.uintpart;
// old.intpart = Portal.CompareAndSwap(index, expected.intpart, old.intpart);
//}
}
inline VTKM_EXEC
//void write_bits(const unsigned int &bits, const uint &n_bits, const uint &bit_offset)
vtkm::UInt64
write_bits(const vtkm::UInt64& bits, const unsigned int& n_bits)
inline VTKM_EXEC vtkm::UInt64 write_bits(const vtkm::UInt64& bits, const unsigned int& n_bits)
{
//std::cout<<"write nbits "<<n_bits<<" "<<m_current_bit<<"\n";
//bool print = m_word_index == 0 && m_start_bit == 0;
const int wbits = sizeof(Word) * 8;
//if(bits == 0) { printf("no\n"); return;}
//uint seg_start = (m_start_bit + bit_offset) % wbits;
//int write_index = m_word_index + (m_start_bit + bit_offset) / wbits;
unsigned int seg_start = (m_start_bit + m_current_bit) % wbits;
vtkm::Id write_index = m_word_index;
write_index += vtkm::Id((m_start_bit + m_current_bit) / wbits);
......@@ -129,26 +82,13 @@ struct BlockWriter
Word b = bits - left;
Word add = b << shift;
Add(write_index, add);
//debug_index = write_index;
//if(write_index == 0)
//{
// std::cout<<"*******\n";
// std::cout<<"Current bit "<<m_current_bit<<" writing ";
// print_bits(add);
// print();
// std::cout<<"*******\n";
//}
// n_bits straddles the word boundary
bool straddle = seg_start < sizeof(Word) * 8 && seg_end >= sizeof(Word) * 8;
if (straddle)
{
Word rem = b >> (sizeof(Word) * 8 - shift);
Add(write_index + 1, rem);
//std::cout<<"======\n";
//print_bits(rem);
//std::cout<<"======\n";
// printf("Straddle "); print_bits(rem);
//debug_index = write_index +1;
}
m_current_bit += n_bits;
return bits >> (Word)n_bits;
......@@ -157,16 +97,10 @@ struct BlockWriter
// TODO: optimize
vtkm::UInt32 VTKM_EXEC write_bit(const unsigned int& bit)
{
//bool print = m_word_index == 0 && m_start_bit == 0;
const int wbits = sizeof(Word) * 8;
//if(bits == 0) { printf("no\n"); return;}
//uint seg_start = (m_start_bit + bit_offset) % wbits;
//int write_index = m_word_index + (m_start_bit + bit_offset) / wbits;
unsigned int seg_start = (m_start_bit + m_current_bit) % wbits;
vtkm::Id write_index = m_word_index;
write_index += vtkm::Id((m_start_bit + m_current_bit) / wbits);
//uint seg_end = seg_start;
//int write_index = m_word_index;
unsigned int shift = seg_start;
// we may be asked to write less bits than exist in 'bits'
// so we have to make sure that anything after n is zero.
......@@ -185,4 +119,4 @@ struct BlockWriter
} // namespace zfp
} // namespace worklet
} // namespace vtkm
#endif // vtk_m_worklet_zfp_type_info_h
#endif // vtk_m_worklet_zfp_block_writer_h
......@@ -210,10 +210,6 @@ VTKM_EXEC void decode_ints(ReaderType<BlockSize, PortalType>& reader,
data[i] += (UInt)(x & 1u) << k;
}
}
//for (int i = 0; i < BlockSize; i++)
//{
// std::cout<<"Decomp int "<<i<<" = "<<data[i]<<"\n";
//}
}
template <vtkm::Int32 BlockSize, typename Scalar, typename PortalType>
......@@ -240,16 +236,8 @@ VTKM_EXEC void zfp_decode(Scalar* fblock,
vtkm::UInt32 emax;
if (!zfp::is_int<Scalar>())
{
//std::cout<<"ebits "<<ebits<<"\n";
// read in the shared exponent
//vtkm::UInt64 b = reader.read_bits(ebits - 1);
//print_bits(b);
//std::cout<<"b "<<b<<"\n";
//std::cout<<"ebias "<<zfp::get_ebias<Scalar>()<<"\n";
//emax = vtkm::UInt32(b - zfp::get_ebias<Scalar>());
emax = vtkm::UInt32(reader.read_bits(static_cast<vtkm::Int32>(ebits) - 1));
emax -= static_cast<vtkm::UInt32>(zfp::get_ebias<Scalar>());
//std::cout<<"EMAX "<<emax<<"\n";
}
else
{
......@@ -269,23 +257,11 @@ VTKM_EXEC void zfp_decode(Scalar* fblock,
iblock[idx] = uint2int(ublock[i]);
}
//for (int i = 0; i < BlockSize; i++)
//{
// std::cout<<"before xform tid "<<i<<" "<<iblock[i]<<"\n";
//}
inv_transform<BlockSize> trans;
trans.inv_xform(iblock);
//for (int i = 0; i < BlockSize; i++)
//{
// std::cout<<"tid "<<i<<" "<<iblock[i]<<"\n";
//}
Scalar inv_w = dequantize<Int, Scalar>(1, static_cast<vtkm::Int32>(emax));
//std::cout<<"dequantize factor "<<inv_w<<"\n";
for (vtkm::Int32 i = 0; i < BlockSize; ++i)
{
fblock[i] = inv_w * (Scalar)iblock[i];
......
......@@ -100,28 +100,17 @@ public:
zfpBlock = blockIdx % ZFPDims;
vtkm::Id logicalStart = zfpBlock * vtkm::Id(4);
//std::cout<<"Block ID "<<blockIdx<<"\n";
//std::cout<<"ZFP Block "<<zfpBlock<<"\n";
//std::cout<<"logicalStart Start "<<logicalStart<<"\n";
// get the offset into the field
//vtkm::Id offset = (zfpBlock[2]*4*ZFPDims[1] + zfpBlock[1] * 4)*ZFPDims[0] * 4 + zfpBlock[0] * 4;
//std::cout<<"ZFP block offset "<<offset<<"\n";
bool partial = false;
if (logicalStart + 4 > Dims)
partial = true;
//std::cout<<"Dims "<<Dims<<"\n";
if (partial)
{
const vtkm::Int32 nx =
logicalStart + 4 > Dims ? vtkm::Int32(Dims - logicalStart) : vtkm::Int32(4);
//std::cout<<"Partial block "<<logicalStart<<" offset "<<offset<<"\n";
//std::cout<<"Nx "<<nx<<" "<<ny<<" "<<nz<<"\n";
ScatterPartial1(fblock, scalars, logicalStart, nx);
}
else
{
//std::cout<<"FULL block "<<zfpBlock<<"\n";
Scatter1(fblock, scalars, logicalStart);
}
}
......
......@@ -112,33 +112,22 @@ public:
zfpBlock[1] = (blockIdx / ZFPDims[0]) % ZFPDims[1];
vtkm::Id2 logicalStart = zfpBlock * vtkm::Id(4);
//std::cout<<"Block ID "<<blockIdx<<"\n";
//std::cout<<"ZFP Block "<<zfpBlock<<"\n";
//std::cout<<"logicalStart Start "<<logicalStart<<"\n";
// get the offset into the field
//vtkm::Id offset = (zfpBlock[2]*4*ZFPDims[1] + zfpBlock[1] * 4)*ZFPDims[0] * 4 + zfpBlock[0] * 4;
vtkm::Id offset = logicalStart[0] + logicalStart[1] * Dims[0];
//std::cout<<"ZFP block offset "<<offset<<"\n";
bool partial = false;
if (logicalStart[0] + 4 > Dims[0])
partial = true;
if (logicalStart[1] + 4 > Dims[1])
partial = true;
//std::cout<<"Dims "<<Dims<<"\n";
if (partial)
{
const vtkm::Int32 nx =
logicalStart[0] + 4 > Dims[0] ? vtkm::Int32(Dims[0] - logicalStart[0]) : vtkm::Int32(4);
const vtkm::Int32 ny =
logicalStart[1] + 4 > Dims[1] ? vtkm::Int32(Dims[1] - logicalStart[1]) : vtkm::Int32(4);
//std::cout<<"Partial block "<<logicalStart<<" offset "<<offset<<"\n";
//std::cout<<"Nx "<<nx<<" "<<ny<<" "<<nz<<"\n";
ScatterPartial2(fblock, scalars, Dims, offset, nx, ny);
}
else
{
//std::cout<<"FULL block "<<zfpBlock<<"\n";
Scatter2(fblock, scalars, Dims, offset);
}
}
......
......@@ -115,13 +115,6 @@ public:
zfp::zfp_decode<BlockSize>(
fblock, vtkm::Int32(MaxBits), static_cast<vtkm::UInt32>(blockIdx), stream);
//for(int i = 0; i < BlockSize; ++i)
//{
// std::cout<<" "<<fblock[i];
//}
//std::cout<<"\n";
vtkm::Id3 zfpBlock;
zfpBlock[0] = blockIdx % ZFPDims[0];
zfpBlock[1] = (blockIdx / ZFPDims[0]) % ZFPDims[1];
......@@ -129,13 +122,7 @@ public:
vtkm::Id3 logicalStart = zfpBlock * vtkm::Id(4);
//std::cout<<"Block ID "<<blockIdx<<"\n";
//std::cout<<"ZFP Block "<<zfpBlock<<"\n";
//std::cout<<"logicalStart Start "<<logicalStart<<"\n";
// get the offset into the field
//vtkm::Id offset = (zfpBlock[2]*4*ZFPDims[1] + zfpBlock[1] * 4)*ZFPDims[0] * 4 + zfpBlock[0] * 4;
vtkm::Id offset = (logicalStart[2] * Dims[1] + logicalStart[1]) * Dims[0] + logicalStart[0];
//std::cout<<"ZFP block offset "<<offset<<"\n";
bool partial = false;
if (logicalStart[0] + 4 > Dims[0])
partial = true;
......@@ -143,7 +130,6 @@ public:
partial = true;
if (logicalStart[2] + 4 > Dims[2])
partial = true;
//std::cout<<"Dims "<<Dims<<"\n";
if (partial)
{
const vtkm::Int32 nx =
......@@ -152,13 +138,10 @@ public:
logicalStart[1] + 4 > Dims[1] ? vtkm::Int32(Dims[1] - logicalStart[1]) : vtkm::Int32(4);
const vtkm::Int32 nz =
logicalStart[2] + 4 > Dims[2] ? vtkm::Int32(Dims[2] - logicalStart[2]) : vtkm::Int32(4);
//std::cout<<"Partial block "<<logicalStart<<" offset "<<offset<<"\n";
//std::cout<<"Nx "<<nx<<" "<<ny<<" "<<nz<<"\n";
ScatterPartial3(fblock, scalars, Dims, offset, nx, ny, nz);
}
else
{
//std::cout<<"FULL block "<<zfpBlock<<"\n";
Scatter3(fblock, scalars, Dims, offset);
}
}
......
......@@ -90,11 +90,9 @@ template <typename Int, typename Scalar, vtkm::Int32 BlockSize>
inline VTKM_EXEC void fwd_cast(Int* iblock, const Scalar* fblock, vtkm::Int32 emax)
{
Scalar s = quantize<Scalar>(1, emax);
//std::cout<<"EMAX "<<emax<<" q "<<s<<"\n";
for (vtkm::Int32 i = 0; i < BlockSize; ++i)
{
iblock[i] = static_cast<Int>(s * fblock[i]);
//std::cout<<i<<" f = "<<fblock[i]<<" i = "<<(vtkm::UInt64)iblock[i]<<"\n";
}
}
......@@ -261,10 +259,6 @@ VTKM_EXEC void encode_block(BlockWriter<BlockSize, PortalType>& stream,
UInt ublock[BlockSize];
fwd_order<UInt, Int, BlockSize>(ublock, iblock);
//for(int i = 0; i < BlockSize; ++i)
//{
// std::cout<<"tid "<<i<<" --> nb "<<ublock[i]<<"\n";
//}
vtkm::UInt32 intprec = CHAR_BIT * (vtkm::UInt32)sizeof(UInt);
vtkm::UInt32 kmin =
......@@ -273,7 +267,6 @@ VTKM_EXEC void encode_block(BlockWriter<BlockSize, PortalType>& stream,
vtkm::UInt32 i, m;
vtkm::UInt32 n = 0;
vtkm::UInt64 x;
//std::cout<<"Kmin "<<kmin<<"\n";
/* encode one bit plane at a time from MSB to LSB */
for (vtkm::UInt32 k = intprec; bits && k-- > kmin;)
{
......@@ -283,27 +276,17 @@ VTKM_EXEC void encode_block(BlockWriter<BlockSize, PortalType>& stream,
{
x += (vtkm::UInt64)((ublock[i] >> k) & 1u) << i;
}
//std::cout<<"Bit plane "<<x<<"\n";
/* step 2: encode first n bits of bit plane */
m = vtkm::Min(n, bits);
bits -= m;
//std::cout<<"Bits left "<<bits<<" m "<<m<<"\n";
x = stream.write_bits(x, m);
//std::cout<<"Wrote m "<<m<<" bits\n";
//vtkm::UInt32 temp = bits;
//std::cout<<"rem bitplane "<<x<<"\n";
/* step 3: unary run-length encode remainder of bit plane */
for (; n < BlockSize && bits && (bits--, stream.write_bit(!!x)); x >>= 1, n++)
{
//std::cout<<"outer n "<<n<<" bits "<<bits<<"\n";
for (; n < BlockSize - 1 && bits && (bits--, !stream.write_bit(x & 1u)); x >>= 1, n++)
{
//std::cout<<"n "<<n<<" bits "<<bits<<"\n";
}
}
//temp = temp - bits;
//std::cout<<"rem bits "<<bits<<" intprec "<<intprec<<" k "<<k<<" encoded_bits "<<temp<<"\n";
//stream.print();
}
}
......
......@@ -90,33 +90,6 @@ public:
{
using Scalar = typename InputScalarPortal::ValueType;
// typedef unsigned long long int ull;
// typedef long long int ll;
// const ull blockId = blockIdx.x +
// blockIdx.y * gridDim.x +
// gridDim.x * gridDim.y * blockIdx.z;
// // each thread gets a block so the block index is
// // the global thread index
// const uint block_idx = blockId * blockDim.x + threadIdx.x;
// if(block_idx >= tot_blocks)
// {
// // we can't launch the exact number of blocks
// // so just exit if this isn't real
// return;
// }
// uint2 block_dims;
// block_dims.x = padded_dims.x >> 2;
// block_dims.y = padded_dims.y >> 2;
// // logical pos in 3d array
// uint2 block;
// block.x = (block_idx % block_dims.x) * 4;
// block.y = ((block_idx/ block_dims.x) % block_dims.y) * 4;
// const ll offset = (ll)block.x * stride.x + (ll)block.y * stride.y;
vtkm::Id zfpBlock;
zfpBlock = blockIdx % ZFPDims;
vtkm::Id logicalStart = zfpBlock * vtkm::Id(4);
......@@ -124,10 +97,6 @@ public:
constexpr vtkm::Int32 BlockSize = 4;
Scalar fblock[BlockSize];
// bool partial = false;
// if(block.x + 4 > dims.x) partial = true;
// if(block.y + 4 > dims.y) partial = true;
bool partial = false;
if (logicalStart + 4 > Dims)
partial = true;
......@@ -143,8 +112,6 @@ public:
Gather1(fblock, scalars, logicalStart, 1);
}
//zfp_encode_block<Scalar, ZFP_2D_BLOCK_SIZE>(fblock, maxbits, block_idx, stream);
zfp::ZFPBlockEncoder<BlockSize, Scalar, BitstreamPortal> encoder;
encoder.encode(fblock, static_cast<vtkm::Int32>(MaxBits), vtkm::UInt32(blockIdx), stream);
}
......
......@@ -103,33 +103,6 @@ public:
{
using Scalar = typename InputScalarPortal::ValueType;
// typedef unsigned long long int ull;
// typedef long long int ll;
// const ull blockId = blockIdx.x +
// blockIdx.y * gridDim.x +
// gridDim.x * gridDim.y * blockIdx.z;
// // each thread gets a block so the block index is
// // the global thread index
// const uint block_idx = blockId * blockDim.x + threadIdx.x;
// if(block_idx >= tot_blocks)
// {
// // we can't launch the exact number of blocks
// // so just exit if this isn't real
// return;
// }
// uint2 block_dims;
// block_dims.x = padded_dims.x >> 2;
// block_dims.y = padded_dims.y >> 2;
// // logical pos in 3d array
// uint2 block;
// block.x = (block_idx % block_dims.x) * 4;
// block.y = ((block_idx/ block_dims.x) % block_dims.y) * 4;
// const ll offset = (ll)block.x * stride.x + (ll)block.y * stride.y;
vtkm::Id2 zfpBlock;
zfpBlock[0] = blockIdx % ZFPDims[0];
zfpBlock[1] = (blockIdx / ZFPDims[0]) % ZFPDims[1];
......@@ -139,10 +112,6 @@ public:
constexpr vtkm::Int32 BlockSize = 16;
Scalar fblock[BlockSize];
// bool partial = false;
</