Commit 6f75cd00 authored by Kenneth Moreland's avatar Kenneth Moreland
Browse files

Fix crash in CUDA compiler

Previously when PointLocatorUniformGrid.h was compiled by the CUDA
compiler, the compiler would crash. Apparently during the ptxas
part of the compiler goes into a crazy recursion and runs out of
stack space. This appears to be a long-standing bug in CUDA
(been there for multiple releases) without a clear reason why it
sometimes rears its ugly head. (See for example
https://devtalk.nvidia.com/default/topic/1028825/cuda-programming-and-performance/-ptxas-died-with-status-0xc00000fd-stack_overflow-/)

The problem appears to be when having a doubly or triply nested
loop over a box of values to check in the uniform array. This
appears to fix the problem by converting that to a single for
loop with some index magic to convert that to 3D indices.
parent 439beaae
......@@ -32,9 +32,9 @@
////brute force method /////
template <typename CoordiVecT, typename CoordiPortalT, typename CoordiT>
VTKM_EXEC_CONT vtkm::Id NNSVerify3D(CoordiVecT qc, CoordiPortalT coordiPortal, CoordiT& dis)
VTKM_EXEC_CONT vtkm::Id NNSVerify3D(CoordiVecT qc, CoordiPortalT coordiPortal, CoordiT& dis2)
{
dis = std::numeric_limits<CoordiT>::max();
dis2 = std::numeric_limits<CoordiT>::max();
vtkm::Id nnpIdx = -1;
for (vtkm::Int32 i = 0; i < coordiPortal.GetNumberOfValues(); i++)
......@@ -42,12 +42,11 @@ VTKM_EXEC_CONT vtkm::Id NNSVerify3D(CoordiVecT qc, CoordiPortalT coordiPortal, C
CoordiT splitX = coordiPortal.Get(i)[0];
CoordiT splitY = coordiPortal.Get(i)[1];
CoordiT splitZ = coordiPortal.Get(i)[2];
CoordiT _dis =
vtkm::Sqrt((splitX - qc[0]) * (splitX - qc[0]) + (splitY - qc[1]) * (splitY - qc[1]) +
(splitZ - qc[2]) * (splitZ - qc[2]));
if (_dis < dis)
CoordiT _dis2 = (splitX - qc[0]) * (splitX - qc[0]) + (splitY - qc[1]) * (splitY - qc[1]) +
(splitZ - qc[2]) * (splitZ - qc[2]);
if (_dis2 < dis2)
{
dis = _dis;
dis2 = _dis2;
nnpIdx = i;
}
}
......
......@@ -30,7 +30,7 @@ namespace exec
class PointLocator : public vtkm::VirtualObjectBase
{
public:
VTKM_EXEC virtual void FindNearestNeighbor(vtkm::Vec<vtkm::FloatDefault, 3> queryPoint,
VTKM_EXEC virtual void FindNearestNeighbor(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
vtkm::Id& pointId,
FloatDefault& distanceSquared) const = 0;
};
......
......@@ -28,6 +28,8 @@
#include <vtkm/exec/PointLocator.h>
#include <vtkm/VectorAnalysis.h>
namespace vtkm
{
namespace exec
......@@ -47,10 +49,10 @@ public:
// TODO: should constructor be VTKM_CONT or VTKM_EXEC?
VTKM_CONT
VTKM_EXEC_CONT
PointLocatorUniformGrid() = default;
VTKM_CONT
VTKM_EXEC_CONT
PointLocatorUniformGrid(const vtkm::Vec<vtkm::FloatDefault, 3>& _min,
const vtkm::Vec<vtkm::FloatDefault, 3>& _max,
const vtkm::Vec<vtkm::Id, 3>& _dims,
......@@ -78,65 +80,26 @@ public:
/// \param nearestNeighborId Neareast neighbor in the training dataset for each points in
/// the test set
/// \param distance Distance between query points and their nearest neighbors.
VTKM_EXEC virtual void FindNearestNeighbor(vtkm::Vec<vtkm::FloatDefault, 3> queryPoint,
VTKM_EXEC virtual void FindNearestNeighbor(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
vtkm::Id& nearestNeighborId,
FloatDefault& distance) const override
vtkm::FloatDefault& distance2) const override
{
//std::cout << "FindNeareastNeighbor: " << queryPoint << std::endl;
#if 1
auto nlayers = vtkm::Max(vtkm::Max(Dims[0], Dims[1]), Dims[2]);
//std::cout << "FindNeareastNeighbor: " << queryPoint << std::endl;
vtkm::Id3 ijk = (queryPoint - Min) / this->Dxdydz;
ijk = vtkm::Max(ijk, vtkm::Id3(0));
ijk = vtkm::Min(ijk, this->Dims - vtkm::Id3(1));
vtkm::Vec<vtkm::Id, 3> xyz = (queryPoint - Min) / Dxdydz;
nearestNeighborId = -1;
distance2 = vtkm::Infinity<vtkm::FloatDefault>();
float min_distance = std::numeric_limits<float>::max();
vtkm::Id neareast = -1;
this->FindInCell(queryPoint, ijk, nearestNeighborId, distance2);
for (vtkm::Id layer = 0; layer < nlayers; layer++)
// TODO: This might stop looking before the absolute nearest neighbor is found.
vtkm::Id maxLevel = vtkm::Max(vtkm::Max(this->Dims[0], this->Dims[1]), this->Dims[2]);
for (vtkm::Id level = 1; (nearestNeighborId < 0) && (level < maxLevel); ++level)
{
vtkm::Id minx = vtkm::Max(vtkm::Id(), xyz[0] - layer);
vtkm::Id maxx = vtkm::Min(Dims[0] - 1, xyz[0] + layer);
vtkm::Id miny = vtkm::Max(vtkm::Id(), xyz[1] - layer);
vtkm::Id maxy = vtkm::Min(Dims[1] - 1, xyz[1] + layer);
vtkm::Id minz = vtkm::Max(vtkm::Id(), xyz[2] - layer);
vtkm::Id maxz = vtkm::Min(Dims[2] - 1, xyz[2] + layer);
for (auto i = minx; i <= maxx; i++)
{
for (auto j = miny; j <= maxy; j++)
{
for (auto k = minz; k <= maxz; k++)
{
if (i == (xyz[0] + layer) || i == (xyz[0] - layer) || j == (xyz[1] + layer) ||
j == (xyz[1] - layer) || k == (xyz[2] + layer) || k == (xyz[2] - layer))
{
auto cellid = i + j * Dims[0] + k * Dims[0] * Dims[1];
auto lower = cellLower.Get(cellid);
auto upper = cellUpper.Get(cellid);
for (auto index = lower; index < upper; index++)
{
auto pointid = pointIds.Get(index);
auto point = coords.Get(pointid);
auto dx = point[0] - queryPoint[0];
auto dy = point[1] - queryPoint[1];
auto dz = point[2] - queryPoint[2];
auto distance2 = dx * dx + dy * dy + dz * dz;
if (distance2 < min_distance)
{
neareast = pointid;
min_distance = distance2;
nlayers = layer + 2;
}
}
}
}
}
}
this->FindInBox(queryPoint, ijk, level, nearestNeighborId, distance2);
}
nearestNeighborId = neareast;
distance = vtkm::Sqrt(min_distance);
#endif
}
private:
......@@ -150,6 +113,136 @@ private:
IdPortalType cellIds;
IdPortalType cellLower;
IdPortalType cellUpper;
VTKM_EXEC void FindInCell(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
const vtkm::Id3& ijk,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
vtkm::Id cellId = ijk[0] + (ijk[1] * this->Dims[0]) + (ijk[2] * this->Dims[0] * this->Dims[1]);
vtkm::Id lower = cellLower.Get(cellId);
vtkm::Id upper = cellUpper.Get(cellId);
for (vtkm::Id index = lower; index < upper; index++)
{
vtkm::Id pointid = pointIds.Get(index);
vtkm::Vec<vtkm::FloatDefault, 3> point = coords.Get(pointid);
vtkm::FloatDefault distance2 = vtkm::MagnitudeSquared(point - queryPoint);
if (distance2 < nearestDistance2)
{
nearestNeighborId = pointid;
nearestDistance2 = distance2;
}
}
}
VTKM_EXEC void FindInBox(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
const vtkm::Id3& boxCenter,
vtkm::Id level,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
if ((boxCenter[0] - level) >= 0)
{
this->FindInXPlane(
queryPoint, boxCenter - vtkm::Id3(level, 0, 0), level, nearestNeighborId, nearestDistance2);
}
if ((boxCenter[0] + level) < this->Dims[0])
{
this->FindInXPlane(
queryPoint, boxCenter + vtkm::Id3(level, 0, 0), level, nearestNeighborId, nearestDistance2);
}
if ((boxCenter[1] - level) >= 0)
{
this->FindInYPlane(
queryPoint, boxCenter - vtkm::Id3(0, level, 0), level, nearestNeighborId, nearestDistance2);
}
if ((boxCenter[1] + level) < this->Dims[1])
{
this->FindInYPlane(
queryPoint, boxCenter - vtkm::Id3(0, level, 0), level, nearestNeighborId, nearestDistance2);
}
if ((boxCenter[2] - level) >= 0)
{
this->FindInZPlane(
queryPoint, boxCenter - vtkm::Id3(0, 0, level), level, nearestNeighborId, nearestDistance2);
}
if ((boxCenter[2] + level) < this->Dims[2])
{
this->FindInZPlane(
queryPoint, boxCenter - vtkm::Id3(0, 0, level), level, nearestNeighborId, nearestDistance2);
}
}
VTKM_EXEC void FindInPlane(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
const vtkm::Id3& planeCenter,
const vtkm::Id3& div,
const vtkm::Id3& mod,
const vtkm::Id3& origin,
vtkm::Id numInPlane,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
for (vtkm::Id index = 0; index < numInPlane; ++index)
{
vtkm::Id3 ijk = planeCenter + vtkm::Id3(index) / div +
vtkm::Id3(index % mod[0], index % mod[1], index % mod[2]) + origin;
if ((ijk[0] >= 0) && (ijk[0] < this->Dims[0]) && (ijk[1] >= 0) && (ijk[1] < this->Dims[1]) &&
(ijk[2] >= 0) && (ijk[2] < this->Dims[2]))
{
this->FindInCell(queryPoint, ijk, nearestNeighborId, nearestDistance2);
}
}
}
VTKM_EXEC void FindInXPlane(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
const vtkm::Id3& planeCenter,
vtkm::Id level,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
vtkm::Id yWidth = (2 * level) + 1;
vtkm::Id zWidth = (2 * level) + 1;
vtkm::Id3 div = { yWidth * zWidth, yWidth * zWidth, yWidth };
vtkm::Id3 mod = { 1, yWidth, 1 };
vtkm::Id3 origin = { 0, -level, -level };
vtkm::Id numInPlane = yWidth * zWidth;
this->FindInPlane(
queryPoint, planeCenter, div, mod, origin, numInPlane, nearestNeighborId, nearestDistance2);
}
VTKM_EXEC void FindInYPlane(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
vtkm::Id3 planeCenter,
vtkm::Id level,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
vtkm::Id xWidth = (2 * level) - 1;
vtkm::Id zWidth = (2 * level) + 1;
vtkm::Id3 div = { xWidth * zWidth, xWidth * zWidth, xWidth };
vtkm::Id3 mod = { xWidth, 1, 1 };
vtkm::Id3 origin = { -level + 1, 0, -level };
vtkm::Id numInPlane = xWidth * zWidth;
this->FindInPlane(
queryPoint, planeCenter, div, mod, origin, numInPlane, nearestNeighborId, nearestDistance2);
}
VTKM_EXEC void FindInZPlane(const vtkm::Vec<vtkm::FloatDefault, 3>& queryPoint,
vtkm::Id3 planeCenter,
vtkm::Id level,
vtkm::Id& nearestNeighborId,
vtkm::FloatDefault& nearestDistance2) const
{
vtkm::Id xWidth = (2 * level) - 1;
vtkm::Id yWidth = (2 * level) - 1;
vtkm::Id3 div = { xWidth * yWidth, xWidth, xWidth * yWidth };
vtkm::Id3 mod = { xWidth, 1, 1 };
vtkm::Id3 origin = { -level + 1, -level + 1, 0 };
vtkm::Id numInPlane = xWidth * yWidth;
this->FindInPlane(
queryPoint, planeCenter, div, mod, origin, numInPlane, nearestNeighborId, nearestDistance2);
}
};
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment