From 1dabc5ce61c106ee2c62e2475afb4b5243a7d0a5 Mon Sep 17 00:00:00 2001 From: "David C. Lonie" <david.lonie@kitware.com> Date: Thu, 3 Mar 2016 13:17:44 -0500 Subject: [PATCH] Documentation updates. --- Common/Core/vtkAbstractArray.h | 3 + Common/Core/vtkArrayDispatch.h | 11 +- Common/Core/vtkAssume.h | 4 + Common/Core/vtkDataArrayAccessor.h | 51 +- Common/Core/vtkGenericDataArray.h | 3 + Common/Core/vtkSetGet.h | 3 + .../Doxygen/ArrayDispatch-VTK-7-1.md | 1277 +++++++++++++++++ Documentation/Doxygen/ChangesVTK-7-1.md | 24 + 8 files changed, 1352 insertions(+), 24 deletions(-) create mode 100644 Documentation/Doxygen/ArrayDispatch-VTK-7-1.md diff --git a/Common/Core/vtkAbstractArray.h b/Common/Core/vtkAbstractArray.h index cf682a7a448..f16dba7dcb3 100644 --- a/Common/Core/vtkAbstractArray.h +++ b/Common/Core/vtkAbstractArray.h @@ -620,6 +620,9 @@ struct vtkArrayDownCast_impl // However, not all arrays support the FastDownCast mechanism. vtkArrayDownCast // exists to select between the two; Arrays that support FastDownCast will use // it, while others will fallback to the slower SafeDownCast. +// +// A more detailed description of this class and related tools can be found +// \ref VTK-7-1-ArrayDispatch "here". template <typename ArrayT> ArrayT* vtkArrayDownCast(vtkAbstractArray *array) { diff --git a/Common/Core/vtkArrayDispatch.h b/Common/Core/vtkArrayDispatch.h index 650c0ed227d..5602546b681 100644 --- a/Common/Core/vtkArrayDispatch.h +++ b/Common/Core/vtkArrayDispatch.h @@ -12,13 +12,16 @@ PURPOSE. See the above copyright notice for more information. =========================================================================*/ -// .NAME vtkArrayDispatch - vtkDataArray code generator +// .NAME vtkArrayDispatch - vtkDataArray code generator/dispatcher. // // vtkArrayDispatch implements a mechanism for generating optimized code for // multiple subclasses of vtkDataArray at once. Using a TypeList based // approach (see vtkTypeList), a templated worker implementation is generated // for a restricted or unrestricted set of vtkDataArray subclasses. // +// A more detailed description of this class and related tools can be found +// \ref VTK-7-1-ArrayDispatch "here". +// // The primary goals of this class are to simplify multi-array dispatch // implementations, and provide tools to lower compilation time and binary // size (i.e. avoiding 'template explosions'). @@ -99,13 +102,17 @@ // There are three components to a dispatch: The dispatcher, the worker, and // the array(s). They are combined like so: // +// @code // bool result = Dispatcher<...>::Execute(array, worker); +// @endcode // // The dispatcher can also be instantiated into an object, e.g.: // +// @code // vtkArrayDispatch::SomeDispatcher<...> myDispatcher; // MyWorker worker; // bool result = myDispatcher.Execute(array, worker); +// @endcode // // Return value: // The Execute method of the dispatcher will return true if a code path matching @@ -122,6 +129,7 @@ // additional input/output data is needed. // // A simple worker implementation for triple dispatch: +// @code // struct MyWorker // { // template <typename Array1T, typename Array2T, typename Array3T> @@ -130,6 +138,7 @@ // // Do work using vtkGenericDataArray API... // } // }; +// @endcode // // Note that optimized implementations (e.g. for AoS arrays vs SoA arrays) can // be supported by providing overloads of operator() that have more restrictive diff --git a/Common/Core/vtkAssume.h b/Common/Core/vtkAssume.h index b0809ea3274..c68857f8e49 100644 --- a/Common/Core/vtkAssume.h +++ b/Common/Core/vtkAssume.h @@ -12,6 +12,7 @@ PURPOSE. See the above copyright notice for more information. =========================================================================*/ +// .NAME VTK_ASSUME - Provide compiler hints for non-obvious conditions. #ifndef vtkAssume_h #define vtkAssume_h @@ -31,6 +32,9 @@ // VTK_ASSUME(array->GetNumberOfComponents() == 3); allows the compiler to // provide faster access through the GetTypedComponent method, as the fixed data // stride in AOS arrays allows advanced optimization of the accesses. +// +// A more detailed description of this class and related tools can be found +// \ref VTK-7-1-ArrayDispatch "here". #define VTK_ASSUME(cond) \ do { \ const bool c = cond; \ diff --git a/Common/Core/vtkDataArrayAccessor.h b/Common/Core/vtkDataArrayAccessor.h index b862fd8634e..191613c6943 100644 --- a/Common/Core/vtkDataArrayAccessor.h +++ b/Common/Core/vtkDataArrayAccessor.h @@ -19,6 +19,9 @@ // vtkDataArrayAccessor provides access to data stored in a vtkDataArray. It // is intended to be used in conjunction with vtkArrayDispatcher. // +// A more detailed description of this class and related tools can be found +// \ref VTK-7-1-ArrayDispatch "here". +// // The goal of this helper template is to allow developers to write a single // templated worker function that will generates code to use the efficient typed // APIs provided by vtkGenericDataArray when the array type is known, but @@ -37,31 +40,33 @@ // // A standard usage pattern of this class would be: // -// // vtkArrayDispatch worker struct: -// struct Worker -// { -// // Templated worker function: -// template <typename ArrayT> -// void operator()(ArrayT *array) -// { -// // The accessor: -// vtkDataArrayAccessor<ArrayT> accessor(array); -// // The data type used by ArrayT's API, use this for -// // temporary/intermediate results: -// typedef typename vtkDataArrayAccessor<ArrayT>::APIType APIType; +// @code +// // vtkArrayDispatch worker struct: +// struct Worker +// { +// // Templated worker function: +// template <typename ArrayT> +// void operator()(ArrayT *array) +// { +// // The accessor: +// vtkDataArrayAccessor<ArrayT> accessor(array); +// // The data type used by ArrayT's API, use this for +// // temporary/intermediate results: +// typedef typename vtkDataArrayAccessor<ArrayT>::APIType APIType; // -// // Do work using accessor to set/get values.... -// } -// }; +// // Do work using accessor to set/get values.... +// } +// }; // -// // Usage: -// Worker worker; -// vtkDataArray *array = ...; -// if (!vtkArrayDispatch::Dispatch::Execute(array, worker)) -// { -// // Dispatch failed: unknown array type. Fallback to vtkDataArray API: -// worker(array); -// } +// // Usage: +// Worker worker; +// vtkDataArray *array = ...; +// if (!vtkArrayDispatch::Dispatch::Execute(array, worker)) +// { +// // Dispatch failed: unknown array type. Fallback to vtkDataArray API: +// worker(array); +// } +// @endcode // // We define Worker::operator() as the templated worker function, restricting // all data accesses to go through the 'accessor' object (methods like diff --git a/Common/Core/vtkGenericDataArray.h b/Common/Core/vtkGenericDataArray.h index 86aae76a7ca..4a35612adcd 100644 --- a/Common/Core/vtkGenericDataArray.h +++ b/Common/Core/vtkGenericDataArray.h @@ -17,6 +17,9 @@ // // .SECTION Description // +// A more detailed description of this class and related tools can be found +// \ref VTK-7-1-ArrayDispatch "here". +// // The vtkGenericDataArray class provides a generic implementation of the // vtkDataArray API. It relies on subclasses providing access to data // via 8 "concept methods", which should be implemented as non-virtual diff --git a/Common/Core/vtkSetGet.h b/Common/Core/vtkSetGet.h index 4ca4b685031..cfbf1f089a5 100644 --- a/Common/Core/vtkSetGet.h +++ b/Common/Core/vtkSetGet.h @@ -688,6 +688,9 @@ virtual double *Get##name() \ return thisClass::New(); \ } +// NOTE: This is no longer the prefer method for dispatching an array to a +// worker template. See vtkArrayDispatch for the new approach. +// // The vtkTemplateMacro is used to centralize the set of types // supported by Execute methods. It also avoids duplication of long // switch statement case lists. diff --git a/Documentation/Doxygen/ArrayDispatch-VTK-7-1.md b/Documentation/Doxygen/ArrayDispatch-VTK-7-1.md new file mode 100644 index 00000000000..f5ac764e54a --- /dev/null +++ b/Documentation/Doxygen/ArrayDispatch-VTK-7-1.md @@ -0,0 +1,1277 @@ +@page VTK-7-1-ArrayDispatch vtkArrayDispatch and Related Tools +@tableofcontents + +# Background # {#VTKAD-Background} + +VTK datasets store most of their important information in subclasses of +`vtkDataArray`. Vertex locations (`vtkPoints::Data`), cell topology +(`vtkCellArray::Ia`), and numeric point, cell, and generic attributes +(`vtkFieldData::Data`) are the dataset features accessed most frequently by VTK +algorithms, and these all rely on the `vtkDataArray` API. + +# Terminology # {#VTKAD-Terminology} + +This page uses the following terms: + +A __ValueType__ is the element type of an array. For instance, `vtkFloatArray` +has a ValueType of `float`. + +An __ArrayType__ is a subclass of `vtkDataArray`. It specifies not only a +ValueType, but an array implementation as well. This becomes important as +`vtkDataArray` subclasses will begin to stray from the typical +"array-of-structs" ordering that has been exclusively used in the past. + +A __dispatch__ is a runtime-resolution of a `vtkDataArray`'s ArrayType, and is +used to call a section of executable code that has been tailored for that +ArrayType. Dispatching has compile-time and run-time components. At +compile-time, the possible ArrayTypes to be used are determined and a worker +code template is generated for each type. At run-time, the type of a specific +array is determined and the proper worker instantiation is called. + +__Template explosion__ refers to a sharp increase in the size of a compiled +binary that results from instantiating a template function or class on many +different types. + +## vtkDataArray ## {#VTKAD-vtkDataArray} + +The data array type hierarchy in VTK has a unique feature when compared to +typical C++ containers: a non-templated base class. All arrays containing +numeric data inherit `vtkDataArray`, a common interface that sports a very +useful API. Without knowing the underlying ValueType stored in data array, an +algorithm or user may still work with any `vtkDataArray` in meaningful ways: +The array can be resized, reshaped, read, and rewritten easily using a generic +API that substitutes double-precision floating point numbers for the array's +actual ValueType. For instance, we can write a simple function that computes +the magnitudes for a set of vectors in one array and store the results in +another using nothing but the typeless `vtkDataArray` API: + +~~~{.cpp} +// 3 component magnitude calculation using the vtkDataArray API. +// Inefficient, but easy to write: +void calcMagnitude(vtkDataArray *vectors, vtkDataArray *magnitude) +{ + vtkIdType numVectors = vectors->GetNumberOfTuples(); + for (vtkIdType tupleIdx = 0; tupleIdx < numVectors; ++tupleIdx) + { + // What data types are magnitude and vectors using? + // We don't care! These methods all use double. + magnitude->SetComponent(tupleIdx, 0, + std::sqrt(vectors->GetComponent(tupleIdx, 0) * + vectors->GetComponent(tupleIdx, 0) + + vectors->GetComponent(tupleIdx, 1) * + vectors->GetComponent(tupleIdx, 1) + + vectors->GetComponent(tupleIdx, 2) * + vectors->GetComponent(tupleIdx, 2)); + } +} +~~~ + +## The Costs of Flexiblity ## {#VTKAD-TheCostsOfFlexiblity} + +However, this flexibility comes at a cost. Passing data through a generic API +has a number of issues: + +__Accuracy__ + +Not all ValueTypes are fully expressible as a `double`. The truncation of +integers with > 52 bits of precision can be a particularly nasty issue. + +__Performance__ + +__Virtual overhead__: The only way to implement such a system is to route the +`vtkDataArray` calls through a run-time resolution of ValueTypes. This is +implemented through the virtual override mechanism of C++, which adds a small +overhead to each API call. + +__Missed optimization__: The virtual indirection described above also prevents +the compiler from being able to make assumptions about the layout of the data +in-memory. This information could be used to perform advanced optimizations, +such as vectorization. + +So what can one do if they want fast, optimized, type-safe access to the data +stored in a `vtkDataArray`? What options are available? + +## The Old Solution: vtkTemplateMacro ## {#VTKAD-vtkTemplateMacro} + +The `vtkTemplateMacro` is described in this section. While it is no longer +considered a best practice to use this construct in new code, it is still +usable and likely to be encountered when reading the VTK source code. Newer +code should use the `vtkArrayDispatch` mechanism, which is detailed later. The +discussion of `vtkTemplateMacro` will help illustrate some of the practical +issues with array dispatching. + +With a few minor exceptions that we won't consider here, prior to VTK 7.1 it +was safe to assume that all numeric `vtkDataArray` objects were also subclasses +of `vtkDataArrayTemplate`. This template class provided the implementation of +all documented numeric data arrays such as `vtkDoubleArray`, `vtkIdTypeArray`, +etc, and stores the tuples in memory as a contiguous array-of-structs (AOS). +For example, if we had an array that stored 3-component tuples as floating +point numbers, we could define a tuple as: + +~~~{.cpp} +struct Tuple { float x; float y; float z; }; +~~~ + +An array-of-structs, or AOS, memory buffer containing this data could be +described as: + +~~~{.cpp} +Tuple ArrayOfStructsBuffer[NumTuples]; +~~~ + +As a result, `ArrayOfStructsBuffer` will have the following memory layout: + +~~~{.cpp} +{ x1, y1, z1, x2, y2, z2, x3, y3, z3, ...} +~~~ + +That is, the components of each tuple are stored in adjacent memory locations, +one tuple after another. While this is not exactly how `vtkDataArrayTemplate` +implemented its memory buffers, it accurately describes the resulting memory +layout. + +`vtkDataArray` also defines a `GetDataType` method, which returns an enumerated +value describing a type. We can used to discover the ValueType stored in the +array. + +Combine the AOS memory convention and `GetDataType()` with a horrific little +method on the data arrays named `GetVoidPointer()`, and a path to efficient, +type-safe access was available. `GetVoidPointer()` does what it says on the +tin: it returns the memory address for the array data's base location as a +`void*`. While this breaks encapsulation and sets off warning bells for the +more pedantic among us, the following technique was safe and efficient when +used correctly: + +~~~{.cpp} +// 3-component magnitude calculation using GetVoidPointer. +// Efficient and fast, but assumes AOS memory layout +template <typename ValueType> +void calcMagnitudeWorker(ValueType *vectors, ValueType *magnitude, + vtkIdType numVectors) +{ + for (vtkIdType tupleIdx = 0; tupleIdx < numVectors; ++tupleIdx) + { + // We now have access to the raw memory buffers, and assuming + // AOS memory layout, we know how to access them. + magnitude[tupleIdx] = + std::sqrt(vectors[3 * tupleIdx + 0] * + vectors[3 * tupleIdx + 0] + + vectors[3 * tupleIdx + 1] * + vectors[3 * tupleIdx + 1] + + vectors[3 * tupleIdx + 2] * + vectors[3 * tupleIdx + 2]); + } +} + +void calcMagnitude(vtkDataArray *vectors, vtkDataArray *magnitude) +{ + assert("Arrays must have same datatype!" && + vtkDataTypesCompare(vectors->GetDataType(), + magnitude->GetDataType())); + switch (vectors->GetDataType()) + { + vtkTemplateMacro(calcMagnitudeWorker<VTK_TT*>( + static_cast<VTK_TT*>(vectors->GetVoidPointer(0)), + static_cast<VTK_TT*>(magnitude->GetVoidPointer(0)), + vectors->GetNumberOfTuples()); + } +} +~~~ + +The `vtkTemplateMacro`, as you may have guessed, expands into a series of case +statements that determine an array's ValueType from the `int GetDataType()` +return value. The ValueType is then `typedef`'d to `VTK_TT`, and the macro's +argument is called for each numeric type returned from `GetDataType`. In this +case, the call to `calcMagnitudeWorker` is made by the macro, with `VTK_TT` +`typedef`'d to the array's ValueType. + +This is the typical usage pattern for `vtkTemplateMacro`. The `calcMagnitude` +function calls a templated worker implementation that uses efficient, raw +memory access to a typesafe memory buffer so that the worker's code can be as +efficient as possible. But this assumes AOS memory ordering, and as we'll +mention, this assumption may no longer be valid as VTK moves further into the +field of in-situ analysis. + +But first, you may have noticed that the above example using `vtkTemplateMacro` +has introduced a step backwards in terms of functionality. In the +`vtkDataArray` implementation, we didn't care if both arrays were the same +ValueType, but now we have to ensure this, since we cast both arrays' `void` +pointers to `VTK_TT`*. What if vectors is an array of integers, but we want to +calculate floating point magnitudes? + +## vtkTemplateMacro with Multiple Arrays ## {#VTKAD-Dual-vtkTemplateMacro} + +The best solution prior to VTK 7.1 was to use two worker functions. The first +is templated on vector's ValueType, and the second is templated on both array +ValueTypes: + +~~~{.cpp} +// 3-component magnitude calculation using GetVoidPointer and a +// double-dispatch to resolve ValueTypes of both arrays. +// Efficient and fast, but assumes AOS memory layout, lots of boilerplate +// code, and the sensitivity to template explosion issues increases. +template <typename VectorType, typename MagnitudeType> +void calcMagnitudeWorker2(VectorType *vectors, MagnitudeType *magnitude, + vtkIdType numVectors) +{ + for (vtkIdType tupleIdx = 0; tupleIdx < numVectors; ++tupleIdx) + { + // We now have access to the raw memory buffers, and assuming + // AOS memory layout, we know how to access them. + magnitude[tupleIdx] = + std::sqrt(vectors[3 * tupleIdx + 0] * + vectors[3 * tupleIdx + 0] + + vectors[3 * tupleIdx + 1] * + vectors[3 * tupleIdx + 1] + + vectors[3 * tupleIdx + 2] * + vectors[3 * tupleIdx + 2]); + } +} + +// Vector ValueType is known (VectorType), now use vtkTemplateMacro on +// magnitude: +template <typename VectorType> +void calcMagnitudeWorker1(VectorType *vectors, vtkDataArray *magnitude, + vtkIdType numVectors) +{ + switch (magnitude->GetDataType()) + { + vtkTemplateMacro(calcMagnitudeWorker2(vectors, + static_cast<VTK_TT*>(magnitude->GetVoidPointer(0)), numVectors); + } +} + +void calcMagnitude(vtkDataArray *vectors, vtkDataArray *magnitude) +{ + // Dispatch vectors first: + switch (vectors->GetDataType()) + { + vtkTemplateMacro(calcMagnitudeWorker1<VTK_TT*>( + static_cast<VTK_TT*>(vectors->GetVoidPointer(0)), + magnitude, vectors->GetNumberOfTuples()); + } +} +~~~ + +This works well, but it's a bit ugly and has the same issue as before regarding +memory layout. Double dispatches using this method will also see more problems +regarding binary size. The number of template instantiations that the compiler +needs to generate is determined by `I = T^D`, where `I` is the number of +template instantiations, `T` is the number of types considered, and `D` is the +number of dispatches. As of VTK 7.1, `vtkTemplateMacro` considers 14 data +types, so this double-dispatch will produce 14 instantiations of +`calcMagnitudeWorker1` and 196 instantiations of `calcMagnitudeWorker2`. If we +tried to resolve 3 `vtkDataArray`s into raw C arrays, 2744 instantiations of +the final worker function would be generated. As more arrays are considered, +the need for some form of restricted dispatch becomes very important to keep +this template explosion in check. + +## Data Array Changes in VTK 7.1 ## {#VTKAD-Changes-in-VTK-71} + +Starting with VTK 7.1, the Array-Of-Structs (AOS) memory layout is no longer +the only `vtkDataArray` implementation provided by the library. The +Struct-Of-Arrays (SOA) memory layout is now available throught the +`vtkSOADataArrayTemplate` class. The SOA layout assumes that the components of +an array are stored separately, as in: + +~~~{.cpp} +struct StructOfArraysBuffer +{ + float *x; // Pointer to array containing x components + float *y; // Same for y + float *z; // Same for z +}; +~~~ + +The new SOA arrays were added to improve interoperability between VTK and +simulation packages for live visualization of in-situ results. Many simulations +use the SOA layout for their data, and natively supporting these arrays in VTK +will allow analysis of live data without the need to explicitly copy it into a +VTK data structure. + +As a result of this change, a new mechanism is needed to efficiently access +array data. `vtkTemplateMacro` and `GetVoidPointer` are no longer an acceptable +solution -- implementing `GetVoidPointer` for SOA arrays requires creating a +deep copy of the data into a new AOS buffer, a waste of both processor time and +memory. + +So we need a replacement for `vtkTemplateMacro` that can abstract away things +like storage details while providing performance that is on-par with raw memory +buffer operations. And while we're at it, let's look at removing the tedium of +multi-array dispatch and reducing the problem of 'template explosion'. The +remainder of this page details such a system. + +# Best Practices for vtkDataArray Post-7.1 # {#VTKAD-BestPractices} + +We'll describe a new set of tools that make managing template instantiations +for efficient array access both easy and extensible. As an overview, the +following new features will be discussed: + +* `vtkGenericDataArray`: The new templated base interface for all numeric +`vtkDataArray` subclasses. +* `vtkArrayDispatch`: Collection of code generation tools that allow concise +and precise specification of restrictable dispatch for up to 3 arrays +simultaneously. +* `vtkArrayDownCast`: Access to specialized downcast implementations from code +templates. +* `vtkDataArrayAccessor`: Provides `Get` and `Set` methods for +accessing/modifying array data as efficiently as possible. Allows a single +worker implementation to work efficiently with `vtkGenericDataArray` +subclasses, or fallback to use the `vtkDataArray` API if needed. +* `VTK_ASSUME`: New abstraction for the compiler `__assume` directive to +provide optimization hints. + +These will be discussed more fully, but as a preview, here's our familiar +`calcMagnitude` example implemented using these new tools: + +~~~{.cpp} +// Modern implementation of calcMagnitude using new concepts in VTK 7.1: +// A worker functor. The calculation is implemented in the function template +// for operator(). +struct CalcMagnitudeWorker +{ + // The worker accepts VTK array objects now, not raw memory buffers. + template <typename VectorArray, typename MagnitudeArray> + void operator()(VectorArray *vectors, MagnitudeArray *magnitude) + { + // This allows the compiler to optimize for the AOS array stride. + VTK_ASSUME(vectors->GetNumberOfComponents() == 3); + VTK_ASSUME(magnitude->GetNumberOfComponents() == 1); + + // These allow this single worker function to be used with both + // the vtkDataArray 'double' API and the more efficient + // vtkGenericDataArray APIs, depending on the template parameters: + vtkDataArrayAccessor<VectorArray> v(vectors); + vtkDataArrayAccessor<MagnitudeArray> m(magnitude); + + vtkIdType numVectors = vectors->GetNumberOfTuples(); + for (vtkIdType tupleIdx = 0; tupleIdx < numVectors; ++tupleIdx) + { + // Set and Get compile to inlined optimizable raw memory accesses for + // vtkGenericDataArray subclasses. + m.Set(tupleIdx, 0, std::sqrt(v.Get(tupleIdx, 0) * v.Get(tupleIdx, 0) + + v.Get(tupleIdx, 1) * v.Get(tupleIdx, 1) + + v.Get(tupleIdx, 2) * v.Get(tupleIdx, 2))); + } + } +}; + +void calcMagnitude(vtkDataArray *vectors, vtkDataArray *magnitude) +{ + // Create our worker functor: + CalcMagnitudeWorker worker; + + // Define our dispatcher. We'll let vectors have any ValueType, but only + // consider float/double arrays for magnitudes. These combinations will + // use a 'fast-path' implementation generated by the dispatcher: + typedef vtkArrayDispatch::Dispatch2ByValueType + < + vtkArrayDispatch::AllTypes, // ValueTypes allowed by first array + vtkArrayDispatch::Reals // ValueTypes allowed by second array + > Dispatcher; + + // Execute the dispatcher: + if (!Dispatcher::Execute(vectors, magnitude, worker)) + { + // If Execute() fails, it means the dispatch failed due to an + // unsupported array type. In this case, it's likely that the magnitude + // array is using an integral type. This is an uncommon case, so we won't + // generate a fast path for these, but instead call an instantiation of + // CalcMagnitudeWorker::operator()<vtkDataArray, vtkDataArray>. + // Through the use of vtkDataArrayAccessor, this falls back to using the + // vtkDataArray double API: + worker(vectors, magnitude); + } +} +~~~ + +# vtkGenericDataArray # {#VTKAD-vtkGenericDataArray} + +The `vtkGenericDataArray` class template drives the new `vtkDataArray` class +hierarchy. The ValueType is introduced here, both as a template parameter and a +class-scope `typedef`. This allows a typed API to be written that doesn't +require conversion to/from a common type (as `vtkDataArray` does with double). +It does not implement any storage details, however. Instead, it uses the CRTP +idiom to forward key method calls to a derived class without using a virtual +function call. By eliminating this indirection, `vtkGenericDataArray` defines +an interface that can be used to implement highly efficient code, because the +compiler is able to see past the method calls and optimize the underlying +memory accesses instead. + +There are two main subclasses of `vtkGenericDataArray`: +`vtkAOSDataArrayTemplate` and `vtkSOADataArrayTemplate`. These implement +array-of-structs and struct-of-arrays storage, respectively. + +# vtkTypeList # {#VTKAD-vtkTypeList} + +Type lists are a metaprogramming construct used to generate a list of C++ +types. They are used in VTK to implement restricted array dispatching. As we'll +see, `vtkArrayDispatch` offers ways to reduce the number of generated template +instantiations by enforcing constraints on the arrays used to dispatch. For +instance, if one wanted to only generate templated worker implementations for +`vtkFloatArray` and `vtkIntArray`, a typelist is used to specify this: + +~~~{.cpp} +// Create a typelist of 2 types, vtkFloatArray and vtkIntArray: +typedef vtkTypeList_Create_2(vtkFloatArray, vtkIntArray) MyArrays; + +Worker someWorker = ...; +vtkDataArray *someArray = ...; + +// Use vtkArrayDispatch to generate code paths for these arrays: +vtkArrayDispatch::DispatchByArray<MyArrays>(someArray, someWorker); +~~~ + +There's not much to know about type lists as a user, other than how to create +them. As seen above, there is a set of macros named `vtkTypeList_Create_X`, +where X is the number of types in the created list, and the arguments are the +types to place in the list. In the example above, the new type list is +typically bound to a friendlier name using a local `typedef`, which is a common +practice. + +The `vtkTypeList.h` header defines some additional type list operations that +may be useful, such as deleting and appending types, looking up indices, etc. +`vtkArrayDispatch::FilterArraysByValueType` may come in handy, too. But for +working with array dispatches, most users will only need to create new ones, or +use one of the following predefined `vtkTypeLists`: + +* `vtkArrayDispatch::Reals`: All floating point ValueTypes. +* `vtkArrayDispatch::Integrals`: All integral ValueTypes. +* `vtkArrayDispatch::AllTypes`: Union of Reals and Integrals. +* `vtkArrayDispatch::Arrays`: Default list of ArrayTypes to use in dispatches. + +The last one is special -- `vtkArrayDispatch::Arrays` is a typelist of +ArrayTypes set application-wide when VTK is built. This `vtkTypeList` of +`vtkDataArray` subclasses is used for unrestricted dispatches, and is the list +that gets filtered when restricting a dispatch to specific ValueTypes. + +Refining this list allows the user building VTK to have some control over the +dispatch process. If SOA arrays are never going to be used, they can be removed +from dispatch calls, reducing compile times and binary size. On the other hand, +a user applying in-situ techniques may want them available, because they'll be +used to import views of intermediate results. + +By default, `vtkArrayDispatch::Arrays` contains all AOS arrays. The `CMake` +option `VTK_DISPATCH_SOA_ARRAYS` will enable SOA array dispatch as well. More +advanced possibilities exist and are described in +`VTK/CMake/vtkCreateArrayDispatchArrayList.cmake`. + +# vtkArrayDownCast # {#VTKAD-vtkArrayDownCast} + +In VTK, all subclasses of `vtkObject` (including the data arrays) support a +downcast method called `SafeDownCast`. It is used similarly to the C++ +`dynamic_cast` -- given an object, try to cast it to a more derived type or +return `NULL` if the object is not the requested type. Say we have a +`vtkDataArray` and want to test if it is actually a `vtkFloatArray`. We can do +this: + +~~~{.cpp} +void DoSomeAction(vtkDataArray *dataArray) +{ + vtkFloatArray *floatArray = vtkFloatArray::SafeDownCast(dataArray); + if (floatArray) + { + // ... (do work with float array) + } +} +~~~ + +This works, but it can pose a serious problem if `DoSomeAction` is called +repeatedly. `SafeDownCast` works by performing a series of virtual calls and +string comparisons to determine if an object falls into a particular class +hierarchy. These string comparisons add up and can actually dominate +computational resources if an algorithm implementation calls `SafeDownCast` in +a tight loop. + +In such situations, it's ideal to restructure the algorithm so that the +downcast only happens once and the same result is used repeatedly, but +sometimes this is not possible. To lessen the cost of downcasting arrays, a +`FastDownCast` method exists for common subclasses of `vtkAbstractArray`. This +replaces the string comparisons with a single virtual call and a few integer +comparisons and is far cheaper than the more general SafeDownCast. However, not +all array implementations support the `FastDownCast` method. + +This creates a headache for templated code. Take the following example: + +~~~{.cpp} +template <typename ArrayType> +void DoSomeAction(vtkAbstractArray *array) +{ + ArrayType *myArray = ArrayType::SafeDownCast(array); + if (myArray) + { + // ... (do work with myArray) + } +} +~~~ + +We cannot use `FastDownCast` here since not all possible ArrayTypes support it. +But we really want that performance increase for the ones that do -- +`SafeDownCast`s are really slow! `vtkArrayDownCast` fixes this issue: + +~~~{.cpp} +template <typename ArrayType> +void DoSomeAction(vtkAbstractArray *array) +{ + ArrayType *myArray = vtkArrayDownCast<ArrayType>(array); + if (myArray) + { + // ... (do work with myArray) + } +} +~~~ + +`vtkArrayDownCast` automatically selects `FastDownCast` when it is defined for +the ArrayType, and otherwise falls back to `SafeDownCast`. This is the +preferred array downcast method for performance, uniformity, and reliability. + +# vtkDataArrayAccessor # {#VTKAD-vtkDataArrayAccessor} + +Array dispatching relies on having templated worker code carry out some +operation. For instance, take this `vtkArrayDispatch` code that locates the +maximum value in an array: + +~~~{.cpp} +// Stores the tuple/component coordinates of the maximum value: +struct FindMax +{ + vtkIdType Tuple; // Result + int Component; // Result + + FindMax() : Tuple(-1), Component(-1) {} + + template <typename ArrayT> + void operator()(ArrayT *array) + { + // The type to use for temporaries, and a temporary to store + // the current maximum value: + typedef typename ArrayT::ValueType ValueType; + ValueType max = std::numeric_limits<ValueType>::min(); + + // Iterate through all tuples and components, noting the location + // of the largest element found. + vtkIdType numTuples = array->GetNumberOfTuples(); + int numComps = array->GetNumberOfComponents(); + for (vtkIdType tupleIdx = 0; tupleIdx < numTuples; ++tupleIdx) + { + for (int compIdx = 0; compIdx < numComps; ++compIdx) + { + if (max < array->GetTypedComponent(tupleIdx, compIdx)) + { + max = array->GetTypedComponent(tupleIdx, compIdx); + this->Tuple = tupleIdx; + this->Component = compIdx; + } + } + } + } +}; + +void someFunction(vtkDataArray *array) +{ + FindMax maxWorker; + vtkArrayDispatch::Dispatch::Execute(array, maxWorker); + // Do work using maxWorker.Tuple and maxWorker.Component... +} +~~~ + +There's a problem, though. Recall that only the arrays in +`vtkArrayDispatch::Arrays` are tested for dispatching. What happens if the +array passed into someFunction wasn't on that list? + +The dispatch will fail, and `maxWorker.Tuple` and `maxWorker.Component` will be +left to their initial values of -1. That's no good. What if `someFunction` is a +critical path where we want to use a fast dispatched worker if possible, but +still have valid results to use if dispatching fails? Well, we can fall back on +the `vtkDataArray` API and do things the slow way in that case. When a +dispatcher is given an unsupported array, Execute() returns false, so let's +just add a backup implementation: + +~~~{.cpp} +// Stores the tuple/component coordinates of the maximum value: +struct FindMax +{ /* As before... */ }; + +void someFunction(vtkDataArray *array) +{ + FindMax maxWorker; + if (!vtkArrayDispatch::Dispatch::Execute(array, maxWorker)) + { + // Reimplement FindMax::operator(), but use the vtkDataArray API's + // "virtual double GetComponent()" instead of the more efficient + // "ValueType GetTypedComponent()" from vtkGenericDataArray. + } +} +~~~ + +Ok, that works. But ugh...why write the same algorithm twice? That's extra +debugging, extra testing, extra maintenance burden, and just plain not fun. + +Enter `vtkDataArrayAccessor`. This utility template does a very simple, yet +useful, job. It provides component and tuple based `Get` and `Set` methods that +will call the corresponding method on the array using either the `vtkDataArray` +or `vtkGenericDataArray` API, depending on the class's template parameter. It +also defines an `APIType`, which can be used to allocate temporaries, etc. This +type is double for `vtkDataArray`s and `vtkGenericDataArray::ValueType` for +`vtkGenericDataArray`s. + +Another nice benefit is that `vtkDataArrayAccessor` has a more compact API. The +only defined methods are Get and Set, and they're overloaded to work on either +tuples or components (though component access is encouraged as it is much, much +more efficient). Note that all non-element access operations (such as +`GetNumberOfTuples`) should still be called on the array pointer using +`vtkDataArray` API. + +Using `vtkDataArrayAccessor`, we can write a single worker template that works +for both `vtkDataArray` and `vtkGenericDataArray`, without a loss of +performance in the latter case. That worker looks like this: + +~~~{.cpp} +// Better, uses vtkDataArrayAccessor: +struct FindMax +{ + vtkIdType Tuple; // Result + int Component; // Result + + FindMax() : Tuple(-1), Component(-1) {} + + template <typename ArrayT> + void operator()(ArrayT *array) + { + // Create the accessor: + vtkDataArrayAccessor<ArrayT> access(array); + + // Prepare the temporary. We'll use the accessor's APIType instead of + // ArrayT::ValueType, since that is appropriate for the vtkDataArray + // fallback: + typedef typename vtkDataArrayAccessor<ArrayT>::APIType ValueType; + ValueType max = std::numeric_limits<ValueType>::min(); + + // Iterate as before, but use access.Get instead of + // array->GetTypedComponent. GetTypedComponent is still used + // when ArrayT is a vtkGenericDataArray, but + // vtkDataArray::GetComponent is now used as a fallback when ArrayT + // is vtkDataArray. + vtkIdType numTuples = array->GetNumberOfTuples(); + int numComps = array->GetNumberOfComponents(); + for (vtkIdType tupleIdx = 0; tupleIdx < numTuples; ++tupleIdx) + { + for (int compIdx = 0; compIdx < numComps; ++compIdx) + { + if (max < access.Get(tupleIdx, compIdx)) + { + max = access.Get(tupleIdx, compIdx); + this->Tuple = tupleIdx; + this->Component = compIdx; + } + } + } + } +}; +~~~ + +Now when we call `operator()` with say, `ArrayT=vtkFloatArray`, we'll get an +optimized, efficient code path. But we can also call this same implementation +with `ArrayT=vtkDataArray` and still get a correct result (assuming that the +`vtkDataArray`'s double API represents the data well enough). + +Using the `vtkDataArray` fallback path is straightforward. At the call site: + +~~~{.cpp} +void someFunction(vtkDataArray *array) +{ + FindMax maxWorker; + if (!vtkArrayDispatch::Dispatch::Execute(array, maxWorker)) + { + maxWorker(array); // Dispatch failed, call vtkDataArray fallback + } + // Do work using maxWorker.Tuple and maxWorker.Component -- now we know + // for sure that they're initialized! +} +~~~ + +Using the above pattern for calling a worker and always going through +`vtkDataArrayAccessor` to `Get`/`Set` array elements ensures that any worker +implementation can be its own fallback path. + +# VTK_ASSUME # {#VTKAD-VTK_ASSUME} + +While performance testing the new array classes, we compared the performance of +a dispatched worker using the `vtkDataArrayAccessor` class to the same +algorithm using raw memory buffers. We managed to achieve the same performance +out of the box for most cases, using both AOS and SOA array implementations. In +fact, with `--ffast-math` optimizations on GCC 4.9, the optimizer is able to +remove all function calls and apply SIMD vectorized instructions in the +dispatched worker, showing that the new array API is thin enough that the +compiler can see the algorithm in terms of memory access. + +But there was one case where performance suffered. If iterating through an AOS +data array with a known number of components using `GetTypedComponent`, the raw +pointer implementation initially outperformed the dispatched array. To +understand why, note that the AOS implementation of `GetTypedComponent` is along +the lines of: + +~~~{.cpp} +ValueType vtkAOSDataArrayTemplate::GetTypedComponent(vtkIdType tuple, + int comp) const +{ + // AOSData is a ValueType* pointing at the base of the array data. + return this->AOSData[tuple * this->NumberOfComponents + comp]; +} +~~~ + +Because `NumberOfComponents` is unknown at compile time, the optimizer cannot +assume anything about the stride of the components in the array. This leads to +missed optimizations for vectorized read/writes and increased complexity in the +instructions used to iterate through the data. + +For such cases where the number of components is, in fact, known at compile +time (due to a calling function performing some validation, for instance), it +is possible to tell the compiler about this fact using `VTK_ASSUME`. + +`VTK_ASSUME` wraps a compiler-specific `__assume` statement, which is used to +pass such optimization hints. Its argument is an expression of some condition +that is guaranteed to always be true. This allows more aggressive optimizations +when used correctly, but be forewarned that if the condition is not met at +runtime, the results are unpredictable and likely catastrophic. + +But if we're writing a filter that only operates on 3D point sets, we know the +number of components in the point array will always be 3. In this case we can +write: + +~~~{.cpp} +VTK_ASSUME(pointsArray->GetNumberOfComponents() == 3); +~~~ + +in the worker function and this instructs the compiler that the array's +internal `NumberOfComponents` variable will always be 3, and thus the stride of +the array is known. Of course, the caller of this worker function should ensure +that this is a 3-component array and fail gracefully if it is not. + +There are many scenarios where `VTK_ASSUME` can offer a serious performance +boost, the case of known tuple size is a common one that's really worth +remembering. + +# vtkArrayDispatch # {#VTKAD-vtkArrayDispatch} + +The dispatchers implemented in the vtkArrayDispatch namespace provide array +dispatching with customizable restrictions on code generation and a simple +syntax that hides the messy details of type resolution and multi-array +dispatch. There are several "flavors" of dispatch available that operate on up +to three arrays simultaneously. + +## Components Of A Dispatch ## {#VTKAD-ComponentsOfADispatch} + +Using the `vtkArrayDispatch` system requires three elements: the array(s), the +worker, and the dispatcher. + +### The Arrays ### {#VTKAD-TheArrays} + +All dispatched arrays must be subclasses of `vtkDataArray`. It is important to +identify as many restrictions as possible. Must every ArrayType be considered +during dispatch, or is the array's ValueType (or even the ArrayType itself) +restricted? If dispatching multiple arrays at once, are they expected to have +the same ValueType? These scenarios are common, and these conditions can be +used to reduce the number of instantiations of the worker template. + +### The Worker ### {#VTKAD-TheWorker} + +The worker is some generic callable. In C++98, a templated functor is a good +choice. In C++14, a generic lambda is a usable option as well. For our +purposes, we'll only consider the functor approach, as C++14 is a long ways off +for core VTK code. + +At a minimum, the worker functor should define `operator()` to make it +callable. This should be a function template with a template parameter for each +array it should handle. For a three array dispatch, it should look something +like this: + +~~~{.cpp} +struct ThreeArrayWorker +{ + template <typename Array1T, typename Array2T, typename Array3T> + void operator()(Array1T *array1, Array2T *array2, Array3T *array3) + { + /* Do stuff... */ + } +}; +~~~ + +At runtime, the dispatcher will call `ThreeWayWorker::operator()` with a set of +`Array1T`, `Array2T`, and `Array3T` that satisfy any dispatch restrictions. + +Workers can be stateful, too, as seen in the `FindMax` worker earlier where the +worker simply identified the component and tuple id of the largest value in the +array. The functor stored them for the caller to use in further analysis: + +~~~{.cpp} +// Example of a stateful dispatch functor: +struct FindMax +{ + // Functor state, holds results that are accessible to the caller: + vtkIdType Tuple; + int Component; + + // Set initial values: + FindMax() : Tuple(-1), Component(-1) {} + + // Template method to set Tuple and Component ivars: + template <typename ArrayT> + void operator()(ArrayT *array) + { + /* Do stuff... */ + } +}; +~~~ + +### The Dispatcher ### {#VTKAD-TheDispatcher} + +The dispatcher is the workhorse of the system. It is responsible for applying +restrictions, resolving array types, and generating the requested template +instantiations. It has responsibilities both at run-time and compile-time. + +During compilation, the dispatcher will identify the valid combinations of +arrays that can be used according to the restrictions. This is done by starting +with a typelist of arrays, either supplied as a template parameter or by +defaulting to `vtkArrayDispatch::Arrays`, and filtering them by ValueType if +needed. For multi-array dispatches, additional restrictions may apply, such as +forcing the second and third arrays to have the same ValueType as the first. It +must then generate the required code for the dispatch -- that is, the templated +worker implementation must be instantiated for each valid combination of +arrays. + +At runtime, it tests each of the dispatched arrays to see if they match one of +the generated code paths. Runtime type resolution is carried out using +`vtkArrayDownCast` to get the best performance available for the arrays of +interest. If it finds a match, it calls the worker's `operator()` method with +the properly typed arrays. If no match is found, it returns `false` without +executing the worker. + +## Restrictions: Why They Matter ## {#VTKAD-RestrictionsWhyTheyMatter} + +We've made several mentions of using restrictions to reduce the number of +template instantiations during a dispatch operation. You may be wondering if it +really matters so much. Let's consider some numbers. + +VTK is configured to use 13 ValueTypes for numeric data. These are the standard +numeric types `float`, `int`, `unsigned char`, etc. By default, VTK will define +`vtkArrayDispatch::Arrays` to use all 13 types with `vtkAOSDataArrayTemplate` +for the standard set of dispatchable arrays. If enabled during compilation, the +SOA data arrays are added to this list for a total of 26 arrays. + +Using these 26 arrays in a single, unrestricted dispatch will result in 26 +instantiations of the worker template. A double dispatch will generate 676 +workers. A triple dispatch with no restrictions creates a whopping 17,576 +functions to handle the possible combinations of arrays. That's a _lot_ of +instructions to pack into the final binary object. + +Applying some simple restrictions can reduce this immensely. Say we know that +the arrays will only contain `float`s or `double`s. This would reduce the +single dispatch to 4 instantiations, the double dispatch to 16, and the triple +to 64. We've just reduced the generated code size significantly. We could even +apply such a restriction to just create some 'fast-paths' and let the integral +types fallback to using the `vtkDataArray` API by using +`vtkDataArrayAccessor`s. Dispatch restriction is a powerful tool for reducing +the compiled size of a binary object. + +Another common restriction is that all arrays in a multi-array dispatch have +the same ValueType, even if that ValueType is not known at compile time. By +specifying this restriction, a double dispatch on all 26 AOS/SOA arrays will +only produce 52 worker instantiations, down from 676. The triple dispatch drops +to 104 instantiations from 17,576. + +Always apply restrictions when they are known, especially for multi-array +dispatches. The savings are worth it. + +## Types of Dispatchers ## {#VTKAD-TypesOfDispatchers} + +Now that we've discussed the components of a dispatch operation, what the +dispatchers do, and the importance of restricting dispatches, let's take a look +at the types of dispatchers available. + +--- + +### vtkArrayDispatch::Dispatch ### {#VTKAD-Dispatch} + +This family of dispatchers take no parameters and perform an unrestricted +dispatch over all arrays in `vtkArrayDispatch::Arrays`. + +__Variations__: +* `vtkArrayDispatch::Dispatch`: Single dispatch. +* `vtkArrayDispatch::Dispatch2`: Double dispatch. +* `vtkArrayDispatch::Dispatch3`: Triple dispatch. + +__Arrays considered__: All arrays in `vtkArrayDispatch::Arrays`. + +__Restrictions__: None. + +__Usecase__: Used when no useful information exists that can be used to apply +restrictions. + +__Example Usage__: + +~~~{.cpp} +vtkArrayDispatch::Dispatch::Execute(array, worker); +~~~ + +--- + +### vtkArrayDispatch::DispatchByArray ### {#VTKAD-DispatchByArray} + +This family of dispatchers takes a `vtkTypeList` of explicit array types to use +during dispatching. They should only be used when an array's exact type is +restricted. If dispatching multiple arrays and only one has such type +restrictions, use `vtkArrayDispatch::Arrays` (or a filtered version) for the +unrestricted arrays. + +__Variations__: +* `vtkArrayDispatch::DispatchByArray`: Single dispatch. +* `vtkArrayDispatch::Dispatch2ByArray`: Double dispatch. +* `vtkArrayDispatch::Dispatch3ByArray`: Triple dispatch. + +__Arrays considered__: All arrays explicitly listed in the parameter lists. + +__Restrictions__: Array must be explicitly listed in the dispatcher's type. + +__Usecase__: Used when one or more arrays have known implementations. + +__Example Usage__: + +An example here would be a filter that processes an input array of some +integral type and produces either a `vtkDoubleArray` or a `vtkFloatArray`, +depending on some condition. Since the input array's implementation is unknown +(it comes from outside the filter), we'll rely on a ValueType-filtered version +of `vtkArrayDispatch::Arrays` for its type. However, we know the output array +is either `vtkDoubleArray` or `vtkFloatArray`, so we'll want to be sure to +apply that restriction: + +~~~{.cpp} +// input has an unknown implementation, but an integral ValueType. +vtkDataArray *input = ...; + +// Output is always either vtkFloatArray or vtkDoubleArray: +vtkDataArray *output = someCondition ? vtkFloatArray::New() + : vtkDoubleArray::New(); + +// Define the valid ArrayTypes for input by filtering +// vtkArrayDispatch::Arrays to remove non-integral types: +typedef typename vtkArrayDispatch::FilterArraysByValueType + < + vtkArrayDispatch::Arrays, + vtkArrayDispatch::Integrals + >::Result InputTypes; + +// For output, create a new vtkTypeList with the only two possibilities: +typedef vtkTypeList_Create_2(vtkFloatArray, vtkDoubleArray) OutputTypes; + +// Typedef the dispatch to a more manageable name: +typedef vtkArrayDispatch::Dispatch2ByArray + < + InputTypes, + OutputTypes + > MyDispatch; + +// Execute the dispatch: +MyDispatch::Execute(input, output, someWorker); +~~~ + +--- + +### vtkArrayDispatch::DispatchByValueType ### {#VTKAD-DispatchByValueType} + +This family of dispatchers takes a vtkTypeList of ValueTypes for each array and +restricts dispatch to only arrays in vtkArrayDispatch::Arrays that have one of +the specified value types. + +__Variations__: +* `vtkArrayDispatch::DispatchByValueType`: Single dispatch. +* `vtkArrayDispatch::Dispatch2ByValueType`: Double dispatch. +* `vtkArrayDispatch::Dispatch3ByValueType`: Triple dispatch. + +__Arrays considered__: All arrays in `vtkArrayDispatch::Arrays` that meet the +ValueType requirements. + +__Restrictions__: Arrays that do not satisfy the ValueType requirements are +eliminated. + +__Usecase__: Used when one or more of the dispatched arrays has an unknown +implementation, but a known (or restricted) ValueType. + +__Example Usage__: + +Here we'll consider a filter that processes three arrays. The first is a +complete unknown. The second is known to hold `unsigned char`, but we don't +know the implementation. The third holds either `double`s or `float`s, but its +implementation is also unknown. + +~~~{.cpp} +// Complete unknown: +vtkDataArray *array1 = ...; +// Some array holding unsigned chars: +vtkDataArray *array2 = ...; +// Some array holding either floats or doubles: +vtkDataArray *array3 = ...; + +// Typedef the dispatch to a more manageable name: +typedef vtkArrayDispatch::Dispatch3ByValueType + < + vtkArrayDispatch::AllTypes, + vtkTypeList_Create_1(unsigned char), + vtkArrayDispatch::Reals + > MyDispatch; + +// Execute the dispatch: +MyDispatch::Execute(array1, array2, array3, someWorker); +~~~ + +--- + +### vtkArrayDispatch::DispatchByArrayWithSameValueType ### {#VTKAD-DispatchByArrayWithSameValueType} + +This family of dispatchers takes a `vtkTypeList` of ArrayTypes for each array +and restricts dispatch to only consider arrays from those typelists, with the +added requirement that all dispatched arrays share a ValueType. + +__Variations__: +* `vtkArrayDispatch::Dispatch2ByArrayWithSameValueType`: Double dispatch. +* `vtkArrayDispatch::Dispatch3ByArrayWithSameValueType`: Triple dispatch. + +__Arrays considered__: All arrays in the explicit typelists that meet the +ValueType requirements. + +__Restrictions__: Combinations of arrays with differing ValueTypes are +eliminated. + +__Usecase__: When one or more arrays are known to belong to a restricted set of +ArrayTypes, and all arrays are known to share the same ValueType, regardless of +implementation. + +__Example Usage__: + +Let's consider a double array dispatch, with `array1` known to be one of four +common array types (AOS `float`, `double`, `int`, and `vtkIdType` arrays), and +the other is a complete unknown, although we know that it holds the same +ValueType as `array1`. + +~~~{.cpp} +// AOS float, double, int, or vtkIdType array: +vtkDataArray *array1 = ...; +// Unknown implementation, but the ValueType matches array1: +vtkDataArray *array2 = ...; + +// array1's possible types: +typedef vtkTypeList_Create_4(vtkFloatArray, vtkDoubleArray, + vtkIntArray, vtkIdTypeArray) Array1Types; + +// array2's possible types: +typedef typename vtkArrayDispatch::FilterArraysByValueType + < + vtkArrayDispatch::Arrays, + vtkTypeList_Create_4(float, double, int, vtkIdType) + > Array2Types; + +// Typedef the dispatch to a more manageable name: +typedef vtkArrayDispatch::Dispatch2ByArrayWithSameValueType + < + Array1Types, + Array2Types + > MyDispatch; + +// Execute the dispatch: +MyDispatch::Execute(array1, array2, someWorker); +~~~ + +--- + +### vtkArrayDispatch::DispatchBySameValueType ### {#VTKAD-DispatchBySameValueType} + +This family of dispatchers takes a single `vtkTypeList` of ValueType and +restricts dispatch to only consider arrays from `vtkArrayDispatch::Arrays` with +those ValueTypes, with the added requirement that all dispatched arrays share a +ValueType. + +__Variations__: +* `vtkArrayDispatch::Dispatch2BySameValueType`: Double dispatch. +* `vtkArrayDispatch::Dispatch3BySameValueType`: Triple dispatch. +* `vtkArrayDispatch::Dispatch2SameValueType`: Double dispatch using +`vtkArrayDispatch::AllTypes`. +* `vtkArrayDispatch::Dispatch3SameValueType`: Triple dispatch using +`vtkArrayDispatch::AllTypes`. + +__Arrays considered__: All arrays in `vtkArrayDispatch::Arrays` that meet the +ValueType requirements. + +__Restrictions__: Combinations of arrays with differing ValueTypes are +eliminated. + +__Usecase__: When one or more arrays are known to belong to a restricted set of +ValueTypes, and all arrays are known to share the same ValueType, regardless of +implementation. + +__Example Usage__: + +Let's consider a double array dispatch, with `array1` known to be one of four +common ValueTypes (`float`, `double`, `int`, and `vtkIdType` arrays), and +`array2` known to have the same ValueType as `array1`. + +~~~{.cpp} +// Some float, double, int, or vtkIdType array: +vtkDataArray *array1 = ...; +// Unknown, but the ValueType matches array1: +vtkDataArray *array2 = ...; + +// The allowed ValueTypes: +typedef vtkTypeList_Create_4(float, double, int, vtkIdType) ValidValueTypes; + +// Typedef the dispatch to a more manageable name: +typedef vtkArrayDispatch::Dispatch2BySameValueType + < + ValidValueTypes + > MyDispatch; + +// Execute the dispatch: +MyDispatch::Execute(array1, array2, someWorker); +~~~ + +--- + +# Advanced Usage # {#VTKAD-AdvancedUsage} + +## Accessing Memory Buffers ## {#VTKAD-AccessingMemoryBuffers} + +Despite the thin `vtkGenericDataArray` API's nice feature that compilers can +optimize memory accesses, sometimes there are still legitimate reasons to +access the underlying memory buffer. This can still be done safely by providing +overloads to your worker's `operator()` method. For instance, +`vtkDataArray::DeepCopy` uses a generic implementation when mixed array +implementations are used, but has optimized overloads for copying between +arrays with the same ValueType and implementation. The worker for this dispatch +is shown below as an example: + +~~~{.cpp} +// Copy tuples from src to dest: +struct DeepCopyWorker +{ + // AoS --> AoS same-type specialization: + template <typename ValueType> + void operator()(vtkAOSDataArrayTemplate<ValueType> *src, + vtkAOSDataArrayTemplate<ValueType> *dst) + { + std::copy(src->Begin(), src->End(), dst->Begin()); + } + + // SoA --> SoA same-type specialization: + template <typename ValueType> + void operator()(vtkSOADataArrayTemplate<ValueType> *src, + vtkSOADataArrayTemplate<ValueType> *dst) + { + vtkIdType numTuples = src->GetNumberOfTuples(); + for (int comp; comp < src->GetNumberOfComponents(); ++comp) + { + ValueType *srcBegin = src->GetComponentArrayPointer(comp); + ValueType *srcEnd = srcBegin + numTuples; + ValueType *dstBegin = dst->GetComponentArrayPointer(comp); + + std::copy(srcBegin, srcEnd, dstBegin); + } + } + + // Generic implementation: + template <typename Array1T, typename Array2T> + void operator()(Array1T *src, Array2T *dst) + { + vtkDataArrayAccessor<Array1T> s(src); + vtkDataArrayAccessor<Array2T> d(dst); + + typedef typename vtkDataArrayAccessor<Array2T>::APIType DestType; + + vtkIdType tuples = src->GetNumberOfTuples(); + int comps = src->GetNumberOfComponents(); + + for (vtkIdType t = 0; t < tuples; ++t) + { + for (int c = 0; c < comps; ++c) + { + d.Set(t, c, static_cast<DestType>(s.Get(t, c))); + } + } + } +}; +~~~ + +# Putting It All Together # {#VTKAD-PuttingItAllTogether} + +Now that we've explored the new tools introduced with VTK 7.1 that allow +efficient, implementation agnostic array access, let's take another look at the +`calcMagnitude` example from before and identify the key features of the +implementation: + +~~~{.cpp} +// Modern implementation of calcMagnitude using new concepts in VTK 7.1: +struct CalcMagnitudeWorker +{ + template <typename VectorArray, typename MagnitudeArray> + void operator()(VectorArray *vectors, MagnitudeArray *magnitude) + { + VTK_ASSUME(vectors->GetNumberOfComponents() == 3); + VTK_ASSUME(magnitude->GetNumberOfComponents() == 1); + + vtkDataArrayAccessor<VectorArray> v(vectors); + vtkDataArrayAccessor<MagnitudeArray> m(magnitude); + + vtkIdType numVectors = vectors->GetNumberOfTuples(); + for (vtkIdType tupleIdx = 0; tupleIdx < numVectors; ++tupleIdx) + { + m.Set(tupleIdx, 0, std::sqrt(v.Get(tupleIdx, 0) * v.Get(tupleIdx, 0) + + v.Get(tupleIdx, 1) * v.Get(tupleIdx, 1) + + v.Get(tupleIdx, 2) * v.Get(tupleIdx, 2))); + } + } +}; + +void calcMagnitude(vtkDataArray *vectors, vtkDataArray *magnitude) +{ + CalcMagnitudeWorker worker; + typedef vtkArrayDispatch::Dispatch2ByValueType + < + vtkArrayDispatch::AllTypes, + vtkArrayDispatch::Reals + > Dispatcher; + + if (!Dispatcher::Execute(vectors, magnitude, worker)) + { + worker(vectors, magnitude); // vtkDataArray fallback + } +} +~~~ + +This implementation: + +* Uses dispatch restrictions to reduce the number of instantiated templated +worker functions. + * Assuming 26 types are in `vtkArrayDispatch::Arrays` (13 AOS + 13 SOA). + * The first array is unrestricted. All 26 array types are considered. + * The second array is restricted to `float` or `double` ValueTypes, which + translates to 4 array types (one each, SOA and AOS). + * 26 * 4 = 104 possible combinations exist. We've eliminated 26 * 22 = 572 + combinations that an unrestricted double-dispatch would have generated (it + would create 676 instantiations). +* The calculation is still carried out at `double` precision when the ValueType +restrictions are not met. + * Just because we don't want those other 572 cases to have special code + generated doesn't necessarily mean that we wouldn't want them to run. + * Thanks to `vtkDataArrayAccessor`, we have a fallback implementation that + reuses our templated worker code. + * In this case, the dispatch is really just a fast-path implementation for + floating point output types. +* The performance should be identical to iterating through raw memory buffers. + * The `vtkGenericDataArray` API is transparent to the compiler. The + specialized instantiations of `operator()` can be heavily optimized since the + memory access patterns are known and well-defined. + * Using `VTK_ASSUME` tells the compiler that the arrays have known strides, + allowing further compile-time optimizations. + +Hopefully this has convinced you that the `vtkArrayDispatch` and related tools +are worth using to create flexible, efficient, typesafe implementations for +your work with VTK. Please direct any questions you may have on the subject to +the VTK mailing lists. diff --git a/Documentation/Doxygen/ChangesVTK-7-1.md b/Documentation/Doxygen/ChangesVTK-7-1.md index f387d15b791..7e3ce76324d 100644 --- a/Documentation/Doxygen/ChangesVTK-7-1.md +++ b/Documentation/Doxygen/ChangesVTK-7-1.md @@ -132,3 +132,27 @@ vtkSMPTools The following back-ends have been removed: + Simple: This is not a production level backend and was only used for debugging purposes. + Kaapi: This backend is no longer maintained. + +vtkDataArray Refactor, vtkArrayDispatch and Related Tools +--------------------------------------------------------- + +The `vtkDataArrayTemplate` template class has been replaced by +`vtkAOSDataArrayTemplate` to distinguish it from the new +`vtkSOADataArrayTemplate`. The former uses Array-Of-Structs component ordering +while the latter uses Struct-Of-Arrays component ordering. These both derive +from the new `vtkGenericDataArray` template class and are an initial +implementation of native support for alternate memory layouts in VTK. + +To facilitate working with these arrays efficiently, several new tools have +been added in this release. They are detailed \ref VTK-7-1-ArrayDispatch "here". + +As part of the refactoring effort, several `vtkDataArrayTemplate` methods were +deprecated and replaced with new, const-correct methods with more meaningful +names. + +The old and new method names are listed below: + ++ `GetTupleValue` is now `GetTypedTuple` ++ `SetTupleValue` is now `SetTypedTuple` ++ `InsertTupleValue` is now `InsertTypedTuple` ++ `InsertNextTupleValue` is now `InsertNextTypedTuple` -- GitLab