Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Steven Walton
VTK-m
Commits
41894a97
Commit
41894a97
authored
Jul 16, 2019
by
Allison Vacanti
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Unroll reduction loops for non-integral types on OpenMP.
parent
e30cb087
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
81 additions
and
13 deletions
+81
-13
Utilities/Scripts/benchSummary.py
Utilities/Scripts/benchSummary.py
+2
-2
vtkm/cont/openmp/internal/FunctorsOpenMP.h
vtkm/cont/openmp/internal/FunctorsOpenMP.h
+79
-11
No files found.
Utilities/Scripts/benchSummary.py
View file @
41894a97
...
...
@@ -105,7 +105,7 @@ if sortOpt:
keys
=
sorted
(
keys
,
key
=
lambda
k
:
benchmarks
[
k
].
mean
)
print
(
"# Summary: (%s)"
%
filename
)
print
(
"%-9s
\t
%-9s
\t
%-s"
%
(
"Mean"
,
"Stdev"
,
"Benchmark (type)"
))
print
(
"%-9s
\t
%-9s
\t
%-
9s
\t
%-
s"
%
(
"Mean"
,
"Stdev"
,
"Stdev%"
,
"Benchmark (type)"
))
for
key
in
keys
:
data
=
benchmarks
[
key
]
print
(
"%9.6f
\t
%9.6f
\t
%s (%s)"
%
(
data
.
mean
,
data
.
stdDev
,
key
.
name
,
key
.
type
))
print
(
"%9.6f
\t
%9.6f
\t
%
9.6f
\t
%
s (%s)"
%
(
data
.
mean
,
data
.
stdDev
,
data
.
stdDev
/
data
.
mean
*
100.
,
key
.
name
,
key
.
type
))
vtkm/cont/openmp/internal/FunctorsOpenMP.h
View file @
41894a97
...
...
@@ -278,6 +278,23 @@ using OpenMPReductionSupported = std::false_type;
struct
ReduceHelper
{
// std::is_integral, but adapted to see through vecs and pairs.
template
<
typename
T
>
struct
IsIntegral
:
public
std
::
is_integral
<
T
>
{
};
template
<
typename
T
,
vtkm
::
IdComponent
Size
>
struct
IsIntegral
<
vtkm
::
Vec
<
T
,
Size
>>
:
public
std
::
is_integral
<
T
>
{
};
template
<
typename
T
,
typename
U
>
struct
IsIntegral
<
vtkm
::
Pair
<
T
,
U
>>
:
public
std
::
integral_constant
<
bool
,
std
::
is_integral
<
T
>
{}
&&
std
::
is_integral
<
U
>
{}
>
{
};
// Generic implementation:
template
<
typename
PortalT
,
typename
ReturnType
,
typename
Functor
>
static
ReturnType
Execute
(
PortalT
portal
,
ReturnType
init
,
Functor
functorIn
,
std
::
false_type
)
...
...
@@ -309,18 +326,11 @@ struct ReduceHelper
if
(
doParallel
)
{
// Use the first (numThreads*2) values for initializing:
ReturnType
accum
;
accum
=
f
(
data
[
2
*
tid
],
data
[
2
*
tid
+
1
]);
// Assign each thread chunks of the remaining values for local reduction
VTKM_OPENMP_DIRECTIVE
(
for
schedule
(
static
))
for
(
vtkm
::
Id
i
=
numThreads
*
2
;
i
<
numVals
;
i
++
)
{
accum
=
f
(
accum
,
data
[
i
]);
}
// Static dispatch to unroll non-integral types:
const
ReturnType
localResult
=
ReduceHelper
::
DoParallelReduction
<
ReturnType
>
(
data
,
numVals
,
tid
,
numThreads
,
f
,
IsIntegral
<
ReturnType
>
{});
threadData
[
static_cast
<
std
::
size_t
>
(
tid
)]
=
accum
;
threadData
[
static_cast
<
std
::
size_t
>
(
tid
)]
=
localResult
;
}
}
// end parallel
...
...
@@ -344,6 +354,64 @@ struct ReduceHelper
return
init
;
}
// non-integer reduction: unroll loop manually.
// This gives faster code for floats and non-trivial types.
template
<
typename
ReturnType
,
typename
IterType
,
typename
FunctorType
>
static
ReturnType
DoParallelReduction
(
IterType
data
,
vtkm
::
Id
numVals
,
int
tid
,
int
numThreads
,
FunctorType
f
,
std
::
false_type
/* isIntegral */
)
{
// Use the first (numThreads*2) values for initializing:
ReturnType
accum
=
f
(
data
[
2
*
tid
],
data
[
2
*
tid
+
1
]);
vtkm
::
Id
i
=
numThreads
*
2
;
const
vtkm
::
Id
unrollEnd
=
(
numVals
/
4
)
*
4
;
VTKM_OPENMP_DIRECTIVE
(
for
schedule
(
static
))
for
(
i
=
numThreads
*
2
;
i
<
unrollEnd
;
i
+=
4
)
{
const
auto
t1
=
f
(
data
[
i
],
data
[
i
+
1
]);
const
auto
t2
=
f
(
data
[
i
+
2
],
data
[
i
+
3
]);
accum
=
f
(
accum
,
t1
);
accum
=
f
(
accum
,
t2
);
}
// Let thread 0 mop up any remaining values:
if
(
tid
==
0
)
{
for
(
i
=
unrollEnd
;
i
<
numVals
;
++
i
)
{
accum
=
f
(
accum
,
data
[
i
]);
}
}
return
accum
;
}
// Integer reduction: no unrolling. Ints vectorize easily and unrolling can
// hurt performance.
template
<
typename
ReturnType
,
typename
IterType
,
typename
FunctorType
>
static
ReturnType
DoParallelReduction
(
IterType
data
,
vtkm
::
Id
numVals
,
int
tid
,
int
numThreads
,
FunctorType
f
,
std
::
true_type
/* isIntegral */
)
{
// Use the first (numThreads*2) values for initializing:
ReturnType
accum
=
f
(
data
[
2
*
tid
],
data
[
2
*
tid
+
1
]);
// Assign each thread chunks of the remaining values for local reduction
VTKM_OPENMP_DIRECTIVE
(
for
schedule
(
static
))
for
(
vtkm
::
Id
i
=
numThreads
*
2
;
i
<
numVals
;
i
++
)
{
accum
=
f
(
accum
,
data
[
i
]);
}
return
accum
;
}
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
// Specialize for vtkm functors with OpenMP special cases:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment