Commit b4e1e73e authored by Kenneth Moreland's avatar Kenneth Moreland
Browse files

Merge branch 'RadixKR'

parents 2e37a636 08fb7c03
......@@ -27,6 +27,7 @@ SET(ICET_SRCS
../strategies/vtree.c
../strategies/bswap.c
../strategies/radixk.c
../strategies/radixkr.c
../strategies/tree.c
../strategies/automatic.c
)
......
......@@ -75,21 +75,31 @@
_depth_format = icetSparseImageGetDepthFormat(INPUT_SPARSE_IMAGE);
_pixel_count = icetSparseImageGetNumPixels(INPUT_SPARSE_IMAGE);
if ( (_color_format != icetImageGetColorFormat(OUTPUT_IMAGE))
|| (_depth_format != icetImageGetDepthFormat(OUTPUT_IMAGE))
if (_color_format != icetImageGetColorFormat(OUTPUT_IMAGE)) {
icetRaiseError("Input/output buffers have different color formats.",
ICET_SANITY_CHECK_FAIL);
}
if (_depth_format != icetImageGetDepthFormat(OUTPUT_IMAGE)) {
icetRaiseError("Input/output buffers have different depth formats.",
ICET_SANITY_CHECK_FAIL);
}
#ifdef PIXEL_COUNT
|| (_pixel_count != PIXEL_COUNT)
if (_pixel_count != PIXEL_COUNT) {
icetRaiseError("Unexpected input pixel count.",
ICET_SANITY_CHECK_FAIL);
}
#else
|| (_pixel_count != icetImageGetNumPixels(OUTPUT_IMAGE))
if (_pixel_count != icetImageGetNumPixels(OUTPUT_IMAGE)) {
icetRaiseError("Unexpected input pixel count.",
ICET_SANITY_CHECK_FAIL);
}
#endif
#ifdef OFFSET
|| (_pixel_count > icetImageGetNumPixels(OUTPUT_IMAGE) - OFFSET)
#endif
)
{
icetRaiseError("Input/output buffers do not agree for decompression.",
if (_pixel_count > icetImageGetNumPixels(OUTPUT_IMAGE) - OFFSET) {
icetRaiseError("Offset pixels outside range of output image.",
ICET_SANITY_CHECK_FAIL);
}
#endif
if (_composite_mode == ICET_COMPOSITE_MODE_Z_BUFFER) {
if (_depth_format == ICET_IMAGE_DEPTH_FLOAT) {
......
......@@ -20,6 +20,10 @@
#include <signal.h>
#ifndef WIN32
#include <unistd.h>
#endif
static IceTEnum currentError = ICET_NO_ERROR;
static IceTEnum currentLevel;
......@@ -125,5 +129,9 @@ void icetDiagnostics(IceTBitField mask)
void icetDebugBreak(void)
{
#if 0
printf("Waiting for debugger in process %d\n", getpid());
sleep(100);
#endif
raise(SIGSEGV);
}
......@@ -219,6 +219,8 @@ ICET_EXPORT const char *icetGetStrategyName(void);
#define ICET_SINGLE_IMAGE_STRATEGY_BSWAP (IceTEnum)0x7002
#define ICET_SINGLE_IMAGE_STRATEGY_TREE (IceTEnum)0x7003
#define ICET_SINGLE_IMAGE_STRATEGY_RADIXK (IceTEnum)0x7004
#define ICET_SINGLE_IMAGE_STRATEGY_RADIXKR (IceTEnum)0x7005
#define ICET_SINGLE_IMAGE_STRATEGY_BSWAP_FOLDING (IceTEnum)0x7006
ICET_EXPORT void icetSingleImageStrategy(IceTEnum strategy);
......
......@@ -21,8 +21,8 @@ void icetAutomaticCompose(const IceTInt *compose_group,
IceTSizeType *piece_offset)
{
if (group_size > 1) {
icetRaiseDebug("Doing radix-k compose");
icetInvokeSingleImageStrategy(ICET_SINGLE_IMAGE_STRATEGY_RADIXK,
icetRaiseDebug("Doing radix-kr compose");
icetInvokeSingleImageStrategy(ICET_SINGLE_IMAGE_STRATEGY_RADIXKR,
compose_group,
group_size,
image_dest,
......@@ -34,7 +34,7 @@ void icetAutomaticCompose(const IceTInt *compose_group,
*result_image = input_image;
*piece_offset = 0;
} else {
icetRaiseDebug("Clearing pixels");
icetRaiseDebug("Clearing pixels");
icetClearSparseImage(input_image);
*result_image = input_image;
*piece_offset = 0;
......
......@@ -13,14 +13,18 @@
#include <IceTDevDiagnostics.h>
#include <IceTDevImage.h>
#include <string.h>
#define BSWAP_INCOMING_IMAGES_BUFFER ICET_SI_STRATEGY_BUFFER_0
#define BSWAP_OUTGOING_IMAGES_BUFFER ICET_SI_STRATEGY_BUFFER_1
#define BSWAP_SPARE_WORKING_IMAGE_BUFFER ICET_SI_STRATEGY_BUFFER_2
#define BSWAP_IMAGE_ARRAY ICET_SI_STRATEGY_BUFFER_3
#define BSWAP_DUMMY_ARRAY ICET_SI_STRATEGY_BUFFER_4
#define BSWAP_COMPOSE_GROUP_BUFFER ICET_SI_STRATEGY_BUFFER_5
#define BSWAP_SWAP_IMAGES 21
#define BSWAP_TELESCOPE 22
#define BSWAP_FOLD 23
#define BIT_REVERSE(result, x, max_val_plus_one) \
{ \
......@@ -554,7 +558,7 @@ static void bswapComposeNoCombine(const IceTInt *compose_group,
input_image = working_image;
available_image = icetSparseImageNull();
}
/* I am part of the lower group. Do the actual binary swap. */
bswapComposePow2(compose_group,
......@@ -594,7 +598,7 @@ void icetBswapCompose(const IceTInt *compose_group,
IceTSparseImage *result_image,
IceTSizeType *piece_offset)
{
icetRaiseDebug("In bswapCompose");
icetRaiseDebug("In binary-swap compose");
/* Remove warning about unused parameter. Binary swap leaves images evenly
* partitioned, so we have no use of the image_dest parameter. */
......@@ -608,3 +612,141 @@ void icetBswapCompose(const IceTInt *compose_group,
result_image,
piece_offset);
}
void icetBswapFoldingCompose(const IceTInt *compose_group,
IceTInt group_size,
IceTInt image_dest,
IceTSparseImage input_image,
IceTSparseImage *result_image,
IceTSizeType *piece_offset)
{
IceTInt group_rank = icetFindMyRankInGroup(compose_group, group_size);
IceTInt pow2size = bswapFindPower2(group_size);
IceTInt extra_proc = group_size - pow2size;
IceTBoolean use_interlace;
IceTSparseImage working_image;
IceTSparseImage available_image;
IceTSparseImage spare_image;
IceTSizeType total_num_pixels = icetSparseImageGetNumPixels(input_image);
IceTInt *pow2group;
icetRaiseDebug("In binary-swap folding compose");
(void)image_dest; /* not used */
if (group_size < 2) {
*result_image = input_image;
*piece_offset = 0;
return;
}
/* Interlace images when requested. */
use_interlace = (pow2size > 2) && icetIsEnabled(ICET_INTERLACE_IMAGES);
if (use_interlace) {
IceTSparseImage interlaced_image = icetGetStateBufferSparseImage(
BSWAP_SPARE_WORKING_IMAGE_BUFFER,
icetSparseImageGetWidth(input_image),
icetSparseImageGetHeight(input_image));
icetSparseImageInterlace(input_image,
pow2size,
BSWAP_DUMMY_ARRAY,
interlaced_image);
working_image = interlaced_image;
available_image = input_image;
} else {
/* Allocate available (scratch) image buffer. */
available_image = icetGetStateBufferSparseImage(
BSWAP_SPARE_WORKING_IMAGE_BUFFER,
icetSparseImageGetWidth(input_image),
icetSparseImageGetHeight(input_image));
working_image = input_image;
}
/* Fold the existing number of processes into a subset that is the maximum
* power of 2. */
pow2group = icetGetStateBuffer(BSWAP_COMPOSE_GROUP_BUFFER,
sizeof(IceTInt)*pow2size);
{
IceTInt whole_group_index = 0;
IceTInt pow2group_index = 0;
while (pow2group_index < extra_proc) {
pow2group[pow2group_index] = compose_group[whole_group_index];
if (group_rank == whole_group_index) {
/* I need to receive a folded image and composite it. */
IceTSizeType incoming_size
= icetSparseImageBufferSize(total_num_pixels, 1);
IceTVoid *in_image_buffer
= icetGetStateBuffer(BSWAP_INCOMING_IMAGES_BUFFER,
incoming_size);
IceTSparseImage in_image;
IceTSparseImage old_working_image;
icetCommRecv(in_image_buffer,
incoming_size,
ICET_BYTE,
compose_group[whole_group_index+1],
BSWAP_FOLD);
in_image = icetSparseImageUnpackageFromReceive(in_image_buffer);
icetCompressedCompressedComposite(working_image,
in_image,
available_image);
old_working_image = working_image;
working_image = available_image;
available_image = old_working_image;
} else if (group_rank == whole_group_index + 1) {
/* I need to send my image to get folded then drop out. */
IceTVoid *package_buffer;
IceTSizeType package_size;
icetSparseImagePackageForSend(working_image,
&package_buffer, &package_size);
icetCommSend(package_buffer,
package_size,
ICET_BYTE,
compose_group[whole_group_index],
BSWAP_FOLD);
*result_image = icetSparseImageNull();
*piece_offset = 0;
return;
}
whole_group_index += 2;
pow2group_index++;
}
/* That handles all the folded images. The rest of the group can just
* copy over. Do a sanity check too to make sure that we haven't messed
* up our indexing. */
if ((group_size - whole_group_index) != (pow2size - pow2group_index)) {
icetRaiseError("Miscounted indices while folding.",
ICET_SANITY_CHECK_FAIL);
}
memcpy(&pow2group[pow2group_index],
&compose_group[whole_group_index],
sizeof(IceTInt)*(group_size-whole_group_index));
}
/* Time to do the actual binary-swap on our new power of two group. */
bswapComposePow2(pow2group,
pow2size,
pow2size,
working_image,
available_image,
result_image,
piece_offset,
&spare_image);
if (use_interlace) {
IceTInt global_partition;
IceTInt pow2rank = icetFindMyRankInGroup(pow2group, pow2size);
BIT_REVERSE(global_partition, pow2rank, pow2size);
*piece_offset = icetGetInterlaceOffset(global_partition,
pow2size,
total_num_pixels);
}
}
......@@ -30,6 +30,8 @@
#include <IceTDevDiagnostics.h>
#include <IceTDevImage.h>
/* #define RADIXK_USE_TELESCOPE */
#define RADIXK_SWAP_IMAGE_TAG_START 2200
#define RADIXK_TELESCOPE_IMAGE_TAG 2300
......@@ -91,6 +93,7 @@ typedef struct radixkPartnerInfoStruct {
} \
}
#ifdef RADIXK_USE_TELESCOPE
/* Finds the largest power of 2 equal to or smaller than x. */
static IceTInt radixkFindPower2(IceTInt x)
{
......@@ -99,6 +102,7 @@ static IceTInt radixkFindPower2(IceTInt x)
pow2 = pow2 >> 1;
return pow2;
}
#endif
static IceTInt radixkFindFloorPow2(IceTInt x)
{
......@@ -784,14 +788,13 @@ static void radixkCompositeIncomingImages(radixkPartnerInfo *partners,
}
}
static void icetRadixkBasicCompose(const IceTInt *compose_group,
static void icetRadixkBasicCompose(const radixkInfo *info,
const IceTInt *compose_group,
IceTInt group_size,
IceTInt total_num_partitions,
IceTSparseImage working_image,
IceTSizeType *piece_offset)
{
radixkInfo info = { NULL, 0 };
IceTSizeType my_offset;
IceTInt current_round;
IceTInt remaining_partitions;
......@@ -812,10 +815,8 @@ static void icetRadixkBasicCompose(const IceTInt *compose_group,
return;
}
info = radixkGetK(group_size, group_rank);
/* num_rounds > 0 is assumed several places throughout this function */
if (info.num_rounds <= 0) {
if (info->num_rounds <= 0) {
icetRaiseError("Radix-k has no rounds?", ICET_SANITY_CHECK_FAIL);
}
......@@ -826,9 +827,9 @@ static void icetRadixkBasicCompose(const IceTInt *compose_group,
my_offset = 0;
remaining_partitions = total_num_partitions;
for (current_round = 0; current_round < info.num_rounds; current_round++) {
for (current_round = 0; current_round < info->num_rounds; current_round++) {
IceTSizeType my_size = icetSparseImageGetNumPixels(working_image);
const radixkRoundInfo *round_info = &info.rounds[current_round];
const radixkRoundInfo *round_info = &info->rounds[current_round];
radixkPartnerInfo *partners = radixkGetPartners(round_info,
remaining_partitions,
compose_group,
......@@ -875,6 +876,8 @@ static void icetRadixkBasicCompose(const IceTInt *compose_group,
return;
}
#ifdef RADIXK_USE_TELESCOPE
static IceTInt icetRadixkTelescopeFindUpperGroupSender(
const IceTInt *my_group,
IceTInt my_group_size,
......@@ -971,9 +974,15 @@ static void icetRadixkTelescopeComposeReceive(const IceTInt *my_group,
{
IceTSparseImage working_image = input_image;
IceTInt upper_sender;
radixkInfo info;
IceTInt my_group_rank;
my_group_rank = icetFindMyRankInGroup(my_group, my_group_size);
info = radixkGetK(my_group_size, my_group_rank);
/* Start with the basic compose of my group. */
icetRadixkBasicCompose(my_group,
icetRadixkBasicCompose(&info,
my_group,
my_group_size,
total_num_partitions,
working_image,
......@@ -1206,7 +1215,7 @@ static void icetRadixkTelescopeCompose(const IceTInt *compose_group,
/* Here is a convenient place to determine the final number of
partitions. */
{
/* Middle argument does not matter. */
/* Group rank does not matter for our purposes. */
radixkInfo info = radixkGetK(main_group_size, 0);
total_num_partitions = radixkGetTotalNumPartitions(&info);
}
......@@ -1280,12 +1289,11 @@ static void icetRadixkTelescopeCompose(const IceTInt *compose_group,
return;
}
info = radixkGetK(main_group_size,
main_group_rank);
info = radixkGetK(main_group_size, main_group_rank);
global_partition = radixkGetFinalPartitionIndex(&info);
*piece_offset = icetGetInterlaceOffset(global_partition,
main_group_size,
total_num_partitions,
original_image_size);
}
......@@ -1293,6 +1301,7 @@ static void icetRadixkTelescopeCompose(const IceTInt *compose_group,
}
void icetRadixkCompose(const IceTInt *compose_group,
IceTInt group_size,
IceTInt image_dest,
......@@ -1308,6 +1317,59 @@ void icetRadixkCompose(const IceTInt *compose_group,
piece_offset);
}
#else
void icetRadixkCompose(const IceTInt *compose_group,
IceTInt group_size,
IceTInt image_dest,
IceTSparseImage input_image,
IceTSparseImage *result_image,
IceTSizeType *piece_offset)
{
IceTInt group_rank = icetFindMyRankInGroup(compose_group, group_size);
radixkInfo info = radixkGetK(group_size, group_rank);
IceTInt total_num_partitions = radixkGetTotalNumPartitions(&info);
IceTBoolean use_interlace = icetIsEnabled(ICET_INTERLACE_IMAGES);
IceTSparseImage working_image = input_image;
IceTSizeType original_image_size = icetSparseImageGetNumPixels(input_image);
(void)image_dest; /* Not used. */
if (use_interlace) {
use_interlace = (info.num_rounds > 1);
}
if (use_interlace) {
IceTSparseImage interlaced_image = icetGetStateBufferSparseImage(
RADIXK_INTERLACED_IMAGE_BUFFER,
icetSparseImageGetWidth(working_image),
icetSparseImageGetHeight(working_image));
icetSparseImageInterlace(working_image,
total_num_partitions,
RADIXK_SPLIT_OFFSET_ARRAY_BUFFER,
interlaced_image);
working_image = interlaced_image;
}
icetRadixkBasicCompose(&info,
compose_group,
group_size,
total_num_partitions,
working_image,
piece_offset);
*result_image = working_image;
if (use_interlace && (0 < icetSparseImageGetNumPixels(working_image))) {
IceTInt global_partition = radixkGetFinalPartitionIndex(&info);
*piece_offset = icetGetInterlaceOffset(global_partition,
total_num_partitions,
original_image_size);
}
}
#endif
static IceTBoolean radixkTryPartitionLookup(IceTInt group_size)
{
IceTInt *partition_assignments;
......@@ -1422,6 +1484,8 @@ ICET_EXPORT IceTBoolean icetRadixkPartitionLookupUnitTest(void)
return ICET_TRUE;
}
#ifdef RADIXK_USE_TELESCOPE
#define MAIN_GROUP_RANK(idx) (10000 + idx)
#define SUB_GROUP_RANK(idx) (20000 + idx)
static IceTBoolean radixkTryTelescopeSendReceive(IceTInt *main_group,
......@@ -1495,7 +1559,7 @@ static IceTBoolean radixkTryTelescopeSendReceive(IceTInt *main_group,
return ICET_TRUE;
}
ICET_EXPORT IceTBoolean icetRadixTelescopeSendReceiveTest(void)
ICET_EXPORT IceTBoolean icetRadixkTelescopeSendReceiveTest(void)
{
IceTInt main_group_size;
......@@ -1556,3 +1620,13 @@ ICET_EXPORT IceTBoolean icetRadixTelescopeSendReceiveTest(void)
return ICET_TRUE;
}
#else /*!RADIXK_USE_TELESCOPE*/
ICET_EXPORT IceTBoolean icetRadixkTelescopeSendReceiveTest(void)
{
/* Telescope method disabled. */
return ICET_TRUE;
}
#endif /*!RADIXK_USE_TELESCOPE*/
/* -*- c -*- *******************************************************/
/*
* Copyright (C) 2010 Sandia Corporation
* Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
* the U.S. Government retains certain rights in this software.
*
* This source code is released under the New BSD License.
*/
/* The Radix-k algorithm was designed by Tom Peterka at Argonne National
Laboratory.
Copyright (c) University of Chicago
Permission is hereby granted to use, reproduce, prepare derivative works, and
to redistribute to others.
The Radix-k algorithm was ported to IceT by Wesley Kendall from University
of Tennessee at Knoxville.
The derived Radix-kr algorithm was designed by Kenneth Moreland at Sandia
National Laboratories.
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include <string.h>
#include <IceT.h>
#include <IceTDevCommunication.h>
#include <IceTDevDiagnostics.h>
#include <IceTDevImage.h>
#define RADIXKR_SWAP_IMAGE_TAG_START 2200
#define RADIXKR_RECEIVE_BUFFER ICET_SI_STRATEGY_BUFFER_0
#define RADIXKR_SEND_BUFFER ICET_SI_STRATEGY_BUFFER_1
#define RADIXKR_SPARE_BUFFER ICET_SI_STRATEGY_BUFFER_2
#define RADIXKR_INTERLACED_IMAGE_BUFFER ICET_SI_STRATEGY_BUFFER_3
#define RADIXKR_PARTITION_INFO_BUFFER ICET_SI_STRATEGY_BUFFER_4
#define RADIXKR_RECEIVE_REQUEST_BUFFER ICET_SI_STRATEGY_BUFFER_5
#define RADIXKR_SEND_REQUEST_BUFFER ICET_SI_STRATEGY_BUFFER_6
#define RADIXKR_FACTORS_ARRAY_BUFFER ICET_SI_STRATEGY_BUFFER_7
#define RADIXKR_SPLIT_OFFSET_ARRAY_BUFFER ICET_SI_STRATEGY_BUFFER_8
#define RADIXKR_SPLIT_IMAGE_ARRAY_BUFFER ICET_SI_STRATEGY_BUFFER_9
typedef struct radixkrRoundInfoStruct {
IceTInt k; /* k value for this round. */
IceTInt r; /* remainder for this round (number of processes dropped). */
IceTInt step; /* Ranks jump by this much in this round. */
IceTInt split_factor; /* Number of new image partitions made from each partition. */
IceTBoolean has_image; /* True if local process collects image data this round. */
IceTBoolean last_partition; /* True if local process is part of the last partition. */
IceTInt first_rank; /* The lowest rank of those participating with this process this round. */
IceTInt partition_index; /* Index of partition at this round (if has_image true). */
} radixkrRoundInfo;
typedef struct radixkrInfoStruct {
radixkrRoundInfo *rounds; /* Array of per round info. */
IceTInt num_rounds;
} radixkrInfo;
typedef struct radixkrPartnerInfoStruct {
IceTInt rank; /* Rank of partner. */
IceTSizeType offset; /* Offset of partner's partition in image. */
IceTVoid *receiveBuffer; /* A buffer for receiving data from partner. */
IceTSparseImage sendImage; /* A buffer to hold data being sent to partner */
IceTSparseImage receiveImage; /* Hold for received non-composited image. */
IceTInt compositeLevel; /* Level in compositing tree for round. */
} radixkrPartnerInfo;
typedef struct radixkrPartnerGroupInfoStruct {
radixkrPartnerInfo *partners; /* Array of partners in this group. */
IceTInt num_partners; /* Number of partners in this group. */
} radixkrPartnerGroupInfo;
/* BEGIN_PIVOT_FOR(loop_var, low, pivot, high)...END_PIVOT_FOR() provides a
special looping mechanism that iterates over the numbers pivot, pivot-1,
pivot+1, pivot-2, pivot-3,... until all numbers between low (inclusive) and
high (exclusive) are visited. Any numbers outside [low,high) are skipped. */
#define BEGIN_PIVOT_FOR(loop_var, low, pivot, high) \
{ \
IceTInt loop_var##_true_iter; \
IceTInt loop_var##_max = 2*( ((pivot) < ((high)+(low))/2) \
? ((high)-(pivot)) : ((pivot)-(low)+1) ); \
for (loop_var##_true_iter = 1; \
loop_var##_true_iter < loop_var##_max; \
loop_var##_true_iter ++) { \
if ((loop_var##_true_iter % 2) == 0) { \
loop_var = (pivot) - loop_var##_true_iter/2; \
if (loop_var < (low)) continue; \
} else { \
loop_var = (pivot) + loop_var##_true_iter/2; \
if ((high) <= loop_var) continue; \
}
#define END_PIVOT_FOR() \
} \
}
static IceTInt radixkrFindFloorLog2(IceTInt x)
{
IceTInt lg;
for (lg = 0; (IceTUInt)(1 << lg) <= (IceTUInt)x; lg++);
lg--;
return lg;
}
static void radixkrSwapImages(IceTSparseImage *image1, IceTSparseImage *image2)
{
IceTSparseImage old_image1 = *image1;
*image1 = *image2;
*image2 = old_image1;
}
/* radixkrGetPartitionIndices
my position in each round forms an num_rounds-dimensional vector
[round 0 pos, round 1 pos, ... round num_rounds-1 pos]
where pos is my position in the group of partners within that round
inputs:
info: holds the number of rounds and k values for each round
group_rank: my rank in composite order (compose_group in icetRadixkrCompose)
outputs:
fills info with step, split_factor, has_image, and partition_index for
each round.
*/
static void radixkrGetPartitionIndices(radixkrInfo info,
IceTInt group_size,
IceTInt group_rank)
{
IceTInt step; /* step size in rank for a lattice direction */
IceTInt total_partitions;
IceTInt current_group_size;
IceTInt current_round;
IceTInt max_image_split;
icetGetIntegerv(ICET_MAX_IMAGE_SPLIT, &max_image_split);