avtMasterSlaveSLAlgorithm.h 11.8 KB
Newer Older
1 2
/*****************************************************************************
*
brugger's avatar
 
brugger committed
3
* Copyright (c) 2000 - 2010, Lawrence Livermore National Security, LLC
4
* Produced at the Lawrence Livermore National Laboratory
brugger's avatar
 
brugger committed
5
* LLNL-CODE-400124
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
* All rights reserved.
*
* This file is  part of VisIt. For  details, see https://visit.llnl.gov/.  The
* full copyright notice is contained in the file COPYRIGHT located at the root
* of the VisIt distribution or at http://www.llnl.gov/visit/copyright.html.
*
* Redistribution  and  use  in  source  and  binary  forms,  with  or  without
* modification, are permitted provided that the following conditions are met:
*
*  - Redistributions of  source code must  retain the above  copyright notice,
*    this list of conditions and the disclaimer below.
*  - Redistributions in binary form must reproduce the above copyright notice,
*    this  list of  conditions  and  the  disclaimer (as noted below)  in  the
*    documentation and/or other materials provided with the distribution.
*  - Neither the name of  the LLNS/LLNL nor the names of  its contributors may
*    be used to endorse or promote products derived from this software without
*    specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT  HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR  IMPLIED WARRANTIES, INCLUDING,  BUT NOT  LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS FOR A PARTICULAR  PURPOSE
* ARE  DISCLAIMED. IN  NO EVENT  SHALL LAWRENCE  LIVERMORE NATIONAL  SECURITY,
* LLC, THE  U.S.  DEPARTMENT OF  ENERGY  OR  CONTRIBUTORS BE  LIABLE  FOR  ANY
* DIRECT,  INDIRECT,   INCIDENTAL,   SPECIAL,   EXEMPLARY,  OR   CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT  LIMITED TO, PROCUREMENT OF  SUBSTITUTE GOODS OR
* SERVICES; LOSS OF  USE, DATA, OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER
* CAUSED  AND  ON  ANY  THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT
* LIABILITY, OR TORT  (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY  WAY
* OUT OF THE  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
*****************************************************************************/

// ************************************************************************* //
//                              avtMasterSlaveSLAlgorithm.h                  //
// ************************************************************************* //

#ifndef AVT_MASTER_SLAVE_SL_ALGORITHM_H
#define AVT_MASTER_SLAVE_SL_ALGORITHM_H

#include "avtParSLAlgorithm.h"

#ifdef PARALLEL

class SlaveInfo;

// ****************************************************************************
// Class: avtMasterSlaveSLAlgorithm
//
// Purpose:
//    Abstract base class for master-slave algorithm.
//
// Programmer: Dave Pugmire
// Creation:   Mon Jan 26 13:25:58 EST 2009
//
61 62 63 64 65
// Modifications:
//
//   Dave Pugmire, Tue Mar 10 12:41:11 EDT 2009
//   Generalized domain to include domain/time. Pathine cleanup.
//
66
//   Dave Pugmire, Wed Mar 18 17:17:40 EDT 2009
67 68
//   Allow masters to share work loads.
//   
69 70
//   Dave Pugmire, Mon Mar 23 12:48:12 EDT 2009
//   Change how timings are reported/calculated.
71
//
72 73 74 75 76
//   Dave Pugmire, Fri Sep 25 15:35:32 EDT 2009
//   New counters.
//
//   Hank Childs, Fri Jun  4 19:58:30 CDT 2010
//   Use avtStreamlines, not avtStreamlineWrappers.
pugmire's avatar
pugmire committed
77
//
78 79 80 81 82 83 84 85 86
// ****************************************************************************

class avtMasterSlaveSLAlgorithm : public avtParSLAlgorithm
{
  public:
    avtMasterSlaveSLAlgorithm(avtStreamlineFilter *slFilter,
                              int maxCount);
    virtual ~avtMasterSlaveSLAlgorithm();
    
87
    virtual void              Initialize(std::vector<avtStreamline *> &);
88
    virtual const char*       AlgoName() const {return "MasterSlave";}
89 90
    
    virtual void              ResetStreamlinesForContinueExecute();
91
    virtual void              AddStreamlines(std::vector<avtStreamline*> &sls);
92 93 94 95 96 97 98 99

    static avtMasterSlaveSLAlgorithm* Create(avtStreamlineFilter *slFilter,
                                             int maxCount,
                                             int rank,
                                             int nProcs,
                                             int workGroupSz);

  protected:
pugmire's avatar
pugmire committed
100
    int                        maxCnt, case4AThreshold;
101 102 103 104
    int                        NUM_DOMAINS;
    int                        DomToIdx(const DomainType &dom) const
    {
        int n = dom.domain/numTimeSteps + dom.timeStep;
105 106
        //debug5<<"numTS= "<<numTimeSteps<<endl;
        //debug5<<"dom "<<dom<<" ==> "<<n<<endl;
107 108 109 110 111 112
        return n;
    }

    DomainType                 IdxToDom(const int &num) const
    {
        DomainType d(num/numTimeSteps + num%numTimeSteps);
113
        //debug5<<"idx "<<num<<" ==> "<<d<<endl;
114 115
        return d;
    }
116 117 118 119
    
    int                        sleepMicroSec;
    void                       Sleep();
    
120 121 122 123
    static int                 MSG_STATUS, MSG_DONE, MSG_SEND_SL,
                               MSG_LOAD_DOMAIN, MSG_SEND_SL_HINT,
                               MSG_FORCE_SEND_STATUS, MSG_MASTER_STATUS,
                               MSG_OFFLOAD_SL;
124 125 126

    //Statistics and coutners.
    int                       latencyTimer;
pugmire's avatar
pugmire committed
127
    std::vector<double>       latencyHistory;
128
    SLStatistics              SleepTime, LatencyTime, MaxLatencyTime;
pugmire's avatar
pugmire committed
129
    SLStatistics              SleepCnt, LatencySavingCnt, OffloadCnt;
130 131
    virtual void              CompileTimingStatistics();
    virtual void              CompileCounterStatistics();
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    virtual void              CalculateExtraTime();
    virtual void              ReportTimings(ostream &os, bool totals);
    virtual void              ReportCounters(ostream &os, bool totals);
};


// ****************************************************************************
// Class: avtMasterSLAlgorithm
//
// Purpose:
//    Master portion of the master-slave algorithm.
//
// Programmer: Dave Pugmire
// Creation:   Mon Jan 26 13:25:58 EST 2009
//
147 148 149 150 151 152 153 154
// Modifications:
//
//  Dave Pugmire, Sat Mar 28 10:04:01 EDT 2009
//  Report case counter information. Temporary fix for a sporadic bug where
//  it looks like messages are not being delivered to the master. Master will
//  detect when slave set is done and mark done. Modify logic in how masters
//  make decisions. Add domainOffloading (case 5).
//
pugmire's avatar
pugmire committed
155 156 157
//   Dave Pugmire, Thu Sep 24 13:52:59 EDT 2009
//   Change Execute to RunAlgorithm.
//
158 159 160 161 162 163 164
// ****************************************************************************

class avtMasterSLAlgorithm : public avtMasterSlaveSLAlgorithm
{
  public:
    avtMasterSLAlgorithm(avtStreamlineFilter *slFilter,
                         int maxCount,
165
                         int workGrpSz,
166 167 168
                         std::vector<int> &slaves,
                         int master,
                         std::vector<int> &masters);
169 170 171
    virtual ~avtMasterSLAlgorithm();

    virtual const char*       AlgoName() const {return "MasterSlave";}
172
    virtual void              Initialize(std::vector<avtStreamline *> &);
173 174

  protected:
pugmire's avatar
pugmire committed
175
    virtual void              RunAlgorithm();
176 177 178 179 180 181 182 183 184 185 186 187
    virtual void              ProcessMessages();
    virtual void              PostLoopProcessing();    
    std::vector<int>          status, prevStatus;
    virtual void              UpdateStatus();
    virtual void              SendStatus(bool forceSend=false);
    virtual void              ProcessSlaveUpdate(std::vector<int> &);
    virtual void              ProcessMasterUpdate(std::vector<int> &);
    virtual void              ProcessOffloadSL(std::vector<int> &);
    virtual void              ProcessNewStreamlines();
    virtual void              ManageWorkgroup();
    virtual void              ManageSlaves();
    virtual void              ManageMasters();
188 189
    virtual void              CompileTimingStatistics();
    virtual void              CompileCounterStatistics();
190
    virtual void              ReportCounters(ostream &os, bool totals);
191

192 193
    int                       workGroupActiveSLs, workGroupSz;
    bool                      done, slaveUpdate, masterUpdate;
pugmire's avatar
pugmire committed
194 195
    int                       case1Cnt, case2Cnt, case3ACnt, case3BCnt, case3CCnt, case4ACnt, case4BCnt,
                              case5ACnt, case5BCnt, case6Cnt;
196 197
    int                       master;
    std::vector<SlaveInfo>    slaveInfo, masterInfo;
198
    std::vector<int>          slDomCnts, domLoaded, slackers;
199
    std::list<avtStreamline *> activeSLs;
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

    void                      SendAllSlavesMsg(int msg);
    void                      SendAllSlavesMsg(std::vector<int> &msg);
    void                      SendSlaveMsg(int slave, std::vector<int> &msg);
    void                      FindSlackers(int oobFactor=-1,
                                           bool randomize= true,
                                           bool checkJustUpdated=false);
    bool                      UpdateSlaveStatus(std::vector<int> &);
    void                      PrintStatus();

    void                      Case1(int &counter);
    void                      Case2(int &counter);
    void                      Case3(int overloadFactor,
                                    int NDomainFactor,
                                    int &counter );
    void                      Case4(int oobThreshold,
                                    int &counter);
217 218 219
    void                      Case5(int overworkThreshold,
                                    bool domainCheck,
                                    int &counter);
220 221 222 223 224 225 226 227 228 229 230 231
};


// ****************************************************************************
// Class: avtSlaveSLAlgorithm
//
// Purpose:
//    Slave portion of the master-slave algorithm.
//
// Programmer: Dave Pugmire
// Creation:   Mon Jan 26 13:25:58 EST 2009
//
232 233 234
// Modifications:
//
//   Dave Pugmire, Mon Feb 23 13:38:49 EST 2009
235 236 237 238
//   Add timeout counter for slaves.
//   
//   Dave Pugmire, Wed Apr  1 11:21:05 EDT 2009
//   Add HandleLatencyTimer method.
239
//
pugmire's avatar
pugmire committed
240 241 242
//   Dave Pugmire, Thu Sep 24 13:52:59 EDT 2009
//   Change Execute to RunAlgorithm.
//
243 244 245 246 247 248 249 250 251 252
// ****************************************************************************

class avtSlaveSLAlgorithm : public avtMasterSlaveSLAlgorithm
{
  public:
    avtSlaveSLAlgorithm(avtStreamlineFilter *slFilter,
                        int maxCount,
                        int masterRank);
    virtual ~avtSlaveSLAlgorithm();

253
    virtual void              Initialize(std::vector<avtStreamline *> &);
254 255 256 257
    virtual void              SendStatus(bool forceSend=false);
    virtual void              UpdateStatus();

  protected:
pugmire's avatar
pugmire committed
258 259
    virtual void              RunAlgorithm();

260
    int                       master, numTerminated, timeout;
261 262
    bool                      workToDo;
    std::vector<int>          status, prevStatus;
263
    std::list<avtStreamline *> activeSLs, oobSLs;
264 265

    void                      ProcessMessages(bool &done, bool &newMsg);
266 267
    void                      HandleLatencyTimer(int activeSLCnt,
                                                 bool checkMaxLatency=true);
268 269 270 271 272 273 274 275 276 277 278 279
};


// ****************************************************************************
// Class: SlaveInfo
//
// Purpose:
//    Class to keep track of slave information.
//
// Programmer: Dave Pugmire
// Creation:   Mon Jan 26 13:25:58 EST 2009
//
280 281 282 283 284
// Modifications:
// 
//   Dave Pugmire, Wed Mar 18 17:17:40 EDT 2009
//   Allow masters to share work loads.
//
285 286 287 288 289 290 291
// ****************************************************************************
class SlaveInfo
{
  public:
    SlaveInfo( int r, int nDomains );
    ~SlaveInfo() {}

292
    void AddSL(int slDomain, int domCache);
293 294 295 296 297 298
    void LoadDom( int slDomain );
    void RemoveSL( int dom );
    void Update( vector<int> &status, bool debug=false );
    void Reset() { justUpdated = false; }
    void Debug();

299
    bool justUpdated, initialized;
300 301 302 303 304 305 306 307 308 309
    int canGive, canAccept, slCount, slLoadedCount, slOOBCount, rank;
    int domLoadedCount;
    vector<int> domainCnt;
    vector<bool> domainLoaded;
    vector<int> domainHistory;
};

#endif

#endif