changa  3.5
 All Classes Files Functions Variables Typedefs Enumerations Friends Macros Groups Pages
ParallelGravity.h
Go to the documentation of this file.
1 
4 #ifndef PARALLELGRAVITY_H
5 #define PARALLELGRAVITY_H
6 
7 #include "config.h"
8 #include <string>
9 #include <map>
10 #include <vector>
11 #include <algorithm>
12 
13 #include "pup_stl.h"
14 #include "ckio.h"
15 
16 #include "Vector3D.h"
17 #include "tree_xdr.h"
18 #include "TipsyFile.h"
19 #include "SFC.h"
20 #include "TreeNode.h"
21 #include "GenericTreeNode.h"
22 #include "Interval.h"
23 #include "parameters.h"
24 #include "param.h"
25 #include "dumpframe.h"
26 #include <liveViz.h>
27 
28 #include "TaggedVector3D.h"
29 
30 #include "codes.h"
31 #include "CacheInterface.h"
32 
33 #ifdef SPCUDA
34 #include "EwaldCUDA.h"
35 #endif
36 
37 #ifdef CUDA
38 #include "cuda_typedef.h"
39 #endif
40 
41 #include "keytype.h"
42 
43 PUPbytes(InDumpFrame);
44 PUPbytes(COOL);
45 PUPbytes(COOLPARAM);
46 
47 #ifdef HPM_COUNTER
48 #include <libhpm.h>
49 #endif
50 
51 #include <map>
52 
53 #define MERGE_REMOTE_REQUESTS_VERBOSE(X) /*CkPrintf x*/
54 
56 inline void CkMustAssert(bool cond, const char *err)
57 {
58  if (!cond) CkAbort("%s", err);
59 }
60 
61 using namespace std;
62 
63 using namespace Tree;
64 
67  Null=0,
68  Multistep,
69  Orb3d,
70  Multistep_notopo,
71  MultistepNode_notopo,
72  Orb3d_notopo,
73  MultistepOrb,
74  HierarchOrb
75 };
76 PUPbytes(LBStrategy);
77 
78 #ifdef SELECTIVE_TRACING
79 enum TraceState {
80  TraceNormal = 0,
81  TraceSkip
82 };
83 #endif
84 
86 enum DomainsDec {
87  SFC_dec=0, // Space Filling Curve with Morton ordering
88  Oct_dec=1, // Oct tree
89  ORB_dec=2, // Bisect the longest axis, balancing particles
90  SFC_peano_dec=3, // SFC with Peano-Hilbert ordering
91  SFC_peano_dec_3D=4, // Joachim Stadel's implementation of P-H ordering
92  SFC_peano_dec_2D=5, // 2D version of Peano-Hilbert ordering
93  ORB_space_dec=6 // Bisect space
94 };
95 
97 enum NborDir {
98  LEFT = 0,
99  RIGHT
100 };
101 PUPbytes(NborDir);
102 
104 const double ddTolerance = 0.1;
105 
106 inline void operator|(PUP::er &p,DomainsDec &d) {
107  int di;
108  if (p.isUnpacking()) {
109  p | di;
110  d = (DomainsDec)di;
111  } else {
112  di = (int)d;
113  p | di;
114  }
115 }
116 
117 #include "GravityParticle.h"
118 
119 class SmoothParams;
120 
123 {
124  public:
125  int64_t nMaxOrderGas;
126  int64_t nMaxOrderDark;
127  int64_t nMaxOrder;
128  void pup(PUP::er& p) {
129  p| nMaxOrderGas;
130  p| nMaxOrderDark;
131  p| nMaxOrder;
132  }
133  };
134 
135 #include "InOutput.h"
136 
137 #include "ParallelGravity.decl.h"
138 
139 extern CProxy_Main mainChare;
140 extern int verbosity;
141 extern bool _cache;
142 extern int _nocache;
143 extern int _cacheLineDepth;
144 extern unsigned int _yieldPeriod;
146 extern double dExtraStore;
147 extern double dMaxBalance;
148 extern double dFracLoadBalance;
149 extern double dGlassDamper;
150 extern int bUseCkLoopPar;
151 extern GenericTrees useTree;
152 extern CProxy_TreePiece treeProxy;
153 #ifdef REDUCTION_HELPER
154 extern CProxy_ReductionHelper reductionHelperProxy;
155 #endif
156 extern CProxy_LvArray lvProxy; // Proxy for the liveViz array
157 extern CProxy_LvArray smoothProxy; // Proxy for smooth reduction
158 extern CProxy_LvArray gravityProxy; // Proxy for gravity reduction
159 extern CProxy_TreePiece streamingProxy;
160 extern CProxy_DataManager dMProxy;
161 extern CProxy_IntraNodeLBManager nodeLBMgrProxy;
162 extern unsigned int numTreePieces;
163 extern unsigned int particlesPerChare;
164 extern int nIOProcessor;
165 
166 extern CProxy_DumpFrameData dfDataProxy;
167 extern CProxy_PETreeMerger peTreeMergerProxy;
168 extern CProxy_CkCacheManager<KeyType> cacheGravPart;
169 extern CProxy_CkCacheManager<KeyType> cacheSmoothPart;
170 extern CProxy_CkCacheManager<KeyType> cacheNode;
171 
173 extern CkGroupID dataManagerID;
174 
175 extern int boundaryEvaluationUE;
176 extern int weightBalanceUE;
177 extern int networkProgressUE;
178 extern int nodeForceUE;
179 extern int partForceUE;
180 
181 extern int tbFlushRequestsUE;
182 extern int prefetchDoneUE;
183 
184 extern int _prefetch;
185 extern int _randChunks;
186 extern int _numChunks;
187 extern unsigned int bucketSize;
188 
189 //jetley
190 extern int localNodesPerReq;
191 extern int remoteNodesPerReq;
192 extern int remoteResumeNodesPerReq;
193 extern int localPartsPerReq;
194 extern int remotePartsPerReq;
195 extern int remoteResumePartsPerReq;
196 
197 extern double largePhaseThreshold;
198 
199 extern cosmoType theta;
200 extern cosmoType thetaMono;
201 
202 extern int numInitDecompBins;
203 extern int octRefineLevel;
204 
206 class dummyMsg : public CMessage_dummyMsg{
207 public:
208 };
209 
210 #if COSMO_STATS > 0
211 class TreePieceStatistics {
212  u_int64_t nodesOpenedLocal;
213  u_int64_t nodesOpenedRemote;
214  u_int64_t nodeInterLocal;
215  u_int64_t nodeInterRemote;
216  u_int64_t particleInterLocal;
217  u_int64_t particleInterRemote;
218  u_int64_t openCriterionCalls;
219  int nActive;
220 
221  TreePieceStatistics() : nodesOpenedLocal(0), nodesOpenedRemote(0),
222  nodeInterLocal(0), nodeInterRemote(0), particleInterLocal(0),
223  particleInterRemote(0), openCriterionCalls(0), nActive(0) { }
224 
225  public:
226  TreePieceStatistics(u_int64_t nol, u_int64_t nor, u_int64_t occ, u_int64_t nil, u_int64_t nir,
227  u_int64_t pil, u_int64_t pir, int na) :
228  nodesOpenedLocal(nol), nodesOpenedRemote(nor), nodeInterLocal(nil),
229  nodeInterRemote(nir), particleInterLocal(pil), particleInterRemote(pir),
230  openCriterionCalls(occ), nActive(na) { }
231 
232  void printTo(CkOStream &os) {
233  os << " TreePiece: " << nActive << " particles active." << endl;
234  os << " TreePiece: " << nodesOpenedLocal << " local nodes opened, ";
235  os << nodesOpenedRemote << " remote" << endl;
236  os << " TreePiece: " << openCriterionCalls << " num of open criterion calls" << endl;
237  os << " TreePiece: " << nodeInterLocal << " local particle-node interactions, ";
238  os << nodeInterRemote << " remote" << endl;
239  os << " TreePiece: " << particleInterLocal << " local particle-particle interactions, ";
240  os << particleInterRemote << " remote" << endl;
241  os << " TreePiece: "
242  << (particleInterLocal + particleInterRemote)/(double) nActive
243  << " particles, "
244  << (nodeInterLocal + nodeInterRemote)/(double) nActive
245  << " nodes per particle" << endl;
246  }
247 
248  static CkReduction::reducerType sum;
249 
250  static CkReductionMsg *sumFn(int nMsg, CkReductionMsg **msgs) {
251  TreePieceStatistics ret;
252  for (int i=0; i<nMsg; ++i) {
253  CkAssert(msgs[i]->getSize() == sizeof(TreePieceStatistics));
254  TreePieceStatistics *data = (TreePieceStatistics *)msgs[i]->getData();
255  ret.nodesOpenedLocal += data->nodesOpenedLocal;
256  ret.nodesOpenedRemote += data->nodesOpenedRemote;
257  ret.openCriterionCalls += data->openCriterionCalls;
258  ret.nodeInterLocal += data->nodeInterLocal;
259  ret.nodeInterRemote += data->nodeInterRemote;
260  ret.particleInterLocal += data->particleInterLocal;
261  ret.particleInterRemote += data->particleInterRemote;
262  ret.nActive += data->nActive;
263  }
264  return CkReductionMsg::buildNew(sizeof(TreePieceStatistics), &ret);
265  }
266 };
267 #endif
268 
270 class ComputeChunkMsg : public CMessage_ComputeChunkMsg {
271  ComputeChunkMsg() {} // not available
272  public:
273  int chunkNum;
274 
275  ComputeChunkMsg(int i) : chunkNum(i) {
276  }
277 };
278 
284 class ORBSplittersMsg : public CMessage_ORBSplittersMsg{
285 public:
287  int length;
289  double *pos;
291  char *dim;
293  CkCallback cb;
294 
295  ORBSplittersMsg(int len, CkCallback callback): length (len), cb(callback) {}
296 
297 };
298 
300 class ParticleShuffleMsg : public CMessage_ParticleShuffleMsg{
301 public:
302  int nloads;
303  int n;
304  int nSPH;
305  int nStar;
306  double *loads;
307  unsigned int *parts_per_phase;
308  GravityParticle *particles;
309  extraSPHData *pGas;
310  extraStarData *pStar;
311  ParticleShuffleMsg(int nload, int npart, int nsph, int nstar):
312  nloads(nload), n(npart), nSPH(nsph), nStar(nstar) {}
313 };
314 
315 #ifdef PUSH_GRAVITY
316 #include "ckmulticast.h"
317 
318 struct BucketMsg : public CkMcastBaseMsg, public CMessage_BucketMsg {
319  GenericTreeNode *buckets;
320  int numBuckets;
321  ExternalGravityParticle *particles;
322  int numParticles;
323  int whichTreePiece;
324 };
325 #endif
326 
329 {
330  public:
331  int index; /* chare index */
332  int nAddGas;
333  int nDelGas;
334  int nAddDark;
335  int nDelDark;
336  int nAddStar;
337  int nDelStar;
338 
339  void pup(PUP::er& p) {
340  p | index;
341  p | nAddGas;
342  p | nDelGas;
343  p | nAddDark;
344  p | nDelDark;
345  p | nAddStar;
346  p | nDelStar;
347  }
348  };
349 
350 /*
351  * Multistepping routines
352  *
353  * Each major timestep can have MAXSUBSTEPS substeps where MAXSUBSTEPS
354  * is a large power of 2 that can fit in an integer: 1 << MAXRUNG,
355  * with MAXRUNG something like 30.
356  * A given particle is on a "Rung" such that it takes 1 << Rung
357  * substeps per major timestep. That is, it's force is updated every
358  * 1 << (MAXRUNG - Rung) smallest substeps.
359  *
360  * Needed routines:
361  * DtToRung(): take an ideal timestep, the major timestep and
362  * determine a rung.
363  */
364 
365 const int MAXRUNG = 30;
366 const int MAXSUBSTEPS = 1 << MAXRUNG;
367 const double MAXSUBSTEPS_INV = 1 / (double)MAXSUBSTEPS;
368 
370 inline int RungToSubsteps(int iRung) {
371  CkAssert(iRung <= MAXRUNG);
372  return 1 << (MAXRUNG - iRung);
373 }
374 
377 inline int DtToRung(double dDelta, double dTideal) {
378  int iSteps = (int) ceil(dDelta/dTideal);
379 
380  int iRung = 0;
381  iSteps--;
382  while(iSteps > 0) {
383  iRung++;
384  iSteps >>= 1;
385  }
386  return iRung;
387 }
388 
391 inline double RungToDt(double dDelta, int iRung) {
392  return dDelta*RungToSubsteps(iRung)*MAXSUBSTEPS_INV;
393 }
394 
396 const int PHASE_FEEDBACK = MAXRUNG + 1;
397 
400 
401 inline double PoverRhoFloorJeans(double dResolveJeans, GravityParticle *p)
402 {
403  /*
404  * Add pressure floor to keep Jeans Mass
405  * resolved. In comparison with Agertz et
406  * al. 2009, dResolveJeans should be 3.0:
407  * P_min = 3*G*max(h,eps)^2*rho^2
408  * Note that G = 1 in our code
409  */
410 #ifdef JEANSSOFTONLY
411  double l2 = p->soft*p->soft;
412 #else
413  double l2 = 0.25*p->fBall*p->fBall;
414 #ifdef JEANSSOFT
415  double e2 = p->soft*p->soft;
416  if (l2 < e2) l2 = e2; /* Jeans scale can't be smaller than softening */
417 #endif
418 #endif
419  return l2*dResolveJeans*p->fDensity;
420 }
421 
423 const double GAMMA_JEANS = 2.0;
424 const double GAMMA_NONCOOL = 5.0/3.0;
425 
426 
436 class Main : public CBase_Main {
437  CkArgMsg *args;
438  std::string basefilename;
440  OutputParams *pOutput;
441  // NChilada file names used to generate the XML description
442  CkVec<std::string> *NCgasNames;
443  CkVec<std::string> *NCdarkNames;
444  CkVec<std::string> *NCstarNames;
446  CkCallback cbIO;
448  Ck::IO::File fIOFile;
449  CProxy_Sorter sorter;
450  int64_t nTotalParticles;
451  int64_t nTotalSPH;
452  int64_t nTotalDark;
453  int64_t nTotalStar;
455  int64_t nSink;
456  int64_t nMaxOrderGas; /* Maximum iOrders */
457  int64_t nMaxOrderDark;
458  int64_t nMaxOrder;
459 
460  double dTime; /* Simulation time */
461  double dTime0;
462  double dEcosmo; /* variables for integrating
463  Lazer-Irvine eq. */
464  double dUOld;
465  double dTimeOld;
466  PRM prm; /* parameter parsing info */
467  Parameters param; /* actual parameters */
468  CkVec<double> vdOutTime; // Desired output times
469  int iOut;
470  /*
471  ** Tracking for frame dumping function
472  */
473  int bDumpFrame;
474  struct DumpFrameContext **df;
475  int bIsRestarting;
477  int bHaveAlpha;
478  int bChkFirst; /* alternate between 0 and 1 for checkpoint */
479  double dSimStartTime; // Start time for entire simulation
480  int iStop; /* indicate we're stopping the
481  simulation early */
482  int64_t nActiveGrav;
483  int64_t nActiveSPH;
484 
485 #ifdef CUDA
486  double localNodesPerReqDouble;
487  double remoteNodesPerReqDouble;
488  double remoteResumeNodesPerReqDouble;
489  double localPartsPerReqDouble;
490  double remotePartsPerReqDouble;
491  double remoteResumePartsPerReqDouble;
492 #endif
493 
494 #ifdef CHECK_TIME_WITHIN_BIGSTEP
495  double wallTimeStart;
496 #endif
497 
500  class timing_fields {
501  public:
502  int count;
503  double tGrav;
504  double tuDot;
505  double tDD;
506  double tLoadB;
507  double tTBuild;
508  double tAdjust;
509  double tEmergAdjust;
510  double tKick;
511  double tDrift;
512  double tCache;
513  public:
515  void clear() {
516  count = 0;
517  tGrav = tuDot = tDD = tLoadB = tTBuild = tAdjust
518  = tEmergAdjust = tKick = tDrift = tCache = 0.0;
519  }
520  };
521 
522  CkVec<timing_fields> timings;
523  void writeTimings(int iStep);
524 
525 #ifdef SELECTIVE_TRACING
526  int monitorRung;
527  int monitorStart;
528  int numTraceIterations;
529  int numSkipIterations;
530  int numMaxTrace;
531  int traceIteration;
532  int traceState;
533  bool projectionsOn;
534 
535  void turnProjectionsOn(int activeRung);
536  void turnProjectionsOff();
537 #endif
538 
539 
540 public:
541 
542  Main(CkArgMsg* m);
543  Main(CkMigrateMessage *m);
544 
545  void niceExit();
546  void setupICs();
547  void initialForces();
548  void doSimulation();
549  void restart(CkCheckpointStatusMsg *msg);
550  void waitForGravity(const CkCallback &cb, double startTime,
551  int activeRung);
552  void advanceBigStep(int);
553  void domainDecomp(int iPhase);
554  void loadBalance(int iPhase);
555  void buildTree(int iPhase);
556  void startGravity(const CkCallback& cbGravity, int iActiveRung,
557  double *startTime) ;
558  void externalGravity(int iActiveRung);
559  void updateuDot(int iActiveRung, const double duKick[],
560  const double dStartTime[], int bUpdateState, int bAll);
561  void kick(bool bClosing, int iActiveRung, int nextMaxRung,
562  const CkCallback &cbGravity, double gravStartTime);
563  int adjust(int iKickRung);
564  void rungStats();
565  void countActive(int activeRung);
566  void emergencyAdjust(int iRung);
567  void starCenterOfMass();
568  void calcEnergy(double, double, const char *);
569  void getStartTime();
570  void getOutTimes();
571  int bOutTime();
572  void writeOutput(int iStep) ;
573  void outputBinary(OutputParams& params, int bParaWrite,
574  const CkCallback& cb);
575  void cbOpen(Ck::IO::FileReadyMsg *msg);
576  void cbIOReady(Ck::IO::SessionReadyMsg *msg);
577  void cbIOComplete(CkMessage *msg);
578  void cbIOClosed(CkMessage *msg);
579  std::string getNCNextOutput(OutputParams& params);
580  void writeNCXML(std::string filename);
581  void NCXMLattrib(ofstream *desc, CkVec<std::string> *names, std::string family);
582  void updateSoft();
583  void growMass(double dTime, double dDelta);
584  void initSph();
585  void initCooling();
586  void initStarLog();
587  int ReadASCII(char *extension, int nDataPerLine, double *dDataOut);
588  void restartGas();
589  void doSph(int activeRung, int bNeedDensity = 1);
590  void AGORAfeedbackPreCheck(double dTime, double dDelta, double dTimeToSF);
591  void FormStars(double dTime, double dDelta);
592  void StellarFeedback(double dTime, double dDelta);
593  void outputBlackHoles(double dTime);
594  void SetSink();
595  void FormSinks(double dTime, double dDelta, int iKickRung);
596  void doSinks(double dTime, double dDelta, int iKickRung);
597  int DumpFrameInit(double dTime, double dStep, int bRestart);
598  void DumpFrame(double dTime, double dStep);
599  int nextMaxRungIncDF(int nextMaxRung);
600  void addDelParticles();
601  void memoryStats();
602  void memoryStatsCache();
603  void pup(PUP::er& p);
604  void liveVizImagePrep(liveVizRequestMsg *msg);
605  void doSIDM(double dTime,double dDelta, int activeRung); /* SIDM */
606 };
607 
608 /* IBM brain damage */
609 #undef hz
610 typedef struct ewaldTable {
612  double hx,hy,hz;
613  double hCfac,hSfac;
614 } EWT;
615 
616 // jetley
617 class MissRecord;
618 class State;
619 
621 typedef struct OffsetNodeStruct
622 {
623  GenericTreeNode *node;
624  int offsetID;
625 }OffsetNode;
626 
627 #if INTERLIST_VER > 0
628 typedef CkQ<OffsetNode> CheckList;
631 typedef CkVec<OffsetNode> UndecidedList;
633 typedef CkVec<UndecidedList> UndecidedLists;
634 #endif
635 
637 typedef struct particlesInfoR{
638  ExternalGravityParticle* particles;
639  int numParticles;
640  Vector3D<cosmoType> offset;
641 #if defined CHANGA_REFACTOR_PRINT_INTERACTIONS || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST || defined CUDA
642  NodeKey key;
643 #endif
644 #if COSMO_DEBUG > 1
645  GenericTreeNode *nd;
646 #endif
648 
650 typedef struct particlesInfoL{
651  GravityParticle* particles;
652  int numParticles;
653  Vector3D<cosmoType> offset;
654 #if defined CHANGA_REFACTOR_PRINT_INTERACTIONS || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST || defined CUDA
655  NodeKey key;
656 #endif
657 #if COSMO_DEBUG > 1 || defined CUDA
658  GenericTreeNode *nd;
659 #endif
660 } LocalPartInfo;
661 
668 typedef struct LoopParDataStruct {
669  CkVec<GenericTreeNode*> lowNodes;
670  CkVec<int> bucketids;
672  CkVec<int> chunkids;
673  CkVec<CkVec<OffsetNode> > clists;
674  CkVec<CkVec<RemotePartInfo> > rpilists;
675  CkVec<CkVec<LocalPartInfo> > lpilists;
677 } LoopParData;
678 
679 
680 
681 #ifdef CUDA
682 struct BucketActiveInfo{
683  int start;
684  int size;
685 };
686 #endif
687 
688 class SmoothCompute;
689 #include "Compute.h"
690 
691 #if INTERLIST_VER > 0 && defined CUDA
692 template<typename T> class GenericList;
693 #endif
694 
697  TreePiece *clientTreePiece;
698  GenericTreeNode *clientNode;
699 
701  clientTreePiece(NULL),
702  clientNode(NULL)
703  {}
704 
706  clientTreePiece(tp),
707  clientNode(node)
708  {}
709 };
710 
713  GenericTreeNode *targetNode;
714  CkVec<NonLocalMomentsClient> clients;
715 
717  targetNode(NULL)
718  {}
719 
721  targetNode(node)
722  {}
723 
724  void addClient(const NonLocalMomentsClient &cli){
725  clients.push_back(cli);
726  }
727 };
728 
730 class TreePiece : public CBase_TreePiece {
731  // jetley
732  friend class PrefetchCompute;
733  friend class GravityCompute;
734  friend class SmoothCompute;
735  friend class KNearestSmoothCompute;
736  friend class ReSmoothCompute;
737  friend class MarkSmoothCompute;
738  friend class ListCompute;
739  friend class NearNeighborState;
740  friend class ReNearNeighborState;
741  friend class MarkNeighborState;
742  friend class BottomUpTreeWalk;
743 #if INTERLIST_VER > 0 && defined CUDA
744  friend class DataManager;
745  template<typename T> friend class GenericList;
746 #endif
747 
748  friend class RemoteTreeBuilder;
749  friend class LocalTreeBuilder;
750 
752  TreeWalk *sTopDown;
753  TreeWalk *twSmooth;
754 #if INTERLIST_VER > 0
755  TreeWalk *sInterListWalk;
756  // clearable, used for resumed walks
757  State *sInterListStateRemoteResume;
758 #endif
759  Compute *sGravity, *sPrefetch;
760  SmoothCompute *sSmooth;
761 
762  Opt *sLocal, *sRemote, *sPref;
763  Opt *optSmooth;
764 
765  State *sPrefetchState;
767  State *sLocalGravityState, *sRemoteGravityState, *sSmoothState;
768  typedef std::map<KeyType, CkVec<int>* > SmPartRequestType;
769  // buffer of requests for smoothParticles.
770  SmPartRequestType smPartRequests;
771 
772  CkVec<ActiveWalk> activeWalks;
773  int completedActiveWalks; // XXX this should be part of the gravity
774  // walk state.
775  int nCacheAccesses; // keep track of outstanding cache accesses to
776  // know when writebacks complete. XXX this
777  // should be part of the smooth state
778 
780  unsigned int nPrevActiveParts;
781  std::vector<double> savedPhaseLoad;
782  std::vector<unsigned int> savedPhaseParticle;
784  std::vector<double> savedPhaseLoadTmp;
786  std::vector<unsigned int> savedPhaseParticleTmp;
787 
788  int memWithCache, memPostCache; // store memory usage.
789  int nNodeCacheEntries, nPartCacheEntries; // store memory usage.
790 
791 #ifdef PUSH_GRAVITY
792  bool doMerge;
793  bool createdSpanningTree;
794  CProxySection_TreePiece allTreePieceSection;
795  CkVec<GravityParticle> foreignParticles;
796  CkVec<double> foreignParticleAccelerations;
797 
798  map<int,CkSectionInfo> cookieJar;
799 
800  BucketMsg *createBucketMsg();
801  void unpackBuckets(BucketMsg *, GenericTreeNode *&foreignBuckets, int &numForeignBuckets);
802  void calculateForces(GenericTreeNode *foreignBuckets, int numForeignBuckets);
803 
804 #endif
805 
806  public:
807 
808 #ifdef PUSH_GRAVITY
809  void startPushGravity(int am, double myTheta);
810  void recvPushBuckets(BucketMsg *);
811  void recvPushAccelerations(CkReductionMsg *);
812 #endif
813 
814 #if COSMO_PRINT_BK > 1
815  State *getSRemoteGravityState(){ return sRemoteGravityState; }
816  State *getSLocalGravityState(){ return sLocalGravityState; }
817 #endif
818  void memCacheStats(const CkCallback &cb);
819  void addActiveWalk(int iAwi, TreeWalk *tw, Compute *c, Opt *o, State *s);
820 
822  void markWalkDone();
824  void finishWalk();
826  void markSmoothWalkDone();
828  void finishSmoothWalk();
829 
830  int getIndex() {
831  return thisIndex;
832  }
833 
835  void addToNodeInterRemote(int chunk, int howmany){
836  nodeInterRemote[chunk] += howmany;
837  }
838 
840  void addToParticleInterRemote(int chunk, int howmany){
841  particleInterRemote[chunk] += howmany;
842  }
843 
845  void addToNodeInterLocal(int howmany){
846  nodeInterLocal += howmany;
847  }
848 
850  void addToParticleInterLocal(int howmany){
851  particleInterLocal += howmany;
852  }
853 
856  void initiatePrefetch(int chunk);
858  void startRemoteChunk();
859 
862  return myNumParticles;
863  }
864 
866  GravityParticle *getParticles(){return myParticles;}
867 
868 
869 #ifdef CUDA
870  // this variable holds the number of buckets active at
871  // the start of an iteration
872  // it is used to ascertain how many buckets still need to
873  // be processed via the stateReady function with regard
874  // to their local and remote-no-resume walks.
875  // if all numActiveBuckets
876  // have been processed but we still have leftover nodes/particles
877  // in the list of interations to the sent to the gpu, we flush
878  // the list
879  int numActiveBuckets;
880  int myNumActiveParticles;
881  // First and Last indices of GPU particle
882  int FirstGPUParticleIndex;
883  int LastGPUParticleIndex;
884  int NumberOfGPUParticles;
885  BucketActiveInfo *bucketActiveInfo;
886 
887  int getNumBuckets(){
888  return numBuckets;
889  }
890 
891  void callFreeRemoteChunkMemory(int chunk);
892 
893  int getActiveRung(){ return activeRung; }
894 #ifdef HAPI_INSTRUMENT_WRS
895  int getInstrumentId(){ return instrumentId; }
896 #endif
897  // returns either all particles or only active particles,
898  // depending on fraction of active particles to their
899  // total count.
900  int getDMNumParticles(){
901  if(largePhase()){
902  return myNumParticles;
903  }
904  else{
905  return myNumActiveParticles;
906  }
907  }
908 
909  int getNumActiveParticles(){
910  return myNumActiveParticles;
911  }
912 
913  void calculateNumActiveParticles(){
914  myNumActiveParticles = 0;
915  for(int i = 1; i <= myNumParticles; i++){
916  if(myParticles[i].rung >= activeRung){
917  myNumActiveParticles++;
918  }
919  }
920  }
921 
922  bool largePhase(){
923  return (1.0*myNumActiveParticles/myNumParticles) >= largePhaseThreshold;
924  }
925 
926  void getDMParticles(CompactPartData *fillArray, int &fillIndex){
927  NumberOfGPUParticles = 0;
928  FirstGPUParticleIndex = fillIndex;//This is for the GPU Ewald
929  if(largePhase()){
930  for(int b = 0; b < numBuckets; b++){
931  GenericTreeNode *bucket = bucketList[b];
932  int buckstart = bucket->firstParticle;
933  int buckend = bucket->lastParticle;
934  GravityParticle *buckparts = bucket->particlePointer;
935  bucket->bucketArrayIndex = fillIndex;
936  for(int i = buckstart; i <= buckend; i++){
937  fillArray[fillIndex] = buckparts[i-buckstart];
938  fillIndex++;
939  }
940  }
941  }
942  else{
943  for(int b = 0; b < numBuckets; b++){
944  GenericTreeNode *bucket = bucketList[b];
945  if(bucket->rungs < activeRung){
946  continue;
947  }
948  BucketActiveInfo *binfo = &(bucketActiveInfo[b]);
949 
950  int buckstart = bucket->firstParticle;
951  int buckend = bucket->lastParticle;
952  GravityParticle *buckparts = bucket->particlePointer;
953 
954  binfo->start = fillIndex;
955  for(int i = buckstart; i <= buckend; i++){
956  if(buckparts[i-buckstart].rung >= activeRung){
957  fillArray[fillIndex] = buckparts[i-buckstart];
958  fillIndex++;
959  }
960  }
961  binfo->size = fillIndex-binfo->start;
962  }
963  }
964  //This is for the GPU Ewald
965  if(FirstGPUParticleIndex == fillIndex){
966  //This means no particle is on GPU
967  FirstGPUParticleIndex = -1;
968  LastGPUParticleIndex = -1;
969  NumberOfGPUParticles = 0;
970  }
971  else{
972  LastGPUParticleIndex = fillIndex - 1;
973  NumberOfGPUParticles = LastGPUParticleIndex - FirstGPUParticleIndex + 1;
974  }
975  }
976 
977  bool isActive(int partNum){
978  return myParticles[partNum].rung >= activeRung;
979  }
980 
981  void clearMarkedBuckets(CkVec<GenericTreeNode *> &markedBuckets);
982  void clearMarkedBucketsAll();
983 
984 #ifdef HAPI_TRACE
985  long long localNodeInteractions;
986  long long localPartInteractions;
987  long long remoteNodeInteractions;
988  long long remotePartInteractions;
989  long long remoteResumeNodeInteractions;
990  long long remoteResumePartInteractions;
991 #endif
992 
993 #ifdef HAPI_INSTRUMENT_WRS
994  int instrumentId;
995 
996  double localNodeListConstructionTime;
997  double remoteNodeListConstructionTime;
998  double remoteResumeNodeListConstructionTime;
999  double localPartListConstructionTime;
1000  double remotePartListConstructionTime;
1001  double remoteResumePartListConstructionTime;
1002 
1003  int nLocalNodeReqs;
1004  int nRemoteNodeReqs;
1005  int nRemoteResumeNodeReqs;
1006  int nLocalPartReqs;
1007  int nRemotePartReqs;
1008  int nRemoteResumePartReqs;
1009 
1010 #endif
1011 
1012 #endif
1013 
1014  void continueStartRemoteChunk(int chunk);
1015 #ifdef CUDA
1016  void updateParticles(intptr_t data, int partIndex);
1017 #endif
1018  void continueWrapUp();
1019 
1020 #if INTERLIST_VER > 0
1021  void getBucketsBeneathBounds(GenericTreeNode *&node, int &start, int &end);
1022  void updateBucketState(int start, int end, int n, int chunk, State *state);
1023  void updateUnfinishedBucketState(int start, int end, int n, int chunk, State *state);
1024 #endif
1025 
1026 #if defined CHANGA_REFACTOR_WALKCHECK || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST
1027  void addToBucketChecklist(int bucketIndex, NodeKey k){
1028  bucketcheckList[bucketIndex].insert(k);
1029  if(bucketIndex == TEST_BUCKET && thisIndex == TEST_TP)
1030  CkPrintf("[%d] add %ld\n", thisIndex, k);
1031  }
1032 #endif
1033 
1034 #if INTERLIST_VER > 0
1035  GenericTreeNode *getStartAncestor(int current, int previous, GenericTreeNode *dflt);
1036 #endif
1037  inline GenericTreeNode *keyToNode(const Tree::NodeKey k){
1039  NodeLookupType::iterator iter = nodeLookupTable.find(k);
1040  if (iter != nodeLookupTable.end()) return iter->second;
1041  else return NULL;
1042  }
1043 
1044  GenericTreeNode *getBucket(int i){
1045  return bucketList[i];
1046  }
1047 
1049  double dStartTime;
1050 
1051 private:
1052  // liveViz
1053  liveVizRequestMsg * savedLiveVizMsg;
1054 
1055  LBStrategy foundLB;
1056  // jetley - saved first internal node
1057  Vector3D<float> savedCentroid;
1059  int iPrevRungLB;
1061  int iActiveRungLB;
1062 
1065  CkCallback callback;
1067  CkCallback cbGravity;
1069  CkCallback cbSmooth;
1070  CkCallback after_dd_callback;
1072  unsigned int myNumParticles;
1073  unsigned int numActiveParticles;
1075  GravityParticle* myParticles;
1076  int nbor_msgs_count_;
1078  int nStore;
1080  bool bBucketsInited;
1081 
1082  // Temporary location to hold the particles that have come from outside this
1083  // TreePiece. This is used in the case where we migrate the particles and
1084  // detect completion of migration using QD.
1085  std::vector<GravityParticle> myTmpShuffleParticle;
1086  std::vector<extraSPHData> myTmpShuffleSphParticle;
1087  std::vector<extraStarData> myTmpShuffleStarParticle;
1088  ParticleShuffleMsg* myShuffleMsg;
1091  int myTreeParticles;
1092  public:
1096  int64_t nTotalSPH;
1098  int64_t nTotalDark;
1100  int64_t nTotalStar;
1101  private:
1103  unsigned int myNumSPH;
1105  extraSPHData *mySPHParticles;
1107  int nStoreSPH;
1109  unsigned int myNumStar;
1111  extraStarData *myStarParticles;
1113  int nStoreStar;
1115  int64_t nMaxOrder;
1117  int64_t nStartRead;
1119  int myIOParticles;
1120 
1122  std::vector<GenericTreeNode *> bucketList;
1123 
1125  std::vector<GravityParticle> mySortedParticles;
1127  std::vector<extraSPHData> mySortedParticlesSPH;
1129  std::vector<extraStarData> mySortedParticlesStar;
1131  std::vector<ParticleShuffleMsg*> incomingParticlesMsg;
1133  int incomingParticlesArrived;
1138  bool incomingParticlesSelf;
1139 
1141  double piecemass;
1142 
1145  int cnt;
1148  int packed;
1149 
1150  // the index assigned by the CacheManager upon registration
1151  int localIndex;
1152 
1153  //unsigned int numSplitters;
1154  //SFC::Key* splitters;
1155  CProxy_TreePiece pieces;
1157  //CProxy_DataManager dataManager; // unused...
1159  DataManager* dm;
1160 
1163 #ifndef REDUCTION_HELPER
1164  CkVec<int64_t> myBinCounts;
1165 #endif
1166  std::vector<int> myBinCountsORB;
1168  int myPlace;
1171  SFC::Key leftSplitter, rightSplitter;
1172 
1173  std::string basefilename;
1174  // Bounding box of the entire simulation
1175  OrientedBox<float> boundingBox;
1176  unsigned iterationNo;
1178  GenericTreeNode* root;
1180  NodePool *pTreeNodes;
1181 
1182  typedef std::map<NodeKey, CkVec<int>* > MomentRequestType;
1186  MomentRequestType momentRequests;
1187 
1189  //double theta; -- moved as readonly
1191  //double thetaMono; -- moved as readonly
1193  int activeRung;
1194 
1196  int bPeriodic;
1197  int bComove;
1199  double dRhoFac;
1200  Vector3D<cosmoType> fPeriod;
1201  int nReplicas;
1202  int bEwald; /* Perform Ewald */
1203  double fEwCut;
1204  double dEwhCut;
1205  EWT *ewt;
1206  int nMaxEwhLoop;
1207  int nEwhLoop;
1208 #ifdef HEXADECAPOLE
1209  MOMC momcRoot; /* complete moments of root */
1210 #endif
1211  bool bEwaldInited;
1213 
1214  int bGasCooling;
1215 #ifndef COOLING_NONE
1216  clDerivsData *CoolData;
1217 #endif
1218  std::vector<int> iSeTab;
1221  int nSetupWriteStage;
1222  int64_t nStartWrite; // Particle number at which this piece starts
1223  // to write file.
1224 
1226  NodeLookupType nodeLookupTable;
1227 
1229  //u_int64_t prefetchWaiting;
1231  Tree::NodeKey *prefetchRoots;
1233  OrientedBox<cosmoType> prefetchReq;
1234 
1236  int numChunks;
1237 
1238 #if COSMO_STATS > 0
1239  u_int64_t myNumMACChecks;
1240  u_int64_t nodesOpenedLocal;
1241  u_int64_t nodesOpenedRemote;
1242  u_int64_t numOpenCriterionCalls;
1243 #endif
1244  u_int64_t nodeInterLocal;
1247  u_int64_t *nodeInterRemote;
1249  u_int64_t particleInterLocal;
1251  u_int64_t *particleInterRemote;
1252 
1253  int nActive; // number of particles that are active
1254 
1256  unsigned int numBuckets;
1257 #if INTERLIST_VER > 0
1258  int prevRemoteBucket;
1261  int prevBucket;
1262 #endif
1263  unsigned int ewaldCurrentBucket;
1270  BucketGravityRequest *bucketReqs;
1271 
1273  //CacheManager *localCache;
1274 
1275  // Entries used for the CacheManager
1276  EntryTypeGravityParticle gravityParticleEntry;
1277  EntryTypeSmoothParticle smoothParticleEntry;
1278  EntryTypeGravityNode gravityNodeEntry;
1279 
1280 #if COSMO_DEBUG > 1 || defined CHANGA_REFACTOR_WALKCHECK || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST
1281  typedef std::vector< std::multiset<Tree::NodeKey> > DebugList;
1283  DebugList bucketcheckList;
1284 #endif
1285 
1287 
1289  bool firstTime;
1290 
1292  std::vector<int> tempBinCounts;
1293 
1295  std::list<GravityParticle *> orbBoundaries;
1296 
1298  int myExpectedCount;
1301  int myExpectedCountSPH;
1304  int myExpectedCountStar;
1305 
1307  unsigned int chunkRootLevel;
1308 
1311  OrientedBox<float>* boxes;
1312  char* splitDims;
1313 
1316  int phase;
1317 
1318  double myTotalMass;
1319 
1320  #if INTERLIST_VER > 0
1321 
1322 
1323  public:
1324 #endif
1325  // called when a chunk has been used completely (chunkRemaining[chunk] == 0)
1326  void finishedChunk(int chunk);
1327 
1329  bool (*compFuncPtr[3])(GravityParticle,GravityParticle);
1330  double tmpTime;
1331  double totalTime;
1332  public:
1333 
1334 #ifdef SPCUDA
1335  EwaldData *h_idata;
1336  CkCallback *cbEwaldGPU;
1337 #endif
1338  void EwaldGPU();
1339  void EwaldGPUComplete();
1340 
1341 #if COSMO_DEBUG > 1 || defined CHANGA_REFACTOR_WALKCHECK || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST
1342  void checkWalkCorrectness();
1344  void combineKeys(Tree::NodeKey key,int bucket);
1345 #endif
1346 
1347  /* DEBUGGING */
1348  void quiescence();
1349  /*END DEBUGGING */
1350 
1351  NodeLookupType &getNodeLookupTable() {
1352  return nodeLookupTable;
1353  }
1354 
1355  int getNumNodes() {
1356  return nodeLookupTable.size();
1357  }
1358 
1360  void deleteTree() {
1361  if(pTreeNodes != NULL) {
1362  delete pTreeNodes;
1363  pTreeNodes = NULL;
1364  root = NULL;
1365  nodeLookupTable.clear();
1366  }
1367  else {
1368  if (root != NULL) {
1369  root->fullyDelete();
1370  delete root;
1371  root = NULL;
1372  nodeLookupTable.clear();
1373  }
1374  }
1375  }
1376 
1380  void calculateRemoteMoments(GenericTreeNode* node);
1381 
1385  void checkTree(GenericTreeNode* node);
1386 
1390  bool nodeOwnership(const Tree::NodeKey nkey, int &firstOwner, int &lastOwner);
1391 
1392 
1395  void initBuckets();
1396  template <class Tsmooth>
1397  void initBucketsSmooth(Tsmooth tSmooth);
1398  void smoothNextBucket();
1399  void reSmoothNextBucket();
1400  void markSmoothNextBucket();
1401  void smoothBucketComputation();
1405  void startNextBucket();
1409  void doAllBuckets();
1410  void reconstructNodeLookup(GenericTreeNode *node);
1411  //void rebuildSFCTree(GenericTreeNode *node,GenericTreeNode *parent,int *);
1412 
1413 public:
1414  TreePiece() : pieces(thisArrayID), root(0),
1415  iPrevRungLB (-1), sTopDown(0), sGravity(0),
1416  sPrefetch(0), sLocal(0), sRemote(0), sPref(0), sSmooth(0),
1417  nPrevActiveParts(0) {
1418  dm = NULL;
1419  foundLB = Null;
1420  iterationNo=0;
1421  usesAtSync = true;
1422  pTreeNodes = NULL;
1423  bucketReqs=NULL;
1424  nCacheAccesses = 0;
1425  memWithCache = 0;
1426  memPostCache = 0;
1427  nNodeCacheEntries = 0;
1428  nPartCacheEntries = 0;
1429  completedActiveWalks = 0;
1430  myPlace = -1;
1431  nSetupWriteStage = -1;
1432  //openingDiffCount=0;
1433  chunkRootLevel=0;
1434  //splitters = NULL;
1435 #if COSMO_STATS > 0
1436  nodesOpenedLocal = 0;
1437  nodesOpenedRemote = 0;
1438  nodeInterLocal = 0;
1439  particleInterLocal = 0;
1440 
1441  numOpenCriterionCalls=0;
1442 
1443  piecemass = 0.0;
1444 #endif
1445  packed=0; /* output accelerations in x,y,z
1446  packed format */
1447  cnt=0; /* Counter over x,y,z when not packed */
1448  particleInterRemote = NULL;
1449  nodeInterRemote = NULL;
1450 
1451 #if INTERLIST_VER > 0
1452  sInterListWalk = NULL;
1453 #endif
1454 #ifdef CUDA
1455  numActiveBuckets = -1;
1456 #ifdef HAPI_TRACE
1457  localNodeInteractions = 0;
1458  localPartInteractions = 0;
1459  remoteNodeInteractions = 0;
1460  remotePartInteractions = 0;
1461  remoteResumeNodeInteractions = 0;
1462  remoteResumePartInteractions = 0;
1463 #endif
1464 #endif
1465 
1466  tmpTime=0.0;
1467  totalTime=0.0;
1468  // temporarely set to -1, it will updated after the tree is built
1469  numChunks=-1;
1470  prefetchRoots = NULL;
1471  ewt = NULL;
1472  nMaxEwhLoop = 100;
1473  bEwaldInited = false;
1474 
1475  incomingParticlesMsg.clear();
1476  incomingParticlesArrived = 0;
1477  incomingParticlesSelf = false;
1478 
1479  myParticles = NULL;
1480  mySPHParticles = NULL;
1481  myStarParticles = NULL;
1482  myNumParticles = myNumSPH = myNumStar = 0;
1483  nStore = nStoreSPH = nStoreStar = 0;
1484  bBucketsInited = false;
1485  myTreeParticles = -1;
1486  orbBoundaries.clear();
1487  boxes = NULL;
1488  splitDims = NULL;
1489  bGasCooling = 0;
1490 
1491 #ifdef PUSH_GRAVITY
1492  createdSpanningTree = false;
1493 #endif
1494 
1495  localTreeBuildComplete = false;
1496  }
1497 
1498  TreePiece(CkMigrateMessage* m): pieces(thisArrayID) {
1499 
1500  usesAtSync = true;
1501  //localCache = NULL;
1502  dm = NULL;
1503  bucketReqs = NULL;
1504  numChunks=-1;
1505  nCacheAccesses = 0;
1506  memWithCache = 0;
1507  memPostCache = 0;
1508  nNodeCacheEntries = 0;
1509  nPartCacheEntries = 0;
1510  completedActiveWalks = 0;
1511  prefetchRoots = NULL;
1512  //remaining Chunk = NULL;
1513  ewt = NULL;
1514  bEwaldInited = false;
1515  root = NULL;
1516  pTreeNodes = NULL;
1517 
1518  sTopDown = 0;
1519  sGravity = NULL;
1520  sPrefetch = NULL;
1521  sSmooth = NULL;
1522 #if INTERLIST_VER > 0
1523  sInterListWalk = NULL;
1524 #endif
1525 
1526  incomingParticlesMsg.clear();
1527  incomingParticlesArrived = 0;
1528  incomingParticlesSelf = false;
1529 
1530  cnt=0;
1531  nodeInterRemote = NULL;
1532  particleInterRemote = NULL;
1533 
1534  orbBoundaries.clear();
1535  boxes = NULL;
1536  splitDims = NULL;
1537  bBucketsInited = false;
1538  myTreeParticles = -1;
1539 
1540 
1541  localTreeBuildComplete = false;
1542  }
1543 
1544  private:
1545  void freeWalkObjects();
1546  void allocateStars() {
1547  nStoreStar = (int) (myNumStar*(1.0 + dExtraStore));
1548  // Stars tend to form out of gas, so make sure there is
1549  // enough room.
1550  nStoreStar += 12 + (int) (myNumSPH*dExtraStore);
1551  myStarParticles = new extraStarData[nStoreStar];
1552  }
1553 
1554  public:
1555  ~TreePiece() {
1556  if (verbosity>1) ckout <<"Deallocating treepiece "<<thisIndex<<endl;
1557  if(nStore > 0) delete[] myParticles;
1558  if(nStoreSPH > 0) delete[] mySPHParticles;
1559  if(nStoreStar > 0) delete[] myStarParticles;
1560  delete[] nodeInterRemote;
1561  delete[] particleInterRemote;
1562  delete[] bucketReqs;
1563  delete[] ewt;
1564 
1565  deleteTree();
1566 
1567  if(boxes!= NULL ) delete[] boxes;
1568  if(splitDims != NULL) delete[] splitDims;
1569 
1570 #ifndef COOLING_NONE
1571  if(bGasCooling)
1572  CoolDerivsFinalize(CoolData);
1573 #endif
1574  if (verbosity>1) ckout <<"Finished deallocation of treepiece "<<thisIndex<<endl;
1575  }
1576 
1577  void setPeriodic(int nReplicas, Vector3D<cosmoType> fPeriod, int bEwald,
1578  double fEwCut, double fEwhCut, int bPeriod,
1579  int bComove, double dRhoFac);
1580  void BucketEwald(GenericTreeNode *req, int nReps,double fEwCut);
1581  void EwaldInit();
1582  void calculateEwald(dummyMsg *m);
1583  void calculateEwaldUsingCkLoop(dummyMsg *msg, int yield_num);
1584  void callBucketEwald(int id);
1585  void doParallelNextBucketWork(int id, LoopParData* lpdata);
1586  void initCoolingData(const CkCallback& cb);
1587  // Scale velocities (needed to convert to canonical momenta for
1588  // comoving coordinates.)
1589  void velScale(double dScale, const CkCallback& cb);
1590 
1594  void loadNChilada(const std::string& filename, const double dTuFac,
1595  const CkCallback& cb);
1596  void readFloatBinary(OutputParams& params, int bParaRead,
1597  const CkCallback& cb);
1604  void loadTipsy(const std::string& filename, const double dTuFac,
1605  const bool bDoublePos,
1606  const bool bDoubleVel,
1607  const CkCallback& cb);
1609  void readTipsyArray(OutputParams& params, const CkCallback& cb);
1610  void resetMetals(const CkCallback& cb);
1611  void getMaxIOrds(const CkCallback& cb);
1612  void RestartEnergy(double dTuFac, const CkCallback& cb);
1613  void findTotalMass(const CkCallback &cb);
1614  void recvTotalMass(CkReductionMsg *msg);
1615 
1616  // Write a Tipsy file
1617  void writeTipsy(Tipsy::TipsyWriter& w,
1618  const double dvFac, // scale velocities
1619  const double duTFac, // convert temperature
1620  const bool bDoublePos,
1621  const bool bDoubleVel,
1622  const int bCool);
1623  // Find position in the file to start writing
1624  void setupWrite(int iStage, u_int64_t iPrevOffset,
1625  const std::string& filename, const double dTime,
1626  const double dvFac, const double duTFac,
1627  const bool bDoublePos,
1628  const bool bDoubleVel,
1629  const int bCool, const CkCallback& cb);
1630  // control parallelism in the write
1631  void parallelWrite(int iPass, const CkCallback& cb,
1632  const std::string& filename, const double dTime,
1633  const double dvFac, // scale velocities
1634  const double duTFac, // convert temperature
1635  const bool bDoublePos,
1636  const bool bDoubleVel,
1637  const int bCool);
1638  // serial output
1639  void serialWrite(u_int64_t iPrevOffset, const std::string& filename,
1640  const double dTime,
1641  const double dvFac, const double duTFac,
1642  const bool bDoublePos,
1643  const bool bDoubleVel,
1644  const int bCool, const CkCallback& cb);
1645  // setup for serial output
1646  void oneNodeWrite(int iIndex,
1647  int iOutParticles,
1648  int iOutSPH,
1649  int iOutStar,
1650  GravityParticle *particles, // particles to
1651  // write
1652  extraSPHData *pGas, // SPH data
1653  extraStarData *pStar, // Star data
1654  int *piSPH, // SPH data offsets
1655  int *piStar, // Star data offsets
1656  const u_int64_t iPrevOffset,
1657  const std::string& filename, // output file
1658  const double dTime, // time or expansion
1659  const double dvFac, // velocity conversion
1660  const double duTFac, // temperature
1661  // conversion
1662  const bool bDoublePos,
1663  const bool bDoubleVel,
1664  const int bCool,
1665  const CkCallback &cb);
1666  // Reorder for output
1667  void reOrder(int64_t nMaxOrder, const CkCallback& cb);
1668  // move particles around for output
1669  void ioShuffle(CkReductionMsg *msg);
1670  void ioAcceptSortedParticles(ParticleShuffleMsg *);
1673  void resetObjectLoad(const CkCallback& cb);
1674  // Assign keys after loading tipsy file and finding Bounding box
1675  void assignKeys(CkReductionMsg* m);
1676  void evaluateBoundaries(SFC::Key* keys, const int n, int isRefine, const CkCallback& cb);
1677  void unshuffleParticles(CkReductionMsg* m);
1678  void acceptSortedParticles(ParticleShuffleMsg *);
1679  void shuffleAfterQD();
1680  void unshuffleParticlesWoDD(const CkCallback& cb);
1681  void acceptSortedParticlesFromOther(ParticleShuffleMsg *);
1682  void setNumExpectedNeighborMsgs();
1683 
1684  /*****ORB Decomposition*******/
1685  void initORBPieces(const CkCallback& cb);
1686  void initBeforeORBSend(unsigned int myCount, unsigned int myCountGas,
1687  unsigned int myCountStar,
1688  const CkCallback& cb, const CkCallback& cback);
1689  void sendORBParticles();
1690  void acceptORBParticles(const GravityParticle* particles, const int n);
1691  void acceptORBParticles(const GravityParticle* particles, const int n,
1692  const extraSPHData *pGas, const int nGasIn,
1693  const extraStarData *pStar, const int nStarIn);
1694  void finalizeBoundaries(ORBSplittersMsg *splittersMsg);
1695  void evaluateParticleCounts(ORBSplittersMsg *splittersMsg);
1696  /*****************************/
1697 
1698  void kick(int iKickRung, double dDelta[MAXRUNG+1], int bClosing,
1699  int bNeedVPred, int bGasIsothermal, double dMaxEnergy, double duDelta[MAXRUNG+1],
1700  double gammam1, double dThermalCondSatCoeff,
1701  double dMultiPhaseMaxTime, double dMultiPhaseMinTemp, double dEvapCoeff, const CkCallback& cb);
1702  void drift(double dDelta, int bNeedVPred, int bGasIsothermal, double dvDelta,
1703  double duDelta, int nGrowMass, bool buildTree, double dMaxEnergy,
1704  const CkCallback& cb);
1705  void initAccel(int iKickRung, const CkCallback& cb);
1706 #ifdef COOLING_MOLECULARH
1707  void distribLymanWerner(const CkCallback& cb);
1708 #endif /*COOLING_MOLECULARH*/
1709 
1710  void applyFrameAcc(int iKickRung, Vector3D<double> frameAcc, const CkCallback& cb);
1717  void externalGravity(int activeRung, const ExternalGravity exGrav,
1718  const CkCallback& cb);
1739  void adjust(int iKickRung, int bEpsAccStep, int bGravStep,
1740  int bSphStep, int bViscosityLimitdt,
1741  double dEta, double dEtaCourant, double dEtauDot,
1742  double dDiffCoeff, double dEtaDiffusion,
1743  double dDelta, double dAccFac,
1744  double dCosmoFac, double dhMinOverSoft,
1745  double dResolveJeans,
1746  int bDoGas,
1747  const CkCallback& cb);
1753  void truncateRung(int iCurrMaxRung, const CkCallback& cb);
1754  void rungStats(const CkCallback& cb);
1755  void countActive(int activeRung, const CkCallback& cb);
1757  void countType(int iType, const CkCallback& cb);
1758  void outputBlackHoles(const std::string& pszFileName, double dvFac,
1759  long lFPos, const CkCallback &cb);
1761  void SetSink(double dSinkMassMin, const CkCallback &cb);
1763  void SinkStep(int iCurrSinkRung, int iKickRung, const CkCallback &cb);
1764  void formSinks(int bJeans, double dJConst2, int bDensity,
1765  double dDensityCut, double dTime, int iKickRung, int bSimple,
1766  const CkCallback &cb);
1767  void emergencyAdjust(int iRung, double dDelta, double dDeltaThresh,
1768  const CkCallback &cb);
1769  void assignDomain(const CkCallback& cb);
1770  void starCenterOfMass(const CkCallback& cb);
1771  void calcEnergy(const CkCallback& cb);
1773  void newParticle(GravityParticle *p);
1774  void adjustTreePointers(GenericTreeNode *node, GravityParticle *newParts);
1776  void colNParts(const CkCallback &cb);
1778  void newOrder(const NewMaxOrder *nStarts, const int n, const CkCallback &cb) ;
1779 
1781  void setNParts(int64_t _nTotalSPH, int64_t _nTotalDark,
1782  int64_t _nTotalStar, const CkCallback &cb);
1784  void setSoft(const double dSoft, const CkCallback &cb);
1785  void physicalSoft(const double dSoftMax, const double dFac,
1786  const int bSoftMaxMul, const CkCallback& cb);
1787  void growMass(int nGrowMass, double dDeltaM, const CkCallback& cb);
1788  void InitEnergy(double dTuFac, double z, double dTime, double gammam1,
1789  const CkCallback& cb);
1790  void updateuDot(int activeRung, double duDelta[MAXRUNG+1],
1791  double dStartTime[MAXRUNG+1], int bCool, int bAll,
1792  int bUpdateState, double gammam1, double dResolveJeans, const CkCallback& cb);
1793  void ballMax(int activeRung, double dFac, const CkCallback& cb);
1794  void sphViscosityLimiter(int bOn, int activeRung, const CkCallback& cb);
1795  void getAdiabaticGasPressure(double gamma, double gammam1, double dTuFac, double dThermalCondCoeff,
1796  double dThermalCond2Coeff, double dThermalCondSatCoeff, double dThermalCond2SatCoeff,
1797  double dEvapMinTemp, double dDtCourantFac, double dResolveJeans, const CkCallback &cb);
1798  void getCoolingGasPressure(double gamma, double gammam1, double dThermalCondCoeff,
1799  double dThermalCond2Coeff, double dThermalCondSatCoeff, double dThermalCond2SatCoeff,
1800  double dEvapMinTemp, double dDtCourantFac, double dResolveJeans, const CkCallback &cb);
1801 #ifdef SPLITGAS
1802  void SplitGas(double dInitGasMass, const CkCallback& cb);
1803 #endif
1804  inline COOL* Cool() {return dm->Cool;}
1806  void initRand(int iRand, const CkCallback &cb);
1807  void FormStars(Stfm param, double dTime, double dDelta, double dCosmoFac,
1808  const CkCallback& cb);
1809  void flushStarLog(const CkCallback& cb);
1810  void Feedback(const Fdbk &fb, double dTime, double dDelta,
1811  const CkCallback& cb);
1812  void massMetalsEnergyCheck(int bPreDist, const CkCallback& cb);
1813  void SetTypeFromFileSweep(int iSetMask, char *file,
1814  struct SortStruct *ss, int nss, int *pniOrder, int *pnSet);
1815  void setTypeFromFile(int iSetMask, char *file, const CkCallback& cb);
1816  void getCOM(const CkCallback& cb, int bLiveViz);
1817  void getCOMByType(int iType, const CkCallback& cb, int bLiveViz);
1818  void DumpFrame(InDumpFrame in, const CkCallback& cb, int liveVizDump) ;
1819  void liveVizDumpFrameInit(liveVizRequestMsg *msg);
1820  void setProjections(int bOn);
1821 
1823 #ifdef PUSH_GRAVITY
1824  void buildTree(int bucketSize, const CkCallback& cb, bool merge);
1825 #else
1826  void buildTree(int bucketSize, const CkCallback& cb);
1827 #endif
1828 
1830  void startOctTreeBuild(CkReductionMsg* m);
1831  void recvBoundary(SFC::Key key, NborDir dir);
1832  void recvdBoundaries(CkReductionMsg* m);
1833 
1834  /********ORB Tree**********/
1835  //void receiveBoundingBoxes(BoundingBoxes *msg);
1836  void startORBTreeBuild(CkReductionMsg* m);
1837  OrientedBox<float> constructBoundingBox(GenericTreeNode *node,int level, int numChild);
1838  void buildORBTree(GenericTreeNode * node, int level);
1839  /**************************/
1840 
1842  bool sendFillReqNodeWhenNull(CkCacheRequestMsg<KeyType> *msg);
1844  void requestRemoteMoments(const Tree::NodeKey key, int sender);
1845  void receiveRemoteMoments(const Tree::NodeKey key, Tree::NodeType type,
1846  int firstParticle, int numParticles, int remIdx,
1847  const MultipoleMoments& moments, const OrientedBox<double>& box,
1848  const OrientedBox<double>& boxBall,
1849  const unsigned int iParticleTypes, const int64_t nSPH);
1850 
1855  void calculateGravityLocal();
1858  void commenceCalculateGravityLocal();
1859 
1864  void calculateGravityRemote(ComputeChunkMsg *msg);
1865  void executeCkLoopParallelization(LoopParData *lpdata, int startbucket,
1866  int yield_num, int chunkNum, State* gravityState);
1867  int doBookKeepingForTargetActive(int curbucket, int end, int chunkNum, bool
1868  updatestate, State* gravityState);
1869  int doBookKeepingForTargetInactive(int chunkNum, bool updatestate,
1870  State* gravityState);
1871 
1873  void calculateSmoothLocal();
1874  void calculateReSmoothLocal();
1875  void calculateMarkSmoothLocal();
1876  void nextBucketSmooth(dummyMsg *msg);
1877  void nextBucketReSmooth(dummyMsg *msg);
1878  void nextBucketMarkSmooth(dummyMsg *msg);
1879 #if INTERLIST_VER > 0
1880 #if 0
1881  void calculateForceRemoteBucket(int bucketIndex, int chunk);
1882  void calculateForceLocalBucket(int bucketIndex);
1883 #endif
1884 #endif
1885 
1890  void startGravity(int am, double myTheta, const CkCallback& cb);
1892  void setupSmooth();
1900  void startSmooth(SmoothParams *p, int iLowhFix, int nSmooth,
1901  double dfBall2OverSoft2, const CkCallback &cb);
1902  void startReSmooth(SmoothParams *p, const CkCallback& cb);
1903  void startMarkSmooth(SmoothParams *p, const CkCallback& cb);
1904 
1905  void finishNodeCache(const CkCallback& cb);
1906 
1908  GenericTreeNode* requestNode(int remoteIndex, Tree::NodeKey lookupKey,
1909  int chunk, int reqID, int awi, void *source);
1912  void fillRequestNode(CkCacheRequestMsg<KeyType> *msg);
1917 #if 0
1918  void receiveNode(GenericTreeNode &node, int chunk, unsigned int reqID);
1919 #endif
1920  const GenericTreeNode* lookupNode(Tree::NodeKey key);
1923  const GravityParticle* lookupParticles(int begin);
1924 
1927  void finishBucket(int iBucket);
1928 
1935  void cachedWalkBucketTree(GenericTreeNode* node, int chunk, int reqID);
1936 
1937 #if INTERLIST_VER > 0
1938  void calculateForcesNode(OffsetNode node, GenericTreeNode *myNode,
1939  int level,int chunk);
1940  void calculateForces(OffsetNode node, GenericTreeNode *myNode,
1941  int level,int chunk);
1942 #endif
1943 
1944  ExternalGravityParticle *requestParticles(Tree::NodeKey key,int chunk,
1945  int remoteIndex,int begin,
1946  int end,int reqID, int awi,
1947  void *source);
1948  GravityParticle *requestSmoothParticles(Tree::NodeKey key, int chunk,
1949  int remoteIndex, int begin,int end,
1950  int reqID, int awi, void *source);
1951  void fillRequestParticles(CkCacheRequestMsg<KeyType> *msg);
1952  void fillRequestSmoothParticles(CkCacheRequestMsg<KeyType> *msg);
1953  void flushSmoothParticles(CkCacheFillMsg<KeyType> *msg);
1954  void processReqSmoothParticles();
1955 
1956  void getParticleInfoForLB(int64_t active_part, int64_t total_part);
1957  void startlb(const CkCallback &cb, int activeRung);
1958  void setTreePieceLoad(int activeRung);
1959  void populateSavedPhaseData(int phase, double tpload, unsigned int activeparts);
1960  bool havePhaseData(int phase);
1961  void savePhaseData(std::vector<double> &loads, std::vector<unsigned int>
1962  &parts_per_phase, double* shuffleloads, unsigned int *shuffleparts,
1963  int shufflelen);
1964  void ResumeFromSync();
1965 
1966  void outputASCII(OutputParams& params, int bParaWrite,
1967  const CkCallback& cb);
1968  void oneNodeOutVec(OutputParams& params, Vector3D<double>* avOut,
1969  int nPart, int iIndex, int bDone,
1970  const CkCallback& cb) ;
1971  void oneNodeOutArr(OutputParams& params, double* adOut,
1972  int nPart, int iIndex, int bDone,
1973  const CkCallback& cb) ;
1974  void oneNodeOutIntArr(OutputParams& params, int *aiOut,
1975  int nPart, int iIndex, const CkCallback& cb);
1976  void outputBinaryStart(OutputParams& params, int64_t nStart,
1977  const CkCallback& cb);
1978  void outputBinary(Ck::IO::Session, OutputParams& params);
1979  void minmaxNCOut(OutputParams& params, const CkCallback& cb);
1980 
1981  void outputStatistics(const CkCallback& cb);
1983  void collectStatistics(const CkCallback &cb);
1984 
1992  void nextBucket(dummyMsg *m);
1993  void nextBucketUsingCkLoop(dummyMsg *m);
1994 
1995  void report();
1996  void printTreeViz(GenericTreeNode* node, std::ostream& os);
1997  void printTree(GenericTreeNode* node, std::ostream& os);
1998  void pup(PUP::er& p);
1999 
2000  // jetley
2001  // need this in TreeWalk
2002  GenericTreeNode *getRoot() {return root;}
2003  // need this in Compute
2004  inline Vector3D<cosmoType> decodeOffset(int reqID) {
2005  int offsetcode = reqID >> 22;
2006  int x = (offsetcode & 0x7) - 3;
2007  int y = ((offsetcode >> 3) & 0x7) - 3;
2008  int z = ((offsetcode >> 6) & 0x7) - 3;
2009 
2010  Vector3D<cosmoType> offset(x*fPeriod.x, y*fPeriod.y, z*fPeriod.z);
2011 
2012  return offset;
2013  }
2014 
2015  void receiveNodeCallback(GenericTreeNode *node, int chunk, int reqID, int awi, void *source);
2016  void receiveParticlesCallback(ExternalGravityParticle *egp, int num, int chunk, int reqID, Tree::NodeKey &remoteBucket, int awi, void *source);
2017  void receiveParticlesFullCallback(GravityParticle *egp, int num, int chunk, int reqID, Tree::NodeKey &remoteBucket, int awi, void *source);
2018  void doAtSync();
2019 
2020  void balanceBeforeInitialForces(const CkCallback &cb);
2021 
2022  // For merging of remote moment requests
2023  // before sending messages during tree building
2024  public:
2025  void sendRequestForNonLocalMoments(GenericTreeNode *pickedNode);
2026  void mergeNonLocalRequestsDone();
2027  //void addTreeBuildMomentsClient(GenericTreeNode *targetNode, TreePiece *client, GenericTreeNode *clientNode);
2028  std::map<NodeKey,NonLocalMomentsClientList>::iterator createTreeBuildMomentsEntry(GenericTreeNode *pickedNode);
2029 
2030 
2031  private:
2032  // XXX - hashtable instead of map
2033  std::map<NodeKey,NonLocalMomentsClientList> nonLocalMomentsClients;
2034  bool localTreeBuildComplete;
2035  int getResponsibleIndex(int first, int last);
2036 
2037 
2038  GenericTreeNode *boundaryParentReady(GenericTreeNode *parent);
2039  void accumulateMomentsFromChild(GenericTreeNode *parent, GenericTreeNode *child);
2040 
2041  void deliverMomentsToClients(GenericTreeNode *);
2042  void deliverMomentsToClients(const std::map<NodeKey,NonLocalMomentsClientList>::iterator &it);
2043  void treeBuildComplete();
2044  void processRemoteRequestsForMoments();
2045  void sendParticlesDuringDD(bool withqd);
2046  void mergeAllParticlesAndSaveCentroid();
2047  bool otherIdlePesAvail();
2048 
2049 };
2050 
2053 class LvArray : public CBase_LvArray {
2054  public:
2055  LvArray() {}
2056  LvArray(CkMigrateMessage* m) {}
2057  } ;
2058 
2059 int decodeReqID(int);
2060 int encodeOffset(int reqID, int x, int y, int z);
2061 bool bIsReplica(int reqID);
2062 void printGenericTree(GenericTreeNode* node, std::ostream& os) ;
2063 //bool compBucket(GenericTreeNode *ln,GenericTreeNode *rn);
2064 
2065 #ifdef REDUCTION_HELPER
2066 
2067 class TreePieceCounter : public CkLocIterator {
2068  public:
2069  TreePieceCounter() { reset(); }
2070  void addLocation(CkLocation &loc);
2071  void reset();
2072 
2073  public:
2074  CkVec<TreePiece *> presentTreePieces;
2075 };
2076 
2077 
2078 
2079 class ReductionHelper : public CBase_ReductionHelper {
2080  public:
2081  ReductionHelper();
2082  ReductionHelper(CkMigrateMessage *);
2083  void pup(PUP::er &p);
2084 
2085  void countTreePieces(const CkCallback &cb);
2086  void reduceBinCounts(int nBins, int64_t *binCounts, const CkCallback &cb);
2087  void evaluateBoundaries(SFC::Key *keys, const int n, int isRefine, const CkCallback& cb);
2088  void evaluateBoundaries(const CkBitVector &binsToSplit, const CkCallback& cb);
2089 
2090  private:
2091  void senseLocalTreePieces();
2092 
2093  private:
2094 
2095  CkVec<int64_t> myBinCounts;
2096  int numTreePiecesCheckedIn;
2097 
2098  TreePieceCounter localTreePieces;
2099  std::vector<SFC::Key> splitters;
2100 };
2101 #endif
2102 
2104 class NonEmptyTreePieceCounter : public CkLocIterator {
2105  public:
2106  NonEmptyTreePieceCounter() { reset(); }
2107  void addLocation(CkLocation &loc);
2108  void reset();
2109 
2110  public:
2111  int count;
2112 };
2113 
2114 #endif //PARALLELGRAVITY_H
int decodeReqID(int)
Given a requestID, return the bucket number.
Definition: TreePiece.cpp:3584
LBStrategy
Load balancers that need the spatial information.
Definition: ParallelGravity.h:66
void addToNodeInterRemote(int chunk, int howmany)
accumulate node interaction count for statistics
Definition: ParallelGravity.h:835
Cache interface to the particles for smooth calculations. This cache is a writeback cache...
Definition: CacheInterface.h:73
Definition: smooth.h:173
struct OffsetNodeStruct OffsetNode
Remote Cell interaction lists for all tree levels.
CkVec< CkVec< OffsetNode > > clists
Cell interactions.
Definition: ParallelGravity.h:673
int nIOProcessor
Number of pieces to be doing I/O at once.
Definition: ParallelGravity.cpp:119
double RungToDt(double dDelta, int iRung)
Given the size of the big step, and a rung, return the corresponding timestep size.
Definition: ParallelGravity.h:391
CProxy_IntraNodeLBManager nodeLBMgrProxy
Proxy for Managing IntraNode load balancing with ckloop.
Definition: ParallelGravity.cpp:81
A representation of a multipole expansion.
Definition: MultipoleMoments.h:163
NborDir
Directions for sending boundaries.
Definition: ParallelGravity.h:97
Class to build the remote part of the tree. Fills in Boundary and NonLocal nodes. ...
Definition: Compute.h:286
List of clients needing a particular moment.
Definition: ParallelGravity.h:712
Base clase for all tree based computations.
Definition: Compute.h:26
int verbosity
verbosity level. Higher is more verbose.
Definition: ParallelGravity.cpp:63
Input parameters for cooling.
Definition: cooling_boley.h:56
int64_t nTotalParticles
Total Particles in the simulation.
Definition: ParallelGravity.h:1094
CkQ< OffsetNode > CheckList
Queue of nodes to check for interactions.
Definition: ParallelGravity.h:629
int _numChunks
Definition: ParallelGravity.cpp:121
Message to efficiently start entry methods with no arguments.
Definition: ParallelGravity.h:206
Parameters and methods to implement star formation.
Definition: starform.h:10
double dFracLoadBalance
Definition: ParallelGravity.cpp:104
Coefficients for the Fourier space part of the Ewald sum.
Definition: ParallelGravity.h:611
int localNodesPerReq
GPU related settings.
Definition: ParallelGravity.cpp:131
Hold parameters of the run.
Definition: parameters.h:14
const int PHASE_FEEDBACK
slot in MultistepLB to hold feedback phase load information
Definition: ParallelGravity.h:396
GravityParticle * particlePointer
Pointer to the first particle in this node.
Definition: GenericTreeNode.h:117
CkVec< CkVec< RemotePartInfo > > rpilists
Remote particle interactions.
Definition: ParallelGravity.h:674
Class for new maxOrder broadcast.
Definition: ParallelGravity.h:122
double dGlassDamper
for doing load balancing.
Definition: ParallelGravity.cpp:106
Message to start a remote gravity walk.
Definition: ParallelGravity.h:270
Class to compute gravity using a &quot;bucket walk&quot;.
Definition: Compute.h:105
Used to count non-empty treepieces on the local processor.
Definition: ParallelGravity.h:2104
int _nocache
Disable the cache (always off)
Definition: ParallelGravity.cpp:93
unsigned int _yieldPeriod
The number of buckets to process in the local gravity walk before yielding the processor.
Definition: ParallelGravity.cpp:99
Compute for the remote node prefetch walk.
Definition: Compute.h:226
bool bIsReplica(int reqID)
is this a periodic replica?
Definition: TreePiece.cpp:3573
Computation over &quot;inverse&quot; nearest neighbors.
Definition: smooth.h:224
External gravity parameters and routines.
Definition: externalGravity.h:7
Cache interface to particles for the gravity calculation. This is a read-only cache of particles...
Definition: CacheInterface.h:37
CProxy_CkCacheManager< KeyType > cacheSmoothPart
Proxy for the smooth particle cache group.
Definition: ParallelGravity.cpp:75
GenericTrees useTree
type of tree to use.
Definition: ParallelGravity.cpp:110
Computations for Stadel-style interaction list walk.
Definition: Compute.h:148
CProxy_PETreeMerger peTreeMergerProxy
Proxy for the PETreeMerger group.
Definition: ParallelGravity.cpp:86
CkCallback cb
Callback for reduction of particle counts.
Definition: ParallelGravity.h:293
CProxy_LvArray lvProxy
Proxy for the liveViz array.
Definition: ParallelGravity.cpp:69
int64_t nTotalStar
Total Star Particles.
Definition: ParallelGravity.h:1100
unsigned int numTreePieces
Number of pieces into which to divide the tree.
Definition: ParallelGravity.cpp:115
double dExtraStore
fraction of extra particle storage
Definition: ParallelGravity.cpp:102
Base class for output parameters.
Definition: InOutput.h:16
int RungToSubsteps(int iRung)
Given a rung, return the number of substeps in one big step.
Definition: ParallelGravity.h:370
const double ddTolerance
tolerance for unequal pieces in SFC based decompositions.
Definition: ParallelGravity.h:104
Extra data needed for SPH.
Definition: GravityParticle.h:50
Definition: DataManager.h:60
Base class for walking trees.
Definition: TreeWalk.h:11
int _cacheLineDepth
Size of a Node Cache line, specified by how deep in the tree it goes.
Definition: ParallelGravity.cpp:96
const double GAMMA_JEANS
Adiabatic index to use with the Jeans pressure floor.
Definition: ParallelGravity.h:423
int DtToRung(double dDelta, double dTideal)
Given the size of the big step, and a desired timestep, return the rung of the largest timestep less ...
Definition: ParallelGravity.h:377
Object containing the parameter information.
Definition: param.h:38
struct particlesInfoR RemotePartInfo
Remote particles in an interaction list.
void addToNodeInterLocal(int howmany)
accumulate node interaction count for statistics
Definition: ParallelGravity.h:845
CkVec< int > chunkids
remote walk chunk number
Definition: ParallelGravity.h:672
client that has requested a moment.
Definition: ParallelGravity.h:696
int weightBalanceUE
Weight balancing during Oct decomposition user event (for Projections tracing).
Definition: ParallelGravity.cpp:149
Class to build the local part of the tree. Builds Internal nodes.
Definition: Compute.h:305
double * pos
Positions of splits.
Definition: ParallelGravity.h:289
cosmoType theta
BH-like opening criterion.
Definition: ParallelGravity.cpp:142
struct LoopParDataStruct LoopParData
Data needed for the CkLoop intranode parallelization.
CProxy_TreePiece treeProxy
Proxy for the TreePiece chare array.
Definition: ParallelGravity.cpp:65
int _prefetch
Prefetch nodes for the remote walk.
Definition: ParallelGravity.cpp:120
Object to bookkeep a Bucket MarkSmooth Walk.
Definition: smooth.h:261
char * dim
Dimension of splits.
Definition: ParallelGravity.h:291
CProxy_CkCacheManager< KeyType > cacheNode
Proxy for the tree node cache group.
Definition: ParallelGravity.cpp:77
Base class for tree nodes.
Definition: GenericTreeNode.h:59
Class for shadow arrays to avoid reduction conflicts in overlapping liveViz, SPH and gravity...
Definition: ParallelGravity.h:2053
CProxy_CkCacheManager< KeyType > cacheGravPart
Proxy for the gravity particle cache group.
Definition: ParallelGravity.cpp:73
CProxy_DumpFrameData dfDataProxy
Proxy for the dumpframe image data (DumpFrameData).
Definition: ParallelGravity.cpp:84
Base class for maintaining the state of a tree walk.
Definition: State.h:6
Information needed to calculate gravity.
Definition: GravityParticle.h:33
CkVec< OffsetNode > UndecidedList
Vector of nodes that are undecided at this level.
Definition: ParallelGravity.h:631
int numInitDecompBins
Definition: ParallelGravity.cpp:182
Data needed for a TreePiece to render its part of the image.
Definition: dumpframe.h:116
Message for evaluating splits for the ORB domain decomposition.
Definition: ParallelGravity.h:284
Object to bookkeep a Bucket Walk.
Definition: GravityParticle.h:18
moment tensor components for complete multipoles.
Definition: moments.h:40
int lastParticle
An index to the last particle contained by this node, myNumParticles+1 means outside the node...
Definition: GenericTreeNode.h:108
Data needed for the CkLoop intranode parallelization.
Definition: ParallelGravity.h:668
int boundaryEvaluationUE
only cells.
Definition: ParallelGravity.cpp:147
Definition: TreeWalk.h:82
int encodeOffset(int reqID, int x, int y, int z)
Given a bucket number and a periodic offset, encode these into one number.
Definition: TreePiece.cpp:3593
int rungs
Definition: GenericTreeNode.h:122
Particle data needed on the GPU to calculate gravity.
Definition: cuda_typedef.h:231
Fundamental structure that holds particle and tree data.
Definition: ParallelGravity.h:730
CkVec< CkVec< LocalPartInfo > > lpilists
Local particle interactions.
Definition: ParallelGravity.h:675
Overall flow control of the simulation.
Definition: ParallelGravity.h:436
void CkMustAssert(bool cond, const char *err)
CkAssert() replacement works even in production mode.
Definition: ParallelGravity.h:56
Heating/Cooling context: parameters and tables.
Definition: cooling_boley.h:83
Stellar/Supernova feedback parameters and routines.
Definition: feedback.h:45
Object to bookkeep a Bucket Smooth Walk.
Definition: smooth.h:27
Global simulation parameters for dumpframe.
Definition: dumpframe.h:209
int firstParticle
An index for the first particle contained by this node, 0 means outside the node. ...
Definition: GenericTreeNode.h:106
int64_t nTotalDark
Total Dark Particles.
Definition: ParallelGravity.h:1098
A base class from which parameters for all smooth operations can be derived.
Definition: smoothparams.h:8
CProxy_TreePiece streamingProxy
A potentially optimized proxy for the tree pieces. Its use is deprecated.
Definition: ParallelGravity.cpp:113
unsigned int particlesPerChare
Number of particles per TreePiece. Used to determine the number of TreePieces.
Definition: ParallelGravity.cpp:118
Class for computation over a set smoothing length.
Definition: smooth.h:186
int getNumParticles()
Return the number of particles on this TreePiece.
Definition: ParallelGravity.h:861
unsigned int bucketSize
Definition: ParallelGravity.cpp:125
int bUseCkLoopPar
Use Ckloop for node parallelization.
Definition: ParallelGravity.cpp:127
CkVec< UndecidedList > UndecidedLists
Vector of undecided lists, one for each level.
Definition: ParallelGravity.h:633
DomainsDec
Possible domain decomposition methods.
Definition: ParallelGravity.h:86
void addToParticleInterLocal(int howmany)
accumulate particle interaction count for statistics
Definition: ParallelGravity.h:850
CkVec< GenericTreeNode * > lowNodes
Definition: ParallelGravity.h:669
structure to hold information specific to GPU Ewald
Definition: EwaldCUDA.h:65
double dStartTime
Time read in from input file.
Definition: ParallelGravity.h:1049
Class to count added and deleted particles.
Definition: ParallelGravity.h:328
void addToParticleInterRemote(int chunk, int howmany)
accumulate particle interaction count for statistics
Definition: ParallelGravity.h:840
Cache interface to the Tree Nodes.
Definition: CacheInterface.h:96
Class for computation over k nearest neighbors.
Definition: smooth.h:121
TreePiece * tp
Treepiece that owns this data.
Definition: ParallelGravity.h:676
Base class for optimizing walk actions.
Definition: Opt.h:12
cosmoType thetaMono
Definition: ParallelGravity.cpp:143
Fundamental type for a particle.
Definition: GravityParticle.h:316
double PoverRhoFloorJeans(double dResolveJeans, GravityParticle *p)
Pressure floor to force Jeans length to be larger than the spatial resolution.
Definition: ParallelGravity.h:401
CProxy_Main mainChare
Proxy for Charm Main Chare.
Definition: ParallelGravity.cpp:61
Utility to pool allocations of tree nodes.
Definition: GenericTreeNode.h:324
CProxy_LvArray gravityProxy
Definition: ParallelGravity.cpp:71
void deleteTree()
delete treenodes if allocated
Definition: ParallelGravity.h:1360
Extra data needed for Stars.
Definition: GravityParticle.h:243
Local particles in an interaction list.
Definition: ParallelGravity.h:650
void printGenericTree(GenericTreeNode *node, std::ostream &os)
Print a text version of a tree.
Definition: TreePiece.cpp:6020
int octRefineLevel
Definition: ParallelGravity.cpp:186
context for calculating cooling derivatives
Definition: cooling_boley.h:108
CProxy_LvArray smoothProxy
Proxy for smooth reductions.
Definition: ParallelGravity.cpp:70
Super class for Smooth and Resmooth computation.
Definition: smooth.h:78
struct particlesInfoL LocalPartInfo
Local particles in an interaction list.
bool _cache
Use the cache (always on)
Definition: ParallelGravity.cpp:91
Remote Cell interaction lists for all tree levels.
Definition: ParallelGravity.h:621
int length
Number of splits.
Definition: ParallelGravity.h:287
struct ewaldTable EWT
Coefficients for the Fourier space part of the Ewald sum.
double fBall
Neighbor search radius for smoothing.
Definition: GravityParticle.h:326
GravityParticle * getParticles()
Return the pointer to the particles on this TreePiece.
Definition: ParallelGravity.h:866
Message for shuffling particles during domain decomposition.
Definition: ParallelGravity.h:300
int _randChunks
Definition: ParallelGravity.cpp:123
structure for efficiently reading iOrders.
Definition: TreePiece.cpp:2430
DomainsDec domainDecomposition
The type of domain decomposition to use.
Definition: ParallelGravity.cpp:101
CProxy_DataManager dMProxy
Proxy for the DataManager.
Definition: ParallelGravity.cpp:79
Remote particles in an interaction list.
Definition: ParallelGravity.h:637
int64_t nTotalSPH
Total Gas Particles.
Definition: ParallelGravity.h:1096
double dMaxBalance
Max piece imbalance for load balancing.
Definition: ParallelGravity.cpp:103
CkGroupID dataManagerID
The group ID of your DataManager. You must set this!
Definition: ParallelGravity.cpp:157