changa  3.5
 All Classes Files Functions Variables Typedefs Enumerations Friends Macros Groups Pages
State.h
1 #ifndef __STATE_H__
2 #define __STATE_H__
3 #include "ParallelGravity.h"
4 
6 class State {
7  public:
13 
14  // shifted variable into state. there is an issue of redundancy
15  // here, though. in addition to local state, remote and remote-resume
16  // state also have this variable but have no use for it, since only
17  // a single copy is required.
18  // could have made this the third element in the array below
22 
23  // again, redundant variables, since only remote-no-resume
24  // walks use this variable to see how many chunks have
25  // been used
33 
35  // requests tied to each bucket (position 0) and chunk (position 1).
36  int *counterArrays[2];
37  virtual ~State() {}
38 };
39 
40 #if INTERLIST_VER > 0
41 #if defined CUDA
42 #include "HostCUDA.h"
43 #include "DataManager.h"
44 #include "ck128bitHash.h"
45 
46 class DoubleWalkState;
47 
48 template<typename T>
49 class GenericList{
50  public:
51  CkVec<CkVec<T> > lists;
52  int totalNumInteractions;
53 
54  GenericList() : totalNumInteractions(0) {}
55 
56  void reset(){
57  // clear all bucket lists:
58  for(int i = 0; i < lists.length(); i++){
59  lists[i].length() = 0;
60  }
61  totalNumInteractions = 0;
62  }
63 
64  void free(){
65  for(int i = 0; i < lists.length(); i++){
66  lists[i].free();
67  }
68  lists.free();
69  totalNumInteractions = 0;
70  }
71 
72  void init(int numBuckets, int numper){
73  lists.resize(numBuckets);
74  for(int i = 0; i < numBuckets; i++){
75  lists[i].reserve(numper);
76  }
77  }
78 
79  CudaRequest *serialize(TreePiece *tp);
80  void getBucketParameters(TreePiece *tp,
81  int bucket,
82  int &bucketStart, int &bucketSize){
83  //std::map<NodeKey, int>&lpref){
84  // bucket is listed in this offload
85  GenericTreeNode *bucketNode = tp->bucketList[bucket];
86 
87  bucketSize = bucketNode->lastParticle - bucketNode->firstParticle + 1;
88  bucketStart = bucketNode->bucketArrayIndex;
89  CkAssert(bucketStart >= 0);
90  }
91 
92  void getActiveBucketParameters(TreePiece *tp,
93  int bucket,
94  int &bucketStart, int &bucketSize){
95  //std::map<NodeKey, int>&lpref){
96  // bucket is listed in this offload
97  GenericTreeNode *bucketNode = tp->bucketList[bucket];
98  BucketActiveInfo *binfo = &(tp->bucketActiveInfo[bucket]);
99 
100  //bucketSize = bucketNode->lastParticle - bucketNode->firstParticle + 1;
101  //bucketStart = bucketNode->bucketArrayIndex;
102  bucketSize = tp->bucketActiveInfo[bucket].size;
103  bucketStart = tp->bucketActiveInfo[bucket].start;
104  CkAssert(bucketStart >= 0);
105  }
106 
107  void push_back(int b, T &ilc, DoubleWalkState *state, TreePiece *tp);
108 
109 
110 };
111 
112 #endif
113 
117 class DoubleWalkState : public State {
118  public:
126  CkVec<CkVec<OffsetNode> >clists;
128  CkVec<CkVec<LocalPartInfo> >lplists;
130  CkVec<CkVec<RemotePartInfo> >rplists;
131 
137  bool *placedRoots;
138  // to tell a remote-resume state from a remote-no-resume state
139  bool resume;
140 
141 #ifdef CUDA
142  int nodeThreshold;
143  int partThreshold;
144 
145  GenericList<ILCell> nodeLists;
146  GenericList<ILPart> particleLists;
147 
148 #ifdef HAPI_INSTRUMENT_WRS
149  double nodeListTime;
150  double partListTime;
151 #endif
152 
153  CkVec<CudaMultipoleMoments> *nodes;
154  CkVec<CompactPartData> *particles;
155 
156  // during 'small' rungs, buckets are marked when
157  // they are included for computation in the request's
158  // aux. particle array. these markings should be
159  // cleared before the assembly of the next request is
160  // begun. for this purpose, we keep track of buckets
161  // marked during the construction of a request.
162  //
163  // NB: for large rungs, we don't mark buckets while
164  // compiling requests. for such rungs, since all
165  // particles are shipped at the beginning of the iteration,
166  // we have them marked at that time. since all particles,
167  // are available on the gpu for these rungs, we do not clear
168  // the markings when requests are sent out.
169  CkVec<GenericTreeNode *> markedBuckets;
170 
173  std::unordered_map<NodeKey,int> nodeMap;
174  std::unordered_map<NodeKey,int> partMap;
175 
176  bool nodeOffloadReady(){
177  return nodeLists.totalNumInteractions >= nodeThreshold;
178  }
179 
180  bool partOffloadReady(){
181  return particleLists.totalNumInteractions >= partThreshold;
182  }
183 
184 #ifdef HAPI_INSTRUMENT_WRS
185  void updateNodeThreshold(int t){
186  nodeThreshold = t;
187  }
188  void updatePartThreshold(int t){
189  partThreshold = t;
190  }
191 #endif
192 
193 #endif
194 
201  int level;
202 
203  DoubleWalkState() : chklists(0), lowestNode(0), level(-1) {
204 #ifdef CUDA
205  partMap.reserve(100);
206 #endif
207  }
208 
209 #ifdef HAPI_INSTRUMENT_WRS
210  void nodeListConstructionTimeStart(){
211  nodeListTime = CmiWallTimer();
212  }
213 
214  double nodeListConstructionTimeStop(){
215  return CmiWallTimer()-nodeListTime;
216  }
217 
218  void partListConstructionTimeStart(){
219  partListTime = CmiWallTimer();
220  }
221 
222  double partListConstructionTimeStop(){
223  return CmiWallTimer()-partListTime;
224  }
225 
226 #endif
227 };
228 #endif // INTERLIST_VER
229 
230 class NullState : public State {
231 };
232 
233 #endif
CkQ< OffsetNode > CheckList
Queue of nodes to check for interactions.
Definition: ParallelGravity.h:629
CkVec< CkVec< OffsetNode > > clists
Lists of cells to be computed. One list for each level.
Definition: State.h:126
int currentBucket
The bucket we have started to walk.
Definition: State.h:12
bool * placedRoots
Definition: State.h:137
CkVec< CkVec< RemotePartInfo > > rplists
Lists of remote particles to be computed. One list for each level.
Definition: State.h:130
int * counterArrays[2]
counters to keep track of outstanding remote processor
Definition: State.h:36
Base class for tree nodes.
Definition: GenericTreeNode.h:59
Base class for maintaining the state of a tree walk.
Definition: State.h:6
int lastParticle
An index to the last particle contained by this node, myNumParticles+1 means outside the node...
Definition: GenericTreeNode.h:108
GenericTreeNode * lowestNode
Definition: State.h:200
Fundamental structure that holds particle and tree data.
Definition: ParallelGravity.h:730
CkVec< CkVec< LocalPartInfo > > lplists
Lists of local particles to be computed. One list for each level.
Definition: State.h:128
int firstParticle
An index for the first particle contained by this node, 0 means outside the node. ...
Definition: GenericTreeNode.h:106
Data and parameters for requesting gravity calculations on the GPU.
Definition: HostCUDA.h:107
Hold state where both the targets and sources are tree walked.
Definition: State.h:117
unsigned int bucketSize
Definition: ParallelGravity.cpp:125
CkVec< UndecidedList > UndecidedLists
Vector of undecided lists, one for each level.
Definition: ParallelGravity.h:633
Definition: State.h:230
int myNumParticlesPending
Keep track of how many buckets are unfinished. XXX note the misnomer.
Definition: State.h:21
CheckList * chklists
Definition: State.h:121
UndecidedLists undlists
Definition: State.h:124
int bWalkDonePending
Definition: State.h:10
int numPendingChunks
Number of pending chunks.
Definition: State.h:32