changa  3.5
 All Classes Files Functions Variables Typedefs Enumerations Friends Macros Groups Pages
gravity.h
1 #ifndef __GRAVITY_H__
2 #define __GRAVITY_H__
3 
4 #include "TreeNode.h"
5 #include "GenericTreeNode.h"
6 #include "Space.h"
7 #include "SSEdefs.h"
8 
9 extern cosmoType theta;
10 extern cosmoType thetaMono;
11 
12 /*
13 ** see (A1) and (A2) of TREESPH: A UNIFICATION OF SPH WITH THE
14 ** HIERARCHICAL TREE METHOD by Lars Hernquist and Neal Katz.
15 ** APJ Supplemant Series 70:416-446, 1989
16 **
17 ** Higher derivative terms c and d for use with quadrupole spline
18 ** softening (Joachim Stadel, Dec. 94).
19 */
20 #if !CMK_SSE
21 inline
22 void SPLINEQ(cosmoType invr,cosmoType r2,cosmoType twoh,cosmoType& a,
23  cosmoType& b,cosmoType& c,cosmoType& d)
24 {
25  cosmoType u,dih,dir=(invr);
26  if ((r2) < (twoh)*(twoh)) {
27  dih = COSMO_CONST(2.0)/(twoh);
28  u = dih/dir;
29  if (u < COSMO_CONST(1.0)) {
30  a = dih*(COSMO_CONST(7.0)/COSMO_CONST(5.0)
31  - COSMO_CONST(2.0)/COSMO_CONST(3.0)*u*u
32  + COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
33  - COSMO_CONST(1.0)/COSMO_CONST(10.0)*u*u*u*u*u);
34  b = dih*dih*dih*(COSMO_CONST(4.0)/COSMO_CONST(3.0)
35  - COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
36  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u*u*u);
37  c = dih*dih*dih*dih*dih*(COSMO_CONST(12.0)/COSMO_CONST(5.0)
38  - COSMO_CONST(3.0)/COSMO_CONST(2.0)*u);
39  d = COSMO_CONST(3.0)/COSMO_CONST(2.0)*dih*dih*dih*dih*dih*dih*dir;
40  }
41  else {
42  a = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir
43  + dih*(COSMO_CONST(8.0)/COSMO_CONST(5.0)
44  - COSMO_CONST(4.0)/COSMO_CONST(3.0)*u*u + u*u*u
45  - COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
46  + COSMO_CONST(1.0)/COSMO_CONST(30.0)*u*u*u*u*u);
47  b = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir*dir*dir
48  + dih*dih*dih*(COSMO_CONST(8.0)/COSMO_CONST(3.0) - COSMO_CONST(3.0)*u
49  + COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
50  - COSMO_CONST(1.0)/COSMO_CONST(6.0)*u*u*u);
51  c = COSMO_CONST(-1.0)/COSMO_CONST(5.0)*dir*dir*dir*dir*dir
52  + COSMO_CONST(3.0)*dih*dih*dih*dih*dir
53  + dih*dih*dih*dih*dih*(COSMO_CONST(-12.0)/COSMO_CONST(5.0)
54  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u);
55  d = -dir*dir*dir*dir*dir*dir*dir
56  + COSMO_CONST(3.0)*dih*dih*dih*dih*dir*dir*dir
57  - COSMO_CONST(1.0)/COSMO_CONST(2.0)*dih*dih*dih*dih*dih*dih*dir;
58  }
59  }
60  else {
61  a = dir;
62  b = a*a*a;
63  c = COSMO_CONST(3.0)*b*a*a;
64  d = COSMO_CONST(5.0)*c*a*a;
65  }
66 }
67 #else
68 inline
69 void SPLINEQ(SSEcosmoType invr, SSEcosmoType r2, SSEcosmoType twoh,
70  SSEcosmoType& a, SSEcosmoType& b,
71  SSEcosmoType& c, SSEcosmoType& d)
72 {
73  SSEcosmoType dir;
74  dir = invr;
75  SSEcosmoType select0 = r2 < twoh * twoh;
76  int compare0 = movemask(select0);
77  // make common case fast, at the cost of some redundant code
78  // in the infrequent case
79  if (!compare0) {
80  a = dir;
81  b = a*a*a;
82  c = COSMO_CONST(3.0)*b*a*a;
83  d = COSMO_CONST(5.0)*c*a*a;
84  }
85  else {
86  SSEcosmoType u, dih;
87  SSEcosmoType select1;
88  SSEcosmoType a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2;
89  int compare1;
90 
91  dih = COSMO_CONST(2.0)/twoh;
92  u = dih/dir;
93 
94  if ((~compare0) & cosmoMask) {
95  a0 = dir;
96  b0 = a0*a0*a0;
97  c0 = COSMO_CONST(3.0)*b0*a0*a0;
98  d0 = COSMO_CONST(5.0)*c0*a0*a0;
99  }
100 
101  select1 = u < COSMO_CONST(1.0);
102  compare1 = movemask(select1);
103  if (compare1) {
104  a1 = dih*(COSMO_CONST(7.0)/COSMO_CONST(5.0)
105  - COSMO_CONST(2.0)/COSMO_CONST(3.0)*u*u
106  + COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
107  - COSMO_CONST(1.0)/COSMO_CONST(10.0)*u*u*u*u*u);
108  b1 = dih*dih*dih*(COSMO_CONST(4.0)/COSMO_CONST(3.0)
109  - COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
110  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u*u*u);
111  c1 = dih*dih*dih*dih*dih*(COSMO_CONST(12.0)/COSMO_CONST(5.0)
112  - COSMO_CONST(3.0)/COSMO_CONST(2.0)*u);
113  d1 = COSMO_CONST(3.0)/COSMO_CONST(2.0)*dih*dih*dih*dih*dih*dih*dir;
114  }
115  if ((~compare1) & cosmoMask) {
116  a2 = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir
117  + dih*(COSMO_CONST(8.0)/COSMO_CONST(5.0)
118  - COSMO_CONST(4.0)/COSMO_CONST(3.0)*u*u + u*u*u
119  - COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
120  + COSMO_CONST(1.0)/COSMO_CONST(30.0)*u*u*u*u*u);
121  b2 = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir*dir*dir
122  + dih*dih*dih*(COSMO_CONST(8.0)/COSMO_CONST(3.0) - COSMO_CONST(3.0)*u
123  + COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
124  - COSMO_CONST(1.0)/COSMO_CONST(6.0)*u*u*u);
125  c2 = COSMO_CONST(-1.0)/COSMO_CONST(5.0)*dir*dir*dir*dir*dir
126  + COSMO_CONST(3.0)*dih*dih*dih*dih*dir
127  + dih*dih*dih*dih*dih*(COSMO_CONST(-12.0)/COSMO_CONST(5.0)
128  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u);
129  d2 = -dir*dir*dir*dir*dir*dir*dir
130  + COSMO_CONST(3.0)*dih*dih*dih*dih*dir*dir*dir
131  - COSMO_CONST(1.0)/COSMO_CONST(2.0)*dih*dih*dih*dih*dih*dih*dir;
132  }
133 
134  a = andnot(select0, a0)
135  | (select0 & ((select1 & a1) | andnot(select1, a2)));
136  b = andnot(select0, b0)
137  | (select0 & ((select1 & b1) | andnot(select1, b2)));
138  c = andnot(select0, c0)
139  | (select0 & ((select1 & c1) | andnot(select1, c2)));
140  d = andnot(select0, d0)
141  | (select0 & ((select1 & d1) | andnot(select1, d2)));
142  }
143 }
144 #endif
145 
146 #if !CMK_SSE
147 inline
148 void SPLINE(cosmoType r2, cosmoType twoh, cosmoType &a, cosmoType &b)
149 {
150  cosmoType r, u,dih,dir;
151  r = sqrt(r2);
152 
153  if (r < (twoh)) {
154  dih = COSMO_CONST(2.0)/(twoh);
155  u = r*dih;
156  if (u < COSMO_CONST(1.0)) {
157  a = dih*(COSMO_CONST(7.0)/COSMO_CONST(5.0)
158  - COSMO_CONST(2.0)/COSMO_CONST(3.0)*u*u
159  + COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
160  - COSMO_CONST(1.0)/COSMO_CONST(10.0)*u*u*u*u*u);
161  b = dih*dih*dih*(COSMO_CONST(4.0)/COSMO_CONST(3.0)
162  - COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
163  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u*u*u);
164  }
165  else {
166  dir = COSMO_CONST(1.0)/r;
167  a = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir
168  + dih*(COSMO_CONST(8.0)/COSMO_CONST(5.0)
169  - COSMO_CONST(4.0)/COSMO_CONST(3.0)*u*u + u*u*u
170  - COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
171  + COSMO_CONST(1.0)/COSMO_CONST(30.0)*u*u*u*u*u);
172  b = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir*dir*dir
173  + dih*dih*dih*(COSMO_CONST(8.0)/COSMO_CONST(3.0) - COSMO_CONST(3.0)*u
174  + COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
175  - COSMO_CONST(1.0)/COSMO_CONST(6.0)*u*u*u);
176  }
177  }
178  else {
179  a = COSMO_CONST(1.0)/r;
180  b = a*a*a;
181  }
182 }
183 #else
184 inline
185 void SPLINE(SSEcosmoType r2, SSEcosmoType twoh,
186  SSEcosmoType &a, SSEcosmoType &b)
187 {
188  SSEcosmoType r;
189  r = sqrt(r2);
190  SSEcosmoType select0 = r < twoh;
191  int compare0 = movemask(select0);
192  // make common case fast, at the cost of some redundant code
193  // in the infrequent case
194  if (!compare0) {
195  a = COSMO_CONST(1.0)/r;
196  b = a * a * a;
197  }
198  else {
199  SSEcosmoType u, dih, dir;
200  SSEcosmoType a0, b0, a1, b1, a2, b2;
201  SSEcosmoType select1;
202  int compare1;
203 
204  dih = COSMO_CONST(2.0)/twoh;
205  u = r * dih;
206 
207  if ((~compare0) & cosmoMask) {
208  a0 = COSMO_CONST(1.0)/r;
209  b0 = a0 * a0 * a0;
210  }
211 
212  select1 = u < COSMO_CONST(1.0);
213  compare1 = movemask(select1);
214  if (compare1) {
215  a1 = dih*(COSMO_CONST(7.0)/COSMO_CONST(5.0)
216  - COSMO_CONST(2.0)/COSMO_CONST(3.0)*u*u
217  + COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
218  - COSMO_CONST(1.0)/COSMO_CONST(10.0)*u*u*u*u*u);
219  b1 = dih*dih*dih*(COSMO_CONST(4.0)/COSMO_CONST(3.0)
220  - COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
221  + COSMO_CONST(1.0)/COSMO_CONST(2.0)*u*u*u);
222  }
223  if ((~compare1) & cosmoMask) {
224  dir = COSMO_CONST(1.0)/r;
225  a2 = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir
226  + dih*(COSMO_CONST(8.0)/COSMO_CONST(5.0)
227  - COSMO_CONST(4.0)/COSMO_CONST(3.0)*u*u + u*u*u
228  - COSMO_CONST(3.0)/COSMO_CONST(10.0)*u*u*u*u
229  + COSMO_CONST(1.0)/COSMO_CONST(30.0)*u*u*u*u*u);
230  b2 = COSMO_CONST(-1.0)/COSMO_CONST(15.0)*dir*dir*dir
231  + dih*dih*dih*(COSMO_CONST(8.0)/COSMO_CONST(3.0) - COSMO_CONST(3.0)*u
232  + COSMO_CONST(6.0)/COSMO_CONST(5.0)*u*u
233  - COSMO_CONST(1.0)/COSMO_CONST(6.0)*u*u*u);
234  }
235 
236  a = andnot(select0, a0)
237  | (select0 & ((select1 & a1) | andnot(select1, a2)));
238  b = andnot(select0, b0)
239  | (select0 & ((select1 & b1) | andnot(select1, b2)));
240  }
241 }
242 #endif
243 
244 //
245 // Return true if the soften nodes overlap, or if the source node's
246 // softening overlaps the bounding box; i.e. the forces involve softening
247 // @param node Node to test
248 // @param myNode
249 // @param offset Periodic offset applied to node
250 //
251 inline
252 int openSoftening(Tree::GenericTreeNode *node, Tree::GenericTreeNode *myNode,
253  Vector3D<cosmoType> offset)
254 {
255  Sphere<cosmoType> s(node->moments.cm + offset, 2.0*node->moments.soft);
256  Sphere<cosmoType> myS(myNode->moments.cm, 2.0*myNode->moments.soft);
257  if(Space::intersect(myS, s))
258  return true;
259  return Space::intersect(myNode->boundingBox, s);
260 }
261 
262 #ifdef CMK_VERSION_BLUEGENE
263 static int forProgress = 0;
264 #endif
265 
266 #if !CMK_SSE
267 inline int partBucketForce(ExternalGravityParticle *part,
268  Tree::GenericTreeNode *req,
269  GravityParticle *particles,
270  Vector3D<cosmoType> offset, int activeRung) {
271  int computed = 0;
272  Vector3D<cosmoType> r;
273  cosmoType rsq;
274  cosmoType twoh, a, b;
275 
276  for(int j = req->firstParticle; j <= req->lastParticle; ++j) {
277  if (particles[j].rung >= activeRung) {
278 #ifdef CMK_VERSION_BLUEGENE
279  if (++forProgress > 200) {
280  forProgress = 0;
281 #ifdef COSMO_EVENTS
282  traceUserEvents(networkProgressUE);
283 #endif
284  CmiNetworkProgress();
285  }
286 #endif
287  computed++;
288  r = offset + part->position - particles[j].position;
289  rsq = r.lengthSquared();
290  twoh = part->soft + particles[j].soft;
291  if(rsq != 0) {
292  SPLINE(rsq, twoh, a, b);
293  cosmoType idt2 = (particles[j].mass + part->mass)*b; // (timescale)^-2
294  // of interaction
295  particles[j].treeAcceleration += r * (b * part->mass);
296  particles[j].potential -= part->mass * a;
297  if(idt2 > particles[j].dtGrav)
298  particles[j].dtGrav = idt2;
299  }
300  }
301  }
302  return computed;
303 }
304 #else
305 inline int partBucketForce(ExternalGravityParticle *part,
306  Tree::GenericTreeNode *req,
307  GravityParticle **activeParticles,
308  Vector3D<cosmoType> offset, int nActiveParts) {
309  Vector3D<SSEcosmoType> r;
310  SSEcosmoType rsq;
311  SSEcosmoType twoh, a, b;
312 
313  for (int i=0; i<nActiveParts; i+=SSE_VECTOR_WIDTH) {
314 #ifdef CMK_VERSION_BLUEGENE
315  if (++forProgress > 200) {
316  forProgress = 0;
317 #ifdef COSMO_EVENTS
318  traceUserEvents(networkProgressUE);
319 #endif
320  CmiNetworkProgress();
321  }
322 #endif
323  Vector3D<SSEcosmoType>
324  packedPos(SSELoad(SSEcosmoType, activeParticles, i, ->position.x),
325  SSELoad(SSEcosmoType, activeParticles, i, ->position.y),
326  SSELoad(SSEcosmoType, activeParticles, i, ->position.z));
327  SSELoad(SSEcosmoType packedSoft, activeParticles, i, ->soft);
328 
329  r = -packedPos + offset + part->position;
330  rsq = r.lengthSquared();
331  twoh = part->soft + packedSoft;
332  SSEcosmoType select = rsq > COSMO_CONST(0.0);
333  int compare = movemask(select);
334  if(compare) {
335  SPLINE(rsq, twoh, a, b);
336  if ((~compare) & cosmoMask) {
337  a = select & a;
338  b = select & b;
339  }
340  SSEcosmoType SSELoad(packedMass, activeParticles, i, ->mass);
341  SSEcosmoType SSELoad(packedDtGrav, activeParticles, i, ->dtGrav);
342  Vector3D<SSEcosmoType>
343  packedAcc(SSELoad(SSEcosmoType,
344  activeParticles, i, ->treeAcceleration.x),
345  SSELoad(SSEcosmoType,
346  activeParticles, i, ->treeAcceleration.y),
347  SSELoad(SSEcosmoType,
348  activeParticles, i, ->treeAcceleration.z));
349  SSEcosmoType SSELoad(packedPotential, activeParticles, i, ->potential);
350  SSEcosmoType idt2 = (packedMass + part->mass) * b;
351  idt2 = max(idt2, packedDtGrav);
352  packedAcc += r * (b * part->mass);
353  packedPotential -= part->mass * a;
354  SSEStore(packedAcc.x, activeParticles, i, ->treeAcceleration.x);
355  SSEStore(packedAcc.y, activeParticles, i, ->treeAcceleration.y);
356  SSEStore(packedAcc.z, activeParticles, i, ->treeAcceleration.z);
357  SSEStore(packedPotential, activeParticles, i, ->potential);
358  SSEStore(idt2, activeParticles, i, ->dtGrav);
359  }
360  }
361  return nActiveParts;
362 }
363 
364 inline int partBucketForce(ExternalGravityParticle *part,
365  Tree::GenericTreeNode *req,
366  GravityParticle *particles,
367  Vector3D<cosmoType> offset, int activeRung) {
368  int nActiveParts = 0;
369  GravityParticle dummyPart = particles[0];
370  dummyPart.soft = 0.0;
371 
372  GravityParticle **activeParticles =
373  (GravityParticle**)alloca((req->lastParticle - req->firstParticle
374  + 1 + FORCE_INPUT_LIST_PAD)
375  * sizeof(GravityParticle*));
376  for (int j=req->firstParticle; j <= req->lastParticle; ++j) {
377  if (particles[j].rung >= activeRung)
378  activeParticles[nActiveParts++] = &particles[j];
379  }
380 
381  activeParticles[nActiveParts] = &dummyPart;
382 #if SSE_VECTOR_WIDTH == 4
383  activeParticles[nActiveParts+1] = &dummyPart;
384  activeParticles[nActiveParts+2] = &dummyPart;
385 #endif
386 
387  int ret = partBucketForce(part, req, activeParticles, offset, nActiveParts);
388  return ret;
389 }
390 
391 #endif
392 
393 
394 //
395 // Calculated forces on active particles in a bucket due to the
396 // multipole of a TreeNode. Return number of multipoles evaluated.
397 //
398 #if !CMK_SSE
399 inline
400 int nodeBucketForce(Tree::GenericTreeNode *node,
401  Tree::GenericTreeNode *req,
402  GravityParticle *particles,
403  Vector3D<cosmoType> offset,
404  int activeRung)
405 
406 {
407  int computed = 0;
408  Vector3D<cosmoType> r;
409  cosmoType rsq;
410  MultipoleMoments &m = node->moments;
411 
412  Vector3D<cosmoType> cm(m.cm + offset);
413 
414 #ifdef HEXADECAPOLE
415  if(openSoftening(node, req, offset)) {
416  ExternalGravityParticle tmpPart;
417  tmpPart.mass = m.totalMass;
418  tmpPart.soft = m.soft;
419  tmpPart.position = m.cm;
420  return partBucketForce(&tmpPart, req, particles, offset, activeRung);
421  }
422 #endif
423  for(int j = req->firstParticle; j <= req->lastParticle; ++j) {
424  if (particles[j].rung >= activeRung) {
425  particles[j].interMass += m.totalMass;
426 #ifdef CMK_VERSION_BLUEGENE
427  if (++forProgress > 200) {
428  forProgress = 0;
429 #ifdef COSMO_EVENTS
430  traceUserEvents(networkProgressUE);
431 #endif
432  CmiNetworkProgress();
433  }
434 #endif
435  computed++;
436  r = Vector3D<cosmoType>(particles[j].position) - cm;
437  rsq = r.lengthSquared();
438  cosmoType dir = COSMO_CONST(1.0)/sqrt(rsq);
439 #ifdef HEXADECAPOLE
440  cosmoType magai;
441  momEvalFmomrcm(&m.mom, m.getRadius(), dir, r.x, r.y, r.z,
442  &particles[j].potential,
443  &particles[j].treeAcceleration.x,
444  &particles[j].treeAcceleration.y,
445  &particles[j].treeAcceleration.z, &magai);
446  cosmoType idt2 = (particles[j].mass + m.totalMass)*dir*dir*dir;
447 #else
448  cosmoType twoh, a, b, c, d;
449  twoh = CONVERT_TO_COSMO_TYPE(m.soft + particles[j].soft);
450  SPLINEQ(dir, rsq, twoh, a, b, c, d);
451  cosmoType qirx = CONVERT_TO_COSMO_TYPE m.xx*r.x
452  + CONVERT_TO_COSMO_TYPE m.xy*r.y + CONVERT_TO_COSMO_TYPE m.xz*r.z;
453  cosmoType qiry = CONVERT_TO_COSMO_TYPE m.xy*r.x
454  + CONVERT_TO_COSMO_TYPE m.yy*r.y + CONVERT_TO_COSMO_TYPE m.yz*r.z;
455  cosmoType qirz = CONVERT_TO_COSMO_TYPE m.xz*r.x
456  + CONVERT_TO_COSMO_TYPE m.yz*r.y + CONVERT_TO_COSMO_TYPE m.zz*r.z;
457  cosmoType qir = COSMO_CONST(0.5)*(qirx*r.x + qiry*r.y + qirz*r.z);
458  cosmoType tr = COSMO_CONST(0.5)*(CONVERT_TO_COSMO_TYPE m.xx
459  + CONVERT_TO_COSMO_TYPE m.yy
460  + CONVERT_TO_COSMO_TYPE m.zz);
461  cosmoType qir3 = b*CONVERT_TO_COSMO_TYPE m.totalMass + d*qir - c*tr;
462  particles[j].potential -= CONVERT_TO_COSMO_TYPE m.totalMass * a
463  + c*qir - b*tr;
464  particles[j].treeAcceleration.x -= qir3*r.x - c*qirx;
465  particles[j].treeAcceleration.y -= qir3*r.y - c*qiry;
466  particles[j].treeAcceleration.z -= qir3*r.z - c*qirz;
467  cosmoType idt2 = (CONVERT_TO_COSMO_TYPE particles[j].mass
468  + CONVERT_TO_COSMO_TYPE m.totalMass)*b;
469 #endif
470  if(idt2 > particles[j].dtGrav)
471  particles[j].dtGrav = idt2;
472  }
473  }
474  return computed;
475 }
476 
477 #elif CMK_SSE
478 inline
479 int nodeBucketForce(Tree::GenericTreeNode *node,
480  Tree::GenericTreeNode *req,
481  GravityParticle *particles,
482  Vector3D<cosmoType> offset,
483  int activeRung)
484 
485 {
486  Vector3D<SSEcosmoType> r;
487  SSEcosmoType rsq;
488  SSEcosmoType twoh;
489  SSEcosmoType a,b,c,d;
490  MultipoleMoments m = node->moments;
491  Vector3D<cosmoType> cm(m.cm + offset);
492  int nActiveParts = 0;
493  GravityParticle dummyPart = particles[0];
494  dummyPart.soft = 0.0;
495 
496  GravityParticle **activeParticles =
497  (GravityParticle**)alloca((req->lastParticle - req->firstParticle
498  + 1 + FORCE_INPUT_LIST_PAD)
499  * sizeof(GravityParticle*));
500  for (int j=req->firstParticle; j <= req->lastParticle; ++j) {
501  if (particles[j].rung >= activeRung)
502  activeParticles[nActiveParts++] = &particles[j];
503  }
504 
505  activeParticles[nActiveParts] = &dummyPart;
506 #if SSE_VECTOR_WIDTH == 4
507  activeParticles[nActiveParts+1] = &dummyPart;
508  activeParticles[nActiveParts+2] = &dummyPart;
509 #endif
510 
511 #ifdef HEXADECAPOLE
512  if(openSoftening(node, req, offset)) {
513  ExternalGravityParticle tmpPart;
514  tmpPart.mass = m.totalMass;
515  tmpPart.soft = m.soft;
516  tmpPart.position = m.cm;
517  int ret = partBucketForce(&tmpPart, req, activeParticles,
518  offset, nActiveParts);
519  return ret;
520  }
521 #endif
522  for (int i=0; i<nActiveParts; i+=SSE_VECTOR_WIDTH) {
523 #ifdef CMK_VERSION_BLUEGENE
524  if (++forProgress > 200) {
525  forProgress = 0;
526 #ifdef COSMO_EVENTS
527  traceUserEvents(networkProgressUE);
528 #endif
529  CmiNetworkProgress();
530  }
531 #endif
532  Vector3D<SSEcosmoType>
533  packedPos(SSELoad(SSEcosmoType, activeParticles, i, ->position.x),
534  SSELoad(SSEcosmoType, activeParticles, i, ->position.y),
535  SSELoad(SSEcosmoType, activeParticles, i, ->position.z));
536  r = packedPos - cm;
537  rsq = r.lengthSquared();
538  SSEcosmoType dir = COSMO_CONST(1.0)/sqrt(rsq);
539  Vector3D<SSEcosmoType>
540  packedAcc(SSELoad(SSEcosmoType,
541  activeParticles, i, ->treeAcceleration.x),
542  SSELoad(SSEcosmoType,
543  activeParticles, i, ->treeAcceleration.y),
544  SSELoad(SSEcosmoType,
545  activeParticles, i, ->treeAcceleration.z));
546  SSEcosmoType SSELoad(packedPotential, activeParticles, i, ->potential);
547  SSEcosmoType SSELoad(packedMass, activeParticles, i, ->mass);
548  SSEcosmoType SSELoad(packedDtGrav, activeParticles, i, ->dtGrav);
549 #ifdef HEXADECAPOLE
550  SSEcosmoType magai;
551  momEvalFmomrcm(&m.mom, m.getRadius(), dir, r.x, r.y, r.z,
552  &packedPotential,
553  &packedAcc.x,
554  &packedAcc.y,
555  &packedAcc.z, &magai);
556  SSEcosmoType idt2 = (packedMass + m.totalMass)*dir*dir*dir;
557 #else
558  SSELoad(SSEcosmoType packedSoft, activeParticles, i, ->soft);
559  twoh = CONVERT_TO_COSMO_TYPE m.soft + packedSoft;
560  SPLINEQ(dir, rsq, twoh, a, b, c, d);
561  SSEcosmoType qirx = CONVERT_TO_COSMO_TYPE m.xx*r.x
562  + CONVERT_TO_COSMO_TYPE m.xy*r.y + CONVERT_TO_COSMO_TYPE m.xz*r.z;
563  SSEcosmoType qiry = CONVERT_TO_COSMO_TYPE m.xy*r.x
564  + CONVERT_TO_COSMO_TYPE m.yy*r.y + CONVERT_TO_COSMO_TYPE m.yz*r.z;
565  SSEcosmoType qirz = CONVERT_TO_COSMO_TYPE m.xz*r.x
566  + CONVERT_TO_COSMO_TYPE m.yz*r.y + CONVERT_TO_COSMO_TYPE m.zz*r.z;
567  SSEcosmoType qir = COSMO_CONST(0.5)*(qirx*r.x + qiry*r.y + qirz*r.z);
568  SSEcosmoType tr = COSMO_CONST(0.5)*(CONVERT_TO_COSMO_TYPE m.xx
569  + CONVERT_TO_COSMO_TYPE m.yy
570  + CONVERT_TO_COSMO_TYPE m.zz);
571  SSEcosmoType qir3 = b*CONVERT_TO_COSMO_TYPE m.totalMass + d*qir - c*tr;
572  packedPotential -= CONVERT_TO_COSMO_TYPE m.totalMass *a
573  + c * qir - b * tr;
574  packedAcc.x -= qir3*r.x - c*qirx;
575  packedAcc.y -= qir3*r.y - c*qiry;
576  packedAcc.z -= qir3*r.z - c*qirz;
577  SSEcosmoType idt2 = (packedMass + CONVERT_TO_COSMO_TYPE m.totalMass)*b;
578 #endif
579  SSEStore(packedAcc.x, activeParticles, i, ->treeAcceleration.x);
580  SSEStore(packedAcc.y, activeParticles, i, ->treeAcceleration.y);
581  SSEStore(packedAcc.z, activeParticles, i, ->treeAcceleration.z);
582  SSEStore(packedPotential, activeParticles, i, ->potential);
583  idt2 = max(idt2, packedDtGrav);
584  SSEStore(idt2, activeParticles, i, ->dtGrav);
585  }
586  return nActiveParts;
587 }
588 #endif
589 
596 
597 inline bool
598 openCriterionBucket(Tree::GenericTreeNode *node,
599  Tree::GenericTreeNode *bucketNode,
600  Vector3D<cosmoType> offset // Offset of node
601  ) {
602 
603 #if COSMO_STATS > 0
604  node->used = true;
605 #endif
606  // Always open node if this many particles or fewer.
607  const int nMinParticleNode = 6;
608  if(node->particleCount <= nMinParticleNode) {
609  return true;
610  }
611 
612  // Note that some of this could be pre-calculated into an "opening radius"
613  cosmoType radius = TreeStuff::opening_geometry_factor * node->moments.getRadius() / theta;
614  if(radius < node->moments.getRadius())
615  radius = node->moments.getRadius();
616 
617  Sphere<cosmoType> s(node->moments.cm + offset, radius);
618 
619 #ifdef HEXADECAPOLE
620  if(!Space::intersect(bucketNode->boundingBox, s)) {
621  // Well separated, now check softening
622  if(!openSoftening(node, bucketNode, offset)) {
623  return false; // passed both tests: will be a Hex interaction
624  }
625  else { // Open as monopole?
626  radius = TreeStuff::opening_geometry_factor*node->moments.getRadius()/thetaMono;
627  Sphere<cosmoType> sM(node->moments.cm + offset, radius);
628  return Space::intersect(bucketNode->boundingBox, sM);
629  }
630  }
631  return true;
632 #else
633  return Space::intersect(bucketNode->boundingBox, s);
634 #endif
635 }
636 
651 
652 inline int openCriterionNode(Tree::GenericTreeNode *node,
653  Tree::GenericTreeNode *myNode,
654  Vector3D<cosmoType> offset
655  ) {
656 
657 #if COSMO_STATS > 0
658  node->used = true;
659 #endif
660  // Always open node if this many particles or fewer.
661  const int nMinParticleNode = 6;
662  if(node->particleCount <= nMinParticleNode) {
663  return 1;
664  }
665 
666  // Note that some of this could be pre-calculated into an "opening radius"
667  cosmoType radius = TreeStuff::opening_geometry_factor * node->moments.getRadius() / theta;
668  if(radius < node->moments.getRadius())
669  radius = node->moments.getRadius();
670 
671  Sphere<cosmoType> s(node->moments.cm + offset, radius);
672 
673  if(myNode->getType()==Tree::Bucket || myNode->getType()==Tree::CachedBucket || myNode->getType()==Tree::NonLocalBucket){
674  if(Space::intersect(myNode->boundingBox, s))
675  return 1;
676  else
677 #ifdef HEXADECAPOLE
678  {
679  // Well separated, now check softening
680  if(!openSoftening(node, myNode, offset)) {
681  return 0; // passed both tests: will be a Hex interaction
682  }
683  else { // Open as monopole?
684  radius = TreeStuff::opening_geometry_factor*node->moments.getRadius()/thetaMono;
685  Sphere<cosmoType> sM(node->moments.cm + offset, radius);
686  if(Space::intersect(myNode->boundingBox, sM))
687  return 1;
688  else
689  return 0;
690  }
691  }
692 #else
693  return 0;
694 #endif
695  }
696  else{
697  if(Space::intersect(myNode->boundingBox, s)){
698  if(Space::contained(myNode->boundingBox,s))
699  return 1;
700  else
701  return -1;
702  }
703  else
704 #ifdef HEXADECAPOLE
705  {
706  // Well separated, now check softening
707  if(!openSoftening(node, myNode, offset)) {
708  return 0; // passed both tests: will be a Hex interaction
709  }
710  else { // Open as monopole?
711  radius = TreeStuff::opening_geometry_factor*node->moments.getRadius()/thetaMono;
712  Sphere<cosmoType> sM(node->moments.cm + offset, radius);
713  if(Space::intersect(myNode->boundingBox, sM))
714  return 1;
715  else
716  return 0;
717  }
718  }
719 #else
720  return 0;
721 #endif
722  }
723 }
724 
725 #endif
A representation of a multipole expansion.
Definition: MultipoleMoments.h:163
NodeType getType() const
return Tree::NodeType of node
Definition: GenericTreeNode.h:166
MultipoleMoments moments
The moments for the gravity computation.
Definition: GenericTreeNode.h:94
unsigned int particleCount
Total number of particles contained (across all chares)
Definition: GenericTreeNode.h:115
cosmoType theta
BH-like opening criterion.
Definition: ParallelGravity.cpp:142
Base class for tree nodes.
Definition: GenericTreeNode.h:59
Information needed to calculate gravity.
Definition: GravityParticle.h:33
int lastParticle
An index to the last particle contained by this node, myNumParticles+1 means outside the node...
Definition: GenericTreeNode.h:108
int firstParticle
An index for the first particle contained by this node, 0 means outside the node. ...
Definition: GenericTreeNode.h:106
OrientedBox< cosmoType > boundingBox
The axis-aligned bounding box of this node.
Definition: GenericTreeNode.h:98
cosmoType totalMass
The total mass represented by this expansion.
Definition: MultipoleMoments.h:172
cosmoType thetaMono
Definition: ParallelGravity.cpp:143
Fundamental type for a particle.
Definition: GravityParticle.h:316
cosmoType dtGrav
Definition: GravityParticle.h:322
Vector3D< cosmoType > cm
The center of mass (zeroth order multipole)
Definition: MultipoleMoments.h:174