changa  3.5
 All Classes Files Functions Variables Typedefs Enumerations Friends Macros Groups Pages
SSEdefs.h
1 #ifndef __SSEDEFS_H__
2 #define __SSEDEFS_H__
3 
4 #include "cosmoType.h"
5 
6 #if CMK_USE_AVX
7  #if !defined(__AVX__)
8  #undef CMK_USE_AVX
9  #define CMK_USE_AVX 0
10  #else
11  #warning "using AVX"
12  #endif
13 #endif
14 
15 #if CMK_USE_SSE2 && !defined(__SSE2__)
16  #undef CMK_USE_SSE2
17  #define CMK_USE_SSE2 0
18 #endif
19 
20 #if CMK_USE_AVX || CMK_USE_SSE2
21  #define CMK_SSE 1
22 #endif
23 
24 #if CMK_USE_AVX
25  #ifdef COSMO_FLOAT
26  #error "single-precision AVX is not supported"
27  #else
28  #include "SSE-Double.h"
29  #define SSE_VECTOR_WIDTH 4
30  #define FORCE_INPUT_LIST_PAD 3
31  typedef SSEDouble SSEcosmoType;
32  #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
33  #define SSEStore(what, arr, idx, field) { \
34  double p[4]; \
35  storeu(p, what); \
36  arr[idx]field = p[0]; \
37  arr[idx+1]field = p[1]; \
38  arr[idx+2]field = p[2]; \
39  arr[idx+3]field = p[3]; \
40  }
41  enum {cosmoMask=0xf};
42  #endif
43 #elif CMK_USE_SSE2
44  #ifdef COSMO_FLOAT
45  #define SSE_COSMO_FLOAT
46  #if defined(__SSE2__)
47  #include "SSE-Float.h"
48  #define SSE_VECTOR_WIDTH 4
49  #define FORCE_INPUT_LIST_PAD 3
50  typedef SSEFloat SSEcosmoType;
51  #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
52  #define SSEStore(what, arr, idx, field) { \
53  float p[4]; \
54  storeu(p, what); \
55  arr[idx]field = p[0]; \
56  arr[idx+1]field = p[1]; \
57  arr[idx+2]field = p[2]; \
58  arr[idx+3]field = p[3]; \
59  }
60  enum {cosmoMask=0xf};
61  #else
62  #error("SSE not available");
63  #endif
64  #else
65  #if defined(__SSE2__) && !defined(SSE_COSMO_FLOAT)
66  #include "SSE-Double.h"
67  #define SSE_VECTOR_WIDTH 2
68  #define FORCE_INPUT_LIST_PAD 1
69  typedef SSEDouble SSEcosmoType;
70  #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field)
71  #define SSEStore(what, arr, idx, field) { \
72  storel(&arr[idx]field, what); \
73  storeh(&arr[idx+1]field, what); \
74  }
75  enum {cosmoMask=0x3};
76  #else
77  #error("SSE not available");
78  #endif
79  #endif
80 #endif
81 
82 #endif