15 #if CMK_USE_SSE2 && !defined(__SSE2__)
17 #define CMK_USE_SSE2 0
20 #if CMK_USE_AVX || CMK_USE_SSE2
26 #error "single-precision AVX is not supported"
28 #include "SSE-Double.h"
29 #define SSE_VECTOR_WIDTH 4
30 #define FORCE_INPUT_LIST_PAD 3
31 typedef SSEDouble SSEcosmoType;
32 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
33 #define SSEStore(what, arr, idx, field) { \
36 arr[idx]field = p[0]; \
37 arr[idx+1]field = p[1]; \
38 arr[idx+2]field = p[2]; \
39 arr[idx+3]field = p[3]; \
45 #define SSE_COSMO_FLOAT
47 #include "SSE-Float.h"
48 #define SSE_VECTOR_WIDTH 4
49 #define FORCE_INPUT_LIST_PAD 3
50 typedef SSEFloat SSEcosmoType;
51 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
52 #define SSEStore(what, arr, idx, field) { \
55 arr[idx]field = p[0]; \
56 arr[idx+1]field = p[1]; \
57 arr[idx+2]field = p[2]; \
58 arr[idx+3]field = p[3]; \
62 #error("SSE not available");
65 #if defined(__SSE2__) && !defined(SSE_COSMO_FLOAT)
66 #include "SSE-Double.h"
67 #define SSE_VECTOR_WIDTH 2
68 #define FORCE_INPUT_LIST_PAD 1
69 typedef SSEDouble SSEcosmoType;
70 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field)
71 #define SSEStore(what, arr, idx, field) { \
72 storel(&arr[idx]field, what); \
73 storeh(&arr[idx+1]field, what); \
77 #error("SSE not available");