Actual source code: sseenabled.c
  2: #include <petscsys.h> /*I "petscsys.h" I*/
  4: #ifdef PETSC_HAVE_SSE
  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
  9: #include <string.h>
 13: PetscErrorCode  PetscSSEHardwareTest(PetscBool  *flag)
 14: {
 16:   char *vendor;
 17:   char Intel[13]="GenuineIntel";
 18:   char AMD[13]  ="AuthenticAMD";
 21:   PetscMalloc(13*sizeof(char),&vendor);
 22:   strcpy(vendor,"************");
 23:   CPUID_GET_VENDOR(vendor);
 24:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 25:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 26:     /* to denote availability of SSE Support */
 27:     unsigned long myeax,myebx,myecx,myedx;
 28:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 29:     if (myedx & SSE_FEATURE_FLAG) {
 30:       *flag = PETSC_TRUE;
 31:     } else {
 32:       *flag = PETSC_FALSE;
 33:     }
 34:   }
 35:   PetscFree(vendor);
 36:   return(0);
 37: }
 39: #if defined(PETSC_HAVE_FORK)
 40: #include <signal.h>
 41: /* 
 42:    Early versions of the Linux kernel disables SSE hardware because
 43:    it does not know how to preserve the SSE state at a context switch.
 44:    To detect this feature, try an sse instruction in another process.  
 45:    If it works, great!  If not, an illegal instruction signal will be thrown,
 46:    so catch it and return an error code. 
 47: */
 48: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
 50: static void PetscSSEDisabledHandler(int sig) {
 51:   signal(SIGILL,SIG_IGN);
 52:   exit(-1);
 53: }
 57: PetscErrorCode  PetscSSEOSEnabledTest_Linux(PetscBool  *flag)
 58: {
 59:   int status, pid = 0;
 61:   signal(SIGILL,PetscSSEDisabledHandler);
 62:   pid = fork();
 63:   if (pid==0) {
 64:     SSE_SCOPE_BEGIN;
 65:       XOR_PS(XMM0,XMM0);
 66:     SSE_SCOPE_END;
 67:     exit(0);
 68:   } else {
 69:     wait(&status);
 70:   }
 71:   if (!status) {
 72:     *flag = PETSC_TRUE;
 73:   } else {
 74:     *flag = PETSC_FALSE;
 75:   }
 76:   return(0);
 77: }
 79: #else
 80: /* 
 81:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 82:    Windows ME/2000 doesn't disable SSE Hardware 
 83: */
 84: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 85: #endif 
 89: PetscErrorCode  PetscSSEOSEnabledTest_TRUE(PetscBool  *flag)
 90: {
 92:   if (flag) {
 93:     *flag = PETSC_TRUE;
 94:   }
 95:   return(0);
 96: }
 98: #else  /* Not defined PETSC_HAVE_SSE */
100: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
101: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
105: PetscErrorCode  PetscSSEEnabledTest_FALSE(PetscBool  *flag)
106: {
108:   if (flag) {
109:     *flag = PETSC_FALSE;
110:   }
111:   return(0);
112: }
114: #endif /* defined PETSC_HAVE_SSE */
118: /*@C
119:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction 
120:      set can be used.  Some operating systems do not allow the use of these instructions despite
121:      hardware availability.
123:      Collective on MPI_Comm
125:      Input Parameter:
126: .    comm - the MPI Communicator
128:      Output Parameters:
129: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
130: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
132:      Notes:
133:      PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.
135:      Options Database Keys:
136: .    -disable_sse - Disable use of hand tuned Intel SSE implementations
138:      Level: developer
139: @*/
140: static PetscBool  petsc_sse_local_is_untested  = PETSC_TRUE;
141: static PetscBool  petsc_sse_enabled_local      = PETSC_FALSE;
142: static PetscBool  petsc_sse_global_is_untested = PETSC_TRUE;
143: static PetscBool  petsc_sse_enabled_global     = PETSC_FALSE;
144: PetscErrorCode  PetscSSEIsEnabled(MPI_Comm comm,PetscBool  *lflag,PetscBool  *gflag) {
146:   PetscBool  disabled_option;
150:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
151:     disabled_option = PETSC_FALSE;
153:     PetscOptionsBool("-disable_sse",
154:                             "Disable use of hand tuned Intel SSE implementations <true,false>.",
155:                             "PetscSSEIsEnabled",disabled_option,&disabled_option,PETSC_NULL);
156:     if (disabled_option) {
157:       petsc_sse_local_is_untested  = PETSC_FALSE;
158:       petsc_sse_enabled_local      = PETSC_FALSE;
159:       petsc_sse_global_is_untested = PETSC_FALSE;
160:       petsc_sse_enabled_global     = PETSC_FALSE;
161:     }
163:     if (petsc_sse_local_is_untested) {
164:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
165:       if (petsc_sse_enabled_local) {
166:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
167:       }
168:       petsc_sse_local_is_untested = PETSC_FALSE;
169:     }
171:     if (gflag && petsc_sse_global_is_untested) {
172:       MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
173:       petsc_sse_global_is_untested = PETSC_FALSE;
174:     }
175:   }
177:   if (lflag) {
178:     *lflag = petsc_sse_enabled_local;
179:   }
180:   if (gflag) {
181:     *gflag = petsc_sse_enabled_global;
182:   }
183:   return(0);
184: }