Changeset 7
- Timestamp:
- 07/11/10 18:40:47 (23 months ago)
- Location:
- trunk/cuCga/src
- Files:
-
- 5 modified
Legend:
- Unmodified
- Added
- Removed
-
trunk/cuCga/src/ca.cu
r6 r7 22 22 #include <assert.h> 23 23 24 void createLattice(Lattice *hLat, int mode) 24 void 25 createLattice(Lattice * hLat, int mode) 25 26 { 26 int i; 27 int idx,amount; 28 extern long idum; 29 memset(hLat->cells,'0',LAT_SIZE); 30 hLat->density = 0; 31 switch(mode) 32 { 27 int i; 28 int idx, amount; 29 extern long idum; 30 int lat = 0; 31 for (lat = 0; lat < MAX_IC; lat++) { 32 memset(hLat[lat].cells, '0', LAT_SIZE); 33 hLat[lat].density = 0; 34 switch (mode) { 33 35 case 1: 34 36 { 35 hLat->density = (size_t)(ran2(&idum) * LAT_SIZE); 36 amount=0; 37 while(amount != hLat->density) 38 { 39 idx = (int)(ran2(&idum) * LAT_SIZE); 40 if(hLat->cells[idx] == '0') 41 { 42 hLat->cells[idx]='1'; 43 amount++; 44 } 45 } 46 break; 37 hLat[lat].density = (size_t) (ran2(&idum) * LAT_SIZE); 38 amount = 0; 39 while (amount != hLat[lat].density) { 40 idx = (int) (ran2(&idum) * LAT_SIZE); 41 if (hLat[lat].cells[idx] != '1') { 42 hLat[lat].cells[idx] = '1'; 43 amount++; 44 } 45 } 46 break; 47 47 } 48 48 case 2: 49 49 { 50 for(i=0;i<LAT_SIZE;i++) 51 { 52 hLat->cells[i] = (ran2(&idum) < 0.5 ? '0':'1'); 53 if(hLat->cells[i]=='1') hLat->density++;54 }55 break;50 for (i = 0; i < LAT_SIZE; i++) { 51 hLat[lat].cells[i] = (ran2(&idum) < 0.5 ? '0' : '1'); 52 if (hLat[lat].cells[i] == '1') 53 hLat[lat].density++; 54 } 55 break; 56 56 } 57 } 57 58 } 58 59 } 59 60 60 void createPopulation(Individual *hPop) 61 void 62 createPopulation(Individual * hPop) 61 63 { 62 int i,j; 63 extern long idum; 64 for(i=0;i<POPULATION;i++) 65 { 66 for(j=0;j<RULE_SIZE;j++) 67 hPop[i].rule[j] = (ran2(&idum) < 0.5 ? '0':'1'); 68 memcpy(hPop[i].rule,TST_RULE,RULE_SIZE); 64 int i, j; 65 extern long idum; 66 for (i = 0; i < POPULATION; i++) { 67 for (j = 0; j < RULE_SIZE; j++) 68 hPop[i].rule[j] = (ran2(&idum) < 0.5 ? '0' : '1'); 69 memcpy(hPop[i].rule, TST_RULE, RULE_SIZE); 69 70 } 70 71 } 71 -
trunk/cuCga/src/kernel.cu
r6 r7 21 21 #include "structs.h" 22 22 23 #ifndef __KERNEL_CU 24 #define __KERNEL_CU 25 26 __device__ int bin2dec(char *bin) 23 __device__ int 24 bin2dec(char *bin) 27 25 { 28 int num,i,j,b; 29 num=0; 30 j=0; 31 for(i=RADIUS*2; i>=0;i--) 32 { 33 b = (bin[i]=='0'?0:1); 34 num += b*powf(2,j++); 26 int num, i, j, b; 27 num = 0; 28 j = 0; 29 for (i = RADIUS * 2; i >= 0; i--) { 30 b = (bin[i] == '0' ? 0 : 1); 31 num += b * powf(2, j++); 35 32 } 36 33 return num; 37 34 } 38 35 39 __global__ void caKernel2(struct Lattice *dLat, struct Individual *dInd) 36 __global__ void 37 caKernel(struct Lattice * dLat, struct Individual * dInd) 40 38 { 41 int latIdx = blockIdx.x; 42 int indIdx = blockIdx.y; 43 int idx = threadIdx.x; 44 Lattice curr,next; 45 int run,i; 46 char bin[RADIUS*2+1]; 47 int stPos; 48 //Copy to a local lattice 49 curr.density = dLat[latIdx].density; 50 curr.cells[idx] = dLat[latIdx].cells[idx]; 51 for(run=0;run<CA_RUNS;run++) 52 { 53 __syncthreads(); //Wait for all threads to finish 54 if(idx-RADIUS < 0) 55 stPos = LAT_SIZE+(idx-RADIUS); 39 int latId = blockIdx.x; 40 int idx = threadIdx.x; 41 Lattice next; 42 char bin[RADIUS * 2 + 1]; 43 int run, i; 44 int stPos; 45 46 for (run = 0; run < CA_RUNS; run++) { 47 __syncthreads(); 48 if (idx - RADIUS < 0) 49 stPos = LAT_SIZE + (idx - RADIUS); 56 50 else 57 stPos = idx -RADIUS;58 for (i=0;i<RADIUS*2+1;i++)59 {60 bin[i] = curr.cells[stPos++];61 if(stPos == LAT_SIZE)stPos = 0;51 stPos = idx - RADIUS; 52 for (i = 0; i < RADIUS * 2 + 1; i++) { 53 bin[i] = dLat[latId].cells[stPos++]; 54 if (stPos == LAT_SIZE) 55 stPos = 0; 62 56 } 63 next.cells[idx] = dInd[indIdx].rule[ bin2dec(bin) ]; 64 __syncthreads(); //Wait for all threads to finish 65 curr.cells[idx] = next.cells[idx]; 57 next.cells[idx] = dInd->rule[bin2dec(bin)]; 58 __syncthreads(); 59 //Wait for all threads to finish 60 61 // Copy the next to the current one 62 dLat[latId].cells[idx] = next.cells[idx]; 66 63 } 67 //Calculate the density (serialized)68 int density = 0;69 if (idx==0)70 {71 for (i=0;i<LAT_SIZE;i++)72 density +=(next.cells[i]=='1'?1:0);73 if ( (curr.density > LAT_SIZE/2 && density == LAT_SIZE) ||74 (curr.density < LAT_SIZE/2 && density == 0))75 dInd [indIdx].fitness++;64 //Calculate the final density 65 __syncthreads(); 66 if (idx == 0) { 67 int density = 0; 68 for (i = 0; i < LAT_SIZE; i++) 69 density += (dLat[latId].cells[i] == '1' ? 1 : 0); 70 if ((dLat[latId].density > LAT_SIZE / 2 && density == LAT_SIZE) || 71 (dLat[latId].density < LAT_SIZE / 2 && density == 0)) 72 dInd->fitness++; 76 73 } 77 74 } 78 75 79 __global__ void caKernel(struct Lattice *dLat, struct Individual *dInd) 76 __global__ void 77 caKernel2(struct Lattice * dLat, struct Individual * dInd) 80 78 { 81 int latId = blockIdx.x;82 int idx = threadIdx.x;83 Lattice next;84 char bin[RADIUS*2+1];85 int run,i;86 int stPos;79 struct Lattice cur, next; 80 int latId = blockIdx.x; 81 int popId = blockIdx.y; 82 int pos = threadIdx.x; 83 int stPos, run, i; 84 char bin[RADIUS * 2 + 1]; 87 85 88 if(latId < MAX_IC && idx < LAT_SIZE) 89 { 90 for(run = 0; run < CA_RUNS; run++) 91 { 92 __syncthreads(); 93 if(idx - RADIUS < 0) 94 stPos = LAT_SIZE + (idx-RADIUS); 95 else 96 stPos = idx-RADIUS; 97 for(i=0;i<RADIUS*2+1;i++) 98 { 99 bin[i] = dLat[latId].cells[stPos++]; 100 if(stPos==LAT_SIZE) stPos=0; 101 } 102 next.cells[idx] = dInd->rule[bin2dec(bin)]; 103 __syncthreads(); //Wait for all threads to finish 86 //Copy lattice to current. 87 next.density = dLat[latId].density; 88 cur.density = dLat[latId].density; 89 cur.cells[pos] = dLat[latId].cells[pos]; 90 __syncthreads(); 104 91 105 //Copy the next to the current one 106 dLat[latId].cells[idx]=next.cells[idx]; 92 //Execute CA for CA_RUNS 93 for (run = 0; run < CA_RUNS; run++) { 94 __syncthreads(); 95 stPos = pos - RADIUS; 96 if (stPos < 0) 97 stPos = LAT_SIZE + (pos - RADIUS); 98 for (i = 0; i < RADIUS * 2 + 1; i++) { 99 bin[i] = cur.cells[stPos++]; 100 if (stPos == LAT_SIZE) 101 stPos = 0; 107 102 } 108 //Calculate the final density 103 next.cells[pos] = dInd[popId].rule[bin2dec(bin)]; 104 //Copy from next to cur 109 105 __syncthreads(); 110 if(idx == 0) 111 { 112 int density=0; 113 for(i=0;i<LAT_SIZE;i++) 114 density+=(dLat[latId].cells[i]=='1'?1:0); 115 //__syncthreads(); 116 if( (dLat[latId].density > LAT_SIZE/2 && density == LAT_SIZE) || 117 (dLat[latId].density < LAT_SIZE/2 && density == 0) ) 118 dInd->fitness++; 119 // __syncthreads(); 120 } 106 cur.cells[pos] = next.cells[pos]; 121 107 } 108 __syncthreads(); 109 110 dLat[latId].cells[pos] = next.cells[pos]; 122 111 } 123 #endif //__KERNEL_CU 112 113 __global__ void resetFitness(struct Individual *dInd) 114 { 115 int id = threadIdx.x; 116 dInd[id].fitness=0; 117 } -
trunk/cuCga/src/kernel.h
r6 r7 23 23 24 24 __global__ void caKernel2(struct Lattice *dLat, struct Individual *dInd); 25 __global__ void resetFitness(struct Individual *dInd); 25 26 __global__ void caKernel(struct Lattice *dLat, struct Individual *dInd); 26 27 __device__ int bin2dec(char *bin); -
trunk/cuCga/src/main.cu
r6 r7 25 25 #include "ca.h" 26 26 27 long idum = 0;27 long idum = 0; 28 28 29 int main(int argc, char *argv[]) 29 int 30 cuErrorCheck(cudaError_t dError, const char *prefix) 30 31 { 31 Lattice *hLat = NULL; 32 Individual *hPop = NULL; 33 Lattice *dLat; 34 Individual *dPop; 35 cudaError_t dError = cudaSuccess; 36 size_t latMem = sizeof(Lattice)*MAX_IC; 37 size_t popMem = sizeof(Individual)*POPULATION; 32 if (dError != cudaSuccess) { 33 fprintf(stderr, "[Cuda-ERROR] %s: %s\n", prefix, cudaGetErrorString(dError)); 34 return -1; 35 } 36 return 0; 37 } 38 38 39 //Starts the random sequence 40 idum = -time(NULL); 39 int 40 main(int argc, char *argv[]) 41 { 42 struct Individual *dPop, *hPop; 43 struct Lattice *dLat, *hLat; 44 size_t latMem = sizeof(struct Lattice) * MAX_IC; 45 size_t popMem = sizeof(struct Individual) * POPULATION; 46 cudaError_t dError = cudaSuccess; 41 47 42 //Allocate memory for the host 43 if((hLat=(Lattice*)malloc(latMem))==NULL) 44 { 48 //Allocate host memory 49 if ((hPop = (struct Individual *) malloc(popMem)) == NULL) { 45 50 perror("malloc"); 46 51 return EXIT_FAILURE; 47 52 } 48 memset(hLat,'\0',latMem); 49 if((hPop=(Individual*)malloc(popMem))==NULL) 50 { 53 if ((hLat = (struct Lattice *) malloc(latMem)) == NULL) { 51 54 perror("malloc"); 55 free(hPop); 56 return EXIT_FAILURE; 57 } 58 //Allocate device memory 59 dError = cudaMalloc((void **) &dPop, popMem); 60 if (cuErrorCheck(dError, "cudaMalloc:") < 0) { 61 free(hPop); 52 62 free(hLat); 53 63 return EXIT_FAILURE; 54 64 } 55 memset(hPop,'\0',popMem); 56 57 58 //Create initial lattices (test) 59 int i; 60 for(i=0;i<MAX_IC;i++) 61 createLattice(&hLat[i],1); 62 //Create initial population 63 createPopulation(hPop); 64 65 //Device memory 66 dError = cudaMalloc((void**)&dLat,latMem); 67 if(dError!=cudaSuccess) 68 { 69 fprintf(stderr,"[CUDA-ERROR] cudaMalloc(): %s\n",cudaGetErrorString(dError)); 65 dError = cudaMalloc((void **) &dLat, latMem); 66 if (cuErrorCheck(dError, "cudaMalloc:") < 0) { 67 free(hPop); 70 68 free(hLat); 71 free(hPop);69 cudaFree(dPop); 72 70 return EXIT_FAILURE; 73 71 } 74 dError = cudaMalloc((void**)&dPop,popMem); 75 if(dError!=cudaSuccess) 76 { 77 fprintf(stderr,"[CUDA-ERROR] cudaMalloc(): %s\n",cudaGetErrorString(dError)); 78 cudaFree(dLat); 72 //Start the PRNG sequence 73 idum = -time(NULL); 74 75 //Create host population 76 createPopulation(hPop); 77 //Pass population to device 78 dError = cudaMemcpy(dPop, hPop, popMem, cudaMemcpyHostToDevice); 79 if (cuErrorCheck(dError, "cudaMemcpy:") < 0) { 80 free(hPop); 79 81 free(hLat); 80 free(hPop);81 return EXIT_FAILURE;82 }83 //Pass memory84 dError = cudaMemcpy(dPop,hPop,popMem,cudaMemcpyHostToDevice);85 if(dError!=cudaSuccess)86 {87 fprintf(stderr,"[CUDA-ERROR] cudaMemcpy(): %s\n",cudaGetErrorString(dError));88 82 cudaFree(dPop); 89 83 cudaFree(dLat); 84 } 85 86 int i,g; 87 for(g=0;g<GA_RUNS;g++) 88 { 89 createLattice(hLat,1); 90 dError = cudaMemcpy(dLat,hLat,latMem,cudaMemcpyHostToDevice); 91 if (cuErrorCheck(dError, "cudaMemcpy:") < 0) { 92 free(hPop); 93 free(hLat); 94 cudaFree(dPop); 95 cudaFree(dLat); 96 } 97 //First reset the fitness of the population on the device 98 resetFitness<<<1,POPULATION>>>(dPop); 99 //Now execute the CA for each individual in each IC (this way is way faster than using a big kernel) 100 for(i=0;i<POPULATION;i++) 101 { 102 caKernel<<<MAX_IC,LAT_SIZE>>>(dLat,&dPop[i]); 103 cudaThreadSynchronize(); 104 } 105 } 106 107 //Get back population 108 dError = cudaMemcpy(hPop, dPop, popMem, cudaMemcpyDeviceToHost); 109 if (cuErrorCheck(dError, "cudaMemcpy:") < 0) { 110 free(hPop); 90 111 free(hLat); 91 free(hPop);92 return EXIT_FAILURE;93 }94 dError = cudaMemcpy(dLat,hLat,latMem,cudaMemcpyHostToDevice);95 if(dError!=cudaSuccess)96 {97 fprintf(stderr,"[CUDA-ERROR] cudaMemcpy(): %s\n",cudaGetErrorString(dError));98 112 cudaFree(dPop); 99 113 cudaFree(dLat); 114 return EXIT_FAILURE; 115 } 116 dError = cudaMemcpy(hLat, dLat, latMem, cudaMemcpyDeviceToHost); 117 if (cuErrorCheck(dError, "cudaMemcpy:") < 0) { 118 free(hPop); 100 119 free(hLat); 101 free(hPop);102 return EXIT_FAILURE;103 }104 105 //Call the kernel (only once for testing)106 /*107 dim3 grids(MAX_IC,POPULATION);108 caKernel2<<<grids,LAT_SIZE>>>(dLat,dPop);109 dError = cudaGetLastError();110 if(dError!=cudaSuccess)111 {112 fprintf(stderr,"[CUDA-ERROR] CUDA Kernel: %s\n",cudaGetErrorString(dError));113 120 cudaFree(dPop); 114 121 cudaFree(dLat); 115 free(hLat);116 free(hPop);117 122 return EXIT_FAILURE; 118 123 } 119 */ 120 for(i=0;i<POPULATION;i++) 121 { 122 caKernel<<<MAX_IC,LAT_SIZE>>>(dLat,&dPop[i]); 123 dError = cudaGetLastError(); 124 if(dError!=cudaSuccess) 125 { 126 fprintf(stderr,"[CUDA-ERROR] CUDA Kernel: %s\n",cudaGetErrorString(dError)); 127 cudaFree(dPop); 128 cudaFree(dLat); 129 free(hLat); 130 free(hPop); 131 return EXIT_FAILURE; 132 } 124 for (i = 0; i < MAX_IC; i++) { 125 printf("%2d:%s\n", i, hLat[i].cells); 133 126 } 134 //get memory 135 dError = cudaMemcpy(hPop,dPop,popMem,cudaMemcpyDeviceToHost); 136 if(dError!=cudaSuccess) 137 { 138 fprintf(stderr,"[CUDA-ERROR] cudaMemcpy(): %s\n",cudaGetErrorString(dError)); 139 cudaFree(dPop); 140 cudaFree(dLat); 141 free(hLat); 142 free(hPop); 143 return EXIT_FAILURE; 144 } 145 146 //Output (should evolve) 147 // for(i=0;i<MAX_IC;i++) 148 // printf("%d:%s\n",i,hLat[i].cells); 149 // printf("Final fitness:%zu\n",hPop[0].fitness); 150 printf("Fitness:"); 151 for(i=0;i<POPULATION;i++) 152 printf("[%zu]",hPop[i].fitness); 127 //Print population fitness 128 printf("Fitness:"); 129 for (i = 0; i < POPULATION; i++) { 130 printf("[%zu]", hPop[i].fitness); 131 } 153 132 printf("\n"); 154 133 155 134 //Free host memory 135 free(hPop); 156 136 free(hLat); 157 free(hPop);158 137 //Free device memory 138 cudaFree(dPop); 159 139 cudaFree(dLat); 160 cudaFree(dPop); 140 161 141 return EXIT_SUCCESS; 162 142 } 163 -
trunk/cuCga/src/random.cu
r5 r7 22 22 23 23 float 24 ran0 (long *idum)24 ran0(long *idum) 25 25 { 26 long k;27 float ans;26 long k; 27 float ans; 28 28 29 29 *idum ^= MASK; … … 38 38 39 39 float 40 ran1 (long *idum)40 ran1(long *idum) 41 41 { 42 int j;43 long k;44 static long iy = 0;45 static long iv[NTAB];46 float temp;42 int j; 43 long k; 44 static long iy = 0; 45 static long iv[NTAB]; 46 float temp; 47 47 48 if (*idum <= 0 || !iy) 49 { 50 if (-(*idum) < 1) 51 *idum = 1; 52 for (j = NTAB + 7; j >= 0; j--) 53 { 54 k = (*idum) / IQ; 55 *idum = IA * (*idum - k * IQ) - IR * k; 56 if (*idum < 0) 57 *idum += IM; 58 if (j < NTAB) 59 iv[j] = *idum; 60 } 61 iy = iv[0]; 48 if (*idum <= 0 || !iy) { 49 if (-(*idum) < 1) 50 *idum = 1; 51 for (j = NTAB + 7; j >= 0; j--) { 52 k = (*idum) / IQ; 53 *idum = IA * (*idum - k * IQ) - IR * k; 54 if (*idum < 0) 55 *idum += IM; 56 if (j < NTAB) 57 iv[j] = *idum; 62 58 } 59 iy = iv[0]; 60 } 63 61 k = (*idum) / IQ; 64 62 *idum = IA * (*idum - k * IQ) - IR * k; … … 75 73 76 74 float 77 ran2 (long *idum)75 ran2(long *idum) 78 76 { 79 int j;80 long k;81 static long idum2 = 123456789;82 static long iy = 0;83 static long iv[NTAB];84 float temp;77 int j; 78 long k; 79 static long idum2 = 123456789; 80 static long iy = 0; 81 static long iv[NTAB]; 82 float temp; 85 83 86 if (*idum < 0) 87 { 88 if (-(*idum) < 1) 89 *idum = 1; 90 else 91 *idum = -(*idum); 92 idum2 = (*idum); 93 for (j = NTAB + 7; j >= 0; j--) 94 { 95 k = (*idum) / IQ1; 96 *idum = IA1 * (*idum - k * IQ1) - k * IR1; 97 if (*idum < 0) 98 *idum += IM1; 99 if (j < NTAB) 100 iv[j] = *idum; 101 } 102 iy = iv[0]; 84 if (*idum < 0) { 85 if (-(*idum) < 1) 86 *idum = 1; 87 else 88 *idum = -(*idum); 89 idum2 = (*idum); 90 for (j = NTAB + 7; j >= 0; j--) { 91 k = (*idum) / IQ1; 92 *idum = IA1 * (*idum - k * IQ1) - k * IR1; 93 if (*idum < 0) 94 *idum += IM1; 95 if (j < NTAB) 96 iv[j] = *idum; 103 97 } 98 iy = iv[0]; 99 } 104 100 k = (*idum) / IQ1; 105 101 *idum = IA1 * (*idum - k * IQ1) - k * IR1; … … 122 118 123 119 int 124 timeSeed (void)120 timeSeed(void) 125 121 { 126 time_t now = time(NULL);127 unsigned char *p = (unsigned char *) &now;128 int seed = 0;129 size_t i;130 for (i = 0; i < sizeof (now); i++)122 time_t now = time(NULL); 123 unsigned char *p = (unsigned char *) &now; 124 int seed = 0; 125 size_t i; 126 for (i = 0; i < sizeof(now); i++) 131 127 seed = seed * (UCHAR_MAX + 2U) + p[i]; 132 128 return seed; … … 134 130 135 131 double 136 uniformDeviate (int seed)132 uniformDeviate(int seed) 137 133 { 138 134 return seed * (1.0 / (RAND_MAX + 1.0));
