Commit e059d41d authored by karius's avatar karius

first complete 6d quadrature

parent 52d6f660
......@@ -160,7 +160,7 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> bounding_b
cudaMemcpy(d_translation_offset,h_translation_offset,sizeof(*h_translation_offset),cudaMemcpyHostToDevice);
CudaCheckError();
//rotations
TransformationGrid::read_matrix_SO3_sampling("rot_samples/N14_M1260_IcoC7.dat",h_rotations,h_rotations_euler,h_num_rotations);
TransformationGrid::read_matrix_SO3_sampling("rot_samples/N23_M5880_IcoC7.dat",h_rotations,h_rotations_euler,h_num_rotations);
cudaMemcpy(d_num_rotations,h_num_rotations,sizeof(uint),cudaMemcpyHostToDevice);
//allocate and populate rotational nodes
cudaMalloc((void**)&d_rotations,h_num_rotations[0]*sizeof(float4));
......@@ -177,13 +177,18 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> bounding_b
__host__ void TransformationGrid::quadrature_test_function(void){
//being the volume of the search space
float translation_normalization = h_translation_total_shift->x*h_translation_total_shift->y*h_translation_total_shift->z;
float translation_normalization = 1/(h_translation_total_shift->x*h_translation_total_shift->y*h_translation_total_shift->z);
//being 8*pi^2 for the euler parametrisation of SO(3)
float rotation_normalization = 2/M_PI;
//normalization for 6D space
// float six_d_normalization_inv = 1/(translation_normalization*rotation_normalization);
float six_d_normalization_inv = rotation_normalization;
for (uint i=0;i<h_num_transformations[0];i++){h_scores[i] = six_d_normalization_inv/sinf(h_rotations_euler[i%h_num_rotations[0]].y);}
float six_d_normalization = translation_normalization*rotation_normalization;
// float six_d_normalization_inv = rotation_normalization;
int rot_index;
int trans_index;
for (uint i=0;i<h_num_transformations[0];i++){
rot_index = i/h_num_translations[0];
h_scores[i] = six_d_normalization/sinf(h_rotations_euler[rot_index].y);
}
cudaMemcpy(d_scores,h_scores,h_num_transformations[0]*sizeof(*h_scores),cudaMemcpyHostToDevice);
CudaCheckError();
}
......@@ -337,17 +342,23 @@ void TransformationGrid::print_quadrature_weights(void){
}
template <uint blockSize>
template <uint blockSize, bool secondary>
__global__
void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
__shared__ float reduce[blockSize];
uint num_scores = grid.d_num_transformations[0];
if (secondary){
num_scores = grid.d_num_translations[0];
}
uint grid_dim = blockDim.x*gridDim.x;
uint num_blocks = num_scores/blockDim.x + 1;
float * d_scores = grid.d_scores;
float * d_scores;
if (secondary){
d_scores = grid.d_tmp_rot_quadrature_scores;
} else {
d_scores = grid.d_scores;
}
uint t;
int4 transformation_index;
float weight;
......@@ -360,6 +371,8 @@ void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
weight *= grid.d_chebychev_weights[grid.d_translation_refinement_dim[0] + transformation_index.y];
weight *= grid.d_chebychev_weights[grid.d_translation_refinement_dim[0] + grid.d_translation_refinement_dim[1] + transformation_index.z];
reduce[threadIdx.x] += weight*d_scores[t];
// printf("%f\n",d_scores[t]);
// reduce[threadIdx.x] += weight*0.9966911009097003;
t += grid_dim;
}
__syncthreads();
......@@ -368,10 +381,10 @@ void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
if (threadIdx.x < 64)reduce[threadIdx.x] += reduce[threadIdx.x + 64];__syncthreads();
if (threadIdx.x < 32)reduce[threadIdx.x] += reduce[threadIdx.x + 32];__syncthreads();
if (threadIdx.x < 16)reduce[threadIdx.x] += reduce[threadIdx.x + 16];__syncthreads();
if (threadIdx.x < 8)reduce[threadIdx.x] += reduce[threadIdx.x + 8];__syncthreads();
if (threadIdx.x < 4)reduce[threadIdx.x] += reduce[threadIdx.x + 4];__syncthreads();
if (threadIdx.x < 2)reduce[threadIdx.x] += reduce[threadIdx.x + 2];__syncthreads();
if (threadIdx.x < 1)reduce[threadIdx.x] += reduce[threadIdx.x + 1];__syncthreads();
if (threadIdx.x < 8) reduce[threadIdx.x] += reduce[threadIdx.x + 8];__syncthreads();
if (threadIdx.x < 4) reduce[threadIdx.x] += reduce[threadIdx.x + 4];__syncthreads();
if (threadIdx.x < 2) reduce[threadIdx.x] += reduce[threadIdx.x + 2];__syncthreads();
if (threadIdx.x < 1) reduce[threadIdx.x] += reduce[threadIdx.x + 1];__syncthreads();
//safe block result in tmp mem, supposed to hold grid_dim values
d_tmp_quadrature[blockIdx.x] = reduce[0]; __syncthreads();
......@@ -384,17 +397,15 @@ void rotation_quadrature(TransformationGrid grid){
uint num_translations = grid.d_num_translations[0];
uint num_rotations = grid.d_num_rotations[0];
//valid for t-designs
float normalize_f = 1/(__uint2float_rn(grid.d_num_rotations[0]));
float * d_scores = grid.d_scores;
float * d_reduced_scores = grid.d_tmp_rot_quadrature_scores;
uint t;
uint b = blockIdx.x;
reduce[threadIdx.x] = 0.0;
__syncthreads();
t = blockIdx.x*blockDim.x + threadIdx.x;
while(b < num_translations){
t = threadIdx.x;
reduce[threadIdx.x] = 0.0;
__syncthreads();
while (t < num_rotations){
//order of scores is: (r0,t0);(r0,t1);(r0,t2); ... ;(r0,t_{N_t-1});
// (r1,t0);(r1,t1);(r1,t2); ... ;(r1,t_{N_t-1});
......@@ -402,9 +413,13 @@ void rotation_quadrature(TransformationGrid grid){
// . .
// . .
// (r_{N_r-1},t0);(r_{N_r-1},t1);(r_{N_r-1},t2); ... ;(r_{N_r-1},t_{N_t-1});
reduce[threadIdx.x] += normalize_f*d_scores[t*num_translations];
reduce[threadIdx.x] += d_scores[t*num_translations+b];
// if (b ==1 && threadIdx.x ==0){
// printf("%i,%i,%f\n",t,t*num_translations,d_scores[t*num_translations]);
// }
t += blockDim.x;
}
__syncthreads();
//reduction, blocksize max 128 hard coded
if (threadIdx.x < 64)reduce[threadIdx.x] += reduce[threadIdx.x + 64];__syncthreads();
if (threadIdx.x < 32)reduce[threadIdx.x] += reduce[threadIdx.x + 32];__syncthreads();
......@@ -415,33 +430,37 @@ void rotation_quadrature(TransformationGrid grid){
if (threadIdx.x < 1)reduce[threadIdx.x] += reduce[threadIdx.x + 1];__syncthreads();
//safe block result in tmp mem, supposed to hold grid_dim values
d_reduced_scores[b] = reduce[0]; __syncthreads();
if (threadIdx.x == 0) d_reduced_scores[b] = reduce[0]/num_rotations; __syncthreads();
// if (threadIdx.x == 0){
// printf("%f\n",reduce[0]/num_rotations);
// }
b += gridDim.x;
}
}
__host__
void TransformationGrid::quadrature(float* const &result){
const uint blockSize = 128;
uint grid_dim = h_num_transformations[0]/blockSize + 1;
uint rot_quat_grid_dim = 64;
float * h_tmp_quadrature = (float *) malloc(grid_dim*sizeof(*h_tmp_quadrature));
float * d_tmp_quadrature;
cudaMalloc((void **)&d_tmp_quadrature,grid_dim*sizeof(*d_tmp_quadrature));
const uint block_size = 128;
// template <uint blockSize>
// __global__
// void rotation_quadrature(TransformationGrid grid){
rotation_quadrature<blockSize><<<rot_quat_grid_dim,blockSize>>>(*this);
rotation_quadrature<block_size><<<rot_quat_grid_dim,block_size>>>(*this);
cudaDeviceSynchronize();
// template <int blockSize>
uint secondary_quadrature_grid_dim = h_num_translations[0]/block_size + 1;
float * h_quadrature_result = (float *) malloc(secondary_quadrature_grid_dim*sizeof(*h_quadrature_result));
float * d_quadrature_result;
cudaMalloc((void **)&d_quadrature_result,secondary_quadrature_grid_dim*sizeof(*d_quadrature_result));
float * d_tmp_quadrature;
// __global__
// void translation_quadrature(TransformationGrid grid, float * d_result, float * d_tmp_quadrature)
// translation_quadrature<blockSize><<<grid_dim,blockSize>>>(*this,d_tmp_quadrature);
// void translation_secondary_quadrature(TransformationGrid grid, float * d_result){
translation_quadrature<block_size,true><<<secondary_quadrature_grid_dim,block_size>>>(*this,d_quadrature_result);
cudaDeviceSynchronize();
cudaMemcpy(h_quadrature_result,d_quadrature_result,secondary_quadrature_grid_dim*sizeof(*result),cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
cudaMemcpy(h_tmp_quadrature,d_tmp_quadrature,grid_dim*sizeof(*h_tmp_quadrature),cudaMemcpyDeviceToHost);
result[0] = 0.0;
for (int i=0;i<grid_dim;i++)result[0] += h_tmp_quadrature[i];
for (int i=0;i<secondary_quadrature_grid_dim;i++)result[0]+=h_quadrature_result[i];
}
__host__
......@@ -497,13 +516,13 @@ void TransformationGrid::print(){
printf("Number of transformations: %u \n",h_num_transformations[0]);
printf("Translation offsets: %f,%f,%f \n",h_translation_offset[0].x,h_translation_offset[0].y,h_translation_offset[0].z);
}
__host__
float3 TransformationGrid::transform(const float4& t){
//basis: 0,0,1 "north"
return make_float3(2*(t.y*t.w + t.x*t.z),2*(t.z*t.w - t.x*t.y),1-2*(t.y*t.y + t.z*t.z));
}
//
//
//__host__
//float3 TransformationGrid::transform(const float4& t){
// //basis: 0,0,1 "north"
// return make_float3(2*(t.y*t.w + t.x*t.z),2*(t.z*t.w - t.x*t.y),1-2*(t.y*t.y + t.z*t.z));
//}
__host__
thrust::device_vector<thrust::tuple<float4,float3,float>> TransformationGrid::n_best_from_file(const char * file_name, uint n){
......
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment