Commit dcbb9ed2 authored by karius's avatar karius

new stuff

parent 40019b19
......@@ -65,6 +65,16 @@ void TransformationGrid::read_matrix_SO3_sampling(const char * file_name,float4
h_rotations[c] = curr_q;
c++;
}
float3 curr_euler;
std::ofstream fho("./test_eulers.txt");
for (int i=0;i<c;i++){
h_quat_to_euler(h_rotations[i],curr_euler);
fho << curr_euler.x << "," << curr_euler.y << "," << curr_euler.z << "\n";
printf("%f,%f,%f,%f\n",h_rotations[i].x,h_rotations[i].y,h_rotations[i].z,h_rotations[i].w);
printf("%f,%f,%f\n",curr_euler.x,curr_euler.y,curr_euler.z);
}
fho.close();
}
__device__
......@@ -873,7 +883,6 @@ void float_min_max(float * d_data,size_t num_data, float * d_min, float * d_max)
while (b < num_blocks){
int t = b*blockDim.x + threadIdx.x;
while (t < num_data){
printf("%i\n",t);
_min[threadIdx.x] = fminf(_min[threadIdx.x],d_data[t]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],d_data[t]);
t += blockDim.x;
......@@ -907,37 +916,74 @@ void float_min_max(float * d_data,size_t num_data, float * d_min, float * d_max)
__syncthreads();
if (threadIdx.x < 4){
// printf("%i %f %f %f\n",threadIdx.x,_min[threadIdx.x],_min[threadIdx.x + 4],fminf(_min[threadIdx.x],_min[threadIdx.x + 4]));
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 4]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 4]);
}
__syncthreads();
if (threadIdx.x < 2){
// printf("%i %f %f %f\n",threadIdx.x,_min[threadIdx.x],_min[threadIdx.x + 2],fminf(_min[threadIdx.x],_min[threadIdx.x + 2]));
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 2]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 2]);
}
__syncthreads();
d_min[0] = fminf(_min[1],_min[0]);
d_max[0] = fminf(_max[1],_max[0]);
if (blockIdx.x == 0 && threadIdx.x ==0){
d_min[0] = fminf(_min[0],_min[1]);
d_max[0] = fmaxf(_max[0],_max[1]);
printf("Min:%f %f\n",d_min[0],fminf(_min[0],_min[1]));
}
}
template<size_t block_size>
__global__
void shift_and_scale_envelope_score_kernel(float * d_score, float * d_scaled_score, size_t num_scores){
__shared__ float _min[1];
__shared__ float _max[1];
void shift_and_scale_envelope_score_kernel(float * d_score, float * d_scaled_score, float *d_min, float *d_max, size_t num_scores){
__shared__ float reduce[block_size];
int num_blocks = num_scores/blockDim.x + 1;
if (threadIdx.x == 0 && blockIdx.x == 0){
float_min_max<block_size><<<32,block_size>>>(d_score,num_scores,_min,_max);
float_min_max<block_size><<<32,block_size>>>(d_score,num_scores,d_min,d_max);
}
__syncthreads();
cudaDeviceSynchronize();
int grid_size = blockDim.x*gridDim.x;
int g = blockIdx.x*blockDim.x + threadIdx.x;
while(g<num_scores){
d_scaled_score[g] = d_score[g]/(d_max[0] - d_min[0]) - d_min[0]/(d_max[0] - d_min[0]);
g+=grid_size;
}
__syncthreads();
//initiate
if (blockIdx.x == 0){
reduce[threadIdx.x] = 0;
}
__syncthreads();
//t-Design, would usually need weights!
int b = blockIdx.x;
while (b < num_blocks){
int t = b*blockDim.x + threadIdx.x;
while (t < num_scores){
reduce[threadIdx.x] += d_scaled_score[t];
t += blockDim.x;
}
b += gridDim.x;
}
if (threadIdx.x < 64) reduce[threadIdx.x] += reduce[threadIdx.x + 64];__syncthreads();
if (threadIdx.x < 32) reduce[threadIdx.x] += reduce[threadIdx.x + 32];__syncthreads();
if (threadIdx.x < 16) reduce[threadIdx.x] += reduce[threadIdx.x + 16];__syncthreads();
if (threadIdx.x < 8) reduce[threadIdx.x] += reduce[threadIdx.x + 8];__syncthreads();
if (threadIdx.x < 4) reduce[threadIdx.x] += reduce[threadIdx.x + 4];__syncthreads();
if (threadIdx.x < 2) reduce[threadIdx.x] += reduce[threadIdx.x + 2];__syncthreads();
if (threadIdx.x < 1) reduce[threadIdx.x] += reduce[threadIdx.x + 1];__syncthreads();
printf("");
}
__host__
void TransformationGrid::shift_and_scale_envelope_score(float * d_score, float * d_normalized_score, size_t num_scores){
shift_and_scale_envelope_score_kernel<128><<<1,1>>>(d_score,d_normalized_score,num_scores);
void TransformationGrid::shift_and_scale_envelope_score(float * d_score, float * d_normalized_score,float *d_min, float *d_max, size_t num_scores){
shift_and_scale_envelope_score_kernel<128><<<32,128>>>(d_score,d_normalized_score,d_min,d_max,num_scores);
}
TransformationGrid::~TransformationGrid() {
......
......@@ -53,7 +53,7 @@ public:
__host__ static void h_quat_to_euler(const float4 & rotations_quat,float3 &rotations_euler);
__host__ static void h_matrix_to_quat(float * const& m,float4 & quaternion);
__host__ static void read_matrix_SO3_sampling(const char * file_name,float4 *& h_rotations, uint *& h_num_rotations);
__host__ static void shift_and_scale_envelope_score(float * d_score, float * d_normalized_score,size_t num_scores);
__host__ static void shift_and_scale_envelope_score(float * d_score, float * d_normalized_score,float *d_min, float *d_max, size_t num_scores);
TransformationGrid(const std::pair<float3,float3> & bounding_box_outer,const std::pair<float3,float3> & bounding_box_inner,
float * const& translation_offset,const float4 & angle_box, const float & alpha, const float & tau_in);
//Testgrid constructor
......
No preview for this file type
......@@ -134,13 +134,14 @@ void chamfer_score(float4 * d_coords, Density difference_map, size_t num_particl
template <size_t block_size>
__global__
void envelope_score(Density binarized_map, float * d_workspaces, uint * d_linear_offsets, float4 * d_coords,size_t num_particles , TransformationGrid grid){
void envelope_score(Density binarized_map, float * d_workspaces, float * d_value_buffers, uint * d_linear_offsets, float4 * d_coords,size_t num_particles , TransformationGrid grid){
__shared__ float4 rotation;
__shared__ float4 translation;
__shared__ float reduce[block_size];
float * d_data = binarized_map.d_data;
uint pixel_vol = binarized_map.d_pixel_vol();
float * d_workspace = d_workspaces + blockIdx.x*pixel_vol;
float * d_value_buffer = d_value_buffers + blockIdx.x*num_particles;
uint * d_linear_offset = d_linear_offsets + blockIdx.x*num_particles;
int t,b;
uint particle_pixel_linear;
......@@ -163,7 +164,7 @@ void envelope_score(Density binarized_map, float * d_workspaces, uint * d_linear
//transformation to shared
if(threadIdx.x == 0){
grid.d_transformation_to_memory(b,&rotation,&translation,grid.d_translation_offset);
// printf("Block %i handling transformation %i\n",blockIdx.x,t);
// printf("Block %i handling, iteration %i transformation %f %f %f %f - %f %f %f\n",blockIdx.x,b,rotation.x,rotation.y,rotation.z,rotation.w,translation.x,translation.y,translation.z);
}
__syncthreads();
//imprint particle ids, iterate over particles
......@@ -175,15 +176,19 @@ void envelope_score(Density binarized_map, float * d_workspaces, uint * d_linear
coord_rot += translation;
//linearize and imprint
particle_pixel_linear = coord_to_linear_offset(&coord_rot,d_pixel_dim,d_pixel_size);
d_linear_offset[t] = particle_pixel_linear;
d_value_buffer[t] = d_data[particle_pixel_linear];
// d_value_buffer[t] += blockIdx.x;
if (d_workspace[particle_pixel_linear] == -1){
d_workspace[particle_pixel_linear] = 2;
}
else if (d_workspace[particle_pixel_linear] == 0){
d_workspace[particle_pixel_linear] = -2;
}
d_linear_offsets[t] = particle_pixel_linear;
t += blockDim.x;
}
__syncthreads();
//reduce, iterate over pixels
t = threadIdx.x;
......@@ -205,13 +210,23 @@ void envelope_score(Density binarized_map, float * d_workspaces, uint * d_linear
if (threadIdx.x < 2) reduce[threadIdx.x] += reduce[threadIdx.x + 2]; __syncthreads();
if (threadIdx.x < 1) reduce[threadIdx.x] += reduce[threadIdx.x + 1]; __syncthreads();
grid.d_scores[b] = reduce[0];
__syncthreads();
// if (threadIdx.x == 0 && blockIdx.x == 0){
// printf("%i %i %f\n",blockIdx.x,b,reduce[0]);
// }
// //replace, iterate over particles
t = threadIdx.x;
while(t < num_particles){
d_workspace[d_linear_offsets[t]] = d_data[d_linear_offsets[t]];
d_workspace[d_linear_offset[t]] = d_data[d_linear_offset[t]];
t += blockDim.x;
}
__syncthreads();
// if (blockIdx.x == 0 && threadIdx.x == 0){
// float s = 0;
// for (int i=0;i<pixel_vol;i++) s+= d_workspace[i];
// printf("%i %f\n",b,s);
// }
b+=gridDim.x;
}
}
......@@ -565,7 +580,7 @@ BOOST_AUTO_TEST_CASE(envelope_test_simpler)
float tau_in = 1.0;
std::pair<float3,float3> bounding_box_outer = density.get_bounding_box();
std::pair<float3,float3> bounding_box_inner = pixel_coords.get_rotationally_safe_bounding_box();
TransformationGrid transformation_grid(bounding_box_outer,bounding_box_inner,binarized_map.h_mid_point,tau_in,"rot_samples/N23_M5880_IcoC7.dat");
TransformationGrid transformation_grid(bounding_box_outer,bounding_box_inner,binarized_map.h_mid_point,tau_in,"rot_samples/N07_M168_OctaC7.dat");
CudaCheckError();
printf("Transformation num: %u\n",transformation_grid.h_num_transformations[0]);
printf("Translation num: %u\n",transformation_grid.h_num_translations[0]);
......@@ -575,23 +590,31 @@ BOOST_AUTO_TEST_CASE(envelope_test_simpler)
// TransformationGrid transformation_grid(binarized_map.h_mid_point);
const size_t envelope_score_block_dim = 256;
size_t envelope_score_grid_dim = work_spaces;
size_t envelope_workspace_volume = work_spaces*pixel_coords.particle_count()*sizeof(uint);
size_t envelope_workspace_replaced_indices_volume = work_spaces*pixel_coords.particle_count()*sizeof(uint);
size_t envelope_workspace_replaced_values_buffer_volume = work_spaces*pixel_coords.particle_count()*sizeof(float);
//allocate workspace resources
float * d_replaced_value_buffer;
cudaMalloc((void **)&d_replaced_value_buffer,envelope_workspace_replaced_values_buffer_volume);
uint * d_replaced_linear_indices;
cudaMalloc((void **)&d_replaced_linear_indices,envelope_workspace_volume);
cudaMalloc((void **)&d_replaced_linear_indices,envelope_workspace_replaced_indices_volume);
CudaCheckError();
float * d_workspaces;
cudaMalloc((void **)&d_workspaces,work_spaces*binarized_map.h_pixel_vol()*sizeof(float));
printf("Allocating %u workspaces for %u pixels each occupying %u bytes of memory ...\n",work_spaces,binarized_map.h_pixel_vol(),envelope_workspace_volume);
printf("Allocating %u workspaces for %u pixels each occupying %u bytes of memory ...\n",work_spaces,binarized_map.h_pixel_vol(),envelope_workspace_replaced_indices_volume + envelope_workspace_replaced_values_buffer_volume);
CudaCheckError();
//(Density binarized_map, float * d_workspaces, uint * d_linear_offsets, float4 * d_coords,size_t num_particles , TransformationGrid grid)
envelope_score<envelope_score_block_dim><<<envelope_score_grid_dim,envelope_score_block_dim>>>
(binarized_map, d_workspaces,d_replaced_linear_indices,pixel_coords.d_data,pixel_coords.particle_count(),transformation_grid);
(binarized_map, d_workspaces,d_replaced_value_buffer, d_replaced_linear_indices,pixel_coords.d_data,pixel_coords.particle_count(),transformation_grid);
// transformation_grid.write_to_csv("test/env_score.csv");
float * d_normalized_scores;
cudaMalloc((void **)&d_normalized_scores,transformation_grid.h_num_transformations[0]*sizeof(*d_normalized_scores));
transformation_grid.shift_and_scale_envelope_score(transformation_grid.d_scores,d_normalized_scores,transformation_grid.h_num_transformations[0]);
printf("\n\n");
float * d_min;
cudaMalloc((void **)&d_min,sizeof(*d_min));
float * d_max;
cudaMalloc((void **)&d_max,sizeof(*d_max));
transformation_grid.shift_and_scale_envelope_score(transformation_grid.d_scores,d_normalized_scores,d_min,d_max,transformation_grid.h_num_transformations[0]);
printf("Finished testing envelope score ... \n\n");
}
//BOOST_AUTO_TEST_CASE(chamfer_test_simpler)
......
9
24
1. 0 0 0 1. 0 0 0 1.
1. 0 0 0 0 -1. 0 1. 0
0 0 1. 0 1. 0 -1. 0 0
0 -1. 0 1. 0 0 0 0 1.
1. 0 0 0 0 1. 0 -1. 0
0 0 -1. 0 1. 0 1. 0 0
0 1. 0 -1. 0 0 0 0 1.
0 0 1. 1. 0 0 0 1. 0
0 0 -1. 1. 0 0 0 -1. 0
0 0 1. -1. 0 0 0 -1. 0
0 0 -1. -1. 0 0 0 1. 0
0 1. 0 0 0 1. 1. 0 0
0 1. 0 0 0 -1. -1. 0 0
0 -1. 0 0 0 -1. 1. 0 0
0 -1. 0 0 0 1. -1. 0 0
1. 0 0 0 -1. 0 0 0 -1.
-1. 0 0 0 1. 0 0 0 -1.
-1. 0 0 0 -1. 0 0 0 1.
0 1. 0 1. 0 0 0 0 -1.
0 -1. 0 -1. 0 0 0 0 -1.
-1. 0 0 0 0 1. 0 1. 0
-1. 0 0 0 0 -1. 0 -1. 0
0 0 1. 0 -1. 0 1. 0 0
0 0 -1. 0 -1. 0 -1. 0 0
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
No preview for this file type
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment