Commit 93570620 authored by karius's avatar karius

first advances in 3D translational quadrature

parent 887575e6
......@@ -382,28 +382,122 @@ void TransformationGrid::print_transformations(void){
float4 rot[1];
float4 trans[1];
for (uint u=0;u < h_num_transformations[0];u++){
h_transformation_to_memory(u,&rot[0],&trans[0],h_translation_offset);
h_transformation_to_memory(u,&rot[0],&trans[0]);
printf("%f %f %f | %f %f %f %f\n",trans[0].x,trans[0].y,trans[0].z,rot[0].x,rot[0].y,rot[0].z,rot[0].w);
}
}
//TODO: use a jump table instead of clumsy checks
__device__
void TransformationGrid::d_transformation_to_memory(const uint& linear_index, float4 * const& rotation, float4 * const& translation, float4 * const& translation_offset){
void TransformationGrid::d_transformation_to_memory(const uint& linear_index, float4 * const& rotation, float4 * const& translation){
int4 transformation_index = d_transformation_index_from_linear(linear_index);
*rotation = d_rotations[transformation_index.w];
translation->x = translation_offset->x + 0.5*d_chebychev_nodes[transformation_index.x]*d_translation_total_shift->x;
translation->y = translation_offset->y + 0.5*d_chebychev_nodes[d_translation_refinement_dim[0] + transformation_index.y]*d_translation_total_shift->y;
translation->z = translation_offset->z + 0.5*d_chebychev_nodes[d_translation_refinement_dim[0] + d_translation_refinement_dim[1] + transformation_index.z]*d_translation_total_shift->z;
translation->x = d_translation_offset->x + 0.5*d_chebychev_nodes[transformation_index.x]*d_translation_total_shift->x;
translation->y = d_translation_offset->y + 0.5*d_chebychev_nodes[d_translation_refinement_dim[0] + transformation_index.y]*d_translation_total_shift->y;
translation->z = d_translation_offset->z + 0.5*d_chebychev_nodes[d_translation_refinement_dim[0] + d_translation_refinement_dim[1] + transformation_index.z]*d_translation_total_shift->z;
translation->w = 0;
}
__host__
void TransformationGrid::h_transformation_to_memory(const uint& linear_index, float4 * const& rotation, float4 * const& translation, float4 * const& translation_offset){
void TransformationGrid::print_quadrature_weights(void){
int t = 0;
int4 transformation_index;
float weight;
float sum = 0.0;
while(t < h_num_transformations[0]){
transformation_index = h_transformation_index_from_linear(t);
weight = h_chebychev_weights[transformation_index.x];
weight *= h_chebychev_weights[h_translation_refinement_dim[0] + transformation_index.y];
weight *= h_chebychev_weights[h_translation_refinement_dim[0] + h_translation_refinement_dim[1] + transformation_index.z];
printf("%i | %i | %i - %e\n",transformation_index.x,transformation_index.y,transformation_index.z,weight);
sum += weight;
t++;
}
printf("Total sum of weights: %e\n",sum/8);
}
template <uint blockSize>
__global__
void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
__shared__ float reduce[blockSize];
uint num_scores = grid.d_num_transformations[0];
uint grid_dim = blockDim.x*gridDim.x;
uint num_blocks = num_scores/blockDim.x + 1;
float * d_scores = grid.d_scores;
uint t;
int4 transformation_index;
float weight;
reduce[threadIdx.x] = 0.0;
__syncthreads();
t = blockIdx.x*blockDim.x + threadIdx.x;
while(t < num_scores){
transformation_index = grid.d_transformation_index_from_linear(t);
weight = grid.d_chebychev_weights[transformation_index.x];
weight *= grid.d_chebychev_weights[grid.d_translation_refinement_dim[0] + transformation_index.y];
weight *= grid.d_chebychev_weights[grid.d_translation_refinement_dim[0] + grid.d_translation_refinement_dim[1] + transformation_index.z];
// reduce[threadIdx.x] += weight*d_scores[t];
reduce[threadIdx.x] += weight;
t += grid_dim;
}
__syncthreads();
//reduction, blocksize max 128 hard coded
if (threadIdx.x < 64)reduce[threadIdx.x] += reduce[threadIdx.x + 64];__syncthreads();
if (threadIdx.x < 32)reduce[threadIdx.x] += reduce[threadIdx.x + 32];__syncthreads();
if (threadIdx.x < 16)reduce[threadIdx.x] += reduce[threadIdx.x + 16];__syncthreads();
if (threadIdx.x < 8)reduce[threadIdx.x] += reduce[threadIdx.x + 8];__syncthreads();
if (threadIdx.x < 4)reduce[threadIdx.x] += reduce[threadIdx.x + 4];__syncthreads();
if (threadIdx.x < 2)reduce[threadIdx.x] += reduce[threadIdx.x + 2];__syncthreads();
if (threadIdx.x < 1)reduce[threadIdx.x] += reduce[threadIdx.x + 1];__syncthreads();
//safe block result in tmp mem, supposed to hold grid_dim values
d_tmp_quadrature[blockIdx.x] = reduce[0]; __syncthreads();
}
__host__
void TransformationGrid::quadrature(float* const &result){
const uint blockSize = 128;
uint grid_dim = h_num_transformations[0]/blockSize + 1;
float * h_tmp_quadrature = (float *) malloc(grid_dim*sizeof(*h_tmp_quadrature));
float * d_tmp_quadrature;
cudaMalloc((void **)&d_tmp_quadrature,grid_dim*sizeof(*d_tmp_quadrature));
// template <int blockSize>
// __global__
// void translation_quadrature(TransformationGrid grid, float * d_result, float * d_tmp_quadrature)
translation_quadrature<blockSize><<<grid_dim,blockSize>>>(*this,d_tmp_quadrature);
cudaDeviceSynchronize();
cudaMemcpy(h_tmp_quadrature,d_tmp_quadrature,grid_dim*sizeof(*h_tmp_quadrature),cudaMemcpyDeviceToHost);
result[0] = 0.0;
for (int i=0;i<grid_dim;i++)result[0] += h_tmp_quadrature[i];
result[0] /=8;
}
__host__
void TransformationGrid::gaussian(void){
float4 * rotation = (float4 *) malloc(sizeof(*rotation));
float4 * translation = (float4 *) malloc(sizeof(*translation));
float sigma = h_translation_offset->x/4;
float mu = h_translation_offset->x;
float N = 1.0/(powf(sigma,3.0)*powf(2*M_PI,1.5));
for (uint i = 0; i < h_num_transformations[0];i++){
h_transformation_to_memory(i,rotation,translation);
h_scores[i] = N*expf(-powf(translation->x-mu,2.0)/powf(sigma,2.0)-powf(translation->y-mu,2.0)/powf(sigma,2.0)-powf(translation->z-mu,2.0)/powf(sigma,2.0));
}
cudaMemcpy(d_scores,h_scores,h_num_transformations[0]*sizeof(*d_scores),cudaMemcpyHostToDevice);
CudaCheckError();
}
__host__
void TransformationGrid::h_transformation_to_memory(const uint& linear_index, float4 * const& rotation, float4 * const& translation){
int4 transformation_index = h_transformation_index_from_linear(linear_index);
*rotation = h_rotations[transformation_index.w];
translation->x = translation_offset->x + 0.5*h_chebychev_nodes[transformation_index.x]*h_translation_total_shift->x;
translation->y = translation_offset->y + 0.5*h_chebychev_nodes[h_translation_refinement_dim[0] + transformation_index.y]*transformation_index.y*h_translation_total_shift->y;
translation->z = translation_offset->z + 0.5*h_chebychev_nodes[h_translation_refinement_dim[0] + h_translation_refinement_dim[1] + transformation_index.z]*h_translation_total_shift->z;
translation->x = h_translation_offset->x + 0.5*h_chebychev_nodes[transformation_index.x]*h_translation_total_shift->x;
translation->y = h_translation_offset->y + 0.5*h_chebychev_nodes[h_translation_refinement_dim[0] + transformation_index.y]*h_translation_total_shift->y;
translation->z = h_translation_offset->z + 0.5*h_chebychev_nodes[h_translation_refinement_dim[0] + h_translation_refinement_dim[1] + transformation_index.z]*h_translation_total_shift->z;
translation->w = 0;
}
......@@ -418,7 +512,7 @@ void TransformationGrid::write_to_csv(const char * file_name){
//Header: #rotations,#translations,#scores
fprintf(fp,"%u,%u,1\n",*h_num_rotations,*h_num_translations);
for (int i = 0; i < *h_num_transformations;i++){
h_transformation_to_memory(i,&rotation,&translation,h_translation_offset);
h_transformation_to_memory(i,&rotation,&translation);
fprintf(fp, "%.9f,%.9f,%.9f,%.9f,%.9f,%.9f,%.9f,%.9f\n",rotation.x,rotation.y,rotation.z,rotation.w,translation.x,translation.y,translation.z,h_scores[i]);
}
fclose(fp);
......@@ -618,7 +712,7 @@ thrust::device_vector<thrust::tuple<float4,float3,float>> TransformationGrid::n_
// float buffer[8];
// float4 rotation;
// float3 translation;
// float score;
// float score;TransformationGrid
// std::string line;
// std::stringstream lineStream;
// std::string cell;
......
......@@ -84,13 +84,16 @@ public:
uint * d_num_transformations;
__device__ int4 d_transformation_index_from_linear(const uint& linear_index);
__host__ int4 h_transformation_index_from_linear(const uint& linear_index);
__device__ void d_transformation_to_memory(const uint& i, float4 * const& rotation, float4 * const& translation, float4 * const& translation_offset);
__host__ void h_transformation_to_memory(const uint& i, float4 * const& rotation, float4 * const& translation, float4 * const& translation_offset);
__device__ void d_transformation_to_memory(const uint& i, float4 * const& rotation, float4 * const& translation);
__host__ void h_transformation_to_memory(const uint& i, float4 * const& rotation, float4 * const& translation);
__host__ void write_to_csv(const char * file_name);
__host__ int write_to_png(const char * filename);
__host__ void normalize_chamfer_score(void);
__host__ void refine_translation_grid(uint & i, float * h_translation);
__host__ void calculate_current_chebychev_nodes(void);
__host__ void gaussian(void);
__host__ void quadrature(float* const &result);
__host__ void print_quadrature_weights(void);
// static __host__ void evaluate(const char * file_name);
__host__ static thrust::device_vector<thrust::tuple<float4,float3,float>> n_best_from_file(const char * file_name, uint n);
__host__ static float3 transform(const float4& t);
......
No preview for this file type
......@@ -115,7 +115,7 @@ void chamfer_score(float4 * d_coords, Density difference_map, size_t num_particl
while (t < grid.d_num_transformations[0]){
//transformation to shared
if(threadIdx.x==0)
grid.d_transformation_to_memory(t,&rotation,&translation,grid.d_translation_offset);
grid.d_transformation_to_memory(t,&rotation,&translation);
__syncthreads();
// printf("Handling transformation %i corresponding to %f %f %f %f | %f %f %f...\n",t,rotation.x,rotation.y,rotation.z,rotation.w,translation.x,translation.y,translation.z);
p = threadIdx.x;
......@@ -164,7 +164,7 @@ void envelope_score(Density binarized_map, float * d_workspaces, float * d_value
while (b < grid.d_num_transformations[0]){
//transformation to shared
if(threadIdx.x == 0){
grid.d_transformation_to_memory(b,&rotation,&translation,grid.d_translation_offset);
grid.d_transformation_to_memory(b,&rotation,&translation);
printf("Block %i handling, iteration %i transformation %f %f %f %f - %f %f %f\n",blockIdx.x,b,rotation.x,rotation.y,rotation.z,rotation.w,translation.x,translation.y,translation.z);
}
__syncthreads();
......@@ -315,7 +315,7 @@ void kai_score(float4 * d_coords, int * d_segmentations, uint * d_labels, float
while (b < grid.d_num_transformations[0]){
//transformation to shared
if (threadIdx.x == 0) grid.d_transformation_to_memory(b,&rotation,&translation,grid.d_translation_offset);
if (threadIdx.x == 0) grid.d_transformation_to_memory(b,&rotation,&translation);
if (threadIdx.x == 0) printf("%u: %f %f %f %f:",b,rotation.x,rotation.y,rotation.z,rotation.w);
// if (threadIdx.x == 0) printf("%u: %f %f %f \n",b,translation.x,translation.y,translation.z);
__syncthreads();
......@@ -533,97 +533,118 @@ void kai_score(float4 * d_coords, int * d_segmentations, uint * d_labels, float
// printf("\n\n");
//}
//
BOOST_AUTO_TEST_CASE(envelope_test_simpler)
{
printf(" ============================= Testing Envelope score ==========================\n");
printf(" ============================= ++++++++++++++++++++++ ==========================\n");
//memory management
nvmlInit_v2();
//checkout free memory
nvmlMemory_t memory_info;
nvmlDevice_t device_handle;
nvmlDeviceGetHandleByIndex_v2(0,&device_handle);
nvmlDeviceGetMemoryInfo(device_handle, &memory_info);
float above_value = -1.0;
float below_value = 0.0;
float coord_dim_outer[3] = {70,70,70};
float coord_dim[3] = {50,50,50};
float distance_threshold = 0.5;
uint pixel_dim_outer[3] = {70,70,70};
uint pixel_dim[3] = {50,50,50};
uint3 cube_pixel_dim= {30,30,30};
Density density;
Density::from_bounds(density, &coord_dim_outer[0],&pixel_dim_outer[0]);
// Density density = Density::from_mrc("test/tauA_Brf1_TBP_negstain_30A.mrc");
density.init_device();
density.to_device();
density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
Density binarized_map;
density.cut_and_binarize(binarized_map,1.0,above_value,below_value,{20,20,20});
Density to_fit_density;
Density::from_bounds(to_fit_density, &coord_dim[0],&pixel_dim[0]);
to_fit_density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
//every pixel a particle, for test reasons
thrust::device_vector<uint> td_on_surface_to_fit;
size_t on_surface_num_to_fit;
to_fit_density.indices_in_range(td_on_surface_to_fit,true,1.25,2,on_surface_num_to_fit);
Particles pixel_coords = to_fit_density.particles_from_density(td_on_surface_to_fit);
pixel_coords.origin_to_center();
float memory_usage_factor = 0.5;
size_t density_memory_usage = pixel_coords.particle_count()*sizeof(uint) + binarized_map.h_pixel_vol()*sizeof(float);
unsigned long int max = 100;
uint work_spaces = std::min(max,((uint)(((float)memory_info.free)*memory_usage_factor))/density_memory_usage);
//set up TransformationGrid
float tau_in = 1.0;
std::pair<float3,float3> bounding_box_outer = density.get_bounding_box();
std::pair<float3,float3> bounding_box_inner = pixel_coords.get_rotationally_safe_bounding_box();
//BOOST_AUTO_TEST_CASE(envelope_test_simpler)
// {
// printf(" ============================= Testing Envelope score ==========================\n");
// printf(" ============================= ++++++++++++++++++++++ ==========================\n");
// //memory management
// nvmlInit_v2();
// //checkout free memory
// nvmlMemory_t memory_info;
// nvmlDevice_t device_handle;
// nvmlDeviceGetHandleByIndex_v2(0,&device_handle);
// nvmlDeviceGetMemoryInfo(device_handle, &memory_info);
//
// float above_value = -1.0;
// float below_value = 0.0;
// float coord_dim_outer[3] = {70,70,70};
// float coord_dim[3] = {50,50,50};
// float distance_threshold = 0.5;
// uint pixel_dim_outer[3] = {70,70,70};
// uint pixel_dim[3] = {50,50,50};
// uint3 cube_pixel_dim= {30,30,30};
// Density density;
// Density::from_bounds(density, &coord_dim_outer[0],&pixel_dim_outer[0]);
//// Density density = Density::from_mrc("test/tauA_Brf1_TBP_negstain_30A.mrc");
// density.init_device();
// density.to_device();
// density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
//
// Density binarized_map;
// density.cut_and_binarize(binarized_map,1.0,above_value,below_value,{20,20,20});
//
// Density to_fit_density;
// Density::from_bounds(to_fit_density, &coord_dim[0],&pixel_dim[0]);
// to_fit_density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
//
// //every pixel a particle, for test reasons
// thrust::device_vector<uint> td_on_surface_to_fit;
// size_t on_surface_num_to_fit;
// to_fit_density.indices_in_range(td_on_surface_to_fit,true,1.25,2,on_surface_num_to_fit);
//
// Particles pixel_coords = to_fit_density.particles_from_density(td_on_surface_to_fit);
// pixel_coords.origin_to_center();
//
// float memory_usage_factor = 0.5;
// size_t density_memory_usage = pixel_coords.particle_count()*sizeof(uint) + binarized_map.h_pixel_vol()*sizeof(float);
// unsigned long int max = 100;
// uint work_spaces = std::min(max,((uint)(((float)memory_info.free)*memory_usage_factor))/density_memory_usage);
//
// //set up TransformationGrid
// float tau_in = 1.0;
// std::pair<float3,float3> bounding_box_outer = density.get_bounding_box();
// std::pair<float3,float3> bounding_box_inner = pixel_coords.get_rotationally_safe_bounding_box();
// std::pair<float3,float3> effective_translational_shift;
// effective_translational_shift.first = bounding_box_outer.first - bounding_box_inner.first;
// effective_translational_shift.second = bounding_box_outer.second - bounding_box_inner.second;
// TransformationGrid transformation_grid(effective_translational_shift,tau_in,binarized_map.h_mid_point);
// CudaCheckError();
// printf("Transformation num: %u\n",transformation_grid.h_num_transformations[0]);
// printf("Translation num: %u\n",transformation_grid.h_num_translations[0]);
// printf("Rotation num: %u\n",transformation_grid.h_num_rotations[0]);
//
//// TransformationGrid transformation_grid(binarized_map.h_mid_point);
// const size_t envelope_score_block_dim = 256;
// size_t envelope_score_grid_dim = work_spaces;
// size_t envelope_workspace_replaced_indices_volume = work_spaces*pixel_coords.particle_count()*sizeof(uint);
// size_t envelope_workspace_replaced_values_buffer_volume = work_spaces*pixel_coords.particle_count()*sizeof(float);
// //allocate workspace resources
//
// float * d_replaced_value_buffer;
// cudaMalloc((void **)&d_replaced_value_buffer,envelope_workspace_replaced_values_buffer_volume);
// uint * d_replaced_linear_indices;
// cudaMalloc((void **)&d_replaced_linear_indices,envelope_workspace_replaced_indices_volume);
// CudaCheckError();
// float * d_workspaces;
// cudaMalloc((void **)&d_workspaces,work_spaces*binarized_map.h_pixel_vol()*sizeof(float));
// printf("Allocating %u workspaces for %u pixels each occupying %u bytes of memory ...\n",work_spaces,binarized_map.h_pixel_vol(),envelope_workspace_replaced_indices_volume + envelope_workspace_replaced_values_buffer_volume);
// CudaCheckError();
// //(Density binarized_map, float * d_workspaces, uint * d_linear_offsets, float4 * d_coords,size_t num_particles , TransformationGrid grid)
// envelope_score<envelope_score_block_dim><<<envelope_score_grid_dim,envelope_score_block_dim>>>
// (binarized_map, d_workspaces,d_replaced_value_buffer, d_replaced_linear_indices,pixel_coords.d_data,pixel_coords.particle_count(),transformation_grid);
//// transformation_grid.write_to_csv("test/env_score.csv");
// float * d_normalized_scores;
// cudaMalloc((void **)&d_normalized_scores,transformation_grid.h_num_transformations[0]*sizeof(*d_normalized_scores));
// float * d_min;
// cudaMalloc((void **)&d_min,sizeof(*d_min));
// float * d_max;
// cudaMalloc((void **)&d_max,sizeof(*d_max));
// transformation_grid.shift_and_scale_envelope_score(transformation_grid.d_scores,d_normalized_scores,d_min,d_max,transformation_grid.h_num_transformations[0]);
// transformation_grid.write_to_csv("test/env_score_5880.csv");
// printf("Finished testing envelope score ... \n\n");
//}
//
BOOST_AUTO_TEST_CASE(translational_quadrature)
{
printf(" ============================= Testing translational quadrature ===========================\n");
printf(" ============================= ++++++++++++++++++++++++++++++++ ===========================\n");
std::pair<float3,float3> effective_translational_shift;
effective_translational_shift.first = bounding_box_outer.first - bounding_box_inner.first;
effective_translational_shift.second = bounding_box_outer.second - bounding_box_inner.second;
TransformationGrid transformation_grid(effective_translational_shift,tau_in,binarized_map.h_mid_point);
effective_translational_shift.first = {0,0,0};
effective_translational_shift.second = {100,100,100};
float trans_offset[3] = {50,50,50};
float tau = 3;
float * quadrature_result = (float *) malloc(sizeof(*quadrature_result));
TransformationGrid transformation_grid(effective_translational_shift,tau,&trans_offset[0]);
transformation_grid.gaussian();
transformation_grid.quadrature(quadrature_result);
CudaCheckError();
// transformation_grid.print_quadrature_weights();
printf("Transformation num: %u\n",transformation_grid.h_num_transformations[0]);
printf("Translation num: %u\n",transformation_grid.h_num_translations[0]);
printf("Rotation num: %u\n",transformation_grid.h_num_rotations[0]);
// TransformationGrid transformation_grid(binarized_map.h_mid_point);
const size_t envelope_score_block_dim = 256;
size_t envelope_score_grid_dim = work_spaces;
size_t envelope_workspace_replaced_indices_volume = work_spaces*pixel_coords.particle_count()*sizeof(uint);
size_t envelope_workspace_replaced_values_buffer_volume = work_spaces*pixel_coords.particle_count()*sizeof(float);
//allocate workspace resources
float * d_replaced_value_buffer;
cudaMalloc((void **)&d_replaced_value_buffer,envelope_workspace_replaced_values_buffer_volume);
uint * d_replaced_linear_indices;
cudaMalloc((void **)&d_replaced_linear_indices,envelope_workspace_replaced_indices_volume);
CudaCheckError();
float * d_workspaces;
cudaMalloc((void **)&d_workspaces,work_spaces*binarized_map.h_pixel_vol()*sizeof(float));
printf("Allocating %u workspaces for %u pixels each occupying %u bytes of memory ...\n",work_spaces,binarized_map.h_pixel_vol(),envelope_workspace_replaced_indices_volume + envelope_workspace_replaced_values_buffer_volume);
CudaCheckError();
//(Density binarized_map, float * d_workspaces, uint * d_linear_offsets, float4 * d_coords,size_t num_particles , TransformationGrid grid)
envelope_score<envelope_score_block_dim><<<envelope_score_grid_dim,envelope_score_block_dim>>>
(binarized_map, d_workspaces,d_replaced_value_buffer, d_replaced_linear_indices,pixel_coords.d_data,pixel_coords.particle_count(),transformation_grid);
// transformation_grid.write_to_csv("test/env_score.csv");
float * d_normalized_scores;
cudaMalloc((void **)&d_normalized_scores,transformation_grid.h_num_transformations[0]*sizeof(*d_normalized_scores));
float * d_min;
cudaMalloc((void **)&d_min,sizeof(*d_min));
float * d_max;
cudaMalloc((void **)&d_max,sizeof(*d_max));
transformation_grid.shift_and_scale_envelope_score(transformation_grid.d_scores,d_normalized_scores,d_min,d_max,transformation_grid.h_num_transformations[0]);
transformation_grid.write_to_csv("test/env_score_5880.csv");
printf("Finished testing envelope score ... \n\n");
printf("Test result:%e\n",quadrature_result[0]);
}
//
//BOOST_AUTO_TEST_CASE(chamfer_test_simpler)
//{
// printf(" ============================= Testing Chamfer score ===========================\n");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment