Commit 40019b19 authored by karius's avatar karius

towards entropy

parent fa740b14
......@@ -1556,136 +1556,3 @@ void Density::indices_in_range(thrust::device_vector<uint> & td_indices,bool ins
size = thrust::distance(td_indices.begin(),end_it);
td_indices.resize(size);
}
//int main(int argc,char * argv[]){
// CUresult cu_res;
// cu_res = cuInit(0);
//
// size_t n = 100000000;
// float * h_test_data = (float *) malloc(n*sizeof(float));
// float * d_test_data;
// cudaMalloc((void **) &d_test_data,n*sizeof(float));
// CudaCheckError();
//
// for (int i=0;i<n;i++){
// h_test_data[i] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
// }
// cudaMemcpy(d_test_data,h_test_data,n*sizeof(float),cudaMemcpyHostToDevice);
// CudaCheckError();
//
// const size_t blockSize = 128;
// unsigned int gridSize = 256;
// printf("Calling kernel\n");
//
// min_max_reduce<blockSize><<<gridSize,blockSize,2*blockSize*sizeof(float)>>>
// (d_test_data,d_test_data + n/2,n/2);
// cudaDeviceSynchronize();
// CudaCheckError();
// min_max_reduce<32><<<1,32,4*32*sizeof(float)>>>
// (d_test_data,d_test_data + n/2,gridSize);
// CudaCheckError();
// printf("Called kernel\n");
//
// std::exit(0);
// Density density = Density::from_mrc("./test/191018_job057_run_class001.mrc");
// uint * d_indices;
// float rho0 = 0.07;
// float rho1 = 0.0;
// size_t inside_indices_size;
// size_t outside_indices_size;
// thrust::device_vector<uint> td_inside_indices;
// thrust::device_vector<uint> td_outside_indices;
//
// //calculate index sets
// density.indices_in_range(td_inside_indices,true,rho0,rho1,inside_indices_size);
// density.indices_in_range(td_outside_indices,false,rho0,rho1,outside_indices_size);
// printf("Inside:%u\nOutside:%u\nSum:%u\n",inside_indices_size,outside_indices_size,outside_indices_size+inside_indices_size);
// uint * d_inside_indices = thrust::raw_pointer_cast(&td_inside_indices[0]);
// uint * d_outside_indices = thrust::raw_pointer_cast(&td_outside_indices[0]);
//
// //prepare distance mapp kernel parameters and resources
// size_t max_tmp_mem = 2*one_gb;
// size_t dm_kernel_block_dim = 1024;
// size_t dm_kernel_shared_mem = sizeof(float4) + dm_kernel_block_dim*sizeof(float);
// size_t blocks_per_from = inside_indices_size/(2*dm_kernel_block_dim) + 1;
// size_t per_from_iter_vol = max_tmp_mem/(blocks_per_from*sizeof(float)) + 1;
// size_t per_from_iter_num = outside_indices_size/per_from_iter_vol + 1;
// size_t dm_kernel_grid_dim = per_from_iter_vol;
// size_t d_tmp_mem_size = dm_kernel_grid_dim * blocks_per_from * sizeof(float) + sizeof(float4);
// float * d_tmp_mem; //for intermediate reduction results
// cudaMalloc((void **) &d_tmp_mem,d_tmp_mem_size);
// CudaCheckError();
// printf("Distance map kernel parameters:\n"
// "dm_kernel_grid_dim: %u \n"
// "dm_kernel_block_dim %u \n"
// "dm_kernel_shared_mem %u \n"
// "d_tmp_mem_size %u \n"
// "blocks_per_from %u \n"
// "Kernel iter volume %u\n"
// "Kernel iter num %u \n",
// dm_kernel_grid_dim,dm_kernel_block_dim,dm_kernel_shared_mem,d_tmp_mem_size,
// blocks_per_from, per_from_iter_vol, per_from_iter_num);
//
// //prepare min in place strided reduction
// size_t mrs_kernel_block_dim = 1024;
// size_t mrs_kernel_partition_size = blocks_per_from;
// size_t mrs_kernel_iter_num = ((size_t) log(mrs_kernel_partition_size)/log(2*mrs_kernel_block_dim)) + 1;
// size_t mrs_kernel_shared_mem_vol = mrs_kernel_block_dim*sizeof(float);
// size_t mrs_kernel_grid_dim = 0;//varies with iteration
// size_t mrs_kernel_blocks_per_partition; //varies with iteration
//
// printf("Min reduce strided in place kernel parameters ...\n"
// "mrs_kernel_block_dim: %u \n"
// "mrs_kernel_partition_size: %u \n"
// "mrs_kernel_iter_num: %u \n"
// "mrs_kernel_shared_mem_vol: %u \n"
// "mrs_kernel_grid_dim: %u \n"
// "mrs_kernel_blocks_per_partition: %u \n",
// mrs_kernel_block_dim,mrs_kernel_partition_size,mrs_kernel_iter_num,mrs_kernel_shared_mem_vol,
// mrs_kernel_grid_dim,mrs_kernel_blocks_per_partition);
//
// //prepare copy_result kernel
// size_t cr_kernel_block_dim = 1024;
// size_t cr_kernel_grid_dim = per_from_iter_vol/cr_kernel_block_dim + 1;
// printf("Copy result kernel parameters ...\n"
// "cr_kernel_block_dim: %u \n"
// "cr_kernel_grid_dim: %u \n",
// cr_kernel_block_dim,cr_kernel_grid_dim);
//
// //prepare distance map density
// Density dm_density = density.empty_copy();
// dm_density.fill_with(0.0);
//
// for (int i=0;i < per_from_iter_num;i++){
// dm_kernel_grid_dim = std::min(per_from_iter_vol,outside_indices_size - i*per_from_iter_vol);
// printf("Entering iteration %i with grid_dim %u\n",i,dm_kernel_grid_dim);
// //distance_map(Density density,uint * d_indices_from, uint * d_indices_to,
// // size_t to_pixel_num, float rho0,float rho1,float * d_tmp_mem,
// // size_t blocks_per_from)
// distance_map<<<dm_kernel_grid_dim,dm_kernel_block_dim,dm_kernel_shared_mem>>>
// (density, d_outside_indices + i*per_from_iter_vol, d_inside_indices,
// inside_indices_size,rho0,rho1,d_tmp_mem,
// blocks_per_from);
// cudaDeviceSynchronize();
// //min reduce the tempmem in place, with stride
// for (int j=0; j < mrs_kernel_iter_num;j++){
// mrs_kernel_blocks_per_partition = mrs_kernel_partition_size/ipow(2*mrs_kernel_block_dim,j+1)+1;
// mrs_kernel_grid_dim = mrs_kernel_blocks_per_partition*per_from_iter_vol;
// printf("Entering reduction iteration %i with grid_dim %u and blocks per partition %u...\n",j,mrs_kernel_grid_dim,mrs_kernel_blocks_per_partition);
// min_reduce_strided<<<mrs_kernel_grid_dim,mrs_kernel_block_dim,mrs_kernel_shared_mem_vol>>>
// (d_tmp_mem,mrs_kernel_partition_size,j);
// cudaDeviceSynchronize();
// }
//
// //void copy_result(float * d_tmp_mem, uint * d_indices, size_t index_vol,
// //Density distance_map, int iteration_index,
// //size_t partition_size,size_t per_from_iter_vol)
// copy_result<<<cr_kernel_grid_dim,cr_kernel_block_dim>>>
// (d_tmp_mem,d_outside_indices,outside_indices_size,
// dm_density,i,
// mrs_kernel_partition_size,per_from_iter_vol);
// cudaDeviceSynchronize();
// }
//
// printf("");
//}
......@@ -19,7 +19,8 @@ eval: GpuDelaunay.o KerDivision.o Splaying.o Star.o PredWrapper.o RandGen.o Inpu
$(NVCC) --gpu-architecture=sm_61 --include-path=./,./gDel3D/GPU/ CRotationGrid.o InputCreator.o RandGen.o DelaunayChecker.o KerPredicates.o Splaying.o Star.o predicates.o ThrustWrapper.o PredWrapper.o KerDivision.o GpuDelaunay.o TransformationGrid.cu -o eval
ccl: CMrcReader.o
$(NVCC) --gpu-architecture=sm_61 --include-path=./ $(CUB_INCLUDE) --device-c Labeler.cu Density.cu Kernels.cu Particles.cu TransformationGrid.cu CRotationGrid.cu ccl_test.cu -g -G
# $(NVCC) --gpu-architecture=sm_61 --include-path=./ $(CUB_INCLUDE) --device-c Labeler.cu Density.cu Kernels.cu Particles.cu TransformationGrid.cu CRotationGrid.cu ccl_test.cu -g -G
$(NVCC) --gpu-architecture=sm_61 --include-path=./ $(CUB_INCLUDE) --device-c TransformationGrid.cu ccl_test.cu -g -G
$(NVCC) --gpu-architecture=sm_61 --device-link Density.o ccl_test.o Kernels.o Labeler.o Particles.o PdbReader.o TransformationGrid.o CRotationGrid.o --output-file link.o
g++ Density.o Particles.o PdbReader.o CMrcReader.o Kernels.o TransformationGrid.o Labeler.o CRotationGrid.o ccl_test.o link.o -o ccl_test -L. $(CUDA_LINKS) $(BOOST_LINKS) -lpng -lnvidia-ml
......
......@@ -12,7 +12,7 @@ void threeaxisrot(double r11, double r12, double r21, double r31, double r32, fl
res.y = (float) asin ( r21 );
res.z = (float) atan2( r11, r12 );
}
__host__
void TransformationGrid::h_quat_to_euler(const float4 & q,float3 &e){
//body 3-2-1 convention
threeaxisrot( 2*(q.w*q.y + q.x*q.z),
......@@ -22,6 +22,51 @@ void TransformationGrid::h_quat_to_euler(const float4 & q,float3 &e){
q.x*q.x - q.w*q.w - q.y*q.y + q.z*q.z,
e);
}
template <typename T> int sgn(T val) {
return (T(0) < val) - (val < T(0));
}
float copy_sign(float x,float y){
return sgn<float>(y)*abs(x);
}
__host__
void TransformationGrid::h_matrix_to_quat(float * const& m,float4 & q){
//https://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
float t = m[0]*m[0] + m[4]*m[4] + m[8]*m[8];
float r = sqrt(1+t);
q.w = 0.5*r;
q.x = 0.5*(copy_sign(0.5*sqrt(1 + m[0]*m[0] - m[4]*m[4] - m[8]*m[8]),m[7] - m[5]));
q.y = 0.5*(copy_sign(0.5*sqrt(1 - m[0]*m[0] + m[4]*m[4] + m[8]*m[8]),m[2] - m[6]));
q.z = 0.5*(copy_sign(0.5*sqrt(1 - m[0]*m[0] - m[4]*m[4] + m[8]*m[8]),m[3] - m[1]));
}
__host__
void TransformationGrid::read_matrix_SO3_sampling(const char * file_name,float4 *& h_rotations, uint *& h_num_rotations){
std::ifstream fh(file_name);
std::string line;
//skip first line
std::getline(fh,line);
std::getline(fh,line);
std::istringstream iss(line);
float num_rot;
iss >> num_rot;
*h_num_rotations = num_rot;
h_rotations = (float4 *) malloc(h_num_rotations[0]*sizeof(*h_rotations));
float * curr_matrix = (float *) malloc(9*sizeof(*curr_matrix));
float4 curr_q;
int c=0;
while (std::getline(fh, line))
{
std::istringstream iss(line);
for (int i = 0;i<9;i++) iss >> curr_matrix[i];
TransformationGrid::h_matrix_to_quat(curr_matrix,curr_q);
h_rotations[c] = curr_q;
c++;
}
}
__device__
void TransformationGrid::d_euler_to_quat(const float3 &euler,float4 &quat){
//body 3-2-1 convention
......@@ -105,7 +150,6 @@ TransformationGrid::TransformationGrid(float * const& translation_offset){ // @s
cudaMemcpy(d_TEST,h_TEST,sizeof(bool),cudaMemcpyHostToDevice);
}
TransformationGrid::TransformationGrid(const std::pair<float3,float3> & bounding_box_outer, // @suppress("Class members should be properly initialized")
const std::pair<float3,float3> & bounding_box_inner,float * const& translation_offset,
const float4 & angle_box, const float & alpha, const float & tau_in){
......@@ -232,35 +276,145 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> & bounding
h_scores = thrust::raw_pointer_cast(&th_scores[0]);
d_scores = thrust::raw_pointer_cast(&td_scores[0]);
};
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
TransformationGrid::TransformationGrid(const std::pair<float3,float3> & bounding_box_outer, // @suppress("Class members should be properly initialized")
const std::pair<float3,float3> & bounding_box_inner,float * const& translation_offset,float tau_in,const char * rotation_samples_file){
h_TEST = (bool *) malloc(sizeof(bool));
*h_TEST = true;
cudaMalloc((void **) &d_TEST,sizeof(bool));
cudaMemcpy(d_TEST,h_TEST,sizeof(bool),cudaMemcpyHostToDevice);
//allocate shit
h_translation_index_dim = (int3 *) malloc(sizeof(*h_translation_index_dim));
cudaMalloc((void **) &d_translation_index_dim,sizeof(*d_translation_index_dim));
h_translation_total_shift = (float3 *) malloc(sizeof(*h_translation_total_shift));
cudaMalloc((void **) &d_translation_total_shift,sizeof(*d_translation_total_shift));
h_translation_step_shift = (float3 *) malloc(sizeof(*h_translation_total_shift));
cudaMalloc((void **) &d_translation_step_shift,sizeof(*d_translation_step_shift));
h_translation_offset = (float4 *) malloc(sizeof(*h_translation_offset));
cudaMalloc((void **) &d_translation_offset,sizeof(*d_translation_offset));
h_num_translations = (uint *) malloc(sizeof(*h_num_translations));
cudaMalloc((void **) &d_num_translations,sizeof(*h_num_translations));
h_num_rotations = (uint *) malloc(sizeof(*h_num_rotations));
cudaMalloc((void **) &d_num_rotations,sizeof(*d_num_rotations));
h_num_transformations = (uint *) malloc(sizeof(*h_num_transformations));
cudaMalloc((void **) &d_num_transformations,sizeof(*d_num_transformations));
tau = tau_in;
CudaCheckError();
printf("Initiating TransformationGrid with bounding boxes: ...\n");
printf("Inner bounding box: %f,%f,%f - %f,%f,%f ...\n",bounding_box_inner.first.x,bounding_box_inner.first.y,bounding_box_inner.first.z,bounding_box_inner.second.x,bounding_box_inner.second.y,bounding_box_inner.second.z);
printf("Outer bounding box: %f,%f,%f - %f,%f,%f ...\n",bounding_box_outer.first.x,bounding_box_outer.first.y,bounding_box_outer.first.z,bounding_box_outer.second.x,bounding_box_outer.second.y,bounding_box_outer.second.z);
float3 bounding_box_inner_vol = bounding_box_inner.second - bounding_box_inner.first;
float3 bounding_box_outer_vol = bounding_box_outer.second - bounding_box_outer.first;
*h_translation_total_shift = bounding_box_outer_vol - bounding_box_inner_vol;
cudaMemcpy(d_translation_total_shift,h_translation_total_shift,sizeof(*h_translation_total_shift),cudaMemcpyHostToDevice);
CudaCheckError();
//just try the middle
h_translation_index_dim->x = 0;
h_translation_index_dim->y = 0;
h_translation_index_dim->z = 0;
cudaMemcpy(d_translation_index_dim,h_translation_index_dim,sizeof(*h_translation_index_dim),cudaMemcpyHostToDevice);
CudaCheckError();
*h_num_translations = 1;
cudaMemcpy(d_num_translations,h_num_translations,sizeof(uint),cudaMemcpyHostToDevice);
CudaCheckError();
// h_translation_index_dim->x = (int) (h_translation_total_shift->x/tau);
// h_translation_index_dim->y = (int) (h_translation_total_shift->y/tau);
// h_translation_index_dim->z = (int) (h_translation_total_shift->z/tau);
// cudaMemcpy(d_translation_index_dim,h_translation_index_dim,sizeof(*h_translation_index_dim),cudaMemcpyHostToDevice);
// *h_num_translations = h_translation_index_dim->x + h_translation_index_dim->y + h_translation_index_dim->z;
// cudaMemcpy(d_num_translations,h_num_translations,sizeof(uint),cudaMemcpyHostToDevice);
//option for anisotrope tau
h_translation_step_shift->x = tau;
h_translation_step_shift->y = tau;
h_translation_step_shift->z = tau;
cudaMemcpy(d_translation_step_shift,h_translation_step_shift,sizeof(*h_translation_step_shift),cudaMemcpyHostToDevice);
CudaCheckError();
h_translation_offset->x = translation_offset[0];
h_translation_offset->y = translation_offset[1];
h_translation_offset->z = translation_offset[2];
h_translation_offset->w = 0.0;
cudaMemcpy(d_translation_offset,h_translation_offset,sizeof(*h_translation_offset),cudaMemcpyHostToDevice);
CudaCheckError();
//rotations
TransformationGrid::read_matrix_SO3_sampling(rotation_samples_file,h_rotations,h_num_rotations);
cudaMemcpy(d_num_rotations,h_num_rotations,sizeof(uint),cudaMemcpyHostToDevice);
CudaCheckError();
cudaMalloc((void**)&d_rotations,h_num_rotations[0]*sizeof(float4));
cudaMemcpy(d_rotations,h_rotations,h_num_rotations[0]*sizeof(float4),cudaMemcpyHostToDevice);
CudaCheckError();
*h_num_transformations = h_num_translations[0]*h_num_rotations[0];
cudaMemcpy(d_num_transformations,h_num_transformations,sizeof(uint),cudaMemcpyHostToDevice);
CudaCheckError();
// td_scores.resize(h_num_transformations[0]);
// th_scores.resize(h_num_transformations[0]);
// h_scores = thrust::raw_pointer_cast(&th_scores[0]);
// d_scores = thrust::raw_pointer_cast(&td_scores[0]);
h_scores = (float *) malloc(h_num_transformations[0]*sizeof(*h_scores));
cudaMalloc((void **)&d_scores,h_num_transformations[0]*sizeof(*d_scores));
CudaCheckError();
};
__device__
int4 TransformationGrid::d_transformation_index_from_linear(const uint& linear_index){
int4 ret;
//number of rotations
ret.w = (int) linear_index/d_num_translations[0];
ret.z = (int) (linear_index - ret.w*d_num_translations[0])/(d_translation_index_dim->x*d_translation_index_dim->y);
ret.y = (int) (linear_index - ret.w*d_num_translations[0] - ret.z*d_translation_index_dim->x*d_translation_index_dim->y)/(d_translation_index_dim->x);
ret.x = (int) (linear_index - ret.w*d_num_translations[0] - ret.z*d_translation_index_dim->x*d_translation_index_dim->y - ret.y*d_translation_index_dim->x);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret.x -= d_translation_index_dim->x/2;
ret.y -= d_translation_index_dim->y/2;
ret.z -= d_translation_index_dim->z/2;
if (d_num_translations[0] > 1){
ret.z = (int) (linear_index - ret.w*d_num_translations[0])/(d_translation_index_dim->x*d_translation_index_dim->y);
ret.y = (int) (linear_index - ret.w*d_num_translations[0] - ret.z*d_translation_index_dim->x*d_translation_index_dim->y)/(d_translation_index_dim->x);
ret.x = (int) (linear_index - ret.w*d_num_translations[0] - ret.z*d_translation_index_dim->x*d_translation_index_dim->y - ret.y*d_translation_index_dim->x);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret.x -= d_translation_index_dim->x/2;
ret.y -= d_translation_index_dim->y/2;
ret.z -= d_translation_index_dim->z/2;
} else {
ret.x = 0;
ret.y = 0;
ret.z = 0;
}
return ret;
}
__device__
__host__
int4 TransformationGrid::h_transformation_index_from_linear(const uint& linear_index){
int4 ret;
//number of rotations
ret.w = (int) linear_index/h_num_translations[0];
ret.z = (int) (linear_index - ret.w*h_num_translations[0])/(h_translation_index_dim->x*h_translation_index_dim->y);
ret.y = (int) (linear_index - ret.w*h_num_translations[0] - ret.z*h_translation_index_dim->x*h_translation_index_dim->y)/(h_translation_index_dim->x);
ret.x = (int) (linear_index - ret.w*h_num_translations[0] - ret.z*h_translation_index_dim->x*h_translation_index_dim->y - ret.y*h_translation_index_dim->x);
//corresponds to the case of just one translation, the identity
if (h_num_translations[0] > 1){
ret.z = (int) (linear_index - ret.w*h_num_translations[0])/(h_translation_index_dim->x*h_translation_index_dim->y);
ret.y = (int) (linear_index - ret.w*h_num_translations[0] - ret.z*h_translation_index_dim->x*h_translation_index_dim->y)/(h_translation_index_dim->x);
ret.x = (int) (linear_index - ret.w*h_num_translations[0] - ret.z*h_translation_index_dim->x*h_translation_index_dim->y - ret.y*h_translation_index_dim->x);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret.x -= h_translation_index_dim->x/2;
ret.y -= h_translation_index_dim->y/2;
ret.z -= h_translation_index_dim->z/2;
ret.x -= h_translation_index_dim->x/2;
ret.y -= h_translation_index_dim->y/2;
ret.z -= h_translation_index_dim->z/2;
}
else {
ret.x = 0;
ret.y = 0;
ret.z = 0;
}
// int r = i/(*d_n_rotations);
return ret;
}
......@@ -341,7 +495,7 @@ void TransformationGrid::d_transformation_to_memory(const uint& linear_index, fl
__host__
void TransformationGrid::h_transformation_to_memory(const uint& linear_index, float4 * const& rotation, float4 * const& translation, float4 * const& translation_offset){
if (!h_TEST[0]){
if (true){
// int r = i/(*d_n_rotations);
// int t =
int4 transformation_index = h_transformation_index_from_linear(linear_index);
......@@ -672,6 +826,20 @@ struct normalize_chamfer: public thrust::unary_function<float,float>
}
};
struct normalize_envelope: public thrust::unary_function<float,float>
{
// thrust::pair<thrust::device_vector<float>::iterator,thrust::device_vector<float>::iterator> d_minmax;
float min;
float max;
__device__
//norm = lambda u: 1/(_max - _min)*u - _min/(_max - _min)
//since the chamfer score maps bad solutions to high scores and good solutions to low scores
//exchange min am max
float operator()(const float& u) const {
return u/(max - min) - min/(max - min);
}
};
__host__
void TransformationGrid::normalize_chamfer_score(){
//currently, fast enough. host <--> device transfer is unneccessary
......@@ -685,6 +853,93 @@ void TransformationGrid::normalize_chamfer_score(){
d_scores = thrust::raw_pointer_cast(&td_scores[0]);
}
template <size_t block_size>
__global__
void float_min_max(float * d_data,size_t num_data, float * d_min, float * d_max){
__shared__ float reduce[2*block_size];
float * _min = &reduce[0];
float * _max = &reduce[block_size];
int num_blocks = num_data/blockDim.x + 1;
//initiate
if (blockIdx.x == 0){
_min[threadIdx.x] = FLT_MAX;
_max[threadIdx.x] = -FLT_MAX;
}
__syncthreads();
//gather
int b = blockIdx.x;
while (b < num_blocks){
int t = b*blockDim.x + threadIdx.x;
while (t < num_data){
printf("%i\n",t);
_min[threadIdx.x] = fminf(_min[threadIdx.x],d_data[t]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],d_data[t]);
t += blockDim.x;
}
b += gridDim.x;
}
__syncthreads();
//reduce - max blocksize 128 hardcoded
if (threadIdx.x < 64){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 64]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 64]);
}
__syncthreads();
if (threadIdx.x < 32){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 32]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 32]);
}
__syncthreads();
if (threadIdx.x < 16){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 16]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 16]);
}
__syncthreads();
if (threadIdx.x < 8){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 8]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 8]);
}
__syncthreads();
if (threadIdx.x < 4){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 4]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 4]);
}
__syncthreads();
if (threadIdx.x < 2){
_min[threadIdx.x] = fminf(_min[threadIdx.x],_min[threadIdx.x + 2]);
_max[threadIdx.x] = fmaxf(_max[threadIdx.x],_max[threadIdx.x + 2]);
}
__syncthreads();
d_min[0] = fminf(_min[1],_min[0]);
d_max[0] = fminf(_max[1],_max[0]);
}
template<size_t block_size>
__global__
void shift_and_scale_envelope_score_kernel(float * d_score, float * d_scaled_score, size_t num_scores){
__shared__ float _min[1];
__shared__ float _max[1];
if (threadIdx.x == 0 && blockIdx.x == 0){
float_min_max<block_size><<<32,block_size>>>(d_score,num_scores,_min,_max);
}
__syncthreads();
cudaDeviceSynchronize();
printf("");
}
__host__
void TransformationGrid::shift_and_scale_envelope_score(float * d_score, float * d_normalized_score, size_t num_scores){
shift_and_scale_envelope_score_kernel<128><<<1,1>>>(d_score,d_normalized_score,num_scores);
}
TransformationGrid::~TransformationGrid() {
// TODO Auto-generated destructor stub
}
......
......@@ -35,6 +35,10 @@
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <util.h>
//#include "gDel3D/GpuDelaunay.h"
//
......@@ -47,12 +51,17 @@ public:
__device__ static void d_euler_to_quat(const float3 &euler,float4 &quat);
__host__ static void h_euler_to_quat(const float3 &euler,float4 &quat);
__host__ static void h_quat_to_euler(const float4 & rotations_quat,float3 &rotations_euler);
__host__ static void h_matrix_to_quat(float * const& m,float4 & quaternion);
__host__ static void read_matrix_SO3_sampling(const char * file_name,float4 *& h_rotations, uint *& h_num_rotations);
__host__ static void shift_and_scale_envelope_score(float * d_score, float * d_normalized_score,size_t num_scores);
TransformationGrid(const std::pair<float3,float3> & bounding_box_outer,const std::pair<float3,float3> & bounding_box_inner,
float * const& translation_offset,const float4 & angle_box, const float & alpha, const float & tau_in);
//Testgrid constructor
TransformationGrid(const std::pair<float3,float3> & bounding_box_outer, // @suppress("Class members should be properly initialized")
const std::pair<float3,float3> & bounding_box_inner,float * const& translation_offset,float tau_in, uint n_testrots);
//empty grid with only one transformation (identity) for test purposes
TransformationGrid::TransformationGrid(const std::pair<float3,float3> & bounding_box_outer, // @suppress("Class members should be properly initialized")
const std::pair<float3,float3> & bounding_box_inner,float * const& translation_offset,float tau_in,const char * rotation_samples_file);
TransformationGrid(float * const& translation_offset);
virtual ~TransformationGrid();
__host__ void print_transformations(void);
......
No preview for this file type
......@@ -403,116 +403,116 @@ void kai_score(float4 * d_coords, int * d_segmentations, uint * d_labels, float
}
}
BOOST_AUTO_TEST_CASE(kai_test)
{
printf(" ============================= Testing Kai score ===============================\n");
printf(" ============================= +++++++++++++++++ ===============================\n");
//memory management
nvmlInit_v2();
//checkout free memory
nvmlMemory_t memory_info;
nvmlDevice_t device_handle;
nvmlDeviceGetHandleByIndex_v2(0,&device_handle);
nvmlDeviceGetMemoryInfo(device_handle, &memory_info);
float coord_dim_outer[3] = {70,70,70};
float coord_dim[3] = {50,50,50};
float distance_threshold = 0.5;
uint pixel_dim_outer[3] = {70,70,70};
uint pixel_dim[3] = {50,50,50};
uint3 cube_pixel_dim= {30,30,30};
Density density;
Density::from_bounds(density,&coord_dim_outer[0],&pixel_dim_outer[0]);
density.init_device();
density.to_device();
density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
Density to_fit_density;
Density::from_bounds(to_fit_density,&coord_dim[0],&pixel_dim[0]);
to_fit_density.mid_cube_with_surface(cube_pixel_dim,1,1.0,1.5);
thrust::device_vector<uint> td_on_surface;
size_t on_surface_num;
density.indices_in_range(td_on_surface,true,1.25,2,on_surface_num);
thrust::device_vector<uint> td_on_surface_to_fit;
size_t on_surface_num_to_fit;
to_fit_density.indices_in_range(td_on_surface_to_fit,true,1.25,2,on_surface_num_to_fit);
Particles pixel_coords = to_fit_density.particles_from_density(td_on_surface_to_fit);
pixel_coords.origin_to_center();
Density difference_map;
density.difference_map_density_threshold(difference_map,1.4,1.6,td_on_surface);
CudaCheckError();
float tau_in = 1.0;
uint n_rot_test = 30;
std::pair<float3,float3> bounding_box_outer = density.get_bounding_box();
std::pair<float3,float3> bounding_box_inner = pixel_coords.get_rotationally_safe_bounding_box();
TransformationGrid transformation_grid(bounding_box_outer,bounding_box_inner,difference_map.h_mid_point,tau_in,n_rot_test);
printf("Transformation num: %u\n",transformation_grid.h_num_transformations[0]);
printf("Translation num: %u\n",transformation_grid.h_num_translations[0]);
printf("Translational shift: %f %f %f\n",transformation_grid.h_translation_step_shift->x,transformation_grid.h_translation_step_shift->y,transformation_grid.h_translation_step_shift->z);
printf("Rotation num: %u\n",transformation_grid.h_num_rotations[0]);
//determine padding
uint max_dim = std::max(difference_map.h_pixel_dim[0],density.h_pixel_dim[1]);
max_dim = std::max(max_dim,difference_map.h_pixel_dim[2]);
uint log_2_dim = ipow(2,(int)(std::log((float) max_dim)/std::log(2))+1);
uint * h_pixel_dim_padding = (uint *) malloc(3*sizeof(*h_pixel_dim_padding));
uint * d_pixel_dim_padding;
cudaMalloc((void **)&d_pixel_dim_padding,sizeof(*d_pixel_dim_padding)*3);
for (int i=0;i<3;i++)h_pixel_dim_padding[i] = log_2_dim;