Commit b325d5f1 authored by karius's avatar karius

added transformation grid

parent 366c9c43
......@@ -16,6 +16,18 @@
#include <util/row_reader.hpp>
#include <util/row_reader_char_delimited.hpp>
//enum EAtomType {
// H,C,N,O,S,Se
//}
//
//std::map<EAtomType,float> atomic_number = {{EAtomType::H,1.0},
// {EAtomType::C,6.0},
// {EAtomType::N,7.0},
// {EAtomType::O,8.0},
// {EAtomType::S,8.0},
// {EAtomType::Se,34.0}}
//
enum RecordType {
UNKNOWN,
ANISOU,
......
......@@ -107,13 +107,13 @@ void CRotationGrid::reset(){
}
}
void CRotationGrid::find_nearest_rotation_set(float alpha_approx, float *& rotations,int & n_rotations, float & alpha)
void CRotationGrid::find_nearest_rotation_set(float alpha_approx, float *& rotations,size_t & n_rotations, float & alpha)
{
std::ifstream filestream;
std::vector<std::string> strs;
std::string current_line;
std::map<float,std::string> alpha_to_path;
std::map<std::string,int> path_to_rot_sample_volume;
std::map<std::string,size_t> path_to_rot_sample_volume;
int rotation_num;
//TODO: IMPORTANT MAKE THIS ENVIRONEMNTAL VARIABLE
const char * base_dir = "/data/eclipse-workspace/fitter/cffk-orientation-dc8bb42/data/";
......@@ -133,7 +133,7 @@ void CRotationGrid::find_nearest_rotation_set(float alpha_approx, float *& rotat
std::getline(filestream, current_line);
boost::split(strs, current_line, boost::is_any_of(" \t"),
boost::token_compress_on);
path_to_rot_sample_volume[_file] = std::stoi(strs[0]);
path_to_rot_sample_volume[_file] = static_cast<size_t>(std::stoi(strs[0]));
alpha_to_path[std::stof(strs[1])] = _file;
break;
}
......@@ -151,7 +151,7 @@ void CRotationGrid::find_nearest_rotation_set(float alpha_approx, float *& rotat
it++;
}
int N = path_to_rot_sample_volume[alpha_to_path[alpha]];
size_t N = path_to_rot_sample_volume[alpha_to_path[alpha]];
rotations = (float *) malloc(4*sizeof(float)*N);
filestream.open(alpha_to_path[alpha],std::fstream::in);
//jump header
......
......@@ -37,7 +37,7 @@ public:
void reset();
void print_current();
static void random_close_to_unity(float * &matrix,size_t offset=0);
static void find_nearest_rotation_set(float alpha_approx, float *& rotations,int & N,float & alpha);
static void find_nearest_rotation_set(float alpha_approx, float *& rotations,size_t & N,float & alpha);
private:
size_t m_chunksize;
ERotationFormat m_rotation_format;
......
No preview for this file type
No preview for this file type
/*
* CRotationGrid.cpp
*
* Created on: Sep 17, 2019
* Author: kkarius
*/
#include <TransformationGrid.h>
TransformationGrid::TransformationGrid(const std::pair<float4,float4> & bounding_box_in,
const float4 & angle_box,
const float & alpha, const float4 & tau_in){
float4 dim;
tau = tau_in;
bounding_box = bounding_box_in;
dim.x = bounding_box.second.x - bounding_box.first.x;
dim.y = bounding_box.second.y - bounding_box.first.y;
dim.z = bounding_box.second.z - bounding_box.first.z;
l_index.x = static_cast<int>(bounding_box.first.x/tau.x);
l_index.y = static_cast<int>(bounding_box.first.y/tau.y);
l_index.z = static_cast<int>(bounding_box.first.z/tau.z);
u_index.x = l_index.x + static_cast<int>(dim.x/tau.x);
u_index.y = l_index.y + static_cast<int>(dim.y/tau.y);
u_index.z = l_index.z + static_cast<int>(dim.z/tau.z);
index_dim.x = u_index.x-l_index.x;
index_dim.y = u_index.y-l_index.y;
index_dim.z = u_index.z-l_index.z;
n_translations = static_cast<size_t>((index_dim.x)*(index_dim.y)*(index_dim.z));
CRotationGrid::find_nearest_rotation_set(alpha, rotations,n_rotations,alpha_actual);
translations = (float *) malloc(n_translations*sizeof(float)*4);
generate_translations(bounding_box);
};
void TransformationGrid::generate_translations(const std::pair<float4,float4> & bounding_box){
size_t layer_vol = index_dim.x*index_dim.y;
size_t bar_vol = index_dim.y;
size_t layer_offset;
size_t bar_offset;
size_t point_offset;
for (int z = 0; z < index_dim.z;z++){
layer_offset = z*layer_vol;
for (int y = 0; y < index_dim.y; y++){
bar_offset = y*bar_vol;
for (int x = 0; x < index_dim.x; x++){
translations[4*(layer_offset + bar_offset + x)] = bounding_box.first.x + x*tau.x;
translations[4*(layer_offset + bar_offset + x) + 1] = bounding_box.first.y + y*tau.y;
translations[4*(layer_offset + bar_offset + x) + 2] = bounding_box.first.z + z*tau.z;
}
}
}
}
void TransformationGrid::write_to_GPU(void * d_gpu_memory){
//populate gpu memory with transformation grid
//apparently the void has size 1
cudaMalloc(&d_gpu_memory,2*sizeof(size_t) + n_translations*4*sizeof(float) + n_rotations*4*sizeof(float));
cudaMemcpy(d_gpu_memory,&n_translations,sizeof(size_t),cudaMemcpyHostToDevice);
cudaMemcpy(d_gpu_memory + sizeof(size_t),&n_rotations,sizeof(size_t),cudaMemcpyHostToDevice);
cudaMemcpy(d_gpu_memory + 2*sizeof(size_t),translations,n_translations*sizeof(float)*4,cudaMemcpyHostToDevice);
cudaMemcpy(d_gpu_memory + 2*sizeof(size_t) + n_translations*sizeof(float)*4,rotations,n_rotations*sizeof(float)*4,cudaMemcpyHostToDevice);
};
void TransformationGrid::print(){
printf("Number of translations: %u, rotations:%u \n",n_translations,n_rotations);
for (int i=0; i<n_translations;i++){
printf("Translations #%i: %f %f %f\n",i,translations[4*i],translations[4*i+1],translations[4*i+2]);
}
for (int i=0; i<n_rotations;i++){
printf("Rotations #%i: %f %f %f %f\n",i,rotations[4*i],rotations[4*i+1],rotations[4*i+2],rotations[4*i+3]);
}
}
TransformationGrid::~TransformationGrid() {
// TODO Auto-generated destructor stub
}
int main(int argc, char * argv[]){
// TransformationGrid(const std::pair<float4,float4,float4> & bounding_box,
// const float4 & angle_box,const float & alpha, const float4 & tau)
TransformationGrid tg({{-50,-50,50},{100,100,100}},{0,0,0},float(40.0),{7,7,7,0});
tg.print();}
/*
* CRotationGrid.h
*
* Created on: Sep 17, 2019
* Author: kkarius
*/
#ifndef TRANSFORMATIONGRID_H_
#define TRANSFORMATIONGRID_H_
#include <cuda_util_include/cutil_math.h>
#include <cstdlib>
#include <utility>
#include <cuda_runtime.h>
#include <CRotationGrid.h>
class TransformationGrid{
public:
TransformationGrid(const std::pair<float4,float4> & bounding_box,const float4 & angle_box,
const float & alpha, const float4 & tau);
virtual ~TransformationGrid();
size_t n_rotations;
size_t n_translations;
void write_to_GPU(void * d_gpu_memory);
void print(void);
private:
std::pair<float4,float4> bounding_box;
float4 tau;
int4 l_index;
int4 u_index;
int4 index_dim;
float * rotations;
float * translations;
float alpha_actual;
void generate_translations(const std::pair<float4,float4> &);
};
#endif /* TRANSFORMATIONGRID_H_ */
No preview for this file type
No preview for this file type
......@@ -1658,7 +1658,7 @@ bool contains(uint4 & index, uint4 lower_left,uint4 upper_right){
return ret;
}
__global__
void cut_and_binarize(float * d_density, size_t num_pixels,char * d_subdensity,
void cut_and_binarize(float * d_density, size_t num_pixels, char * d_subdensity,
uint4 pixel_dim, uint4 sub_pixel_dim, uint sub_pixel_offset,
uint4 * d_density_lower_left,uint4 * d_density_upper_right,
float threshold,char above_value,char below_value){
......@@ -2097,6 +2097,82 @@ void imprint(float4 * d_atoms_4, size_t num_atoms, char * d_chunk, float4 coord_
}
}
__global__
void reduce_char(char * d_chunk, size_t num_pixels, uint * d_scores){
extern __shared__ uint rcshared[];
int tid = threadIdx.x;
//TODO: magic number 1024
if (tid >= num_pixels)
return;
rcshared[tid] = 0;
if (tid + 1024 >= num_pixels) {
rcshared[tid] = (uint) d_chunk[tid];
} else {
rcshared[tid] = (uint) d_chunk[tid] + (uint) d_chunk[tid + 1024];
}
__syncthreads();
if (tid < 512){rcshared[tid] += rcshared[tid + 512];}
__syncthreads();
if (tid < 256){rcshared[tid] += rcshared[tid + 256];}
__syncthreads();
if (tid < 128){rcshared[tid] += rcshared[tid + 128];}
__syncthreads();
if (tid < 64){rcshared[tid] += rcshared[tid + 64];}
__syncthreads();
//TODO: unwrap last thread
if (tid < 32){rcshared[tid] += rcshared[tid + 32];}
__syncthreads();
if (tid < 16){rcshared[tid] += rcshared[tid + 16];}
__syncthreads();
if (tid < 8){rcshared[tid] += rcshared[tid + 8];}
__syncthreads();
if (tid < 4){rcshared[tid] += rcshared[tid + 4];}
__syncthreads();
if (tid < 2){rcshared[tid] += rcshared[tid + 2];}
__syncthreads();
if (tid < 1){rcshared[tid] += rcshared[tid + 1];}
__syncthreads();
if (tid == 0){d_scores[blockIdx.x] = rcshared[0];}
}
template <class T>
__global__
void reduce(T * d_data, size_t num, T * result){
extern __shared__ T rshared[];
int tid = threadIdx.x;
if (tid >= num)
return;
rshared[tid] = (T) 0;
if (tid + 1024 >= num) {
rshared[tid] = d_data[tid];
} else {
rshared[tid] = d_data[tid] + d_data[tid + 1024];
}
__syncthreads();
if (tid < 512){rshared[tid] += rshared[tid + 512];}
__syncthreads();
if (tid < 256){rshared[tid] += rshared[tid + 256];}
__syncthreads();
if (tid < 128){rshared[tid] += rshared[tid + 128];}
__syncthreads();
if (tid < 64){rshared[tid] += rshared[tid + 64];}
__syncthreads();
//TODO: unwrap last thread
if (tid < 32){rshared[tid] += rshared[tid + 32];}
__syncthreads();
if (tid < 16){rshared[tid] += rshared[tid + 16];}
__syncthreads();
if (tid < 8){rshared[tid] += rshared[tid + 8];}
__syncthreads();
if (tid < 4){rshared[tid] += rshared[tid + 4];}
__syncthreads();
if (tid < 2){rshared[tid] += rshared[tid + 2];}
__syncthreads();
if (tid < 1){rshared[tid] += rshared[tid + 1];}
__syncthreads();
if (tid == 0){*result = rshared[0];}
}
void score_envelope(boost::filesystem::path in_kai_file_path, boost::filesystem::path in_mrc, float tau){
//general algorithm:
......@@ -2321,11 +2397,19 @@ void score_envelope(boost::filesystem::path in_kai_file_path, boost::filesystem:
"Rotation alpha shift: %f\n"
"Rotation number: %i\n",
alpha,n_rotations);
//float * d_subdensity;
//allocate rotations on device
cudaMalloc((void **) &d_rotations_4, sizeof(float4)*n_rotations);
cudaMemcpy(d_rotations_4,h_rotations,sizeof(float4)*n_rotations,cudaMemcpyHostToDevice);
cudaDeviceSynchronize();
CudaCheckError();
//allocate scores on device
uint * h_scores = (uint *) malloc(n_translations*n_rotations*sizeof(uint));
uint * d_scores;
cudaMalloc((void **) &d_scores,n_translations*n_rotations*sizeof(uint));
cudaDeviceSynchronize();
CudaCheckError();
printf("Allocated scores for %u transformations ...\n",n_translations*n_rotations);
//obtain padding parameters
float atom_diagonal = euclidean_distance(atoms_bounding_box_volume);
......@@ -2375,42 +2459,65 @@ void score_envelope(boost::filesystem::path in_kai_file_path, boost::filesystem:
printf("Not enough processing memory ... \n");
std::exit(0);
}
char * d_chunks;
cudaMalloc((void **) d_chunks,chunk_num*total_binarized_density_vol*sizeof(char));
char * d_chunks[chunk_num];
//create streams and allocate handles
uint sid = 0;
cudaStream_t streams[chunk_num];
for (int i = 0; i<chunk_num;i++)
for (int i = 0; i<chunk_num;i++){
cudaStreamCreate(&streams[i]);
cudaMalloc(&d_chunks[i], total_binarized_density_vol*sizeof(char));
}
printf("%u streams created ... \n",chunk_num);
//determine imprint kernel parameters
size_t imp_kernel_block_dim = ::deviceMaxThreadsPerBlock;
size_t imp_kernel_block_num = num_particles/imp_kernel_block_dim+1;
size_t imp_kernel_block_shared_mem = sizeof(float4)*(1024 + 1);
printf("Imprint kernel launched with %u blocks of %u threads ...\n",
imp_kernel_block_num, imp_kernel_block_dim);
float t_shift;
//issue kernel calls for transformations
//determine reduce char kernel parameters
size_t rc_kernel_block_dim = ::deviceMaxThreadsPerBlock;
size_t rc_kernel_block_num = total_binarized_density_vol/rc_kernel_block_dim+1;
size_t rc_kernel_block_shared_mem = sizeof(uint)*1024;
printf("Reduce char kernel launched with %u blocks of %u threads ...\n",
rc_kernel_block_num, rc_kernel_block_dim);
//determine reduce kernel parameters (the one after the reduce char kernel)
size_t r_kernel_block_dim = ::deviceMaxThreadsPerBlock;
size_t r_kernel_block_num = rc_kernel_block_num/r_kernel_block_dim+1;
size_t r_kernel_block_shared_mem = sizeof(uint)*1024;
printf("Reduce char kernel launched with %u blocks of %u threads ...\n",
rc_kernel_block_num, rc_kernel_block_dim);
//create an array of temporary memories for each redution in a stream
uint * d_chunk_reduce[chunk_num];
for (int i=0;i<chunk_num;i++)
cudaMalloc(&d_chunk_reduce[i],rc_kernel_block_num*sizeof(uint));
//issue kernel calls for transformations
float t_shift;
for (int r = 0; r < n_rotations; r++){
printf("Processing rotation %i ...\n",r);
for (int t = 0; t < n_translations;t++){
sid = (r*n_translations + t)/chunk_num;
//prepare chunk
cudaMemcpyAsync(d_chunks + total_binarized_density_vol*sid,d_total_binarized_density,total_binarized_density_vol*sizeof(char),cudaMemcpyDeviceToDevice,streams[sid]);
cudaMemcpyAsync(d_chunks[sid],d_total_binarized_density,total_binarized_density_vol*sizeof(char),cudaMemcpyDeviceToDevice,streams[sid]);
//calculate shift
t_shift = translation_search_offset + t*tau;
//call kernel
//void imprint(float4 * d_atoms_4, size_t num_atoms, char * d_chunk,
//float4 coord_dim,uint4 pixel_dim,
//float4 * d_rotation, float trans_shift)
imprint<<<imp_kernel_block_num,imp_kernel_block_dim,imp_kernel_block_shared_mem,streams[sid]>>>(d_coords_4,num_particles,d_chunks + total_binarized_density_vol*sid,total_binarized_density_dim,total_binarized_density_pixel_dim,d_rotations_4 + r,t_shift);
imprint<<<imp_kernel_block_num,imp_kernel_block_dim,imp_kernel_block_shared_mem,streams[sid]>>>(d_coords_4,num_particles,d_chunks[sid],total_binarized_density_dim,total_binarized_density_pixel_dim,d_rotations_4 + r,t_shift);
cudaStreamSynchronize(streams[sid]);
//reduce
//void reduce_char(char * d_chunk, size_t num_pixels, uint * d_scores, size_t trans_id)
reduce_char<<<rc_kernel_block_num,rc_kernel_block_dim,rc_kernel_block_shared_mem,streams[sid]>>>(d_chunks[sid],total_binarized_density_vol,d_chunk_reduce[sid]);
cudaStreamSynchronize(streams[sid]);
// void reduce(T * d_data, size_t num, T * result)
reduce<uint><<<r_kernel_block_num,r_kernel_block_dim,r_kernel_block_shared_mem,streams[sid]>>>(d_chunk_reduce[sid],rc_kernel_block_num,&d_scores[r*n_translations + t]);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment