Commit f80d09e5 authored by karius's avatar karius

safety commit

parent 8d73932c
......@@ -23,21 +23,32 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <cublas_v2.h>
#include <cusolverDn.h>
#include <cmath>
#pragma pack(1)
struct M_alpha {
float m00;
float m01;
float m02;
float m03;
float m11;
float m12;
float m13;
float m22;
float m23;
float m33;
char padding;
double m00;
double m01;
double m02;
double m03;
double m10;
double m11;
double m12;
double m13;
double m20;
double m21;
double m22;
double m23;
double m30;
double m31;
double m32;
double m33;
__device__
struct M_alpha& operator+=(const M_alpha& rhs) {
//matrix is supposed to be symmetric, other additions unneccessary
m00 += rhs.m00;
m01 += rhs.m01;
m02 += rhs.m02;
......@@ -51,22 +62,61 @@ struct M_alpha {
return *this;}
};
__host__ __device__
void rotate(float4 * position_0, float4 * position_r, float4 * rotation){
// rot_quat = [a,b,c,d]
// pos_quat = [0,x,y,z] - in memory as [x,y,z,0]
// (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)*i
//+ (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)*j
//+ (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x - c^2*z + 2*c*d*y + d^2*z)*k
//x (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)
position_r->x = rotation->x*rotation->x*position_0->x
+ 2*rotation->x*rotation->z*position_0->z
- 2*rotation->x*rotation->w*position_0->y
+ rotation->y*rotation->y*position_0->x
+ 2*rotation->y*rotation->z*position_0->y
+ 2*rotation->y*rotation->w*position_0->z
- rotation->z*rotation->z*position_0->x
- rotation->w*rotation->w*position_0->x;
//y (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)
position_r->y = rotation->x*rotation->x*position_0->y
- 2*rotation->x*rotation->y*position_0->z
+ 2*rotation->x*rotation->w*position_0->x
- rotation->y*rotation->y*position_0->y
+ 2*rotation->y*rotation->z*position_0->x
+ 2*rotation->z*rotation->w*position_0->z
+ rotation->z*rotation->z*position_0->y
- rotation->w*rotation->w*position_0->y;
//z (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x + 2*c*d*y - c^2*z + d^2*z)
position_r->z = rotation->x*rotation->x*position_0->z
+ 2*rotation->x*rotation->y*position_0->y
- 2*rotation->x*rotation->z*position_0->x
- rotation->y*rotation->y*position_0->z
+ 2*rotation->y*rotation->w*position_0->x
+ 2*rotation->z*rotation->w*position_0->y
- rotation->z*rotation->z*position_0->z
+ rotation->w*rotation->w*position_0->z;
// printf("%f %f\n",position_0->x*position_0->x+position_0->y*position_0->y+position_0->z*position_0->z,position_r->x*position_r->x+position_r->y*position_r->y+position_r->z*position_r->z);
// printf("%f %f %f -> %f %f %f |%f %f %f %f\n",position_0->x,position_0->y,position_0->z,position_r->x,position_r->y,position_r->z,rotation->x,rotation->y,rotation->z,rotation->w);
}
template <int BLOCKDIM>
__global__
void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alphas, int num_frames, int num_particles){
void center_and_m_alpha(float * d_particles, M_alpha * d_m_alphas, int num_frames, int num_particles){
//holds intermediates for center of mass and M_alpha calculations --> size = thread_number*(|M_alpha| + 3*float)
extern __shared__ float buffer[];
float3 * coms = (float3 *) &buffer[0];
M_alpha * malphas = (M_alpha *) &buffer[3*BLOCKDIM];
extern __shared__ float3 buffer[];
float3 * coms = &buffer[0];
M_alpha * malphas = (M_alpha *) &buffer[0];
//frames
int f = 0;
//frames -- first frame is zeroframe
int f = blockIdx.x+1;
//particles
int p;
//threads
int t;
//pointer for current frame
float * d_frame;
float * d_frame0 = &d_particles[0];
//pointer for current particles, plus d for efficiency
float4 * p0;
float4 * p1;
......@@ -75,24 +125,19 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
while (f < num_frames){
d_frame = &d_particles[4*f*num_particles];
//init coms and malphas to zero
t = 0;
while (t < BLOCKDIM){
coms[t] = {0};
malphas[t] = {0};
t +=1;
}
coms[threadIdx.x] = {0};
p = threadIdx.x;
//looping over particles, calculating center of mass
while (p < num_particles){
coms[threadIdx.x].x += d_frame[4*p];
coms[threadIdx.x].y += d_frame[4*p + 1];
coms[threadIdx.x].z += d_frame[4*p + 2];
p += BLOCKDIM;
p += blockDim.x;
}
__syncthreads();
// if (threadIdx.x < 128){coms[threadIdx.x] += coms[threadIdx.x + 128];}__syncthreads();
// if (threadIdx.x < 64){coms[threadIdx.x] += coms[threadIdx.x + 128];}__syncthreads();
//here, in effect, BLOCKDIM is forced to be 32. Add statements of the above kind to change this.
//here, in effect, BLOCKDIM is forced to be 64. Add statements of the above kind to change this.
if (threadIdx.x < 32){
coms[threadIdx.x] += coms[threadIdx.x+ 32];
coms[threadIdx.x] += coms[threadIdx.x+ 16];
......@@ -110,15 +155,30 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
d_frame[4*p] -= coms[0].x;
d_frame[4*p + 1] -= coms[0].y;
d_frame[4*p + 2] -= coms[0].z;
p += BLOCKDIM;
p += blockDim.x;
}
__syncthreads();
// __syncthreads();
// if (threadIdx.x == 0 ){
// float3 s = {0};
// for (int i=0;i<num_particles;i++){
// s.x += d_frame[4*i];
// s.y += d_frame[4*i + 1];
// s.z += d_frame[4*i + 2];
// }
// printf("Frame % i - coms: %f %f %f\n",f,s.x,s.y,s.z);
// }
//init __shared__ for M_alpha calculation
malphas[threadIdx.x] = {0};
p = threadIdx.x;
while (p < num_particles){
p0 = (float4 *) &d_frame0[p];
p1 = (float4 *) &d_frame[p];
p0 = (float4 *) &d_frame0[4*p];
p1 = (float4 *) &d_frame[4*p];
d = p1->x * p1->x + p1->y * p1->y + p1->z * p1->z + p0->x * p0->x + p0->y * p0->y
+ p0->z * p0->z;
if (f == 1 && p == 1){
printf("%i %i| %f %f %f | %f %f %f\n",threadIdx.x,p,p0->x,p0->y,p0->z,p1->x,p1->y,p1->z);
}
malphas[threadIdx.x].m00 = d - 2 * p1->x * p0->x - 2 * p1->y * p0->y - 2 * p1->z * p0->z;
malphas[threadIdx.x].m01 = 2 * (p1->y * p0->z - p1->z * p0->y);
malphas[threadIdx.x].m02 = 2 * (-p1->x * p0->z + p1->z * p0->x);
......@@ -129,11 +189,11 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
malphas[threadIdx.x].m22 = d + 2 * p1->x * p0->x - 2 * p1->y * p0->y + 2 * p1->z * p0->z;
malphas[threadIdx.x].m23 = -2 * (p1->y * p0->z + p1->z * p0->y);
malphas[threadIdx.x].m33 = d + 2 * p1->x * p0->x + 2 * p1->y * p0->y - 2 * p1->z * p0->z;
p += BLOCKDIM;
p += blockDim.x;
}
__syncthreads();
//reduce malphas
//here, in effect, BLOCKDIM is forced to be 32. See remark above.
//here, in effect, BLOCKDIM is forced to be 64. See remark above.
if (threadIdx.x < 32){
malphas[threadIdx.x] += malphas[threadIdx.x + 32];
malphas[threadIdx.x] += malphas[threadIdx.x + 16];
......@@ -142,11 +202,322 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
malphas[threadIdx.x] += malphas[threadIdx.x + 2];
malphas[threadIdx.x] += malphas[threadIdx.x + 1];
}
d_m_alphas[f] = malphas[0];
__syncthreads();
//symmetrize, spread out the work a little
if (threadIdx.x ==0){
malphas[0].m10 =malphas[0].m01;
malphas[0].m20 =malphas[0].m02;
malphas[0].m30 =malphas[0].m03;
} else if (threadIdx.x == 1){
malphas[0].m21 =malphas[0].m12;
malphas[0].m31 =malphas[0].m13;
malphas[0].m32 =malphas[0].m23;
}
__syncthreads();
if (threadIdx.x == 0)
d_m_alphas[f-1] = malphas[0];
f += gridDim.x;
}
}
__global__
void rotate_frames_in_place(float4 * d_coords,double * d_U,int num_particles, int num_frames){
// rot_quat = [a,b,c,d] [1,0,0,0] is identity
// pos_quat = [0,x,y,z] - in memory as [x,y,z,0]
// (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)*i
//+ (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)*j
//+ (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x + 2*c*d*y - c^2*z + d^2*z)*k
extern __shared__ float4 buffer2[];
float4 * rot_quat = &buffer2[0];
float4 * orig_coords = &buffer2[1];
//frame-- first frame is zeroframe
int f = blockIdx.x +1;
//particle
int p;
while (f < num_frames){
//get appropriate rot_quat
if (threadIdx.x == 0){
rot_quat->x = (float) d_U[(f-1)* 16 + 12];
rot_quat->y = -(float) d_U[(f-1)* 16 + 13];
rot_quat->z = -(float) d_U[(f-1)* 16 + 14];
rot_quat->w = -(float) d_U[(f-1)* 16 + 15];
// printf("Frame:%i Worker:%i -- %f %f %f %f\n",f,blockIdx.x,rot_quat->x,rot_quat->y,rot_quat->z,rot_quat->w);
}
__syncthreads();
p= threadIdx.x;
while (p<num_particles){
orig_coords[threadIdx.x] = d_coords[f*num_particles + p];
rotate(&orig_coords[threadIdx.x],&d_coords[f*num_particles + p],rot_quat);
p += blockDim.x;
}
f += gridDim.x;
}
}
inline float scaled_rand(int _rand_max, float bound) {
return (float) (rand() % _rand_max) / (float) _rand_max * bound;
}
inline float4 random_quat(){
float4 ret;
float u = scaled_rand(10000,1.0);
float v = scaled_rand(10000,1.0);
float w = scaled_rand(10000,1.0);
ret.x = sqrtf(1-u)*(sinf(2*M_PI*v));
ret.y = sqrtf(1-u)*(cosf(2*M_PI*v));
ret.z = sqrtf(u)*(sinf(2*M_PI*w));
ret.w = sqrtf(u)*(cosf(2*M_PI*w));
return ret;
}
void AlignmentProtocol::align(float4 * h_particles_4,int frames,int particle_num){
//alignment steps
//0. center zero frame and copy data to GPU
//1. calculate m_alphas
//2. find EVs
//3. rotate coordinates
float4 coms = {0};
for (int i=0;i<particle_num;i++)
coms += h_particles_4[i];
coms /= particle_num;
for (int i=0;i<particle_num;i++)
h_particles_4[i] -= coms;
float4 * d_particles_4;
cudaMalloc((void **)& d_particles_4,frames*particle_num*sizeof(*d_particles_4));
cudaMemcpy(d_particles_4,h_particles_4,frames*particle_num*sizeof(*d_particles_4),cudaMemcpyHostToDevice);
//gpu frame num
int gpu_cycle_volume = frames;
//particle num
int P = particle_num;
M_alpha * d_m_alphas;
cudaMalloc((void **) &d_m_alphas,gpu_cycle_volume*sizeof(*d_m_alphas));
//recast for cusolver use
double * d_m_alpha_matrices = reinterpret_cast<double *>(d_m_alphas);
//recast for kernel use
float * d_particles = reinterpret_cast<float *>(d_particles_4);
//#######################################################################
// setup - cusolver library
//#######################################################################
//cusolver resources and parameters
double *d_U; /* ldu-by-m-by-batchSize */
double *d_V; /* ldv-by-n-by-batchSize */
double *d_S; /* -by-batchSizee */
//initate resources
//cusolver "-1" since the first frame stays fixed and the data elements signify rotations
cudaMalloc((void**) &d_U, sizeof(double) * 4 * 4 * (gpu_cycle_volume-1));
cudaMalloc((void**) &d_V, sizeof(double) * 4 * 4 * (gpu_cycle_volume-1));
cudaMalloc((void**) &d_S, sizeof(double) * 4 * (gpu_cycle_volume-1));
cusolverDnHandle_t cusolverH = NULL;
cudaStream_t stream = NULL;
gesvdjInfo_t gesvdj_params = NULL;
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
cudaError_t cudaStat1 = cudaSuccess;
cudaError_t cudaStat2 = cudaSuccess;
cudaError_t cudaStat3 = cudaSuccess;
cudaError_t cudaStat4 = cudaSuccess;
cudaError_t cudaStat5 = cudaSuccess;
const int batchSize = (gpu_cycle_volume - 1);
int info[batchSize]; /* info = [info0 ; info1] */
//
// double *d_A = d_m_alpha_matrices; /* lda-by-n-by-batchSize */
int* d_info = NULL; /* batchSize */
int lwork = 0; /* size of workspace */
double *d_work = NULL; /* d_coords workspace for gesvdjBatched */
//
//configure
/* step 1: create cusolver handle, bind a stream */
status = cusolverDnCreate(&cusolverH);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
assert(cudaSuccess == cudaStat1);
status = cusolverDnSetStream(cusolverH, stream);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 2: configuration of gesvdj */
status = cusolverDnCreateGesvdjInfo(&gesvdj_params);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of tolerance is machine zero */
status = cusolverDnXgesvdjSetTolerance(gesvdj_params, 1.e-7);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of max. sweeps is 100 */
status = cusolverDnXgesvdjSetMaxSweeps(gesvdj_params, 15);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* disable sorting */
status = cusolverDnXgesvdjSetSortEig(gesvdj_params, 1);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 3: copy A to d_coords */
cudaStat5 = cudaMalloc((void**) &d_info, sizeof(int) * batchSize);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
assert(cudaSuccess == cudaStat4);
assert(cudaSuccess == cudaStat5);
/* step 4: query working space of gesvdjBatched */
status = cusolverDnDgesvdjBatched_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, 4, 4, d_m_alpha_matrices,
4, d_S, d_U, 4, d_V, 4, &lwork, gesvdj_params, batchSize);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaMalloc((void**) &d_work, sizeof(double) * lwork);
assert(cudaSuccess == cudaStat1);
cublasHandle_t handle;
cublasStatus_t cublas_status;
cublas_status = cublasCreate(&handle);
//#######################################################################
// end setup - cusolver library
//#######################################################################
//step 1 - center and calculate M_alphas
int num_workers = 16;
const int block_dim = 64;
int shared_mem_bytes = block_dim*sizeof(M_alpha);
int frames_per_worker = max(frames/num_workers,1);
center_and_m_alpha<block_dim><<<num_workers,block_dim,shared_mem_bytes>>>(d_particles,d_m_alphas,frames,P);
cudaDeviceSynchronize();
//step 2 - determine rotation quat by EVs
cusolverDnDgesvdjBatched(cusolverH, CUSOLVER_EIG_MODE_VECTOR, 4, 4, d_m_alpha_matrices, 4, d_S, d_U,
4, d_V, 4, d_work, lwork, d_info, gesvdj_params, batchSize);
cudaDeviceSynchronize();
//step 3 - rotate in device memory
//shared mem - one for rot_quat, others for tempary memory of coords
shared_mem_bytes = (1+block_dim)*(sizeof(float4));
rotate_frames_in_place<<<num_workers,block_dim,shared_mem_bytes>>>(d_particles_4,d_U,P,frames);
cudaDeviceSynchronize();
cudaMemcpy(h_particles_4,d_particles_4,frames*particle_num*sizeof(*d_particles_4),cudaMemcpyDeviceToHost);
}
void AlignmentProtocol::concatRMF(std::string outfile_name){
RMF::FileHandle orh = RMF::create_rmf_file(outfile_name);
orh.set_producer("Le Kai");
bool first = true;
for (std::string rmf_path: _rmf_file_paths){
//TODO: check for filetype
RMF::FileConstHandle rh = RMF::open_rmf_file_read_only(rmf_path);
if (first) {
RMF::clone_file_info(rh, orh); // creator etc. (not essential)
RMF::clone_hierarchy(rh, orh);
RMF::clone_static_frame(rh, orh);
first = false;
}
orh.set_description(orh.get_description() + "\n" + rh.get_description());
RMF_FOREACH(RMF::FrameID ni, rh.get_frames()){
rh.set_current_frame(ni);
orh.add_frame(rh.get_name(ni), rh.get_type(ni));
RMF::clone_loaded_frame(rh, orh);
}
}
}
void AlignmentProtocol::create_test_cube(float4 *& h_particle_set,float4 *& d_particle_set,int & num_particles, int & num_frames){
num_particles = 8;
num_frames = 24;
h_particle_set = (float4 *) malloc(num_particles*num_frames*sizeof(*h_particle_set));
cudaMalloc((void **) &d_particle_set,num_particles*num_frames*sizeof(*h_particle_set));
h_particle_set[0] = {1,1,1,1};
h_particle_set[1] = {-1,1,1,1};
h_particle_set[2] = {-1,-1,1,1};
h_particle_set[3] = {1,-1,1,1};
h_particle_set[4] = {1,-1,-1,1};
h_particle_set[5] = {1,1,-1,1};
h_particle_set[6] = {-1,1,-1,1};
h_particle_set[7] = {-1,-1,-1,1};
float4 * h_rot_quats = (float4 *) malloc(num_frames*sizeof(*h_rot_quats));
float4 * d_rot_quats;
h_rot_quats[0]= {1.000000,0.000000,0.000000,0.000000};
h_rot_quats[1]= {0.000000,1.000000,0.000000,0.000000};
h_rot_quats[2]= {0.000000,0.000000,1.000000,0.000000};
h_rot_quats[3]= {0.000000,0.000000,0.000000,1.000000};
h_rot_quats[4]= {0.500000,0.500000,0.500000,0.500000};
h_rot_quats[5]= {0.500000,0.500000,0.500000,-0.500000};
h_rot_quats[6]= {0.500000,0.500000,-0.500000,0.500000};
h_rot_quats[7]= {0.500000,0.500000,-0.500000,-0.500000};
h_rot_quats[8]= {0.500000,-0.500000,0.500000,0.500000};
h_rot_quats[9]= {0.500000,-0.500000,0.500000,-0.500000};
h_rot_quats[10]= {0.500000,-0.500000,-0.500000,0.500000};
h_rot_quats[11]= {0.500000,-0.500000,-0.500000,-0.500000};
h_rot_quats[12]= {0.707107,0.707107,0.000000,0.000000};
h_rot_quats[13]= {0.707107,-0.707107,0.000000,0.000000};
h_rot_quats[14]= {0.707107,0.000000,0.707107,0.000000};
h_rot_quats[15]= {0.707107,0.000000,-0.707107,0.000000};
h_rot_quats[16]= {0.707107,0.000000,0.000000,0.707107};
h_rot_quats[17]= {0.707107,0.000000,0.000000,-0.707107};
h_rot_quats[18]= {0.000000,0.707107,0.707107,0.000000};
h_rot_quats[19]= {0.000000,0.707107,-0.707107,0.000000};
h_rot_quats[20]= {0.000000,0.707107,0.000000,0.707107};
h_rot_quats[21]= {0.000000,0.707107,0.000000,-0.707107};
h_rot_quats[22]= {0.000000,0.000000,0.707107,0.707107};
h_rot_quats[23]= {0.000000,0.000000,0.707107,-0.707107};
for (int f = 1; f < num_frames; f++) {
for (int p = 0; p < num_particles; p++) {
rotate(&h_particle_set[p], &h_particle_set[f*num_particles + p], &h_rot_quats[f]);
h_particle_set[f*num_particles + p].w = 1;
// if (p==0)
// printf("%f %f %f\n",h_particle_set[f*num_particles + p].x,h_particle_set[f*num_particles + p].y,h_particle_set[f*num_particles + p].z);
}
}
cudaMemcpy(d_particle_set,h_particle_set,num_frames*num_particles*sizeof(*d_particle_set),cudaMemcpyHostToDevice);
// cudaMalloc((void **)&d_rot_quats,24*sizeof(*d_rot_quats));
// cudaMemcpy(d_rot_quats,h_rot_quats,24*sizeof(*d_rot_quats),cudaMemcpyHostToDevice)
}
void AlignmentProtocol::create_random_test_frames(float4 *& particle_set,const int num_particles, const int num_frames, const float * bounding_box) {
srand(time(NULL));
particle_set = (float4 *)malloc(sizeof(float4) * num_particles * num_frames);
//file encodes 24 rotations
float bound_x = bounding_box[0];
float bound_y = bounding_box[1];
float bound_z = bounding_box[2];
int _rand_max = 10000;
printf("Generating %i particles in zero frame.\n", num_particles);
for (int p = 0; p < num_particles; p++) {
particle_set[p].x = scaled_rand(_rand_max, bound_x);
particle_set[p].y = scaled_rand(_rand_max, bound_y);
particle_set[p].z = scaled_rand(_rand_max, bound_z);
}
float4 rotation_quat;
float4 rotated_particle;
printf("Rotating them randomly: \n", num_particles);
for (int f = 1; f < num_frames; f++) {
printf("Rotating frame %i ...\n", f);
rotation_quat = random_quat();
for (int p = 0; p < num_particles; p++) {
rotate(&particle_set[p], &particle_set[f*num_particles + p], &rotation_quat);
}
}
}
void AlignmentProtocol::_set_reference_frame(void){
// if (_rmf_file_paths.size()>0){
// RMF::FileConstHandle fh = RMF::open_rmf_file_read_only(_rmf_file_paths[0]);
......
......@@ -10,6 +10,7 @@
#include<vector>
#include<string>
#include <cuda_runtime.h>
//#include<Particles.h>
class AlignmentProtocol {
......@@ -19,6 +20,10 @@ public:
void prepare(void);
void prepareGPU(int gpu_index);
void runOnGPU(int gpu_index);
void create_random_test_frames(float4 *& particle_set,const int num_particles, const int num_frames, const float * bounding_box);
void create_test_cube(float4 *& h_particle_set,float4 *& d_particle_set,int & num_particles, int & num_frames);
void align(float4 * h_particles_4,int frames,int particle_num);
void concatRMF(std::string outfile_name);
virtual ~AlignmentProtocol();
private:
std::string _search_dir;
......
No preview for this file type
......@@ -21,9 +21,9 @@ ifndef DEVICE_ARCH
endif
align:
nvcc --gpu-architecture=$(DEVICE_ARCH) --include-path=./ $(IMP_INCLUDE) $(EIGEN_INCLUDE) --device-c AlignmentProtocol.cu test.cu -g -G
nvcc --gpu-architecture=$(DEVICE_ARCH) --device-link AlignmentProtocol.o test.o --output-file link.o
g++ AlignmentProtocol.o test.o link.o -o align -lnvidia-ml -L/usr/local/cuda-10.0/lib64/ -lcudart -lcudadevrt -pthread $(IMP_LINK)
nvcc --gpu-architecture=$(DEVICE_ARCH) --include-path=./ $(IMP_INCLUDE) $(EIGEN_INCLUDE) --device-c AlignmentProtocol.cu test.cu -g -G -D_GLIBCXX_USE_CXX11_ABI=0
nvcc --gpu-architecture=$(DEVICE_ARCH) --device-link AlignmentProtocol.o test.o --output-file link.o $(IMP_LINK)
g++ AlignmentProtocol.o test.o link.o -o align -lnvidia-ml -L/usr/local/cuda-10.0/lib64/ -lcudart -lcudadevrt -lcublas -lcusolver -pthread $(IMP_LINK)
eval: GpuDelaunay.o KerDivision.o Splaying.o Star.o PredWrapper.o RandGen.o InputCreator.o predicates.o ThrustWrapper.o KerPredicates.o CRotationGrid.o DelaunayChecker.o
......
......@@ -39,6 +39,10 @@
#include<AlignmentProtocol.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
using namespace std::chrono_literals;
BOOST_AUTO_TEST_CASE(general_engine_test)
{
......@@ -46,6 +50,26 @@ BOOST_AUTO_TEST_CASE(general_engine_test)
printf(" ============================= ++++++++++++++++++++++++++++++++ ===========================\n");
AlignmentProtocol align_protocol("/data/tmp/test_rmfs");
align_protocol.prepare();
//test with random frames
int P = 1000;
int F = 1000;
float4 * h_particle_sets;
float4 * d_particle_sets;
float bb[3] = {100,100,100};
align_protocol.create_random_test_frames(h_particle_sets,P,F,&bb[0]);
cudaDeviceSynchronize();
align_protocol.align(h_particle_sets,F,P);
//test with regular cube
// int P;
// int F;
// float4 * h_particle_sets;
// float4 * d_particle_sets;
// align_protocol.create_test_cube(h_particle_sets,d_particle_sets,P, F);
// cudaDeviceSynchronize();
// align_protocol.align(d_particle_sets,F,P);
// printf("");
// Engine engine;
// engine.startWorkers();
// engine.submitTask(std::shared_ptr<Task>(new Task("Yay0")),0);
......
......@@ -28,23 +28,6 @@ std::map<std::string,std::list<size_t>> subunit_map;
std::list<IMP::core::RigidBody> unique_rbs;
std::list<size_t> rigid_particle_indices;
float * getRotationMatrix(float * q) {
float * matrix;
matrix = (float *) malloc(9 * sizeof(float));
// 0 1 2
// 3 4 5 --> 0 1 2 3 4 5 6 7 8
// 6 7 8
matrix[0] = 1 - 2 * q[2] * q[2] - 2 * q[3] * q[3];
matrix[1] = 2 * q[1] * q[2] - 2 * q[0] * q[3];
matrix[2] = 2 * q[1] * q[3] + 2 * q[0] * q[2];