Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Kai Karius
Fitter
Commits
f80d09e5
Commit
f80d09e5
authored
Jan 20, 2021
by
karius
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
safety commit
parent
8d73932c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
440 additions
and
224 deletions
+440
-224
AlignmentProtocol.cu
AlignmentProtocol.cu
+405
-34
AlignmentProtocol.h
AlignmentProtocol.h
+5
-0
AlignmentProtocol.o
AlignmentProtocol.o
+0
-0
Makefile.test
Makefile.test
+3
-3
test.cu
test.cu
+24
-0
testareal.cu
testareal.cu
+3
-187
No files found.
AlignmentProtocol.cu
View file @
f80d09e5
...
...
@@ -23,21 +23,32 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <cublas_v2.h>
#include <cusolverDn.h>
#include <cmath>
#pragma pack(1)
struct
M_alpha
{
float
m00
;
float
m01
;
float
m02
;
float
m03
;
float
m11
;
float
m12
;
float
m13
;
float
m22
;
float
m23
;
float
m33
;
char
padding
;
double
m00
;
double
m01
;
double
m02
;
double
m03
;
double
m10
;
double
m11
;
double
m12
;
double
m13
;
double
m20
;
double
m21
;
double
m22
;
double
m23
;
double
m30
;
double
m31
;
double
m32
;
double
m33
;
__device__
struct
M_alpha
&
operator
+=
(
const
M_alpha
&
rhs
)
{
//matrix is supposed to be symmetric, other additions unneccessary
m00
+=
rhs
.
m00
;
m01
+=
rhs
.
m01
;
m02
+=
rhs
.
m02
;
...
...
@@ -51,22 +62,61 @@ struct M_alpha {
return
*
this
;}
};
__host__
__device__
void
rotate
(
float4
*
position_0
,
float4
*
position_r
,
float4
*
rotation
){
// rot_quat = [a,b,c,d]
// pos_quat = [0,x,y,z] - in memory as [x,y,z,0]
// (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)*i
//+ (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)*j
//+ (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x - c^2*z + 2*c*d*y + d^2*z)*k
//x (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)
position_r
->
x
=
rotation
->
x
*
rotation
->
x
*
position_0
->
x
+
2
*
rotation
->
x
*
rotation
->
z
*
position_0
->
z
-
2
*
rotation
->
x
*
rotation
->
w
*
position_0
->
y
+
rotation
->
y
*
rotation
->
y
*
position_0
->
x
+
2
*
rotation
->
y
*
rotation
->
z
*
position_0
->
y
+
2
*
rotation
->
y
*
rotation
->
w
*
position_0
->
z
-
rotation
->
z
*
rotation
->
z
*
position_0
->
x
-
rotation
->
w
*
rotation
->
w
*
position_0
->
x
;
//y (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)
position_r
->
y
=
rotation
->
x
*
rotation
->
x
*
position_0
->
y
-
2
*
rotation
->
x
*
rotation
->
y
*
position_0
->
z
+
2
*
rotation
->
x
*
rotation
->
w
*
position_0
->
x
-
rotation
->
y
*
rotation
->
y
*
position_0
->
y
+
2
*
rotation
->
y
*
rotation
->
z
*
position_0
->
x
+
2
*
rotation
->
z
*
rotation
->
w
*
position_0
->
z
+
rotation
->
z
*
rotation
->
z
*
position_0
->
y
-
rotation
->
w
*
rotation
->
w
*
position_0
->
y
;
//z (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x + 2*c*d*y - c^2*z + d^2*z)
position_r
->
z
=
rotation
->
x
*
rotation
->
x
*
position_0
->
z
+
2
*
rotation
->
x
*
rotation
->
y
*
position_0
->
y
-
2
*
rotation
->
x
*
rotation
->
z
*
position_0
->
x
-
rotation
->
y
*
rotation
->
y
*
position_0
->
z
+
2
*
rotation
->
y
*
rotation
->
w
*
position_0
->
x
+
2
*
rotation
->
z
*
rotation
->
w
*
position_0
->
y
-
rotation
->
z
*
rotation
->
z
*
position_0
->
z
+
rotation
->
w
*
rotation
->
w
*
position_0
->
z
;
// printf("%f %f\n",position_0->x*position_0->x+position_0->y*position_0->y+position_0->z*position_0->z,position_r->x*position_r->x+position_r->y*position_r->y+position_r->z*position_r->z);
// printf("%f %f %f -> %f %f %f |%f %f %f %f\n",position_0->x,position_0->y,position_0->z,position_r->x,position_r->y,position_r->z,rotation->x,rotation->y,rotation->z,rotation->w);
}
template
<
int
BLOCKDIM
>
__global__
void
center_and_m_alpha
(
float
*
d_particles
,
float
*
d_frame0
,
M_alpha
*
d_m_alphas
,
int
num_frames
,
int
num_particles
){
void
center_and_m_alpha
(
float
*
d_particles
,
M_alpha
*
d_m_alphas
,
int
num_frames
,
int
num_particles
){
//holds intermediates for center of mass and M_alpha calculations --> size = thread_number*(|M_alpha| + 3*float)
extern
__shared__
float
buffer
[];
float3
*
coms
=
(
float3
*
)
&
buffer
[
0
];
M_alpha
*
malphas
=
(
M_alpha
*
)
&
buffer
[
3
*
BLOCKDIM
];
extern
__shared__
float
3
buffer
[];
float3
*
coms
=
&
buffer
[
0
];
M_alpha
*
malphas
=
(
M_alpha
*
)
&
buffer
[
0
];
//frames
int
f
=
0
;
//frames
-- first frame is zeroframe
int
f
=
blockIdx
.
x
+
1
;
//particles
int
p
;
//threads
int
t
;
//pointer for current frame
float
*
d_frame
;
float
*
d_frame0
=
&
d_particles
[
0
];
//pointer for current particles, plus d for efficiency
float4
*
p0
;
float4
*
p1
;
...
...
@@ -75,24 +125,19 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
while
(
f
<
num_frames
){
d_frame
=
&
d_particles
[
4
*
f
*
num_particles
];
//init coms and malphas to zero
t
=
0
;
while
(
t
<
BLOCKDIM
){
coms
[
t
]
=
{
0
};
malphas
[
t
]
=
{
0
};
t
+=
1
;
}
coms
[
threadIdx
.
x
]
=
{
0
};
p
=
threadIdx
.
x
;
//looping over particles, calculating center of mass
while
(
p
<
num_particles
){
coms
[
threadIdx
.
x
].
x
+=
d_frame
[
4
*
p
];
coms
[
threadIdx
.
x
].
y
+=
d_frame
[
4
*
p
+
1
];
coms
[
threadIdx
.
x
].
z
+=
d_frame
[
4
*
p
+
2
];
p
+=
BLOCKDIM
;
p
+=
blockDim
.
x
;
}
__syncthreads
();
// if (threadIdx.x < 128){coms[threadIdx.x] += coms[threadIdx.x + 128];}__syncthreads();
// if (threadIdx.x < 64){coms[threadIdx.x] += coms[threadIdx.x + 128];}__syncthreads();
//here, in effect, BLOCKDIM is forced to be
32
. Add statements of the above kind to change this.
//here, in effect, BLOCKDIM is forced to be
64
. Add statements of the above kind to change this.
if
(
threadIdx
.
x
<
32
){
coms
[
threadIdx
.
x
]
+=
coms
[
threadIdx
.
x
+
32
];
coms
[
threadIdx
.
x
]
+=
coms
[
threadIdx
.
x
+
16
];
...
...
@@ -110,15 +155,30 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
d_frame
[
4
*
p
]
-=
coms
[
0
].
x
;
d_frame
[
4
*
p
+
1
]
-=
coms
[
0
].
y
;
d_frame
[
4
*
p
+
2
]
-=
coms
[
0
].
z
;
p
+=
BLOCKDIM
;
p
+=
blockDim
.
x
;
}
__syncthreads
();
// __syncthreads();
// if (threadIdx.x == 0 ){
// float3 s = {0};
// for (int i=0;i<num_particles;i++){
// s.x += d_frame[4*i];
// s.y += d_frame[4*i + 1];
// s.z += d_frame[4*i + 2];
// }
// printf("Frame % i - coms: %f %f %f\n",f,s.x,s.y,s.z);
// }
//init __shared__ for M_alpha calculation
malphas
[
threadIdx
.
x
]
=
{
0
};
p
=
threadIdx
.
x
;
while
(
p
<
num_particles
){
p0
=
(
float4
*
)
&
d_frame0
[
p
];
p1
=
(
float4
*
)
&
d_frame
[
p
];
p0
=
(
float4
*
)
&
d_frame0
[
4
*
p
];
p1
=
(
float4
*
)
&
d_frame
[
4
*
p
];
d
=
p1
->
x
*
p1
->
x
+
p1
->
y
*
p1
->
y
+
p1
->
z
*
p1
->
z
+
p0
->
x
*
p0
->
x
+
p0
->
y
*
p0
->
y
+
p0
->
z
*
p0
->
z
;
if
(
f
==
1
&&
p
==
1
){
printf
(
"%i %i| %f %f %f | %f %f %f
\n
"
,
threadIdx
.
x
,
p
,
p0
->
x
,
p0
->
y
,
p0
->
z
,
p1
->
x
,
p1
->
y
,
p1
->
z
);
}
malphas
[
threadIdx
.
x
].
m00
=
d
-
2
*
p1
->
x
*
p0
->
x
-
2
*
p1
->
y
*
p0
->
y
-
2
*
p1
->
z
*
p0
->
z
;
malphas
[
threadIdx
.
x
].
m01
=
2
*
(
p1
->
y
*
p0
->
z
-
p1
->
z
*
p0
->
y
);
malphas
[
threadIdx
.
x
].
m02
=
2
*
(
-
p1
->
x
*
p0
->
z
+
p1
->
z
*
p0
->
x
);
...
...
@@ -129,11 +189,11 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
malphas
[
threadIdx
.
x
].
m22
=
d
+
2
*
p1
->
x
*
p0
->
x
-
2
*
p1
->
y
*
p0
->
y
+
2
*
p1
->
z
*
p0
->
z
;
malphas
[
threadIdx
.
x
].
m23
=
-
2
*
(
p1
->
y
*
p0
->
z
+
p1
->
z
*
p0
->
y
);
malphas
[
threadIdx
.
x
].
m33
=
d
+
2
*
p1
->
x
*
p0
->
x
+
2
*
p1
->
y
*
p0
->
y
-
2
*
p1
->
z
*
p0
->
z
;
p
+=
BLOCKDIM
;
p
+=
blockDim
.
x
;
}
__syncthreads
();
//reduce malphas
//here, in effect, BLOCKDIM is forced to be
32
. See remark above.
//here, in effect, BLOCKDIM is forced to be
64
. See remark above.
if
(
threadIdx
.
x
<
32
){
malphas
[
threadIdx
.
x
]
+=
malphas
[
threadIdx
.
x
+
32
];
malphas
[
threadIdx
.
x
]
+=
malphas
[
threadIdx
.
x
+
16
];
...
...
@@ -142,11 +202,322 @@ void center_and_m_alpha(float * d_particles, float * d_frame0, M_alpha * d_m_alp
malphas
[
threadIdx
.
x
]
+=
malphas
[
threadIdx
.
x
+
2
];
malphas
[
threadIdx
.
x
]
+=
malphas
[
threadIdx
.
x
+
1
];
}
d_m_alphas
[
f
]
=
malphas
[
0
];
__syncthreads
();
//symmetrize, spread out the work a little
if
(
threadIdx
.
x
==
0
){
malphas
[
0
].
m10
=
malphas
[
0
].
m01
;
malphas
[
0
].
m20
=
malphas
[
0
].
m02
;
malphas
[
0
].
m30
=
malphas
[
0
].
m03
;
}
else
if
(
threadIdx
.
x
==
1
){
malphas
[
0
].
m21
=
malphas
[
0
].
m12
;
malphas
[
0
].
m31
=
malphas
[
0
].
m13
;
malphas
[
0
].
m32
=
malphas
[
0
].
m23
;
}
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
d_m_alphas
[
f
-
1
]
=
malphas
[
0
];
f
+=
gridDim
.
x
;
}
}
__global__
void
rotate_frames_in_place
(
float4
*
d_coords
,
double
*
d_U
,
int
num_particles
,
int
num_frames
){
// rot_quat = [a,b,c,d] [1,0,0,0] is identity
// pos_quat = [0,x,y,z] - in memory as [x,y,z,0]
// (a^2*x + 2*a*c*z - 2*a*d*y + b^2*x + 2*b*c*y + 2*b*d*z - c^2*x - d^2*x)*i
//+ (a^2*y - 2*a*b*z + 2*a*d*x - b^2*y + 2*b*c*x + 2*c*d*z + c^2*y - d^2*y)*j
//+ (a^2*z + 2*a*b*y - 2*a*c*x - b^2*z + 2*b*d*x + 2*c*d*y - c^2*z + d^2*z)*k
extern
__shared__
float4
buffer2
[];
float4
*
rot_quat
=
&
buffer2
[
0
];
float4
*
orig_coords
=
&
buffer2
[
1
];
//frame-- first frame is zeroframe
int
f
=
blockIdx
.
x
+
1
;
//particle
int
p
;
while
(
f
<
num_frames
){
//get appropriate rot_quat
if
(
threadIdx
.
x
==
0
){
rot_quat
->
x
=
(
float
)
d_U
[(
f
-
1
)
*
16
+
12
];
rot_quat
->
y
=
-
(
float
)
d_U
[(
f
-
1
)
*
16
+
13
];
rot_quat
->
z
=
-
(
float
)
d_U
[(
f
-
1
)
*
16
+
14
];
rot_quat
->
w
=
-
(
float
)
d_U
[(
f
-
1
)
*
16
+
15
];
// printf("Frame:%i Worker:%i -- %f %f %f %f\n",f,blockIdx.x,rot_quat->x,rot_quat->y,rot_quat->z,rot_quat->w);
}
__syncthreads
();
p
=
threadIdx
.
x
;
while
(
p
<
num_particles
){
orig_coords
[
threadIdx
.
x
]
=
d_coords
[
f
*
num_particles
+
p
];
rotate
(
&
orig_coords
[
threadIdx
.
x
],
&
d_coords
[
f
*
num_particles
+
p
],
rot_quat
);
p
+=
blockDim
.
x
;
}
f
+=
gridDim
.
x
;
}
}
inline
float
scaled_rand
(
int
_rand_max
,
float
bound
)
{
return
(
float
)
(
rand
()
%
_rand_max
)
/
(
float
)
_rand_max
*
bound
;
}
inline
float4
random_quat
(){
float4
ret
;
float
u
=
scaled_rand
(
10000
,
1.0
);
float
v
=
scaled_rand
(
10000
,
1.0
);
float
w
=
scaled_rand
(
10000
,
1.0
);
ret
.
x
=
sqrtf
(
1
-
u
)
*
(
sinf
(
2
*
M_PI
*
v
));
ret
.
y
=
sqrtf
(
1
-
u
)
*
(
cosf
(
2
*
M_PI
*
v
));
ret
.
z
=
sqrtf
(
u
)
*
(
sinf
(
2
*
M_PI
*
w
));
ret
.
w
=
sqrtf
(
u
)
*
(
cosf
(
2
*
M_PI
*
w
));
return
ret
;
}
void
AlignmentProtocol
::
align
(
float4
*
h_particles_4
,
int
frames
,
int
particle_num
){
//alignment steps
//0. center zero frame and copy data to GPU
//1. calculate m_alphas
//2. find EVs
//3. rotate coordinates
float4
coms
=
{
0
};
for
(
int
i
=
0
;
i
<
particle_num
;
i
++
)
coms
+=
h_particles_4
[
i
];
coms
/=
particle_num
;
for
(
int
i
=
0
;
i
<
particle_num
;
i
++
)
h_particles_4
[
i
]
-=
coms
;
float4
*
d_particles_4
;
cudaMalloc
((
void
**
)
&
d_particles_4
,
frames
*
particle_num
*
sizeof
(
*
d_particles_4
));
cudaMemcpy
(
d_particles_4
,
h_particles_4
,
frames
*
particle_num
*
sizeof
(
*
d_particles_4
),
cudaMemcpyHostToDevice
);
//gpu frame num
int
gpu_cycle_volume
=
frames
;
//particle num
int
P
=
particle_num
;
M_alpha
*
d_m_alphas
;
cudaMalloc
((
void
**
)
&
d_m_alphas
,
gpu_cycle_volume
*
sizeof
(
*
d_m_alphas
));
//recast for cusolver use
double
*
d_m_alpha_matrices
=
reinterpret_cast
<
double
*>
(
d_m_alphas
);
//recast for kernel use
float
*
d_particles
=
reinterpret_cast
<
float
*>
(
d_particles_4
);
//#######################################################################
// setup - cusolver library
//#######################################################################
//cusolver resources and parameters
double
*
d_U
;
/* ldu-by-m-by-batchSize */
double
*
d_V
;
/* ldv-by-n-by-batchSize */
double
*
d_S
;
/* -by-batchSizee */
//initate resources
//cusolver "-1" since the first frame stays fixed and the data elements signify rotations
cudaMalloc
((
void
**
)
&
d_U
,
sizeof
(
double
)
*
4
*
4
*
(
gpu_cycle_volume
-
1
));
cudaMalloc
((
void
**
)
&
d_V
,
sizeof
(
double
)
*
4
*
4
*
(
gpu_cycle_volume
-
1
));
cudaMalloc
((
void
**
)
&
d_S
,
sizeof
(
double
)
*
4
*
(
gpu_cycle_volume
-
1
));
cusolverDnHandle_t
cusolverH
=
NULL
;
cudaStream_t
stream
=
NULL
;
gesvdjInfo_t
gesvdj_params
=
NULL
;
cusolverStatus_t
status
=
CUSOLVER_STATUS_SUCCESS
;
cudaError_t
cudaStat1
=
cudaSuccess
;
cudaError_t
cudaStat2
=
cudaSuccess
;
cudaError_t
cudaStat3
=
cudaSuccess
;
cudaError_t
cudaStat4
=
cudaSuccess
;
cudaError_t
cudaStat5
=
cudaSuccess
;
const
int
batchSize
=
(
gpu_cycle_volume
-
1
);
int
info
[
batchSize
];
/* info = [info0 ; info1] */
//
// double *d_A = d_m_alpha_matrices; /* lda-by-n-by-batchSize */
int
*
d_info
=
NULL
;
/* batchSize */
int
lwork
=
0
;
/* size of workspace */
double
*
d_work
=
NULL
;
/* d_coords workspace for gesvdjBatched */
//
//configure
/* step 1: create cusolver handle, bind a stream */
status
=
cusolverDnCreate
(
&
cusolverH
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
cudaStat1
=
cudaStreamCreateWithFlags
(
&
stream
,
cudaStreamNonBlocking
);
assert
(
cudaSuccess
==
cudaStat1
);
status
=
cusolverDnSetStream
(
cusolverH
,
stream
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
/* step 2: configuration of gesvdj */
status
=
cusolverDnCreateGesvdjInfo
(
&
gesvdj_params
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
/* default value of tolerance is machine zero */
status
=
cusolverDnXgesvdjSetTolerance
(
gesvdj_params
,
1.e-7
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
/* default value of max. sweeps is 100 */
status
=
cusolverDnXgesvdjSetMaxSweeps
(
gesvdj_params
,
15
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
/* disable sorting */
status
=
cusolverDnXgesvdjSetSortEig
(
gesvdj_params
,
1
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
/* step 3: copy A to d_coords */
cudaStat5
=
cudaMalloc
((
void
**
)
&
d_info
,
sizeof
(
int
)
*
batchSize
);
assert
(
cudaSuccess
==
cudaStat1
);
assert
(
cudaSuccess
==
cudaStat2
);
assert
(
cudaSuccess
==
cudaStat3
);
assert
(
cudaSuccess
==
cudaStat4
);
assert
(
cudaSuccess
==
cudaStat5
);
/* step 4: query working space of gesvdjBatched */
status
=
cusolverDnDgesvdjBatched_bufferSize
(
cusolverH
,
CUSOLVER_EIG_MODE_VECTOR
,
4
,
4
,
d_m_alpha_matrices
,
4
,
d_S
,
d_U
,
4
,
d_V
,
4
,
&
lwork
,
gesvdj_params
,
batchSize
);
assert
(
CUSOLVER_STATUS_SUCCESS
==
status
);
cudaStat1
=
cudaMalloc
((
void
**
)
&
d_work
,
sizeof
(
double
)
*
lwork
);
assert
(
cudaSuccess
==
cudaStat1
);
cublasHandle_t
handle
;
cublasStatus_t
cublas_status
;
cublas_status
=
cublasCreate
(
&
handle
);
//#######################################################################
// end setup - cusolver library
//#######################################################################
//step 1 - center and calculate M_alphas
int
num_workers
=
16
;
const
int
block_dim
=
64
;
int
shared_mem_bytes
=
block_dim
*
sizeof
(
M_alpha
);
int
frames_per_worker
=
max
(
frames
/
num_workers
,
1
);
center_and_m_alpha
<
block_dim
><<<
num_workers
,
block_dim
,
shared_mem_bytes
>>>
(
d_particles
,
d_m_alphas
,
frames
,
P
);
cudaDeviceSynchronize
();
//step 2 - determine rotation quat by EVs
cusolverDnDgesvdjBatched
(
cusolverH
,
CUSOLVER_EIG_MODE_VECTOR
,
4
,
4
,
d_m_alpha_matrices
,
4
,
d_S
,
d_U
,
4
,
d_V
,
4
,
d_work
,
lwork
,
d_info
,
gesvdj_params
,
batchSize
);
cudaDeviceSynchronize
();
//step 3 - rotate in device memory
//shared mem - one for rot_quat, others for tempary memory of coords
shared_mem_bytes
=
(
1
+
block_dim
)
*
(
sizeof
(
float4
));
rotate_frames_in_place
<<<
num_workers
,
block_dim
,
shared_mem_bytes
>>>
(
d_particles_4
,
d_U
,
P
,
frames
);
cudaDeviceSynchronize
();
cudaMemcpy
(
h_particles_4
,
d_particles_4
,
frames
*
particle_num
*
sizeof
(
*
d_particles_4
),
cudaMemcpyDeviceToHost
);
}
void
AlignmentProtocol
::
concatRMF
(
std
::
string
outfile_name
){
RMF
::
FileHandle
orh
=
RMF
::
create_rmf_file
(
outfile_name
);
orh
.
set_producer
(
"Le Kai"
);
bool
first
=
true
;
for
(
std
::
string
rmf_path
:
_rmf_file_paths
){
//TODO: check for filetype
RMF
::
FileConstHandle
rh
=
RMF
::
open_rmf_file_read_only
(
rmf_path
);
if
(
first
)
{
RMF
::
clone_file_info
(
rh
,
orh
);
// creator etc. (not essential)
RMF
::
clone_hierarchy
(
rh
,
orh
);
RMF
::
clone_static_frame
(
rh
,
orh
);
first
=
false
;
}
orh
.
set_description
(
orh
.
get_description
()
+
"
\n
"
+
rh
.
get_description
());
RMF_FOREACH
(
RMF
::
FrameID
ni
,
rh
.
get_frames
()){
rh
.
set_current_frame
(
ni
);
orh
.
add_frame
(
rh
.
get_name
(
ni
),
rh
.
get_type
(
ni
));
RMF
::
clone_loaded_frame
(
rh
,
orh
);
}
}
}
void
AlignmentProtocol
::
create_test_cube
(
float4
*&
h_particle_set
,
float4
*&
d_particle_set
,
int
&
num_particles
,
int
&
num_frames
){
num_particles
=
8
;
num_frames
=
24
;
h_particle_set
=
(
float4
*
)
malloc
(
num_particles
*
num_frames
*
sizeof
(
*
h_particle_set
));
cudaMalloc
((
void
**
)
&
d_particle_set
,
num_particles
*
num_frames
*
sizeof
(
*
h_particle_set
));
h_particle_set
[
0
]
=
{
1
,
1
,
1
,
1
};
h_particle_set
[
1
]
=
{
-
1
,
1
,
1
,
1
};
h_particle_set
[
2
]
=
{
-
1
,
-
1
,
1
,
1
};
h_particle_set
[
3
]
=
{
1
,
-
1
,
1
,
1
};
h_particle_set
[
4
]
=
{
1
,
-
1
,
-
1
,
1
};
h_particle_set
[
5
]
=
{
1
,
1
,
-
1
,
1
};
h_particle_set
[
6
]
=
{
-
1
,
1
,
-
1
,
1
};
h_particle_set
[
7
]
=
{
-
1
,
-
1
,
-
1
,
1
};
float4
*
h_rot_quats
=
(
float4
*
)
malloc
(
num_frames
*
sizeof
(
*
h_rot_quats
));
float4
*
d_rot_quats
;
h_rot_quats
[
0
]
=
{
1.000000
,
0.000000
,
0.000000
,
0.000000
};
h_rot_quats
[
1
]
=
{
0.000000
,
1.000000
,
0.000000
,
0.000000
};
h_rot_quats
[
2
]
=
{
0.000000
,
0.000000
,
1.000000
,
0.000000
};
h_rot_quats
[
3
]
=
{
0.000000
,
0.000000
,
0.000000
,
1.000000
};
h_rot_quats
[
4
]
=
{
0.500000
,
0.500000
,
0.500000
,
0.500000
};
h_rot_quats
[
5
]
=
{
0.500000
,
0.500000
,
0.500000
,
-
0.500000
};
h_rot_quats
[
6
]
=
{
0.500000
,
0.500000
,
-
0.500000
,
0.500000
};
h_rot_quats
[
7
]
=
{
0.500000
,
0.500000
,
-
0.500000
,
-
0.500000
};
h_rot_quats
[
8
]
=
{
0.500000
,
-
0.500000
,
0.500000
,
0.500000
};
h_rot_quats
[
9
]
=
{
0.500000
,
-
0.500000
,
0.500000
,
-
0.500000
};
h_rot_quats
[
10
]
=
{
0.500000
,
-
0.500000
,
-
0.500000
,
0.500000
};
h_rot_quats
[
11
]
=
{
0.500000
,
-
0.500000
,
-
0.500000
,
-
0.500000
};
h_rot_quats
[
12
]
=
{
0.707107
,
0.707107
,
0.000000
,
0.000000
};
h_rot_quats
[
13
]
=
{
0.707107
,
-
0.707107
,
0.000000
,
0.000000
};
h_rot_quats
[
14
]
=
{
0.707107
,
0.000000
,
0.707107
,
0.000000
};
h_rot_quats
[
15
]
=
{
0.707107
,
0.000000
,
-
0.707107
,
0.000000
};
h_rot_quats
[
16
]
=
{
0.707107
,
0.000000
,
0.000000
,
0.707107
};
h_rot_quats
[
17
]
=
{
0.707107
,
0.000000
,
0.000000
,
-
0.707107
};
h_rot_quats
[
18
]
=
{
0.000000
,
0.707107
,
0.707107
,
0.000000
};
h_rot_quats
[
19
]
=
{
0.000000
,
0.707107
,
-
0.707107
,
0.000000
};
h_rot_quats
[
20
]
=
{
0.000000
,
0.707107
,
0.000000
,
0.707107
};
h_rot_quats
[
21
]
=
{
0.000000
,
0.707107
,
0.000000
,
-
0.707107
};
h_rot_quats
[
22
]
=
{
0.000000
,
0.000000
,
0.707107
,
0.707107
};
h_rot_quats
[
23
]
=
{
0.000000
,
0.000000
,
0.707107
,
-
0.707107
};
for
(
int
f
=
1
;
f
<
num_frames
;
f
++
)
{
for
(
int
p
=
0
;
p
<
num_particles
;
p
++
)
{
rotate
(
&
h_particle_set
[
p
],
&
h_particle_set
[
f
*
num_particles
+
p
],
&
h_rot_quats
[
f
]);
h_particle_set
[
f
*
num_particles
+
p
].
w
=
1
;
// if (p==0)
// printf("%f %f %f\n",h_particle_set[f*num_particles + p].x,h_particle_set[f*num_particles + p].y,h_particle_set[f*num_particles + p].z);
}
}
cudaMemcpy
(
d_particle_set
,
h_particle_set
,
num_frames
*
num_particles
*
sizeof
(
*
d_particle_set
),
cudaMemcpyHostToDevice
);
// cudaMalloc((void **)&d_rot_quats,24*sizeof(*d_rot_quats));
// cudaMemcpy(d_rot_quats,h_rot_quats,24*sizeof(*d_rot_quats),cudaMemcpyHostToDevice)
}
void
AlignmentProtocol
::
create_random_test_frames
(
float4
*&
particle_set
,
const
int
num_particles
,
const
int
num_frames
,
const
float
*
bounding_box
)
{
srand
(
time
(
NULL
));
particle_set
=
(
float4
*
)
malloc
(
sizeof
(
float4
)
*
num_particles
*
num_frames
);
//file encodes 24 rotations
float
bound_x
=
bounding_box
[
0
];
float
bound_y
=
bounding_box
[
1
];
float
bound_z
=
bounding_box
[
2
];
int
_rand_max
=
10000
;
printf
(
"Generating %i particles in zero frame.
\n
"
,
num_particles
);
for
(
int
p
=
0
;
p
<
num_particles
;
p
++
)
{
particle_set
[
p
].
x
=
scaled_rand
(
_rand_max
,
bound_x
);
particle_set
[
p
].
y
=
scaled_rand
(
_rand_max
,
bound_y
);
particle_set
[
p
].
z
=
scaled_rand
(
_rand_max
,
bound_z
);
}
float4
rotation_quat
;
float4
rotated_particle
;
printf
(
"Rotating them randomly:
\n
"
,
num_particles
);
for
(
int
f
=
1
;
f
<
num_frames
;
f
++
)
{
printf
(
"Rotating frame %i ...
\n
"
,
f
);
rotation_quat
=
random_quat
();
for
(
int
p
=
0
;
p
<
num_particles
;
p
++
)
{
rotate
(
&
particle_set
[
p
],
&
particle_set
[
f
*
num_particles
+
p
],
&
rotation_quat
);
}
}
}
void
AlignmentProtocol
::
_set_reference_frame
(
void
){
// if (_rmf_file_paths.size()>0){
// RMF::FileConstHandle fh = RMF::open_rmf_file_read_only(_rmf_file_paths[0]);
...
...
AlignmentProtocol.h
View file @
f80d09e5
...
...
@@ -10,6 +10,7 @@
#include<vector>
#include<string>
#include <cuda_runtime.h>
//#include<Particles.h>
class
AlignmentProtocol
{
...
...
@@ -19,6 +20,10 @@ public:
void
prepare
(
void
);
void
prepareGPU
(
int
gpu_index
);
void
runOnGPU
(
int
gpu_index
);
void
create_random_test_frames
(
float4
*&
particle_set
,
const
int
num_particles
,
const
int
num_frames
,
const
float
*
bounding_box
);
void
create_test_cube
(
float4
*&
h_particle_set
,
float4
*&
d_particle_set
,
int
&
num_particles
,
int
&
num_frames
);
void
align
(
float4
*
h_particles_4
,
int
frames
,
int
particle_num
);
void
concatRMF
(
std
::
string
outfile_name
);
virtual
~
AlignmentProtocol
();
private:
std
::
string
_search_dir
;
...
...
AlignmentProtocol.o
View file @
f80d09e5
No preview for this file type
Makefile.test
View file @
f80d09e5
...
...
@@ -21,9 +21,9 @@ ifndef DEVICE_ARCH
endif
align
:
nvcc
--gpu-architecture
=
$(DEVICE_ARCH)
--include-path
=
./
$(IMP_INCLUDE)
$(EIGEN_INCLUDE)
--device-c
AlignmentProtocol.cu test.cu
-g
-G
nvcc
--gpu-architecture
=
$(DEVICE_ARCH)
--device-link
AlignmentProtocol.o test.o
--output-file
link.o
g++ AlignmentProtocol.o test.o link.o
-o
align
-lnvidia-ml
-L
/usr/local/cuda-10.0/lib64/
-lcudart
-lcudadevrt
-pthread
$(IMP_LINK)
nvcc
--gpu-architecture
=
$(DEVICE_ARCH)
--include-path
=
./
$(IMP_INCLUDE)
$(EIGEN_INCLUDE)
--device-c
AlignmentProtocol.cu test.cu
-g
-G
-D_GLIBCXX_USE_CXX11_ABI
=
0
nvcc
--gpu-architecture
=
$(DEVICE_ARCH)
--device-link
AlignmentProtocol.o test.o
--output-file
link.o
$(IMP_LINK)
g++ AlignmentProtocol.o test.o link.o
-o
align
-lnvidia-ml
-L
/usr/local/cuda-10.0/lib64/
-lcudart
-lcudadevrt
-lcublas
-lcusolver
-pthread
$(IMP_LINK)
eval
:
GpuDelaunay.o KerDivision.o Splaying.o Star.o PredWrapper.o RandGen.o InputCreator.o predicates.o ThrustWrapper.o KerPredicates.o CRotationGrid.o DelaunayChecker.o
...
...
test.cu
View file @
f80d09e5
...
...
@@ -39,6 +39,10 @@
#include<AlignmentProtocol.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
using
namespace
std
::
chrono_literals
;
BOOST_AUTO_TEST_CASE
(
general_engine_test
)
{
...
...
@@ -46,6 +50,26 @@ BOOST_AUTO_TEST_CASE(general_engine_test)
printf
(
" ============================= ++++++++++++++++++++++++++++++++ ===========================
\n
"
);
AlignmentProtocol
align_protocol
(
"/data/tmp/test_rmfs"
);
align_protocol
.
prepare
();
//test with random frames
int
P
=
1000
;
int
F
=
1000
;
float4
*
h_particle_sets
;
float4
*
d_particle_sets
;
float
bb
[
3
]
=
{
100
,
100
,
100
};
align_protocol
.
create_random_test_frames
(
h_particle_sets
,
P
,
F
,
&
bb
[
0
]);
cudaDeviceSynchronize
();
align_protocol
.
align
(
h_particle_sets
,
F
,
P
);
//test with regular cube
// int P;
// int F;
// float4 * h_particle_sets;
// float4 * d_particle_sets;
// align_protocol.create_test_cube(h_particle_sets,d_particle_sets,P, F);
// cudaDeviceSynchronize();
// align_protocol.align(d_particle_sets,F,P);
// printf("");
// Engine engine;
// engine.startWorkers();
// engine.submitTask(std::shared_ptr<Task>(new Task("Yay0")),0);
...
...
testareal.cu
View file @
f80d09e5
...
...
@@ -28,23 +28,6 @@ std::map<std::string,std::list<size_t>> subunit_map;
std
::
list
<
IMP
::
core
::
RigidBody
>
unique_rbs
;
std
::
list
<
size_t
>
rigid_particle_indices
;
float
*
getRotationMatrix
(
float
*
q
)
{
float
*
matrix
;
matrix
=
(
float
*
)
malloc
(
9
*
sizeof
(
float
));
// 0 1 2
// 3 4 5 --> 0 1 2 3 4 5 6 7 8
// 6 7 8
matrix
[
0
]
=
1
-
2
*
q
[
2
]
*
q
[
2
]
-
2
*
q
[
3
]
*
q
[
3
];
matrix
[
1
]
=
2
*
q
[
1
]
*
q
[
2
]
-
2
*
q
[
0
]
*
q
[
3
];
matrix
[
2
]
=
2
*
q
[
1
]
*
q
[
3
]
+
2
*
q
[
0
]
*
q
[
2
];