Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Kai Karius
Fitter
Commits
40019b19
Commit
40019b19
authored
Oct 21, 2020
by
karius
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
towards entropy
parent
fa740b14
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
469 additions
and
328 deletions
+469
-328
Density.cu
Density.cu
+0
-133
Makefile.test
Makefile.test
+2
-1
TransformationGrid.cu
TransformationGrid.cu
+274
-19
TransformationGrid.h
TransformationGrid.h
+9
-0
TransformationGrid.o
TransformationGrid.o
+0
-0
ccl_test.cu
ccl_test.cu
+184
-175
so3_sampling.blend
so3_sampling.blend
+0
-0
so3_sampling.blend1
so3_sampling.blend1
+0
-0
No files found.
Density.cu
View file @
40019b19
...
...
@@ -1556,136 +1556,3 @@ void Density::indices_in_range(thrust::device_vector<uint> & td_indices,bool ins
size
=
thrust
::
distance
(
td_indices
.
begin
(),
end_it
);
td_indices
.
resize
(
size
);
}
//int main(int argc,char * argv[]){
// CUresult cu_res;
// cu_res = cuInit(0);
//
// size_t n = 100000000;
// float * h_test_data = (float *) malloc(n*sizeof(float));
// float * d_test_data;
// cudaMalloc((void **) &d_test_data,n*sizeof(float));
// CudaCheckError();
//
// for (int i=0;i<n;i++){
// h_test_data[i] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
// }
// cudaMemcpy(d_test_data,h_test_data,n*sizeof(float),cudaMemcpyHostToDevice);
// CudaCheckError();
//
// const size_t blockSize = 128;
// unsigned int gridSize = 256;
// printf("Calling kernel\n");
//
// min_max_reduce<blockSize><<<gridSize,blockSize,2*blockSize*sizeof(float)>>>
// (d_test_data,d_test_data + n/2,n/2);
// cudaDeviceSynchronize();
// CudaCheckError();
// min_max_reduce<32><<<1,32,4*32*sizeof(float)>>>
// (d_test_data,d_test_data + n/2,gridSize);
// CudaCheckError();
// printf("Called kernel\n");
//
// std::exit(0);
// Density density = Density::from_mrc("./test/191018_job057_run_class001.mrc");
// uint * d_indices;
// float rho0 = 0.07;
// float rho1 = 0.0;
// size_t inside_indices_size;
// size_t outside_indices_size;
// thrust::device_vector<uint> td_inside_indices;
// thrust::device_vector<uint> td_outside_indices;
//
// //calculate index sets
// density.indices_in_range(td_inside_indices,true,rho0,rho1,inside_indices_size);
// density.indices_in_range(td_outside_indices,false,rho0,rho1,outside_indices_size);
// printf("Inside:%u\nOutside:%u\nSum:%u\n",inside_indices_size,outside_indices_size,outside_indices_size+inside_indices_size);
// uint * d_inside_indices = thrust::raw_pointer_cast(&td_inside_indices[0]);
// uint * d_outside_indices = thrust::raw_pointer_cast(&td_outside_indices[0]);
//
// //prepare distance mapp kernel parameters and resources
// size_t max_tmp_mem = 2*one_gb;
// size_t dm_kernel_block_dim = 1024;
// size_t dm_kernel_shared_mem = sizeof(float4) + dm_kernel_block_dim*sizeof(float);
// size_t blocks_per_from = inside_indices_size/(2*dm_kernel_block_dim) + 1;
// size_t per_from_iter_vol = max_tmp_mem/(blocks_per_from*sizeof(float)) + 1;
// size_t per_from_iter_num = outside_indices_size/per_from_iter_vol + 1;
// size_t dm_kernel_grid_dim = per_from_iter_vol;
// size_t d_tmp_mem_size = dm_kernel_grid_dim * blocks_per_from * sizeof(float) + sizeof(float4);
// float * d_tmp_mem; //for intermediate reduction results
// cudaMalloc((void **) &d_tmp_mem,d_tmp_mem_size);
// CudaCheckError();
// printf("Distance map kernel parameters:\n"
// "dm_kernel_grid_dim: %u \n"
// "dm_kernel_block_dim %u \n"
// "dm_kernel_shared_mem %u \n"
// "d_tmp_mem_size %u \n"
// "blocks_per_from %u \n"
// "Kernel iter volume %u\n"
// "Kernel iter num %u \n",
// dm_kernel_grid_dim,dm_kernel_block_dim,dm_kernel_shared_mem,d_tmp_mem_size,
// blocks_per_from, per_from_iter_vol, per_from_iter_num);
//
// //prepare min in place strided reduction
// size_t mrs_kernel_block_dim = 1024;
// size_t mrs_kernel_partition_size = blocks_per_from;
// size_t mrs_kernel_iter_num = ((size_t) log(mrs_kernel_partition_size)/log(2*mrs_kernel_block_dim)) + 1;
// size_t mrs_kernel_shared_mem_vol = mrs_kernel_block_dim*sizeof(float);
// size_t mrs_kernel_grid_dim = 0;//varies with iteration
// size_t mrs_kernel_blocks_per_partition; //varies with iteration
//
// printf("Min reduce strided in place kernel parameters ...\n"
// "mrs_kernel_block_dim: %u \n"
// "mrs_kernel_partition_size: %u \n"
// "mrs_kernel_iter_num: %u \n"
// "mrs_kernel_shared_mem_vol: %u \n"
// "mrs_kernel_grid_dim: %u \n"
// "mrs_kernel_blocks_per_partition: %u \n",
// mrs_kernel_block_dim,mrs_kernel_partition_size,mrs_kernel_iter_num,mrs_kernel_shared_mem_vol,
// mrs_kernel_grid_dim,mrs_kernel_blocks_per_partition);
//
// //prepare copy_result kernel
// size_t cr_kernel_block_dim = 1024;
// size_t cr_kernel_grid_dim = per_from_iter_vol/cr_kernel_block_dim + 1;
// printf("Copy result kernel parameters ...\n"
// "cr_kernel_block_dim: %u \n"
// "cr_kernel_grid_dim: %u \n",
// cr_kernel_block_dim,cr_kernel_grid_dim);
//
// //prepare distance map density
// Density dm_density = density.empty_copy();
// dm_density.fill_with(0.0);
//
// for (int i=0;i < per_from_iter_num;i++){
// dm_kernel_grid_dim = std::min(per_from_iter_vol,outside_indices_size - i*per_from_iter_vol);
// printf("Entering iteration %i with grid_dim %u\n",i,dm_kernel_grid_dim);
// //distance_map(Density density,uint * d_indices_from, uint * d_indices_to,
// // size_t to_pixel_num, float rho0,float rho1,float * d_tmp_mem,
// // size_t blocks_per_from)
// distance_map<<<dm_kernel_grid_dim,dm_kernel_block_dim,dm_kernel_shared_mem>>>
// (density, d_outside_indices + i*per_from_iter_vol, d_inside_indices,
// inside_indices_size,rho0,rho1,d_tmp_mem,
// blocks_per_from);
// cudaDeviceSynchronize();
// //min reduce the tempmem in place, with stride
// for (int j=0; j < mrs_kernel_iter_num;j++){
// mrs_kernel_blocks_per_partition = mrs_kernel_partition_size/ipow(2*mrs_kernel_block_dim,j+1)+1;
// mrs_kernel_grid_dim = mrs_kernel_blocks_per_partition*per_from_iter_vol;
// printf("Entering reduction iteration %i with grid_dim %u and blocks per partition %u...\n",j,mrs_kernel_grid_dim,mrs_kernel_blocks_per_partition);
// min_reduce_strided<<<mrs_kernel_grid_dim,mrs_kernel_block_dim,mrs_kernel_shared_mem_vol>>>
// (d_tmp_mem,mrs_kernel_partition_size,j);
// cudaDeviceSynchronize();
// }
//
// //void copy_result(float * d_tmp_mem, uint * d_indices, size_t index_vol,
// //Density distance_map, int iteration_index,
// //size_t partition_size,size_t per_from_iter_vol)
// copy_result<<<cr_kernel_grid_dim,cr_kernel_block_dim>>>
// (d_tmp_mem,d_outside_indices,outside_indices_size,
// dm_density,i,
// mrs_kernel_partition_size,per_from_iter_vol);
// cudaDeviceSynchronize();
// }
//
// printf("");
//}
Makefile.test
View file @
40019b19
...
...
@@ -19,7 +19,8 @@ eval: GpuDelaunay.o KerDivision.o Splaying.o Star.o PredWrapper.o RandGen.o Inpu
$(NVCC)
--gpu-architecture
=
sm_61
--include-path
=
./,./gDel3D/GPU/ CRotationGrid.o InputCreator.o RandGen.o DelaunayChecker.o KerPredicates.o Splaying.o Star.o predicates.o ThrustWrapper.o PredWrapper.o KerDivision.o GpuDelaunay.o TransformationGrid.cu
-o
eval
ccl
:
CMrcReader.o
$(NVCC)
--gpu-architecture
=
sm_61
--include-path
=
./
$(CUB_INCLUDE)
--device-c
Labeler.cu Density.cu Kernels.cu Particles.cu TransformationGrid.cu CRotationGrid.cu ccl_test.cu
-g
-G
# $(NVCC) --gpu-architecture=sm_61 --include-path=./ $(CUB_INCLUDE) --device-c Labeler.cu Density.cu Kernels.cu Particles.cu TransformationGrid.cu CRotationGrid.cu ccl_test.cu -g -G
$(NVCC)
--gpu-architecture
=
sm_61
--include-path
=
./
$(CUB_INCLUDE)
--device-c
TransformationGrid.cu ccl_test.cu
-g
-G
$(NVCC)
--gpu-architecture
=
sm_61
--device-link
Density.o ccl_test.o Kernels.o Labeler.o Particles.o PdbReader.o TransformationGrid.o CRotationGrid.o
--output-file
link.o
g++
Density.o
Particles.o
PdbReader.o
CMrcReader.o
Kernels.o
TransformationGrid.o
Labeler.o
CRotationGrid.o
ccl_test.o
link.o
-o
ccl_test
-L.
$(CUDA_LINKS)
$(BOOST_LINKS)
-lpng
-lnvidia-ml
...
...
TransformationGrid.cu
View file @
40019b19
...
...
@@ -12,7 +12,7 @@ void threeaxisrot(double r11, double r12, double r21, double r31, double r32, fl
res
.
y
=
(
float
)
asin
(
r21
);
res
.
z
=
(
float
)
atan2
(
r11
,
r12
);
}
__host__
void
TransformationGrid
::
h_quat_to_euler
(
const
float4
&
q
,
float3
&
e
){
//body 3-2-1 convention
threeaxisrot
(
2
*
(
q
.
w
*
q
.
y
+
q
.
x
*
q
.
z
),
...
...
@@ -22,6 +22,51 @@ void TransformationGrid::h_quat_to_euler(const float4 & q,float3 &e){
q
.
x
*
q
.
x
-
q
.
w
*
q
.
w
-
q
.
y
*
q
.
y
+
q
.
z
*
q
.
z
,
e
);
}
template
<
typename
T
>
int
sgn
(
T
val
)
{
return
(
T
(
0
)
<
val
)
-
(
val
<
T
(
0
));
}
float
copy_sign
(
float
x
,
float
y
){
return
sgn
<
float
>
(
y
)
*
abs
(
x
);
}
__host__
void
TransformationGrid
::
h_matrix_to_quat
(
float
*
const
&
m
,
float4
&
q
){
//https://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
float
t
=
m
[
0
]
*
m
[
0
]
+
m
[
4
]
*
m
[
4
]
+
m
[
8
]
*
m
[
8
];
float
r
=
sqrt
(
1
+
t
);
q
.
w
=
0.5
*
r
;
q
.
x
=
0.5
*
(
copy_sign
(
0.5
*
sqrt
(
1
+
m
[
0
]
*
m
[
0
]
-
m
[
4
]
*
m
[
4
]
-
m
[
8
]
*
m
[
8
]),
m
[
7
]
-
m
[
5
]));
q
.
y
=
0.5
*
(
copy_sign
(
0.5
*
sqrt
(
1
-
m
[
0
]
*
m
[
0
]
+
m
[
4
]
*
m
[
4
]
+
m
[
8
]
*
m
[
8
]),
m
[
2
]
-
m
[
6
]));
q
.
z
=
0.5
*
(
copy_sign
(
0.5
*
sqrt
(
1
-
m
[
0
]
*
m
[
0
]
-
m
[
4
]
*
m
[
4
]
+
m
[
8
]
*
m
[
8
]),
m
[
3
]
-
m
[
1
]));
}
__host__
void
TransformationGrid
::
read_matrix_SO3_sampling
(
const
char
*
file_name
,
float4
*&
h_rotations
,
uint
*&
h_num_rotations
){
std
::
ifstream
fh
(
file_name
);
std
::
string
line
;
//skip first line
std
::
getline
(
fh
,
line
);
std
::
getline
(
fh
,
line
);
std
::
istringstream
iss
(
line
);
float
num_rot
;
iss
>>
num_rot
;
*
h_num_rotations
=
num_rot
;
h_rotations
=
(
float4
*
)
malloc
(
h_num_rotations
[
0
]
*
sizeof
(
*
h_rotations
));
float
*
curr_matrix
=
(
float
*
)
malloc
(
9
*
sizeof
(
*
curr_matrix
));
float4
curr_q
;
int
c
=
0
;
while
(
std
::
getline
(
fh
,
line
))
{
std
::
istringstream
iss
(
line
);
for
(
int
i
=
0
;
i
<
9
;
i
++
)
iss
>>
curr_matrix
[
i
];
TransformationGrid
::
h_matrix_to_quat
(
curr_matrix
,
curr_q
);
h_rotations
[
c
]
=
curr_q
;
c
++
;
}
}
__device__
void
TransformationGrid
::
d_euler_to_quat
(
const
float3
&
euler
,
float4
&
quat
){
//body 3-2-1 convention
...
...
@@ -105,7 +150,6 @@ TransformationGrid::TransformationGrid(float * const& translation_offset){ // @s
cudaMemcpy
(
d_TEST
,
h_TEST
,
sizeof
(
bool
),
cudaMemcpyHostToDevice
);
}
TransformationGrid
::
TransformationGrid
(
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_outer
,
// @suppress("Class members should be properly initialized")
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_inner
,
float
*
const
&
translation_offset
,
const
float4
&
angle_box
,
const
float
&
alpha
,
const
float
&
tau_in
){
...
...
@@ -232,35 +276,145 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> & bounding
h_scores
=
thrust
::
raw_pointer_cast
(
&
th_scores
[
0
]);
d_scores
=
thrust
::
raw_pointer_cast
(
&
td_scores
[
0
]);
};
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline
void
gpuAssert
(
cudaError_t
code
,
const
char
*
file
,
int
line
,
bool
abort
=
true
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"GPUassert: %s %s %d
\n
"
,
cudaGetErrorString
(
code
),
file
,
line
);
if
(
abort
)
exit
(
code
);
}
}
TransformationGrid
::
TransformationGrid
(
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_outer
,
// @suppress("Class members should be properly initialized")
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_inner
,
float
*
const
&
translation_offset
,
float
tau_in
,
const
char
*
rotation_samples_file
){
h_TEST
=
(
bool
*
)
malloc
(
sizeof
(
bool
));
*
h_TEST
=
true
;
cudaMalloc
((
void
**
)
&
d_TEST
,
sizeof
(
bool
));
cudaMemcpy
(
d_TEST
,
h_TEST
,
sizeof
(
bool
),
cudaMemcpyHostToDevice
);
//allocate shit
h_translation_index_dim
=
(
int3
*
)
malloc
(
sizeof
(
*
h_translation_index_dim
));
cudaMalloc
((
void
**
)
&
d_translation_index_dim
,
sizeof
(
*
d_translation_index_dim
));
h_translation_total_shift
=
(
float3
*
)
malloc
(
sizeof
(
*
h_translation_total_shift
));
cudaMalloc
((
void
**
)
&
d_translation_total_shift
,
sizeof
(
*
d_translation_total_shift
));
h_translation_step_shift
=
(
float3
*
)
malloc
(
sizeof
(
*
h_translation_total_shift
));
cudaMalloc
((
void
**
)
&
d_translation_step_shift
,
sizeof
(
*
d_translation_step_shift
));
h_translation_offset
=
(
float4
*
)
malloc
(
sizeof
(
*
h_translation_offset
));
cudaMalloc
((
void
**
)
&
d_translation_offset
,
sizeof
(
*
d_translation_offset
));
h_num_translations
=
(
uint
*
)
malloc
(
sizeof
(
*
h_num_translations
));
cudaMalloc
((
void
**
)
&
d_num_translations
,
sizeof
(
*
h_num_translations
));
h_num_rotations
=
(
uint
*
)
malloc
(
sizeof
(
*
h_num_rotations
));
cudaMalloc
((
void
**
)
&
d_num_rotations
,
sizeof
(
*
d_num_rotations
));
h_num_transformations
=
(
uint
*
)
malloc
(
sizeof
(
*
h_num_transformations
));
cudaMalloc
((
void
**
)
&
d_num_transformations
,
sizeof
(
*
d_num_transformations
));
tau
=
tau_in
;
CudaCheckError
();
printf
(
"Initiating TransformationGrid with bounding boxes: ...
\n
"
);
printf
(
"Inner bounding box: %f,%f,%f - %f,%f,%f ...
\n
"
,
bounding_box_inner
.
first
.
x
,
bounding_box_inner
.
first
.
y
,
bounding_box_inner
.
first
.
z
,
bounding_box_inner
.
second
.
x
,
bounding_box_inner
.
second
.
y
,
bounding_box_inner
.
second
.
z
);
printf
(
"Outer bounding box: %f,%f,%f - %f,%f,%f ...
\n
"
,
bounding_box_outer
.
first
.
x
,
bounding_box_outer
.
first
.
y
,
bounding_box_outer
.
first
.
z
,
bounding_box_outer
.
second
.
x
,
bounding_box_outer
.
second
.
y
,
bounding_box_outer
.
second
.
z
);
float3
bounding_box_inner_vol
=
bounding_box_inner
.
second
-
bounding_box_inner
.
first
;
float3
bounding_box_outer_vol
=
bounding_box_outer
.
second
-
bounding_box_outer
.
first
;
*
h_translation_total_shift
=
bounding_box_outer_vol
-
bounding_box_inner_vol
;
cudaMemcpy
(
d_translation_total_shift
,
h_translation_total_shift
,
sizeof
(
*
h_translation_total_shift
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
//just try the middle
h_translation_index_dim
->
x
=
0
;
h_translation_index_dim
->
y
=
0
;
h_translation_index_dim
->
z
=
0
;
cudaMemcpy
(
d_translation_index_dim
,
h_translation_index_dim
,
sizeof
(
*
h_translation_index_dim
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
*
h_num_translations
=
1
;
cudaMemcpy
(
d_num_translations
,
h_num_translations
,
sizeof
(
uint
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
// h_translation_index_dim->x = (int) (h_translation_total_shift->x/tau);
// h_translation_index_dim->y = (int) (h_translation_total_shift->y/tau);
// h_translation_index_dim->z = (int) (h_translation_total_shift->z/tau);
// cudaMemcpy(d_translation_index_dim,h_translation_index_dim,sizeof(*h_translation_index_dim),cudaMemcpyHostToDevice);
// *h_num_translations = h_translation_index_dim->x + h_translation_index_dim->y + h_translation_index_dim->z;
// cudaMemcpy(d_num_translations,h_num_translations,sizeof(uint),cudaMemcpyHostToDevice);
//option for anisotrope tau
h_translation_step_shift
->
x
=
tau
;
h_translation_step_shift
->
y
=
tau
;
h_translation_step_shift
->
z
=
tau
;
cudaMemcpy
(
d_translation_step_shift
,
h_translation_step_shift
,
sizeof
(
*
h_translation_step_shift
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
h_translation_offset
->
x
=
translation_offset
[
0
];
h_translation_offset
->
y
=
translation_offset
[
1
];
h_translation_offset
->
z
=
translation_offset
[
2
];
h_translation_offset
->
w
=
0.0
;
cudaMemcpy
(
d_translation_offset
,
h_translation_offset
,
sizeof
(
*
h_translation_offset
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
//rotations
TransformationGrid
::
read_matrix_SO3_sampling
(
rotation_samples_file
,
h_rotations
,
h_num_rotations
);
cudaMemcpy
(
d_num_rotations
,
h_num_rotations
,
sizeof
(
uint
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
cudaMalloc
((
void
**
)
&
d_rotations
,
h_num_rotations
[
0
]
*
sizeof
(
float4
));
cudaMemcpy
(
d_rotations
,
h_rotations
,
h_num_rotations
[
0
]
*
sizeof
(
float4
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
*
h_num_transformations
=
h_num_translations
[
0
]
*
h_num_rotations
[
0
];
cudaMemcpy
(
d_num_transformations
,
h_num_transformations
,
sizeof
(
uint
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
// td_scores.resize(h_num_transformations[0]);
// th_scores.resize(h_num_transformations[0]);
// h_scores = thrust::raw_pointer_cast(&th_scores[0]);
// d_scores = thrust::raw_pointer_cast(&td_scores[0]);
h_scores
=
(
float
*
)
malloc
(
h_num_transformations
[
0
]
*
sizeof
(
*
h_scores
));
cudaMalloc
((
void
**
)
&
d_scores
,
h_num_transformations
[
0
]
*
sizeof
(
*
d_scores
));
CudaCheckError
();
};
__device__
int4
TransformationGrid
::
d_transformation_index_from_linear
(
const
uint
&
linear_index
){
int4
ret
;
//number of rotations
ret
.
w
=
(
int
)
linear_index
/
d_num_translations
[
0
];
ret
.
z
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
])
/
(
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
);
ret
.
y
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
]
-
ret
.
z
*
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
)
/
(
d_translation_index_dim
->
x
);
ret
.
x
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
]
-
ret
.
z
*
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
-
ret
.
y
*
d_translation_index_dim
->
x
);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret
.
x
-=
d_translation_index_dim
->
x
/
2
;
ret
.
y
-=
d_translation_index_dim
->
y
/
2
;
ret
.
z
-=
d_translation_index_dim
->
z
/
2
;
if
(
d_num_translations
[
0
]
>
1
){
ret
.
z
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
])
/
(
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
);
ret
.
y
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
]
-
ret
.
z
*
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
)
/
(
d_translation_index_dim
->
x
);
ret
.
x
=
(
int
)
(
linear_index
-
ret
.
w
*
d_num_translations
[
0
]
-
ret
.
z
*
d_translation_index_dim
->
x
*
d_translation_index_dim
->
y
-
ret
.
y
*
d_translation_index_dim
->
x
);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret
.
x
-=
d_translation_index_dim
->
x
/
2
;
ret
.
y
-=
d_translation_index_dim
->
y
/
2
;
ret
.
z
-=
d_translation_index_dim
->
z
/
2
;
}
else
{
ret
.
x
=
0
;
ret
.
y
=
0
;
ret
.
z
=
0
;
}
return
ret
;
}
__
device
__
__
host
__
int4
TransformationGrid
::
h_transformation_index_from_linear
(
const
uint
&
linear_index
){
int4
ret
;
//number of rotations
ret
.
w
=
(
int
)
linear_index
/
h_num_translations
[
0
];
ret
.
z
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
])
/
(
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
);
ret
.
y
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
]
-
ret
.
z
*
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
)
/
(
h_translation_index_dim
->
x
);
ret
.
x
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
]
-
ret
.
z
*
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
-
ret
.
y
*
h_translation_index_dim
->
x
);
//corresponds to the case of just one translation, the identity
if
(
h_num_translations
[
0
]
>
1
){
ret
.
z
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
])
/
(
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
);
ret
.
y
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
]
-
ret
.
z
*
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
)
/
(
h_translation_index_dim
->
x
);
ret
.
x
=
(
int
)
(
linear_index
-
ret
.
w
*
h_num_translations
[
0
]
-
ret
.
z
*
h_translation_index_dim
->
x
*
h_translation_index_dim
->
y
-
ret
.
y
*
h_translation_index_dim
->
x
);
//inner bounding box and outer bounding box are assumed to coincide in their respective centers
ret
.
x
-=
h_translation_index_dim
->
x
/
2
;
ret
.
y
-=
h_translation_index_dim
->
y
/
2
;
ret
.
z
-=
h_translation_index_dim
->
z
/
2
;
ret
.
x
-=
h_translation_index_dim
->
x
/
2
;
ret
.
y
-=
h_translation_index_dim
->
y
/
2
;
ret
.
z
-=
h_translation_index_dim
->
z
/
2
;
}
else
{
ret
.
x
=
0
;
ret
.
y
=
0
;
ret
.
z
=
0
;
}
// int r = i/(*d_n_rotations);
return
ret
;
}
...
...
@@ -341,7 +495,7 @@ void TransformationGrid::d_transformation_to_memory(const uint& linear_index, fl
__host__
void
TransformationGrid
::
h_transformation_to_memory
(
const
uint
&
linear_index
,
float4
*
const
&
rotation
,
float4
*
const
&
translation
,
float4
*
const
&
translation_offset
){
if
(
!
h_TEST
[
0
]
){
if
(
true
){
// int r = i/(*d_n_rotations);
// int t =
int4
transformation_index
=
h_transformation_index_from_linear
(
linear_index
);
...
...
@@ -672,6 +826,20 @@ struct normalize_chamfer: public thrust::unary_function<float,float>
}
};
struct
normalize_envelope
:
public
thrust
::
unary_function
<
float
,
float
>
{
// thrust::pair<thrust::device_vector<float>::iterator,thrust::device_vector<float>::iterator> d_minmax;
float
min
;
float
max
;
__device__
//norm = lambda u: 1/(_max - _min)*u - _min/(_max - _min)
//since the chamfer score maps bad solutions to high scores and good solutions to low scores
//exchange min am max
float
operator
()(
const
float
&
u
)
const
{
return
u
/
(
max
-
min
)
-
min
/
(
max
-
min
);
}
};
__host__
void
TransformationGrid
::
normalize_chamfer_score
(){
//currently, fast enough. host <--> device transfer is unneccessary
...
...
@@ -685,6 +853,93 @@ void TransformationGrid::normalize_chamfer_score(){
d_scores
=
thrust
::
raw_pointer_cast
(
&
td_scores
[
0
]);
}
template
<
size_t
block_size
>
__global__
void
float_min_max
(
float
*
d_data
,
size_t
num_data
,
float
*
d_min
,
float
*
d_max
){
__shared__
float
reduce
[
2
*
block_size
];
float
*
_min
=
&
reduce
[
0
];
float
*
_max
=
&
reduce
[
block_size
];
int
num_blocks
=
num_data
/
blockDim
.
x
+
1
;
//initiate
if
(
blockIdx
.
x
==
0
){
_min
[
threadIdx
.
x
]
=
FLT_MAX
;
_max
[
threadIdx
.
x
]
=
-
FLT_MAX
;
}
__syncthreads
();
//gather
int
b
=
blockIdx
.
x
;
while
(
b
<
num_blocks
){
int
t
=
b
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
t
<
num_data
){
printf
(
"%i
\n
"
,
t
);
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
d_data
[
t
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
d_data
[
t
]);
t
+=
blockDim
.
x
;
}
b
+=
gridDim
.
x
;
}
__syncthreads
();
//reduce - max blocksize 128 hardcoded
if
(
threadIdx
.
x
<
64
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
64
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
64
]);
}
__syncthreads
();
if
(
threadIdx
.
x
<
32
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
32
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
32
]);
}
__syncthreads
();
if
(
threadIdx
.
x
<
16
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
16
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
16
]);
}
__syncthreads
();
if
(
threadIdx
.
x
<
8
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
8
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
8
]);
}
__syncthreads
();
if
(
threadIdx
.
x
<
4
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
4
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
4
]);
}
__syncthreads
();
if
(
threadIdx
.
x
<
2
){
_min
[
threadIdx
.
x
]
=
fminf
(
_min
[
threadIdx
.
x
],
_min
[
threadIdx
.
x
+
2
]);
_max
[
threadIdx
.
x
]
=
fmaxf
(
_max
[
threadIdx
.
x
],
_max
[
threadIdx
.
x
+
2
]);
}
__syncthreads
();
d_min
[
0
]
=
fminf
(
_min
[
1
],
_min
[
0
]);
d_max
[
0
]
=
fminf
(
_max
[
1
],
_max
[
0
]);
}
template
<
size_t
block_size
>
__global__
void
shift_and_scale_envelope_score_kernel
(
float
*
d_score
,
float
*
d_scaled_score
,
size_t
num_scores
){
__shared__
float
_min
[
1
];
__shared__
float
_max
[
1
];
if
(
threadIdx
.
x
==
0
&&
blockIdx
.
x
==
0
){
float_min_max
<
block_size
><<<
32
,
block_size
>>>
(
d_score
,
num_scores
,
_min
,
_max
);
}
__syncthreads
();
cudaDeviceSynchronize
();
printf
(
""
);
}
__host__
void
TransformationGrid
::
shift_and_scale_envelope_score
(
float
*
d_score
,
float
*
d_normalized_score
,
size_t
num_scores
){
shift_and_scale_envelope_score_kernel
<
128
><<<
1
,
1
>>>
(
d_score
,
d_normalized_score
,
num_scores
);
}
TransformationGrid
::~
TransformationGrid
()
{
// TODO Auto-generated destructor stub
}
...
...
TransformationGrid.h
View file @
40019b19
...
...
@@ -35,6 +35,10 @@
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <util.h>
//#include "gDel3D/GpuDelaunay.h"
//
...
...
@@ -47,12 +51,17 @@ public:
__device__
static
void
d_euler_to_quat
(
const
float3
&
euler
,
float4
&
quat
);
__host__
static
void
h_euler_to_quat
(
const
float3
&
euler
,
float4
&
quat
);
__host__
static
void
h_quat_to_euler
(
const
float4
&
rotations_quat
,
float3
&
rotations_euler
);
__host__
static
void
h_matrix_to_quat
(
float
*
const
&
m
,
float4
&
quaternion
);
__host__
static
void
read_matrix_SO3_sampling
(
const
char
*
file_name
,
float4
*&
h_rotations
,
uint
*&
h_num_rotations
);
__host__
static
void
shift_and_scale_envelope_score
(
float
*
d_score
,
float
*
d_normalized_score
,
size_t
num_scores
);
TransformationGrid
(
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_outer
,
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_inner
,
float
*
const
&
translation_offset
,
const
float4
&
angle_box
,
const
float
&
alpha
,
const
float
&
tau_in
);
//Testgrid constructor
TransformationGrid
(
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_outer
,
// @suppress("Class members should be properly initialized")
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_inner
,
float
*
const
&
translation_offset
,
float
tau_in
,
uint
n_testrots
);
//empty grid with only one transformation (identity) for test purposes
TransformationGrid
::
TransformationGrid
(
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_outer
,
// @suppress("Class members should be properly initialized")
const
std
::
pair
<
float3
,
float3
>
&
bounding_box_inner
,
float
*
const
&
translation_offset
,
float
tau_in
,
const
char
*
rotation_samples_file
);
TransformationGrid
(
float
*
const
&
translation_offset
);
virtual
~
TransformationGrid
();
__host__
void
print_transformations
(
void
);
...
...
TransformationGrid.o
View file @
40019b19
No preview for this file type
ccl_test.cu
View file @
40019b19
...
...
@@ -403,116 +403,116 @@ void kai_score(float4 * d_coords, int * d_segmentations, uint * d_labels, float
}
}
BOOST_AUTO_TEST_CASE
(
kai_test
)
{
printf
(
" ============================= Testing Kai score ===============================
\n
"
);
printf
(
" ============================= +++++++++++++++++ ===============================
\n
"
);
//memory management
nvmlInit_v2
();
//checkout free memory
nvmlMemory_t
memory_info
;
nvmlDevice_t
device_handle
;
nvmlDeviceGetHandleByIndex_v2
(
0
,
&
device_handle
);
nvmlDeviceGetMemoryInfo
(
device_handle
,
&
memory_info
);
float
coord_dim_outer
[
3
]
=
{
70
,
70
,
70
};
float
coord_dim
[
3
]
=
{
50
,
50
,
50
};
float
distance_threshold
=
0.5
;
uint
pixel_dim_outer
[
3
]
=
{
70
,
70
,
70
};
uint
pixel_dim
[
3
]
=
{
50
,
50
,
50
};
uint3
cube_pixel_dim
=
{
30
,
30
,
30
};
Density
density
;
Density
::
from_bounds
(
density
,
&
coord_dim_outer
[
0
],
&
pixel_dim_outer
[
0
]);
density
.
init_device
();
density
.
to_device
();
density
.
mid_cube_with_surface
(
cube_pixel_dim
,
1
,
1.0
,
1.5
);
Density
to_fit_density
;
Density
::
from_bounds
(
to_fit_density
,
&
coord_dim
[
0
],
&
pixel_dim
[
0
]);
to_fit_density
.
mid_cube_with_surface
(
cube_pixel_dim
,
1
,
1.0
,
1.5
);
thrust
::
device_vector
<
uint
>
td_on_surface
;
size_t
on_surface_num
;
density
.
indices_in_range
(
td_on_surface
,
true
,
1.25
,
2
,
on_surface_num
);
thrust
::
device_vector
<
uint
>
td_on_surface_to_fit
;
size_t
on_surface_num_to_fit
;
to_fit_density
.
indices_in_range
(
td_on_surface_to_fit
,
true
,
1.25
,
2
,
on_surface_num_to_fit
);
Particles
pixel_coords
=
to_fit_density
.
particles_from_density
(
td_on_surface_to_fit
);
pixel_coords
.
origin_to_center
();
Density
difference_map
;
density
.
difference_map_density_threshold
(
difference_map
,
1.4
,
1.6
,
td_on_surface
);
CudaCheckError
();
float
tau_in
=
1.0
;
uint
n_rot_test
=
30
;
std
::
pair
<
float3
,
float3
>
bounding_box_outer
=
density
.
get_bounding_box
();
std
::
pair
<
float3
,
float3
>
bounding_box_inner
=
pixel_coords
.
get_rotationally_safe_bounding_box
();
TransformationGrid
transformation_grid
(
bounding_box_outer
,
bounding_box_inner
,
difference_map
.
h_mid_point
,
tau_in
,
n_rot_test
);
printf
(
"Transformation num: %u
\n
"
,
transformation_grid
.
h_num_transformations
[
0
]);
printf
(
"Translation num: %u
\n
"
,
transformation_grid
.
h_num_translations
[
0
]);
printf
(
"Translational shift: %f %f %f
\n
"
,
transformation_grid
.
h_translation_step_shift
->
x
,
transformation_grid
.
h_translation_step_shift
->
y
,
transformation_grid
.
h_translation_step_shift
->
z
);
printf
(
"Rotation num: %u
\n
"
,
transformation_grid
.
h_num_rotations
[
0
]);
//determine padding
uint
max_dim
=
std
::
max
(
difference_map
.
h_pixel_dim
[
0
],
density
.
h_pixel_dim
[
1
]);
max_dim
=
std
::
max
(
max_dim
,
difference_map
.
h_pixel_dim
[
2
]);
uint
log_2_dim
=
ipow
(
2
,(
int
)(
std
::
log
((
float
)
max_dim
)
/
std
::
log
(
2
))
+
1
);
uint
*
h_pixel_dim_padding
=
(
uint
*
)
malloc
(
3
*
sizeof
(
*
h_pixel_dim_padding
));
uint
*
d_pixel_dim_padding
;
cudaMalloc
((
void
**
)
&
d_pixel_dim_padding
,
sizeof
(
*
d_pixel_dim_padding
)
*
3
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
h_pixel_dim_padding
[
i
]
=
log_2_dim
;