Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Kai Karius
Fitter
Commits
e059d41d
Commit
e059d41d
authored
Nov 25, 2020
by
karius
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
first complete 6d quadrature
parent
52d6f660
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
57 additions
and
38 deletions
+57
-38
TransformationGrid.cu
TransformationGrid.cu
+57
-38
TransformationGrid.o
TransformationGrid.o
+0
-0
No files found.
TransformationGrid.cu
View file @
e059d41d
...
...
@@ -160,7 +160,7 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> bounding_b
cudaMemcpy
(
d_translation_offset
,
h_translation_offset
,
sizeof
(
*
h_translation_offset
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
//rotations
TransformationGrid
::
read_matrix_SO3_sampling
(
"rot_samples/N
14_M126
0_IcoC7.dat"
,
h_rotations
,
h_rotations_euler
,
h_num_rotations
);
TransformationGrid
::
read_matrix_SO3_sampling
(
"rot_samples/N
23_M588
0_IcoC7.dat"
,
h_rotations
,
h_rotations_euler
,
h_num_rotations
);
cudaMemcpy
(
d_num_rotations
,
h_num_rotations
,
sizeof
(
uint
),
cudaMemcpyHostToDevice
);
//allocate and populate rotational nodes
cudaMalloc
((
void
**
)
&
d_rotations
,
h_num_rotations
[
0
]
*
sizeof
(
float4
));
...
...
@@ -177,13 +177,18 @@ TransformationGrid::TransformationGrid(const std::pair<float3,float3> bounding_b
__host__
void
TransformationGrid
::
quadrature_test_function
(
void
){
//being the volume of the search space
float
translation_normalization
=
h_translation_total_shift
->
x
*
h_translation_total_shift
->
y
*
h_translation_total_shift
->
z
;
float
translation_normalization
=
1
/
(
h_translation_total_shift
->
x
*
h_translation_total_shift
->
y
*
h_translation_total_shift
->
z
)
;
//being 8*pi^2 for the euler parametrisation of SO(3)
float
rotation_normalization
=
2
/
M_PI
;
//normalization for 6D space
// float six_d_normalization_inv = 1/(translation_normalization*rotation_normalization);
float
six_d_normalization_inv
=
rotation_normalization
;
for
(
uint
i
=
0
;
i
<
h_num_transformations
[
0
];
i
++
){
h_scores
[
i
]
=
six_d_normalization_inv
/
sinf
(
h_rotations_euler
[
i
%
h_num_rotations
[
0
]].
y
);}
float
six_d_normalization
=
translation_normalization
*
rotation_normalization
;
// float six_d_normalization_inv = rotation_normalization;
int
rot_index
;
int
trans_index
;
for
(
uint
i
=
0
;
i
<
h_num_transformations
[
0
];
i
++
){
rot_index
=
i
/
h_num_translations
[
0
];
h_scores
[
i
]
=
six_d_normalization
/
sinf
(
h_rotations_euler
[
rot_index
].
y
);
}
cudaMemcpy
(
d_scores
,
h_scores
,
h_num_transformations
[
0
]
*
sizeof
(
*
h_scores
),
cudaMemcpyHostToDevice
);
CudaCheckError
();
}
...
...
@@ -337,17 +342,23 @@ void TransformationGrid::print_quadrature_weights(void){
}
template
<
uint
blockSize
>
template
<
uint
blockSize
,
bool
secondary
>
__global__
void
translation_quadrature
(
TransformationGrid
grid
,
float
*
d_tmp_quadrature
){
__shared__
float
reduce
[
blockSize
];
uint
num_scores
=
grid
.
d_num_transformations
[
0
];
if
(
secondary
){
num_scores
=
grid
.
d_num_translations
[
0
];
}
uint
grid_dim
=
blockDim
.
x
*
gridDim
.
x
;
uint
num_blocks
=
num_scores
/
blockDim
.
x
+
1
;
float
*
d_scores
=
grid
.
d_scores
;
float
*
d_scores
;
if
(
secondary
){
d_scores
=
grid
.
d_tmp_rot_quadrature_scores
;
}
else
{
d_scores
=
grid
.
d_scores
;
}
uint
t
;
int4
transformation_index
;
float
weight
;
...
...
@@ -360,6 +371,8 @@ void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
weight
*=
grid
.
d_chebychev_weights
[
grid
.
d_translation_refinement_dim
[
0
]
+
transformation_index
.
y
];
weight
*=
grid
.
d_chebychev_weights
[
grid
.
d_translation_refinement_dim
[
0
]
+
grid
.
d_translation_refinement_dim
[
1
]
+
transformation_index
.
z
];
reduce
[
threadIdx
.
x
]
+=
weight
*
d_scores
[
t
];
// printf("%f\n",d_scores[t]);
// reduce[threadIdx.x] += weight*0.9966911009097003;
t
+=
grid_dim
;
}
__syncthreads
();
...
...
@@ -368,10 +381,10 @@ void translation_quadrature(TransformationGrid grid, float * d_tmp_quadrature){
if
(
threadIdx
.
x
<
64
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
64
];
__syncthreads
();
if
(
threadIdx
.
x
<
32
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
32
];
__syncthreads
();
if
(
threadIdx
.
x
<
16
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
16
];
__syncthreads
();
if
(
threadIdx
.
x
<
8
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
8
];
__syncthreads
();
if
(
threadIdx
.
x
<
4
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
4
];
__syncthreads
();
if
(
threadIdx
.
x
<
2
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
2
];
__syncthreads
();
if
(
threadIdx
.
x
<
1
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
1
];
__syncthreads
();
if
(
threadIdx
.
x
<
8
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
8
];
__syncthreads
();
if
(
threadIdx
.
x
<
4
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
4
];
__syncthreads
();
if
(
threadIdx
.
x
<
2
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
2
];
__syncthreads
();
if
(
threadIdx
.
x
<
1
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
1
];
__syncthreads
();
//safe block result in tmp mem, supposed to hold grid_dim values
d_tmp_quadrature
[
blockIdx
.
x
]
=
reduce
[
0
];
__syncthreads
();
...
...
@@ -384,17 +397,15 @@ void rotation_quadrature(TransformationGrid grid){
uint
num_translations
=
grid
.
d_num_translations
[
0
];
uint
num_rotations
=
grid
.
d_num_rotations
[
0
];
//valid for t-designs
float
normalize_f
=
1
/
(
__uint2float_rn
(
grid
.
d_num_rotations
[
0
]));
float
*
d_scores
=
grid
.
d_scores
;
float
*
d_reduced_scores
=
grid
.
d_tmp_rot_quadrature_scores
;
uint
t
;
uint
b
=
blockIdx
.
x
;
reduce
[
threadIdx
.
x
]
=
0.0
;
__syncthreads
();
t
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
b
<
num_translations
){
t
=
threadIdx
.
x
;
reduce
[
threadIdx
.
x
]
=
0.0
;
__syncthreads
();
while
(
t
<
num_rotations
){
//order of scores is: (r0,t0);(r0,t1);(r0,t2); ... ;(r0,t_{N_t-1});
// (r1,t0);(r1,t1);(r1,t2); ... ;(r1,t_{N_t-1});
...
...
@@ -402,9 +413,13 @@ void rotation_quadrature(TransformationGrid grid){
// . .
// . .
// (r_{N_r-1},t0);(r_{N_r-1},t1);(r_{N_r-1},t2); ... ;(r_{N_r-1},t_{N_t-1});
reduce
[
threadIdx
.
x
]
+=
normalize_f
*
d_scores
[
t
*
num_translations
];
reduce
[
threadIdx
.
x
]
+=
d_scores
[
t
*
num_translations
+
b
];
// if (b ==1 && threadIdx.x ==0){
// printf("%i,%i,%f\n",t,t*num_translations,d_scores[t*num_translations]);
// }
t
+=
blockDim
.
x
;
}
__syncthreads
();
//reduction, blocksize max 128 hard coded
if
(
threadIdx
.
x
<
64
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
64
];
__syncthreads
();
if
(
threadIdx
.
x
<
32
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
32
];
__syncthreads
();
...
...
@@ -415,33 +430,37 @@ void rotation_quadrature(TransformationGrid grid){
if
(
threadIdx
.
x
<
1
)
reduce
[
threadIdx
.
x
]
+=
reduce
[
threadIdx
.
x
+
1
];
__syncthreads
();
//safe block result in tmp mem, supposed to hold grid_dim values
d_reduced_scores
[
b
]
=
reduce
[
0
];
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
d_reduced_scores
[
b
]
=
reduce
[
0
]
/
num_rotations
;
__syncthreads
();
// if (threadIdx.x == 0){
// printf("%f\n",reduce[0]/num_rotations);
// }
b
+=
gridDim
.
x
;
}
}
__host__
void
TransformationGrid
::
quadrature
(
float
*
const
&
result
){
const
uint
blockSize
=
128
;
uint
grid_dim
=
h_num_transformations
[
0
]
/
blockSize
+
1
;
uint
rot_quat_grid_dim
=
64
;
float
*
h_tmp_quadrature
=
(
float
*
)
malloc
(
grid_dim
*
sizeof
(
*
h_tmp_quadrature
));
float
*
d_tmp_quadrature
;
cudaMalloc
((
void
**
)
&
d_tmp_quadrature
,
grid_dim
*
sizeof
(
*
d_tmp_quadrature
));
const
uint
block_size
=
128
;
// template <uint blockSize>
// __global__
// void rotation_quadrature(TransformationGrid grid){
rotation_quadrature
<
block
S
ize
><<<
rot_quat_grid_dim
,
block
S
ize
>>>
(
*
this
);
rotation_quadrature
<
block
_s
ize
><<<
rot_quat_grid_dim
,
block
_s
ize
>>>
(
*
this
);
cudaDeviceSynchronize
();
// template <int blockSize>
uint
secondary_quadrature_grid_dim
=
h_num_translations
[
0
]
/
block_size
+
1
;
float
*
h_quadrature_result
=
(
float
*
)
malloc
(
secondary_quadrature_grid_dim
*
sizeof
(
*
h_quadrature_result
));
float
*
d_quadrature_result
;
cudaMalloc
((
void
**
)
&
d_quadrature_result
,
secondary_quadrature_grid_dim
*
sizeof
(
*
d_quadrature_result
));
float
*
d_tmp_quadrature
;
// __global__
// void translation_quadrature(TransformationGrid grid, float * d_result, float * d_tmp_quadrature)
// translation_quadrature<blockSize><<<grid_dim,blockSize>>>(*this,d_tmp_quadrature);
// void translation_secondary_quadrature(TransformationGrid grid, float * d_result){
translation_quadrature
<
block_size
,
true
><<<
secondary_quadrature_grid_dim
,
block_size
>>>
(
*
this
,
d_quadrature_result
);
cudaDeviceSynchronize
();
cudaMemcpy
(
h_quadrature_result
,
d_quadrature_result
,
secondary_quadrature_grid_dim
*
sizeof
(
*
result
),
cudaMemcpyDeviceToHost
);
cudaDeviceSynchronize
();
cudaMemcpy
(
h_tmp_quadrature
,
d_tmp_quadrature
,
grid_dim
*
sizeof
(
*
h_tmp_quadrature
),
cudaMemcpyDeviceToHost
);
result
[
0
]
=
0.0
;
for
(
int
i
=
0
;
i
<
grid_dim
;
i
++
)
result
[
0
]
+=
h_
tmp_
quadrature
[
i
];
for
(
int
i
=
0
;
i
<
secondary_quadrature_
grid_dim
;
i
++
)
result
[
0
]
+=
h_quadrature
_result
[
i
];
}
__host__
...
...
@@ -497,13 +516,13 @@ void TransformationGrid::print(){
printf
(
"Number of transformations: %u
\n
"
,
h_num_transformations
[
0
]);
printf
(
"Translation offsets: %f,%f,%f
\n
"
,
h_translation_offset
[
0
].
x
,
h_translation_offset
[
0
].
y
,
h_translation_offset
[
0
].
z
);
}
__host__
float3
TransformationGrid
::
transform
(
const
float4
&
t
){
//basis: 0,0,1 "north"
return
make_float3
(
2
*
(
t
.
y
*
t
.
w
+
t
.
x
*
t
.
z
),
2
*
(
t
.
z
*
t
.
w
-
t
.
x
*
t
.
y
),
1
-
2
*
(
t
.
y
*
t
.
y
+
t
.
z
*
t
.
z
));
}
//
//
//
__host__
//
float3 TransformationGrid::transform(const float4& t){
//
//basis: 0,0,1 "north"
//
return make_float3(2*(t.y*t.w + t.x*t.z),2*(t.z*t.w - t.x*t.y),1-2*(t.y*t.y + t.z*t.z));
//
}
__host__
thrust
::
device_vector
<
thrust
::
tuple
<
float4
,
float3
,
float
>>
TransformationGrid
::
n_best_from_file
(
const
char
*
file_name
,
uint
n
){
...
...
TransformationGrid.o
View file @
e059d41d
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment