Commit f0e07011 authored by karius's avatar karius

added engine logic

parent ddb4c200
This diff is collapsed.
......@@ -36,6 +36,7 @@
#include <nvml.h>
#include <GPUObject.h>
#include <GPUManager.h>
#include <MemoryResources.h>
inline size_t vol(uint4 vec){
......@@ -44,41 +45,34 @@ inline size_t vol(uint4 vec){
class Density: public GPUObject {
public:
Density();
Density(GPUManager * manager) : GPUObject(manager){};
Density(float * const & h_coord_dim, const float & h_pixel_size);
Density(float * const & coord_dim, uint * const & padding_dim, const float & h_pixel_size);
virtual ~Density();
static Density from_mrc(const char * mrc_path);
static Density from_mrc(GPUManager * manager,const char * mrc_path,int gpu_index);
static void from_bounds(Density & density, float * coord_dim, uint * pixel_dim);
float * h_coord_dim;
float * d_coord_dim;
float * h_lower_left;
float * d_lower_left;
float * h_upper_right;
float * d_upper_right;
float * h_mid_point;
float * d_mid_point;
uint * h_pixel_dim;
uint * d_pixel_dim;
uint * h_padding_dim;
uint * d_padding_dim;
TDensity * h_data;
TDensity * d_data;
float * no_padding;
float * h_pixel_size;
float * d_pixel_size;
float3 * h_coord_dim = nullptr;
float3 * d_coord_dim = nullptr;
float3 * h_lower_left = nullptr;
float3 * d_lower_left = nullptr;
float3 * h_upper_right = nullptr;
float3 * d_upper_right = nullptr;
float3 * h_mid_point = nullptr;
float3 * d_mid_point = nullptr;
uint3 * h_pixel_dim = nullptr;
uint3 * d_pixel_dim = nullptr;
TDensity * h_data = nullptr;
TDensity * d_data = nullptr;
float * h_pixel_size = nullptr;
float * d_pixel_size = nullptr;
bool padding = false;
//offset meaning: address of first element
uint padding_linear_offset{0};
uint3 padding_index_offset = {0,0,0};
void to_mrc(const char * mrc_path);
void to_mrc_async(const char * mrc_path, cudaStream_t &stream);
__host__ __device__ static uint3 linear_to_index(const uint &linear_index, const uint3 &pixel_dim);
__host__ __device__ static uint index_to_linear(const uint3 &pixel_index, const uint3 &pixel_dim);
__host__ size_t h_pixel_vol(void);
__device__ size_t d_pixel_vol(void);
__host__ size_t h_layer_vol(void);
__device__ size_t d_layer_vol(void);
__host__ __device__ static uint3 linear_to_index(const uint &linear_index, uint3 * const& pixel_dim);
__host__ __device__ static uint index_to_linear(const uint3 &pixel_index, uint3 * const& pixel_dim);
__host__ uint4 pixel_from_coord_dim(float * h_coord_dim);
__host__ void indices_in_range(thrust::device_vector<uint> & td_indices, bool inside, const float & rho0, const float & rho1, size_t & size);
__host__ float4 h_pixel_linear_index_to_coord(const uint & pixel_index);
......
/*
* DensityThresholdFilter.cu
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
#include <DensityThresholdFilter.h>
//DensityThresholdFilter::DensityThresholdFilter() {}
DensityThresholdFilter::DensityThresholdFilter(Parameter parameter){
ParameterSet parameters;
parameters.addParameter(parameter);
DensityThresholdFilter::Filter(parameters);
}
DensityThresholdFilter::DensityThresholdFilter(TDensity from, TDensity to, TDensity init){
Parameter parameter("threshold",from,to,init);
this(parameter);
}
//unary function that is used as a first step for a thrust::reduce_transform
//operation, representing the transform part
//turns linear indexes into triple pixel indexes for later min/max reduction
//values above the threshold are assigned MAX and 0 so that they are not
//considered in the reductions
struct conditional_to_index : public thrust::unary_function<thrust::tuple<uint,TDensity>,thrust::pair<uint3,uint3>>
{
uint * d_pixel_dim;
TDensity threshold;
__device__
thrust::pair<uint3,uint3> operator()(const thrust::tuple<uint,TDensity>& u) const {
thrust::pair<uint3,uint3> ret;
if (thrust::get<1>(u) < threshold){
//default for min
ret.first.x = UINT32_MAX;
ret.first.y = UINT32_MAX;
ret.first.z = UINT32_MAX;
//default for max
ret.second.x = 0;
ret.second.y = 0;
ret.second.z = 0;
} else {
ret.first.z = thrust::get<0>(u)/(d_pixel_dim[0]*d_pixel_dim[1]);
ret.first.y = (thrust::get<0>(u) - ret.first.z*d_pixel_dim[0]*d_pixel_dim[1])/d_pixel_dim[0];
ret.first.x = thrust::get<0>(u) - ret.first.y*d_pixel_dim[0] - ret.first.z*d_pixel_dim[0]*d_pixel_dim[1];
ret.second.x = ret.first.x;
ret.second.y = ret.first.y;
ret.second.z = ret.first.z;
}
return ret;
}
};
//binary function that is used as a first step for a thrust::reduce_transform
//operation, representing the reduce part
//simultaneously calculates min and max
struct uint_bounding_box_pair : public thrust::binary_function<thrust::pair<uint3,uint3>,thrust::pair<uint3,uint3>,thrust::pair<uint3,uint3>>
{
__device__
thrust::pair<uint3,uint3> operator()(const thrust::pair<uint3,uint3>& u, const thrust::pair<uint3,uint3>& v) const {
thrust::pair<uint3,uint3> ret;
ret.first = min(u.first,v.first);
ret.second = max(u.second,v.second);
return ret;
}
};
//copy kernel that "cuts out" a subvolume
__global__
void cut_and_copy(TDensity * d_source_data, TDensity * d_sub_data, uint3 offset, uint num_pixels_sub, uint3 pixel_dim, uint3 sub_pixel_dim){
uint t = blockDim.x*blockIdx.x + threadIdx.x;
uint3 pixel_index_sub;
uint3 pixel_index;
if (t < num_pixels_sub){
pixel_index_sub = Density::linear_to_index(t,sub_pixel_dim);
pixel_index = {offset.x + pixel_index_sub.x,offset.y + pixel_index_sub.y,offset.z + pixel_index_sub.z};
d_sub_data[Density::index_to_linear(pixel_index_sub,sub_pixel_dim)] = d_source_data[Density::index_to_linear(pixel_index,pixel_dim)];
}
}
DensityThresholdFilter::~DensityThresholdFilter() {}
DensityThresholdFilter::filter(Density * source, Density * sink){
//typedef these iterators for shorthand
typedef thrust::device_vector<uint>::iterator UIntIterator;
typedef thrust::device_vector<TDensity>::iterator TDensityIterator;
// typedef a tuple of these iterators
typedef thrust::tuple<UIntIterator, TDensityIterator> IteratorTuple;
// typedef the zip_iterator of this tuple
typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
// finally, create the zip_iterator
thrust::device_vector<uint> linear_mem_offsets(source->h_pixel_vol());
//linear pixel indeces
thrust::sequence(linear_mem_offsets.begin(),linear_mem_offsets.end());
ZipIterator iter_begin(thrust::make_tuple(linear_mem_offsets.begin(), thrust::device_pointer_cast(&source->d_data[0])));
ZipIterator iter_end(thrust::make_tuple(linear_mem_offsets.end(), thrust::device_pointer_cast(&source->d_data[source->h_pixel_vol()-1])));
thrust::pair<uint3,uint3> init;
//default for min,max
init.first = {UINT32_MAX,UINT32_MAX,UINT32_MAX};
init.second = {0,0,0};
conditional_to_index to_index;
to_index.threshold = _parameters.getParameter("thresold").continuousValue();
to_index.d_pixel_dim = source->d_pixel_dim;
thrust::pair<uint3,uint3> bounding_box = thrust::transform_reduce(iter_begin, iter_end, to_index, init, uint_bounding_box_pair());
uint3 tolerance = {5,5,5};
uint3 index_offset = bounding_box.first-tolerance;
uint3 pixel_dim = bounding_box.second - bounding_box.first + 2*tolerance;
//set the sink properties
sink->h_pixel_dim[0] = pixel_dim.x;
sink->h_pixel_dim[1] = pixel_dim.y;
sink->h_pixel_dim[2] = pixel_dim.z;
sink->h_coord_dim[0] = (((float) pixel_dim.x)/((float) source->h_pixel_dim[0]))*source->h_coord_dim[0];
sink->h_coord_dim[1] = (((float) pixel_dim.y)/((float) source->h_pixel_dim[1]))*source->h_coord_dim[1];
sink->h_coord_dim[2] = (((float) pixel_dim.z)/((float) source->h_pixel_dim[2]))*source->h_coord_dim[2];
//TODO: request resources!
// if (pixel_dim.x*pixel_dim.y*pixel_dim.z>0){
// Density sub_density;
// Density::from_bounds(sub_density,h_sub_coord_dim,h_sub_pixel_dim);
// sub_density.fill_with(0.0,true);
// uint3 pixel_index_sub;
// uint3 pixel_index;
// for (uint x = 0; x < h_sub_pixel_dim[0]; x++){
// for (uint y = 0; y < h_sub_pixel_dim[1]; y++){
// for (uint z = 0; z < h_sub_pixel_dim[2]; z++){
// pixel_index = {x + index_offset.x,y + index_offset.y,z + index_offset.z};
// pixel_index_sub = {x,y,z};
// cudaMemcpyAsync(sub_density.d_data + index_to_linear_space(pixel_index_sub,h_sub_pixel_dim),d_data + index_to_linear_space(pixel_index,h_pixel_dim),sizeof(float),cudaMemcpyDeviceToDevice);
// }
// }
// }
// return sub_density;
// } else {
// throw std::length_error("Resulting density seems invalid");
// }
//}
}
/*
* DensityThresholdFilter.h
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
#ifndef DENSITYTHRESHOLDFILTER_H_
#define DENSITYTHRESHOLDFILTER_H_
#include <Filter.h>
#include <Density.h>
#include <Parameter.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/functional.h>
#include <thrust/transform_reduce.h>
class DensityThresholdFilter:public Filter<Density,Density>{
public:
DensityThresholdFilter(ParameterSet parameters):Filter(parameters){};
DensityThresholdFilter(Parameter parameter);
DensityThresholdFilter(TDensity from, TDensity to, TDensity init);
virtual ~DensityThresholdFilter();
};
#endif /* DENSITYTHRESHOLDFILTER_H_ */
/*
* Engine.cu
*
* Created on: Dec 15, 2020
* Author: kkarius
*/
#include <Engine.h>
#include <string>
#include <iostream>
Engine::Engine() {
_workerthreads.resize(_gpumanager.numGPUS());
for (int i = 0;i<_gpumanager.numGPUS();i++){
std::string worker_name = "gpu";
worker_name.append(std::to_string(i));
worker_name.append("_worker");
_workerthreads[i] = new WorkerThread(worker_name.c_str());
}
std::cout << "Created Worker threads for " << _gpumanager.numGPUS() << " gpus ..." << std::endl;
for (int i = 0;i<_gpumanager.numGPUS();i++){
_workerthreads[i]->ExitThread();
std::cout << "Killing worker " << i << "..." << std::endl;
}
}
void Engine::submitTask(std::shared_ptr<Task> task,int gpu_index){
if (gpu_index < _workerthreads.size())
_workerthreads[gpu_index]->PostTask(task);
}
Engine::~Engine() {
// TODO Auto-generated destructor stub
}
/*
* Engine.h
*
* Created on: Dec 15, 2020
* Author: kkarius
*/
#ifndef ENGINE_H_
#define ENGINE_H_
#include <vector>
#include <memory>
#include <GPUManager.h>
#include <WorkerThread.h>
#include <Task.h>
class Engine {
public:
Engine();
void submitTask(std::shared_ptr<Task> task,int gpu_index);
virtual ~Engine();
private:
GPUManager _gpumanager;
std::vector<WorkerThread *> _workerthreads;
};
#endif /* ENGINE_H_ */
/*
* Filter.cu
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
//#include <Filter.h>
//
//template <typename SOURCE, typename SINK>
//Filter<SOURCE,SINK>::Filter() {}
//
//template <typename SOURCE, typename SINK>
//Filter<SOURCE,SINK>::~Filter() {}
//
/*
* Filter.h
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
#ifndef FILTER_H_
#define FILTER_H_
#include <MemoryResources.h>
#include <ParameterSet.h>
template <typename SOURCE, typename SINK>
class Filter {
public:
Filter(ParameterSet parameters):_parameters(parameters){};
void filter(SOURCE * source, SINK * sink);
virtual ~Filter(){};
private:
ParameterSet _parameters;
};
#endif /* FILTER_H_ */
......@@ -26,6 +26,10 @@ void GPUManager::_determine_gpu_resources(void){
CudaCheckError();
}
int GPUManager::numGPUS(){
return _num_devices;
}
bool GPUManager::_check_memory(int gpu_index,size_t bytes){
nvmlDeviceGetMemoryInfo(_device_handles[gpu_index], &_device_memory_infos[gpu_index]);
if (_device_memory_infos[gpu_index].free > bytes){
......@@ -46,17 +50,17 @@ bool GPUManager::blockMemory(GPUObject * gpu_object,int gpu_index, void * gpu_me
return ret;
}
bool initGPUMemory(memory_unit _memory_unit){
bool GPUManager::initGPUMemory(memory_unit _memory_unit){
assert(cudaSuccess == cudaSetDevice(_memory_unit.gpu_index));
return bool(cudaSuccess != cudaMalloc(&_memory_unit.gpu_ptr,_memory_unit.bytes));
}
bool updateGPUMemory(memory_unit _memory_unit){
bool GPUManager::CPUToGPU(memory_unit _memory_unit){
assert(cudaSuccess == cudaSetDevice(_memory_unit.gpu_index));
return bool(cudaSuccess != cudaMemcpy(_memory_unit.gpu_ptr,_memory_unit.cpu_ptr,_memory_unit.bytes,cudaMemcpyHostToDevice));
}
bool updateCPUMemory(memory_unit _memory_unit){
bool GPUManager::GPUToCPU(memory_unit _memory_unit){
assert(cudaSuccess == cudaSetDevice(_memory_unit.gpu_index));
return bool(cudaSuccess != cudaMemcpy(_memory_unit.gpu_ptr,_memory_unit.cpu_ptr,_memory_unit.bytes,cudaMemcpyDeviceToHost));
}
......
......@@ -17,9 +17,10 @@ public:
GPUManager();
bool blockMemory(GPUObject * gpu_object,int gpu_index, void * mem_ptr,size_t bytes,void * cpu_mem_ptr = nullptr,bool mirrored = false);
bool initGPUMemory(memory_unit _memory_unit);
bool udpateGPUMemory(memory_unit _memory_unit);
bool udpateCPUMemory(memory_unit _memory_unit);
bool CPUToGPU(memory_unit _memory_unit);
bool GPUToCPU(memory_unit _memory_unit);
bool freeAllMemory(GPUObject * gpu_object);
int numGPUS();
virtual ~GPUManager();
private:
void _determine_gpu_resources(void);
......
No preview for this file type
......@@ -30,12 +30,12 @@ bool GPUObject::initGPUMemory(bool all){
return ret;
}
bool GPUObject::updateGPUMemory(bool all){
bool GPUObject::CPUToGPU(bool all){
bool ret = true;
for ( auto const& _memory_unit_map_entry: _memory_units){
for (memory_unit const& _memory_unit: _memory_unit_map_entry.second){
if (_memory_unit.gpu_initiated && _memory_unit.cpu_initiated && _memory_unit.mirrored){
if (_manager->udpateGPUMemory(_memory_unit) == true){
if (_manager->CPUToGPU(_memory_unit) == true){
ret &= true;
}
ret &= false;
......@@ -45,12 +45,12 @@ bool GPUObject::updateGPUMemory(bool all){
return ret;
}
bool GPUObject::updateCPUMemory(bool all){
bool GPUObject::GPUToCPU(bool all){
bool ret = true;
for ( auto const& _memory_unit_map_entry: _memory_units){
for (memory_unit const& _memory_unit: _memory_unit_map_entry.second){
if (_memory_unit.gpu_initiated && _memory_unit.cpu_initiated && _memory_unit.mirrored){
if (_manager->udpateCPUMemory(_memory_unit) == true){
if (_manager->GPUToCPU(_memory_unit) == true){
ret &= true;
}
ret &= false;
......
......@@ -18,8 +18,9 @@ public:
GPUObject(GPUManager * manager):_manager(manager){};
bool blockMemory(GPUObject * gpu_object,int gpu_index, void * gpu_mem_ptr, size_t bytes,void * cpu_mem_ptr,bool mirrored);
bool initGPUMemory(bool all = true);
bool updateGPUMemory(bool all = true);
bool updateCPUMemory(bool all = true);
bool CPUToGPU(bool all = true);
bool GPUToCPU(bool all = true);
bool ToCPU(bool all = true);
virtual ~GPUObject();
private:
friend class GPUManager;
......
No preview for this file type
......@@ -28,8 +28,15 @@ ccl: CMrcReader.o
g++ Density.o Particles.o PdbReader.o CMrcReader.o Kernels.o TransformationGrid.o Labeler.o CRotationGrid.o ccl_test.o link.o -o ccl_test -L. $(CUDA_LINKS) $(BOOST_LINKS) -lpng -lnvidia-ml
manager:
nvcc --gpu-architecture=$(DEVICE_ARCH) --include-path=./ --device-c GPUManager.cu GPUObject.cu test.cu -g -G
nvcc --gpu-architecture=$(DEVICE_ARCH) --include-path=./ --device-c GPUManager.cu GPUObject.cu Density.cu CMrcReader.cu test.cu -g -G
nvcc --gpu-architecture=$(DEVICE_ARCH) --device-link GPUManager.o GPUObject.o Density.o CMrcReader.o test.o --output-file link.o
g++ GPUManager.o GPUObject.o Density.o CMrcReader.o test.o link.o -o manager -lnvidia-ml -L/usr/local/cuda-10.0/lib64/ -lcudart -lcudadevrt
engine:
nvcc --gpu-architecture=$(DEVICE_ARCH) --include-path=./ --device-c Engine.cu Task.cu WorkerThread.cu GPUManager.cu GPUObject.cu Density.cu CMrcReader.cu test.cu -g -G
nvcc --gpu-architecture=$(DEVICE_ARCH) --device-link Engine.o Task.o WorkerThread.o GPUManager.o GPUObject.o Density.o CMrcReader.o test.o --output-file link.o
g++ Engine.o Task.o WorkerThread.o GPUManager.o GPUObject.o Density.o CMrcReader.o test.o link.o -o engine -lnvidia-ml -L/usr/local/cuda-10.0/lib64/ -lcudart -lcudadevrt -pthread
protocol:
# sm_72 volta support, V100s
# nvcc --gpu-architecture=sm_61 --include-path=./ $(CUB_INCLUDE) --device-c Particles.cu PdbReader.cu CMrcReader.cu Target.cu FittingProtocol.cu Density.cu ccl_test.cu -g -G
......
/*
* ParameterSet.cu
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
#include <ParameterSet.h>
ParameterSet::ParameterSet() {
// TODO Auto-generated constructor stub
}
ParameterSet::~ParameterSet() {
// TODO Auto-generated destructor stub
}
/*
* ParameterSet.h
*
* Created on: Dec 7, 2020
* Author: kkarius
*/
#ifndef PARAMETERSET_H_
#define PARAMETERSET_H_
#include <Parameter.h>
#include <map>
#include <string>
class ParameterSet {
public:
ParameterSet();
void addParameter(Parameter parameter){_parameters[parameter.name()] = parameter;}
Parameter getParameter(const char * name){return _parameters[std::string(name)];}
virtual ~ParameterSet();
private:
std::map<std::string,Parameter> _parameters;
};
#endif /* PARAMETERSET_H_ */
/*
* SpatialData.cu
*
* Created on: Dec 11, 2020
* Author: kkarius
*/
#include <SpatialData.h>
SpatialData::SpatialData() {
// TODO Auto-generated constructor stub
}
SpatialData::~SpatialData() {
// TODO Auto-generated destructor stub
}
SpatialData SpatialData::fromMrc(const char * fileName){
SpatialData data;
data._density = &Density::from_mrc(fileName);
return data;
}
SpatialData SpatialData::fromPdb(const char * fileName){
SpatialData data;
data._particles = &Particles::from_pdb(fileName);
return data;
}
/*
* SpatialData.h
*
* Created on: Dec 11, 2020
* Author: kkarius
*/
#ifndef SPATIALDATA_H_
#define SPATIALDATA_H_
#include <GPUObject.h>
#include <Density.h>
#include <MemoryResources.h>
class SpatialData: public GPUObject {
public:
SpatialData();
static SpatialData fromMrc(const char * fileName);
static SpatialData fromPdb(const char * fileName);
virtual ~SpatialData();
private:
Density * _density = nullptr;
Particles * _particles = nullptr;
size_t _data_byte_vol = 0;
bool _cpu_initiated = false;
bool _gpu_initiated = false;
};
#endif /* SPATIALDATA_H_ */
/*
* Task.cu
*
* Created on: Dec 15, 2020
* Author: kkarius
*/
#include <Task.h>
Task::Task() {
// TODO Auto-generated constructor stub
}
Task::~Task() {
// TODO Auto-generated destructor stub
}
/*
* Task.h
*
* Created on: Dec 15, 2020
* Author: kkarius
*/
#ifndef TASK_H_
#define TASK_H_
enum TASK_TYPE {SCORING,KILL};
class Task {
public:
Task();
virtual ~Task();
TASK_TYPE type = KILL;
private:
};
#endif /* TASK_H_ */
/*
* WorkerThread.cu