Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Balint Balazs
B3D
Commits
3e70a0ba
Commit
3e70a0ba
authored
Mar 15, 2018
by
Balint Balazs
Browse files
removed dwtLevels from GPU/CPU Resources
parent
36d7d9e9
Changes
5
Hide whitespace changes
Inline
Side-by-side
source/src/B3D_HDF5_plugin/GPUResources.cpp
View file @
3e70a0ba
#include "GPUResources.h"
#include <algorithm>
#include <cassert>
#include "cudaUtil.h"
Resources
::
Config
::
Config
()
:
cudaDevice
(
-
1
),
blockCountMax
(
0
),
elemCountPerBlockMax
(
0
),
codingBlockSize
(
0
),
log2HuffmanDistinctSymbolCountMax
(
0
),
bufferSize
(
0
)
{
}
void
Resources
::
Config
::
merge
(
const
Resources
::
Config
&
other
)
{
if
(
cudaDevice
==
-
1
)
{
cudaDevice
=
other
.
cudaDevice
;
}
if
(
blockCountMax
==
0
)
{
blockCountMax
=
other
.
blockCountMax
;
}
else
{
blockCountMax
=
std
::
max
(
blockCountMax
,
other
.
blockCountMax
);
}
if
(
elemCountPerBlockMax
==
0
)
{
elemCountPerBlockMax
=
other
.
elemCountPerBlockMax
;
}
else
{
elemCountPerBlockMax
=
std
::
max
(
elemCountPerBlockMax
,
other
.
elemCountPerBlockMax
);
}
if
(
codingBlockSize
==
0
)
{
codingBlockSize
=
other
.
codingBlockSize
;
}
else
{
codingBlockSize
=
std
::
min
(
codingBlockSize
,
other
.
codingBlockSize
);
}
if
(
log2HuffmanDistinctSymbolCountMax
==
0
)
{
log2HuffmanDistinctSymbolCountMax
=
other
.
log2HuffmanDistinctSymbolCountMax
;
}
else
{
log2HuffmanDistinctSymbolCountMax
=
std
::
max
(
log2HuffmanDistinctSymbolCountMax
,
other
.
log2HuffmanDistinctSymbolCountMax
);
}
if
(
bufferSize
==
0
)
{
bufferSize
=
other
.
bufferSize
;
}
else
{
bufferSize
=
std
::
max
(
bufferSize
,
other
.
bufferSize
);
}
}
Resources
::
Resources
()
:
m_pCuCompInstance
(
nullptr
)
,
m_dpBuffer
(
nullptr
)
,
m_bufferOffset
(
0
)
{
}
Resources
::~
Resources
()
{
assert
(
m_pCuCompInstance
==
nullptr
);
assert
(
m_dpBuffer
==
nullptr
);
}
byte
*
Resources
::
getByteBuffer
(
size_t
bytes
)
{
assert
(
m_bufferOffset
+
bytes
<=
m_config
.
bufferSize
);
if
(
m_bufferOffset
+
bytes
>
m_config
.
bufferSize
)
{
printf
(
"ERROR: Resources::getByteBuffer: out of memory!
\n
"
);
return
nullptr
;
}
byte
*
dpResult
=
m_dpBuffer
+
m_bufferOffset
;
m_allocatedSizes
.
push_back
(
bytes
);
m_bufferOffset
+=
getAlignedSize
(
bytes
,
128
);
return
dpResult
;
}
void
Resources
::
releaseBuffer
()
{
assert
(
!
m_allocatedSizes
.
empty
());
if
(
m_allocatedSizes
.
empty
())
{
printf
(
"ERROR: Resources::releaseBuffer: no more buffers to release
\n
"
);
return
;
}
size_t
lastSize
=
m_allocatedSizes
.
back
();
m_allocatedSizes
.
pop_back
();
m_bufferOffset
-=
getAlignedSize
(
lastSize
,
128
);
assert
(
m_bufferOffset
%
128
==
0
);
}
void
Resources
::
releaseBuffers
(
uint
bufferCount
)
{
for
(
uint
i
=
0
;
i
<
bufferCount
;
i
++
)
{
releaseBuffer
();
}
}
GPUResources
::
GPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
uint
levels
,
int
cudaDevice
)
{
uint
blockSizeX
=
sizeX
/
pow
(
2.0
,
(
double
)
levels
);
uint
blockSizeY
=
sizeY
/
pow
(
2.0
,
(
double
)
levels
);
uint
blockCount
=
1
;
uint
elemCount
=
sizeX
*
sizeY
*
sizeZ
;
uint
elemCountPerBlock
=
elemCount
/
blockCount
;
// accumulate GPU buffer size
size_t
size
=
0
;
// dpBuffer + dpScratch1 + dpScratch2
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
// dpSymbolStreams
size
+=
getAlignedSize
(
blockCount
*
elemCountPerBlock
*
sizeof
(
uint16_t
),
128
);
// dpImage
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
short
),
128
);
// build Resources config
Resources
::
Config
config
;
config
.
blockCountMax
=
blockCount
;
config
.
elemCountPerBlockMax
=
elemCount
*
2
;
config
.
bufferSize
=
size
;
config
.
log2HuffmanDistinctSymbolCountMax
=
LOG2_SYMBOL_COUNT_MAX
;
config
.
cudaDevice
=
cudaDevice
;
this
->
create
(
config
);
}
bool
GPUResources
::
create
(
const
Config
&
config
)
{
m_config
=
config
;
assert
(
m_pCuCompInstance
==
nullptr
);
m_pCuCompInstance
=
cudaCompress
::
createInstance
(
m_config
.
cudaDevice
,
m_config
.
blockCountMax
,
m_config
.
elemCountPerBlockMax
,
m_config
.
codingBlockSize
,
m_config
.
log2HuffmanDistinctSymbolCountMax
);
if
(
!
m_pCuCompInstance
)
{
return
false
;
}
//TODO don't use cudaSafeCall, but manually check for out of memory?
assert
(
m_dpBuffer
==
nullptr
);
cudaSafeCall
(
cudaMalloc
(
&
m_dpBuffer
,
m_config
.
bufferSize
));
return
true
;
}
void
GPUResources
::
destroy
()
{
cudaSafeCall
(
cudaFree
(
m_dpBuffer
));
m_dpBuffer
=
nullptr
;
cudaCompress
::
destroyInstance
(
m_pCuCompInstance
);
m_pCuCompInstance
=
nullptr
;
}
CPUResources
::
CPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
uint
levels
,
int
cudaDevice
)
{
uint
blockSizeX
=
sizeX
/
pow
(
2.0
,
(
double
)
levels
);
uint
blockSizeY
=
sizeY
/
pow
(
2.0
,
(
double
)
levels
);
uint
blockCount
=
1
;
uint
elemCount
=
sizeX
*
sizeY
*
sizeZ
;
uint
elemCountPerBlock
=
elemCount
/
blockCount
;
// accumulate GPU buffer size
size_t
size
=
0
;
// dpBuffer + dpScratch1 + dpScratch2
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
// dpSymbolStreams
size
+=
getAlignedSize
(
blockCount
*
elemCountPerBlock
*
sizeof
(
uint16_t
),
128
);
// dpImage
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
short
),
128
);
// build Resources config
Resources
::
Config
config
;
config
.
blockCountMax
=
blockCount
;
config
.
elemCountPerBlockMax
=
elemCount
*
2
;
config
.
bufferSize
=
size
;
config
.
log2HuffmanDistinctSymbolCountMax
=
LOG2_SYMBOL_COUNT_MAX
;
config
.
cudaDevice
=
cudaDevice
;
this
->
create
(
config
);
}
bool
CPUResources
::
create
(
const
Config
&
config
)
{
m_config
=
config
;
/*assert(m_pCuCompInstance == nullptr);
m_pCuCompInstance = cudaCompress::createInstance(m_config.cudaDevice, m_config.blockCountMax, m_config.elemCountPerBlockMax, m_config.codingBlockSize, m_config.log2HuffmanDistinctSymbolCountMax);
if (!m_pCuCompInstance) {
return false;
}*/
m_pCuCompInstance
=
nullptr
;
assert
(
m_dpBuffer
==
nullptr
);
//cudaSafeCall(cudaMalloc(&m_dpBuffer, m_config.bufferSize));
m_dpBuffer
=
(
byte
*
)
malloc
(
m_config
.
bufferSize
);
return
true
;
}
void
CPUResources
::
destroy
()
{
free
(
m_dpBuffer
);
m_dpBuffer
=
nullptr
;
//cudaCompress::destroyInstance(m_pCuCompInstance);
m_pCuCompInstance
=
nullptr
;
}
#include "GPUResources.h"
#include <algorithm>
#include <cassert>
#include "cudaUtil.h"
Resources
::
Config
::
Config
()
:
cudaDevice
(
-
1
),
blockCountMax
(
0
),
elemCountPerBlockMax
(
0
),
codingBlockSize
(
0
),
log2HuffmanDistinctSymbolCountMax
(
0
),
bufferSize
(
0
)
{
}
void
Resources
::
Config
::
merge
(
const
Resources
::
Config
&
other
)
{
if
(
cudaDevice
==
-
1
)
{
cudaDevice
=
other
.
cudaDevice
;
}
if
(
blockCountMax
==
0
)
{
blockCountMax
=
other
.
blockCountMax
;
}
else
{
blockCountMax
=
std
::
max
(
blockCountMax
,
other
.
blockCountMax
);
}
if
(
elemCountPerBlockMax
==
0
)
{
elemCountPerBlockMax
=
other
.
elemCountPerBlockMax
;
}
else
{
elemCountPerBlockMax
=
std
::
max
(
elemCountPerBlockMax
,
other
.
elemCountPerBlockMax
);
}
if
(
codingBlockSize
==
0
)
{
codingBlockSize
=
other
.
codingBlockSize
;
}
else
{
codingBlockSize
=
std
::
min
(
codingBlockSize
,
other
.
codingBlockSize
);
}
if
(
log2HuffmanDistinctSymbolCountMax
==
0
)
{
log2HuffmanDistinctSymbolCountMax
=
other
.
log2HuffmanDistinctSymbolCountMax
;
}
else
{
log2HuffmanDistinctSymbolCountMax
=
std
::
max
(
log2HuffmanDistinctSymbolCountMax
,
other
.
log2HuffmanDistinctSymbolCountMax
);
}
if
(
bufferSize
==
0
)
{
bufferSize
=
other
.
bufferSize
;
}
else
{
bufferSize
=
std
::
max
(
bufferSize
,
other
.
bufferSize
);
}
}
Resources
::
Resources
()
:
m_pCuCompInstance
(
nullptr
)
,
m_dpBuffer
(
nullptr
)
,
m_bufferOffset
(
0
)
{
}
Resources
::~
Resources
()
{
assert
(
m_pCuCompInstance
==
nullptr
);
assert
(
m_dpBuffer
==
nullptr
);
}
byte
*
Resources
::
getByteBuffer
(
size_t
bytes
)
{
assert
(
m_bufferOffset
+
bytes
<=
m_config
.
bufferSize
);
if
(
m_bufferOffset
+
bytes
>
m_config
.
bufferSize
)
{
printf
(
"ERROR: Resources::getByteBuffer: out of memory!
\n
"
);
return
nullptr
;
}
byte
*
dpResult
=
m_dpBuffer
+
m_bufferOffset
;
m_allocatedSizes
.
push_back
(
bytes
);
m_bufferOffset
+=
getAlignedSize
(
bytes
,
128
);
return
dpResult
;
}
void
Resources
::
releaseBuffer
()
{
assert
(
!
m_allocatedSizes
.
empty
());
if
(
m_allocatedSizes
.
empty
())
{
printf
(
"ERROR: Resources::releaseBuffer: no more buffers to release
\n
"
);
return
;
}
size_t
lastSize
=
m_allocatedSizes
.
back
();
m_allocatedSizes
.
pop_back
();
m_bufferOffset
-=
getAlignedSize
(
lastSize
,
128
);
assert
(
m_bufferOffset
%
128
==
0
);
}
void
Resources
::
releaseBuffers
(
uint
bufferCount
)
{
for
(
uint
i
=
0
;
i
<
bufferCount
;
i
++
)
{
releaseBuffer
();
}
}
GPUResources
::
GPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
int
cudaDevice
)
{
uint
blockCount
=
1
;
uint
elemCount
=
sizeX
*
sizeY
*
sizeZ
;
uint
elemCountPerBlock
=
elemCount
/
blockCount
;
// accumulate GPU buffer size
size_t
size
=
0
;
// dpBuffer + dpScratch1 + dpScratch2
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
// dpSymbolStreams
size
+=
getAlignedSize
(
blockCount
*
elemCountPerBlock
*
sizeof
(
uint16_t
),
128
);
// dpImage
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
short
),
128
);
// build Resources config
Resources
::
Config
config
;
config
.
blockCountMax
=
blockCount
;
config
.
elemCountPerBlockMax
=
elemCount
*
2
;
config
.
bufferSize
=
size
;
config
.
log2HuffmanDistinctSymbolCountMax
=
LOG2_SYMBOL_COUNT_MAX
;
config
.
cudaDevice
=
cudaDevice
;
this
->
create
(
config
);
}
bool
GPUResources
::
create
(
const
Config
&
config
)
{
m_config
=
config
;
assert
(
m_pCuCompInstance
==
nullptr
);
m_pCuCompInstance
=
cudaCompress
::
createInstance
(
m_config
.
cudaDevice
,
m_config
.
blockCountMax
,
m_config
.
elemCountPerBlockMax
,
m_config
.
codingBlockSize
,
m_config
.
log2HuffmanDistinctSymbolCountMax
);
if
(
!
m_pCuCompInstance
)
{
return
false
;
}
//TODO don't use cudaSafeCall, but manually check for out of memory?
assert
(
m_dpBuffer
==
nullptr
);
cudaSafeCall
(
cudaMalloc
(
&
m_dpBuffer
,
m_config
.
bufferSize
));
return
true
;
}
void
GPUResources
::
destroy
()
{
cudaSafeCall
(
cudaFree
(
m_dpBuffer
));
m_dpBuffer
=
nullptr
;
cudaCompress
::
destroyInstance
(
m_pCuCompInstance
);
m_pCuCompInstance
=
nullptr
;
}
CPUResources
::
CPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
int
cudaDevice
)
{
uint
blockCount
=
1
;
uint
elemCount
=
sizeX
*
sizeY
*
sizeZ
;
uint
elemCountPerBlock
=
elemCount
/
blockCount
;
// accumulate GPU buffer size
size_t
size
=
0
;
// dpBuffer + dpScratch1 + dpScratch2
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
float
),
128
);
// dpSymbolStreams
size
+=
getAlignedSize
(
blockCount
*
elemCountPerBlock
*
sizeof
(
uint16_t
),
128
);
// dpImage
size
+=
getAlignedSize
(
elemCount
*
sizeof
(
short
),
128
);
// build Resources config
Resources
::
Config
config
;
config
.
blockCountMax
=
blockCount
;
config
.
elemCountPerBlockMax
=
elemCount
*
2
;
config
.
bufferSize
=
size
;
config
.
log2HuffmanDistinctSymbolCountMax
=
LOG2_SYMBOL_COUNT_MAX
;
config
.
cudaDevice
=
cudaDevice
;
this
->
create
(
config
);
}
bool
CPUResources
::
create
(
const
Config
&
config
)
{
m_config
=
config
;
/*assert(m_pCuCompInstance == nullptr);
m_pCuCompInstance = cudaCompress::createInstance(m_config.cudaDevice, m_config.blockCountMax, m_config.elemCountPerBlockMax, m_config.codingBlockSize, m_config.log2HuffmanDistinctSymbolCountMax);
if (!m_pCuCompInstance) {
return false;
}*/
m_pCuCompInstance
=
nullptr
;
assert
(
m_dpBuffer
==
nullptr
);
//cudaSafeCall(cudaMalloc(&m_dpBuffer, m_config.bufferSize));
m_dpBuffer
=
(
byte
*
)
malloc
(
m_config
.
bufferSize
);
return
true
;
}
void
CPUResources
::
destroy
()
{
free
(
m_dpBuffer
);
m_dpBuffer
=
nullptr
;
//cudaCompress::destroyInstance(m_pCuCompInstance);
m_pCuCompInstance
=
nullptr
;
}
source/src/B3D_HDF5_plugin/GPUResources.h
View file @
3e70a0ba
#ifndef __TUM3D__GPU_RESOURCES_H__
#define __TUM3D__GPU_RESOURCES_H__
#include "global.h"
#include <vector>
#include <inttypes.h>
#include <cudaCompress/Instance.h>
inline
size_t
getAlignedSize
(
size_t
size
,
size_t
numBytes
=
128
)
{
return
(
size
+
numBytes
-
1
)
/
numBytes
*
numBytes
;
}
template
<
typename
T
>
inline
void
align
(
T
*&
pData
,
size_t
numBytes
=
128
)
{
pData
=
(
T
*
)
getAlignedSize
(
size_t
(
pData
),
numBytes
);
}
// Helper class to manage shared GPU resources (scratch buffers and cudaCompress instance).
// Usage: Fill a Config (use merge to build max required sizes etc over all clients), then call create() to initialize.
class
Resources
{
public:
Resources
();
//Resources(uint sizeX, uint sizeY, uint sizeZ, uint levels, int cudaDevice=-1);
~
Resources
();
struct
Config
{
Config
();
int
cudaDevice
;
// set to -1 (default) to use the current one
uint
blockCountMax
;
uint
elemCountPerBlockMax
;
uint
codingBlockSize
;
uint
log2HuffmanDistinctSymbolCountMax
;
size_t
bufferSize
;
void
merge
(
const
Config
&
other
);
};
virtual
bool
create
(
const
Config
&
config
)
=
0
;
virtual
void
destroy
()
=
0
;
const
Config
&
getConfig
()
const
{
return
m_config
;
}
cudaCompress
::
Instance
*
m_pCuCompInstance
;
// get a buffer of the specified size in GPU memory
byte
*
getByteBuffer
(
size_t
bytes
);
template
<
typename
T
>
T
*
getBuffer
(
size_t
count
)
{
return
(
T
*
)
getByteBuffer
(
count
*
sizeof
(
T
));
}
// release the last buffer(s) returned from getBuffer
void
releaseBuffer
();
void
releaseBuffers
(
uint
bufferCount
);
protected:
Config
m_config
;
byte
*
m_dpBuffer
;
size_t
m_bufferOffset
;
std
::
vector
<
size_t
>
m_allocatedSizes
;
};
class
GPUResources
:
public
Resources
{
public:
GPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
uint
levels
,
int
cudaDevice
=
-
1
);
bool
create
(
const
Config
&
config
);
void
destroy
();
};
class
CPUResources
:
public
Resources
{
public:
CPUResources
(
uint
sizeX
,
uint
sizeY
,
uint
sizeZ
,
uint
levels
,
int
cudaDevice
=
-
1
);
bool
create
(
const
Config
&
config
);
void
destroy
();
};
#endif
#ifndef __TUM3D__GPU_RESOURCES_H__
#define __TUM3D__GPU_RESOURCES_H__
#include "global.h"
#include <vector>
#include <inttypes.h>
#include <cudaCompress/Instance.h>
inline
size_t
getAlignedSize
(
size_t
size
,
size_t
numBytes
=
128
)
{
return
(
size
+
numBytes
-
1
)
/
numBytes
*
numBytes
;
}
template
<
typename
T
>
inline
void
align
(
T
*&
pData
,
size_t
numBytes
=
128
)
{
pData
=
(
T
*
)
getAlignedSize
(
size_t
(
pData
),
numBytes
);
}
// Helper class to manage shared GPU resources (scratch buffers and cudaCompress instance).
// Usage: Fill a Config (use merge to build max required sizes etc over all clients), then call create() to initialize.
class
Resources
{
public:
Resources
();
//Resources(uint sizeX, uint sizeY, uint sizeZ, uint levels, int cudaDevice=-1);
~
Resources
();
struct
Config
{
Config
();
int
cudaDevice
;
// set to -1 (default) to use the current one
uint
blockCountMax
;
uint
elemCountPerBlockMax
;
uint
codingBlockSize
;
uint
log2HuffmanDistinctSymbolCountMax
;
size_t
bufferSize
;
void
merge
(
const
Config
&
other
);
};
virtual
bool
create
(
const
Config
&
config
)
=
0
;
virtual
void
destroy
()
=
0
;
const
Config
&
getConfig
()
const
{
return
m_config
;
}