Commit ddb4c200 authored by karius's avatar karius

Density clean up, added GPUObjects now really

parent e8fd0a7f
/*
Author: Cao Thanh Tung, Ashwin Nanjappa
Date: 05-Aug-2014
===============================================================================
Copyright (c) 2011, School of Computing, National University of Singapore.
All rights reserved.
Project homepage: http://www.comp.nus.edu.sg/~tants/gdel3d.html
If you use gDel3D and you like it or have comments on its usefulness etc., we
would love to hear from you at <tants@comp.nus.edu.sg>. You may share with us
your experience and any possibilities that we may improve the work/code.
===============================================================================
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
Neither the name of the National University of University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission from the National University of Singapore.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/
#pragma once
#include <cuda_runtime.h>
#include <cstdio>
#include <cstdlib>
// Define this to turn on error checking
#define CUDA_ERROR_CHECK
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
// More careful checking. However, this will affect performance.
// Comment away if needed.
err = cudaDeviceSynchronize();
if( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
#if __CUDA_ARCH__ >= 200 && defined( CUDA_ERROR_CHECK )
#define CudaAssert(X) \
if ( !(X) ) \
{ \
printf( "!!!Thread %d:%d failed assert at %s:%d!!!\n", \
blockIdx.x, threadIdx.x, __FILE__, __LINE__ ); \
}
#else
#define CudaAssert(X)
#endif
template< typename T >
T* cuNew( int num )
{
T* loc = NULL;
const size_t space = num * sizeof( T );
CudaSafeCall( cudaMalloc( &loc, space ) );
return loc;
}
template< typename T >
void cuDelete( T** loc )
{
CudaSafeCall( cudaFree( *loc ) );
*loc = NULL;
return;
}
template< typename T >
__forceinline__ __device__ void cuSwap( T& v0, T& v1 )
{
const T tmp = v0;
v0 = v1;
v1 = tmp;
return;
}
inline void cuPrintMemory( const char* inStr )
{
const int MegaByte = ( 1 << 20 );
size_t free;
size_t total;
CudaSafeCall( cudaMemGetInfo( &free, &total ) );
printf( "[%s] Memory used: %d MB\n", inStr, (int) ( total - free ) / MegaByte );
return;
}
// Obtained from: C:\ProgramData\NVIDIA Corporation\GPU SDK\C\common\inc\cutil_inline_runtime.h
// This function returns the best GPU (with maximum GFLOPS)
inline int cutGetMaxGflopsDeviceId()
{
int current_device = 0, sm_per_multiproc = 0;
int max_compute_perf = 0, max_perf_device = 0;
int device_count = 0, best_SM_arch = 0;
int arch_cores_sm[3] = { 1, 8, 32 };
cudaDeviceProp deviceProp;
cudaGetDeviceCount( &device_count );
// Find the best major SM Architecture GPU device
while ( current_device < device_count ) {
cudaGetDeviceProperties( &deviceProp, current_device );
if (deviceProp.major > 0 && deviceProp.major < 9999)
{
if ( deviceProp.major > best_SM_arch )
best_SM_arch = deviceProp.major;
}
current_device++;
}
// Find the best CUDA capable GPU device
current_device = 0;
while( current_device < device_count ) {
cudaGetDeviceProperties( &deviceProp, current_device );
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
sm_per_multiproc = 1;
} else if (deviceProp.major <= 2) {
sm_per_multiproc = arch_cores_sm[deviceProp.major];
} else {
sm_per_multiproc = arch_cores_sm[2];
}
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
if( compute_perf > max_compute_perf ) {
// If we find GPU with SM major > 2, search only these
if ( best_SM_arch > 2 ) {
// If our device==dest_SM_arch, choose this, or else pass
if (deviceProp.major == best_SM_arch) {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
} else {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
++current_device;
}
return max_perf_device;
}
/*
Author: Cao Thanh Tung, Ashwin Nanjappa
Date: 05-Aug-2014
===============================================================================
Copyright (c) 2011, School of Computing, National University of Singapore.
All rights reserved.
Project homepage: http://www.comp.nus.edu.sg/~tants/gdel3d.html
If you use gDel3D and you like it or have comments on its usefulness etc., we
would love to hear from you at <tants@comp.nus.edu.sg>. You may share with us
your experience and any possibilities that we may improve the work/code.
===============================================================================
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
Neither the name of the National University of University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission from the National University of Singapore.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/
#pragma once
#include "../CommonTypes.h"
#include "HostToKernel.h"
class DPredWrapper
{
private:
Point3 *_pointArr;
int* _orgPointIdx;
int _pointNum;
RealType* _predConsts;
RealType* _predData;
__forceinline__ __device__ Orient doOrient3DFastAdaptExact( Point3 p0, Point3 p1, Point3 p2, Point3 p3 ) const;
__forceinline__ __device__ RealType doOrient2DFastExact(
const RealType* p0,
const RealType* p1,
const RealType* p2 ) const;
__forceinline__ __device__ Orient doOrient3DSoSOnly(
int v0, int v1, int v2, int v3,
Point3 p0, Point3 p1, Point3 p2, Point3 p3 ) const;
__forceinline__ __device__ Orient doOrient1DExact_Lifted(
const RealType* p0,
const RealType* p1,
bool lifted
) const;
__forceinline__ __device__ Orient doOrient2DExact_Lifted
(
RealType* curPredData,
const RealType* p0,
const RealType* p1,
const RealType* p2,
bool lifted
) const;
__forceinline__ __device__ Orient doOrient3DFastExact_Lifted
(
RealType* curPredData,
const RealType* p0,
const RealType* p1,
const RealType* p2,
const RealType* p3,
bool lifted
) const;
__device__ Side doInSphereSoSOnly
(
RealType* curPredData,
int pi0, int pi1, int pi2, int pi3, int pi4,
Point3 p0, Point3 p1, Point3 p2, Point3 p3, Point3 p4
) const;
__forceinline__ __device__ Side doInSphereFastAdaptExact(
RealType *curPredData, Point3 p0, Point3 p1, Point3 p2, Point3 p3, Point3 p4 ) const;
public:
int _infIdx;
void init(
Point3* pointArr,
int pointNum,
int* orgPointIdx,
int infIdx,
int PredTotalThreadNum
);
void cleanup();
__forceinline__ __device__ __host__ int pointNum() const;
__forceinline__ __device__ const Point3& getPoint( int idx ) const;
__forceinline__ __device__ Orient doOrient3DFast(
int v0, int v1, int v2, int v3,
Point3 p0, Point3 p1, Point3 p2, Point3 p3 ) const;
__forceinline__ __device__ Orient doOrient3DFast(
int v0, int v1, int v2, int v3 ) const;
__forceinline__ __device__ Orient doOrient3DSoS(
int v0, int v1, int v2, int v3,
Point3 p0, Point3 p1, Point3 p2, Point3 p3 ) const;
__forceinline__ __device__ Orient doOrient3DSoS(
int v0, int v1, int v2, int v3 ) const;
__forceinline__ __device__ Side doInSphereFast(
Tet tet, int vert, const Point3 pt[], Point3 ptVert ) const;
__forceinline__ __device__ Side doInSphereSoS(
Tet tet, int vert, const Point3 pt[], Point3 ptVert ) const;
__forceinline__ __device__ float distToCentroid( Tet tet, int v ) const;
__forceinline__ __device__ float inSphereDet( Tet tet, int v ) const;
__forceinline__ __device__ float inDist( Tet tet, int v ) const;
__forceinline__ __device__ float maxDist( Tet tet, int v ) const;
__forceinline__ __device__ float splitSphere( Tet tet, int v ) const;
};
/*
Author: Cao Thanh Tung, Ashwin Nanjappa
Date: 05-Aug-2014
===============================================================================
Copyright (c) 2011, School of Computing, National University of Singapore.
All rights reserved.
Project homepage: http://www.comp.nus.edu.sg/~tants/gdel3d.html
If you use gDel3D and you like it or have comments on its usefulness etc., we
would love to hear from you at <tants@comp.nus.edu.sg>. You may share with us
your experience and any possibilities that we may improve the work/code.
===============================================================================
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
Neither the name of the National University of University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission from the National University of Singapore.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/
#pragma once
#include "../CommonTypes.h"
////////////////////////////////////////////////////////////////////// GPU Exact predicates
enum DPredicateBounds
{
Splitter, /* = 2^ceiling(p / 2) + 1. Used to split floats in half. */
Epsilon, /* = 2^(-p). Used to estimate roundoff errors. */
/* A set of coefficients used to calculate maximum roundoff errors. */
Resulterrbound,
CcwerrboundA,
CcwerrboundB,
CcwerrboundC,
O3derrboundA,
O3derrboundB,
O3derrboundC,
IccerrboundA,
IccerrboundB,
IccerrboundC,
IsperrboundA,
IsperrboundB,
IsperrboundC,
O3derrboundAlifted,
O2derrboundAlifted,
O1derrboundAlifted,
DPredicateBoundNum // Number of bounds in this enum
};
enum DPredicateSizes
{
// Size of each array
//AbcdSize = 96,
//BcdeSize = 96,
//CdeaSize = 96,
//DeabSize = 96,
//EabcSize = 96,
Temp96Size = 96,
//Temp192Size = 192,
Det384xSize = 384,
Det384ySize = 384,
//Det384zSize = 384,
DetxySize = 768,
AdetSize = 1152,
BdetSize = 1152,
//CdetSize = 1152,
//DdetSize = 1152,
//EdetSize = 1152,
AbdetSize = 2304,
//CddetSize = 2304,
CdedetSize = 3456,
//DeterSize = 5760,
// Total size
PredicateTotalSize = 0
//+ AbcdSize
//+ BcdeSize
//+ CdeaSize
//+ DeabSize
//+ EabcSize
+ Temp96Size
//+ Temp192Size
+ Det384xSize
+ Det384ySize
//+ Det384zSize
+ DetxySize
+ AdetSize
+ BdetSize
//+ CdetSize
//+ DdetSize
//+ EdetSize
+ AbdetSize
//+ CddetSize
+ CdedetSize
// + DeterSize
};
struct PredicateInfo
{
RealType* _consts;
RealType* _data;
};
////////////////////////////////////////////////////////////////////// Enums
enum CheckDelaunayMode
{
SphereFastOrientFast,
SphereExactOrientSoS
};
enum Counter {
CounterExact,
CounterFlip,
CounterNum
};
enum ActTetMode
{
ActTetMarkCompact,
ActTetCollectCompact
};
////////////////////////////////////////////////////////////////////// Constants
__device__ const int Flip32NewFaceVi[3][2] = {
{ 2, 1 }, // newTetIdx[0]'s vi, newTetIdx[1]'s vi
{ 1, 2 }, // -"-
{ 0, 0 } // -"-
};
__device__ const int Flip23IntFaceOpp[3][4] = {
{ 0, 1, 1, 2 },
{ 0, 2, 2, 1 },
{ 1, 1, 2, 2 }
};
// Adjacency between 6 internal faces of 4 new tetra
__device__ const int IntSplitFaceOpp[4][6] = {
{ 1, 0, 3, 0, 2, 0 },
{ 0, 0, 2, 2, 3, 1 },
{ 0, 2, 3, 2, 1, 1 },
{ 0, 1, 1, 2, 2, 1 } };
__device__ const int SplitFaces[11][3] = {
/*0*/ { 0, 1, 4 },
/*1*/ { 0, 3, 4 }, /*2*/ { 0, 2, 4 },
/*3*/ { 2, 3, 4 }, /*4*/ { 1, 3, 4 }, /*5*/ { 1, 2, 4 }, /*6*/ { 2, 3, 4 },
/*7*/ { 1, 3, 2 }, /*8*/ { 0, 2, 3 }, /*9*/ { 0, 3, 1 }, /*10*/ { 0, 1, 2 }
};
__device__ const int SplitNext[11][2] = {
{ 1, 2 },
{ 3, 4 }, { 5, 6 },
{ 7, 8 }, { 9, 7 }, { 7, 10 }, { 7, 8 },
{ 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }
};
////////////////////////////////////////////////////////////////// DevVector //
template< typename T >
class DevVector
{
public:
// Types
typedef typename thrust::device_ptr< T > iterator;
// Properties
thrust::device_ptr< T > _ptr;
size_t _size;
size_t _capacity;
bool _owned;
DevVector( ) : _size( 0 ), _capacity( 0 ) {}
DevVector( size_t n ) : _size( 0 ), _capacity( 0 )
{
resize( n );
return;
}
DevVector( size_t n, T value ) : _size( 0 ), _capacity( 0 )
{
assign( n, value );
return;
}
// Reuse the storage space
DevVector( const DevVector<T> &clone ) : _size( 0 ), _owned( false )
{
_ptr = clone._ptr;
_capacity = clone._capacity;
}
template< typename T1 >
DevVector( const DevVector<T1> &clone ) : _size( 0 ), _owned( false )
{
_ptr = thrust::device_ptr< T >( ( T* ) clone._ptr.get() );
_capacity = clone._capacity * sizeof( T1 ) / sizeof( T );
}
~DevVector()
{
free();
return;
}
void free()
{
if ( _capacity > 0 && _owned )
CudaSafeCall( cudaFree( _ptr.get() ) );
_size = 0;
_capacity = 0;
return;
}
// Use only for cases where new size is within capacity
// So, old data remains in-place
void expand( size_t n )
{
assert( ( _capacity >= n ) && "New size not within current capacity! Use resize!" );
_size = n;
}
// Resize with data remains
void grow( size_t n )
{
assert( ( n >= _size ) && "New size not larger than old size." );
if ( _capacity >= n )
{
_size = n;
return;
}
DevVector< T > tempVec( n );
thrust::copy( begin(), end(), tempVec.begin() );
swapAndFree( tempVec );
}
void resize( size_t n )
{
if ( _capacity >= n )
{
_size = n;
return;
}
free();
_size = n;
_capacity = ( n == 0 ) ? 1 : n;
_owned = true;
try
{
_ptr = thrust::device_malloc< T >( _capacity );