Saxum/extern/bullet/Extras/sph/fluids/fluid_system_kern.cu
Fabian Klemp aeb6218d2d Renaming.
2014-10-24 11:49:46 +02:00

403 lines
14 KiB
Plaintext

/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "fluid_system_host.cuh"
#define TOTAL_THREADS 65536
#define BLOCK_THREADS 256
#define MAX_NBR 80
__constant__ FluidParams simData; // simulation data (on device)
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
#define NULL_HASH 333333
#define OFFSET_CLR 12
#define OFFSET_NEXT 16
#define OFFSET_VEL 20
#define OFFSET_VEVAL 32
#define OFFSET_PRESS 48
#define OFFSET_DENS 52
#define OFFSET_FORCE 56
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
int gz = (pos->z - simData.min.z) * simData.delta.z ;
int gy = (pos->y - simData.min.y) * simData.delta.y ;
int gx = (pos->x - simData.min.x) * simData.delta.x ;
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
else
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
__syncthreads ();
}
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
{
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
bufPnts += OFFSET_NEXT;
bufGrid[grid_ndx] = -1;
for (int n=0; n < numPnt; n++) {
if ( bufHash[n].x == grid_ndx ) {
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
bufGrid[grid_ndx] = bufHash[n].y;
}
}
__syncthreads ();
}
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
uint2 bufHashSort = bufHash[ndx];
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = bufHashSort.x;
if ( ndx > 0 && threadIdx.x == 0 ) {
volatile uint2 prevData = bufHash[ndx-1];
sharedHash[0] = prevData.x;
}
__syncthreads ();
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
bufGrid [ bufHashSort.x ] = ndx;
}
if ( ndx < numPnt ) {
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
char* dest = bufPntSort + __mul24( ndx, simData.stride );
*(float3*)(dest) = *(float3*)(src);
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
*(float*) (dest + OFFSET_DENS) = 0.0;
*(float*) (dest + OFFSET_PRESS) = 0.0;
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
}
__syncthreads ();
}
//__shared__ int ncount [ BLOCK_THREADS ];
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float3* qpos;
float3 dist;
float dsq, c, sum;
float d = simData.sim_scale;
int nbr = __mul24(pndx, MAX_NBR);
sum = 0.0;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
c = simData.r2 - dsq;
sum += c * c * c;
if ( bufNeighbor[nbr] < MAX_NBR ) {
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
bufNeighbor[nbr]++;
}
}
}
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
}
return sum;
}
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
ncount[threadIdx.x]++;
}*/
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < 1024 ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float sum = 0.0;
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Compute Density & Pressure
sum = sum * simData.pmass * simData.poly6kern;
if ( sum == 0.0 ) sum = 1.0;
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
//}
//__syncthreads ();
}
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float press = *(float*) ((char*)p + OFFSET_PRESS);
float dens = *(float*) ((char*)p + OFFSET_DENS);
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
float3 qeval, dist;
float c, ndistj, dsq;
float pterm, dterm, vterm;
float3* qpos;
float d = simData.sim_scale;
vterm = simData.lapkern * simData.visc;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
ndistj = sqrt(dsq);
c = ( simData.smooth_rad - ndistj );
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
}
}
}
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float3 force = make_float3(0,0,0);
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Update Force
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
//}
//__syncthreads ();
}
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* qpos;
float press = *(float*) ((char*)pos + OFFSET_PRESS);
float dens = *(float*) ((char*)pos + OFFSET_DENS);
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
float3 qeval, dist, force;
float d = simData.sim_scale;
float c, ndistj;
float pterm, dterm, vterm;
vterm = simData.lapkern * simData.visc;
int nbr = __mul24(ndx, MAX_NBR);
int ncnt = bufNeighbor[ nbr ];
force = make_float3(0,0,0);
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
ndistj = bufNdist[ nbr+j ];
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
c = ( simData.smooth_rad - ndistj );
dist.x = ( pos->x - qpos->x )*d; // dist in cm
dist.y = ( pos->y - qpos->y )*d;
dist.z = ( pos->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
}
}
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
// Get particle vars
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
float3 vcurr, vnext;
// Leapfrog integration
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
accel.y *= 0.00020543;
accel.z *= 0.00020543;
accel.z -= 9.8;
vcurr = *vel;
vnext.x = accel.x*dt + vcurr.x;
vnext.y = accel.y*dt + vcurr.y;
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
*vel_eval = accel;
*vel = vnext;
dt /= simData.sim_scale;
vnext.x = pos->x + vnext.x*dt;
vnext.y = pos->y + vnext.y*dt;
vnext.z = pos->z + vnext.z*dt;
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
}
__syncthreads ();
}
#endif