403 lines
14 KiB
Plaintext
403 lines
14 KiB
Plaintext
/*
|
|
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
|
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
|
|
|
ZLib license
|
|
This software is provided 'as-is', without any express or implied
|
|
warranty. In no event will the authors be held liable for any damages
|
|
arising from the use of this software.
|
|
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it
|
|
freely, subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not
|
|
claim that you wrote the original software. If you use this software
|
|
in a product, an acknowledgment in the product documentation would be
|
|
appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be
|
|
misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _PARTICLES_KERNEL_H_
|
|
#define _PARTICLES_KERNEL_H_
|
|
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
|
|
#include "fluid_system_host.cuh"
|
|
|
|
#define TOTAL_THREADS 65536
|
|
#define BLOCK_THREADS 256
|
|
#define MAX_NBR 80
|
|
|
|
__constant__ FluidParams simData; // simulation data (on device)
|
|
|
|
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
|
|
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
|
|
|
|
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
|
|
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
|
|
|
|
#define NULL_HASH 333333
|
|
|
|
#define OFFSET_CLR 12
|
|
#define OFFSET_NEXT 16
|
|
#define OFFSET_VEL 20
|
|
#define OFFSET_VEVAL 32
|
|
#define OFFSET_PRESS 48
|
|
#define OFFSET_DENS 52
|
|
#define OFFSET_FORCE 56
|
|
|
|
|
|
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
|
|
int gz = (pos->z - simData.min.z) * simData.delta.z ;
|
|
int gy = (pos->y - simData.min.y) * simData.delta.y ;
|
|
int gx = (pos->x - simData.min.x) * simData.delta.x ;
|
|
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
|
|
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
|
|
else
|
|
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
|
|
|
|
__syncthreads ();
|
|
}
|
|
|
|
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
|
|
{
|
|
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
|
|
|
|
bufPnts += OFFSET_NEXT;
|
|
bufGrid[grid_ndx] = -1;
|
|
for (int n=0; n < numPnt; n++) {
|
|
if ( bufHash[n].x == grid_ndx ) {
|
|
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
|
|
bufGrid[grid_ndx] = bufHash[n].y;
|
|
}
|
|
}
|
|
__syncthreads ();
|
|
}
|
|
|
|
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
|
|
uint2 bufHashSort = bufHash[ndx];
|
|
|
|
__shared__ uint sharedHash[257];
|
|
sharedHash[threadIdx.x+1] = bufHashSort.x;
|
|
if ( ndx > 0 && threadIdx.x == 0 ) {
|
|
volatile uint2 prevData = bufHash[ndx-1];
|
|
sharedHash[0] = prevData.x;
|
|
}
|
|
__syncthreads ();
|
|
|
|
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
|
|
bufGrid [ bufHashSort.x ] = ndx;
|
|
}
|
|
if ( ndx < numPnt ) {
|
|
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
|
|
char* dest = bufPntSort + __mul24( ndx, simData.stride );
|
|
|
|
*(float3*)(dest) = *(float3*)(src);
|
|
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
|
|
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
|
|
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
|
|
|
|
*(float*) (dest + OFFSET_DENS) = 0.0;
|
|
*(float*) (dest + OFFSET_PRESS) = 0.0;
|
|
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
|
|
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
|
|
}
|
|
|
|
__syncthreads ();
|
|
|
|
}
|
|
|
|
//__shared__ int ncount [ BLOCK_THREADS ];
|
|
|
|
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
|
{
|
|
float3* qpos;
|
|
float3 dist;
|
|
float dsq, c, sum;
|
|
float d = simData.sim_scale;
|
|
int nbr = __mul24(pndx, MAX_NBR);
|
|
|
|
sum = 0.0;
|
|
for ( ; qndx < simData.pnts; qndx++ ) {
|
|
|
|
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
|
|
|
if ( qndx != pndx ) {
|
|
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
|
|
|
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
|
dist.y = ( p->y - qpos->y )*d;
|
|
dist.z = ( p->z - qpos->z )*d;
|
|
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
|
if ( dsq < simData.r2 ) {
|
|
c = simData.r2 - dsq;
|
|
sum += c * c * c;
|
|
if ( bufNeighbor[nbr] < MAX_NBR ) {
|
|
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
|
|
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
|
|
bufNeighbor[nbr]++;
|
|
}
|
|
}
|
|
}
|
|
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
|
|
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
|
|
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
|
|
ncount[threadIdx.x]++;
|
|
}*/
|
|
|
|
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
|
|
//if ( ndx < 1024 ) {
|
|
|
|
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
|
|
|
// Find 2x2x2 grid cells
|
|
// - Use registers only, no arrays (local-memory too slow)
|
|
int3 cell;
|
|
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
|
float gs = simData.smooth_rad / simData.sim_scale;
|
|
|
|
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
|
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
|
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
|
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
|
gc1 = gc0 + 1;
|
|
gc2 = gc0 + simData.res.x;
|
|
gc3 = gc2 + 1;
|
|
if ( cell.z+1 < simData.res.z ) {
|
|
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
|
gc5 = gc4 + 1;
|
|
gc6 = gc4 + simData.res.x;
|
|
gc7 = gc6 + 1;
|
|
}
|
|
if ( cell.x+1 >= simData.res.x ) {
|
|
gc1 = -1; gc3 = -1;
|
|
gc5 = -1; gc7 = -1;
|
|
}
|
|
if ( cell.y+1 >= simData.res.y ) {
|
|
gc2 = -1; gc3 = -1;
|
|
gc6 = -1; gc7 = -1;
|
|
}
|
|
// Sum Pressure
|
|
float sum = 0.0;
|
|
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
|
|
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
|
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
|
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
|
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
|
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
|
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
|
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
|
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
|
|
|
// Compute Density & Pressure
|
|
sum = sum * simData.pmass * simData.poly6kern;
|
|
if ( sum == 0.0 ) sum = 1.0;
|
|
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
|
|
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
|
|
|
|
//}
|
|
//__syncthreads ();
|
|
}
|
|
|
|
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
|
{
|
|
float press = *(float*) ((char*)p + OFFSET_PRESS);
|
|
float dens = *(float*) ((char*)p + OFFSET_DENS);
|
|
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
|
|
float3 qeval, dist;
|
|
float c, ndistj, dsq;
|
|
float pterm, dterm, vterm;
|
|
float3* qpos;
|
|
float d = simData.sim_scale;
|
|
|
|
vterm = simData.lapkern * simData.visc;
|
|
|
|
for ( ; qndx < simData.pnts; qndx++ ) {
|
|
|
|
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
|
|
|
if ( qndx != pndx ) {
|
|
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
|
|
|
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
|
dist.y = ( p->y - qpos->y )*d;
|
|
dist.z = ( p->z - qpos->z )*d;
|
|
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
|
if ( dsq < simData.r2 ) {
|
|
ndistj = sqrt(dsq);
|
|
c = ( simData.smooth_rad - ndistj );
|
|
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
|
dist.y = ( p->y - qpos->y )*d;
|
|
dist.z = ( p->z - qpos->z )*d;
|
|
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
|
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
|
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
|
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
|
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
|
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
|
|
//if ( ndx < numPnt ) {
|
|
|
|
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
|
|
|
// Find 2x2x2 grid cells
|
|
// - Use registers only, no arrays (local-memory too slow)
|
|
int3 cell;
|
|
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
|
float gs = simData.smooth_rad / simData.sim_scale;
|
|
|
|
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
|
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
|
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
|
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
|
gc1 = gc0 + 1;
|
|
gc2 = gc0 + simData.res.x;
|
|
gc3 = gc2 + 1;
|
|
if ( cell.z+1 < simData.res.z ) {
|
|
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
|
gc5 = gc4 + 1;
|
|
gc6 = gc4 + simData.res.x;
|
|
gc7 = gc6 + 1;
|
|
}
|
|
if ( cell.x+1 >= simData.res.x ) {
|
|
gc1 = -1; gc3 = -1;
|
|
gc5 = -1; gc7 = -1;
|
|
}
|
|
if ( cell.y+1 >= simData.res.y ) {
|
|
gc2 = -1; gc3 = -1;
|
|
gc6 = -1; gc7 = -1;
|
|
}
|
|
// Sum Pressure
|
|
float3 force = make_float3(0,0,0);
|
|
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
|
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
|
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
|
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
|
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
|
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
|
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
|
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
|
|
|
// Update Force
|
|
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
|
|
|
//}
|
|
//__syncthreads ();
|
|
}
|
|
|
|
|
|
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
|
|
if ( ndx < numPnt ) {
|
|
|
|
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
|
|
|
float3* qpos;
|
|
float press = *(float*) ((char*)pos + OFFSET_PRESS);
|
|
float dens = *(float*) ((char*)pos + OFFSET_DENS);
|
|
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
|
|
float3 qeval, dist, force;
|
|
float d = simData.sim_scale;
|
|
float c, ndistj;
|
|
float pterm, dterm, vterm;
|
|
vterm = simData.lapkern * simData.visc;
|
|
int nbr = __mul24(ndx, MAX_NBR);
|
|
|
|
int ncnt = bufNeighbor[ nbr ];
|
|
|
|
force = make_float3(0,0,0);
|
|
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
|
|
ndistj = bufNdist[ nbr+j ];
|
|
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
|
|
c = ( simData.smooth_rad - ndistj );
|
|
dist.x = ( pos->x - qpos->x )*d; // dist in cm
|
|
dist.y = ( pos->y - qpos->y )*d;
|
|
dist.z = ( pos->z - qpos->z )*d;
|
|
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
|
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
|
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
|
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
|
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
|
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
|
}
|
|
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
|
|
{
|
|
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
|
|
|
if ( ndx < numPnt ) {
|
|
|
|
// Get particle vars
|
|
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
|
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
|
|
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
|
|
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
|
|
float3 vcurr, vnext;
|
|
|
|
// Leapfrog integration
|
|
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
|
|
accel.y *= 0.00020543;
|
|
accel.z *= 0.00020543;
|
|
accel.z -= 9.8;
|
|
|
|
vcurr = *vel;
|
|
vnext.x = accel.x*dt + vcurr.x;
|
|
vnext.y = accel.y*dt + vcurr.y;
|
|
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
|
|
|
|
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
|
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
|
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
|
|
|
*vel_eval = accel;
|
|
*vel = vnext;
|
|
|
|
dt /= simData.sim_scale;
|
|
vnext.x = pos->x + vnext.x*dt;
|
|
vnext.y = pos->y + vnext.y*dt;
|
|
vnext.z = pos->z + vnext.z*dt;
|
|
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
|
|
}
|
|
|
|
__syncthreads ();
|
|
}
|
|
|
|
#endif
|