// // Test_mindot.cpp // BulletTest // // Copyright (c) 2011 Apple Inc. // #include "LinearMath/btScalar.h" #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) #include "Test_mindot.h" #include "vector.h" #include "Utils.h" #include "main.h" #include #include #include // reference code for testing purposes static long mindot_ref( const btSimdFloat4 *vertices, float *vec, size_t count, float *dotResult ); #ifdef __arm__ #define MAX_LOG2_SIZE 9 #else #define MAX_LOG2_SIZE 9 #endif #define MAX_SIZE (1U << MAX_LOG2_SIZE) #define LOOPCOUNT 100 int Test_mindot(void) { // Init an array flanked by guard pages btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL ); float *fp = (float*) data; long correct, test; btVector3 localScaling( 0.1f, 0.2f, 0.3f); size_t size; // Init the data size_t i; for( i = 0; i < MAX_SIZE; i++ ) { fp[4*i] = (int32_t) RANDF_16; fp[4*i+1] = (int32_t) RANDF_16; fp[4*i+2] = (int32_t) RANDF_16; fp[4*i+3] = BT_NAN; // w channel NaN } float correctDot, testDot; fp = (float*) localScaling; float maxRelativeError = 0.f; for( size = 1; size <= MAX_SIZE; size++ ) { float *in = (float*)(data + MAX_SIZE - size); size_t position; for( position = 0; position < size; position++ ) { float *biggest = in + position * 4; float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] }; biggest[0] -= LARGE_FLOAT17; biggest[1] -= LARGE_FLOAT17; biggest[2] -= LARGE_FLOAT17; biggest[3] -= LARGE_FLOAT17; correctDot = BT_NAN; testDot = BT_NAN; correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot); test = localScaling.minDot( (btVector3*) in, size, testDot); if( test < 0 || test >= size ) { vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test); continue; } if( correct != test ) { vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test, fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2], fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] ); return 1; } if( test != position ) { vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test, fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2], fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] ); return 1; } if( correctDot != testDot ) { float relativeError = btFabs((testDot - correctDot) / correctDot); if (relativeError>1e6) { vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot, fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2], fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] ); return 1; } else { if (maxRelativeError < relativeError) { maxRelativeError = relativeError; } } } memcpy( biggest, old, 16 ); } } if (maxRelativeError) { printf("Warning: relative error = %e\n", maxRelativeError); } uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)]; uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)]; size_t j, k; float *in = (float*) data; for( size = 1; size <= 32; size++ ) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTimes[size] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot); currentTime = ReadTicks() - startTime; scalarTimes[size] += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTimes[size] = bestTime; else scalarTimes[size] /= 100; } uint64_t *timep = &scalarTimes[33]; for( size = 64; size <= MAX_SIZE; size *= 2 ) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; timep[0] =0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot); currentTime = ReadTicks() - startTime; timep[0] += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) timep[0] = bestTime; else timep[0] /= 100; timep++; } for( size = 1; size <= 32; size++ ) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTimes[size] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) test += localScaling.minDot( (btVector3*) in, size, testDot); currentTime = ReadTicks() - startTime; vectorTimes[size] += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTimes[size] = bestTime; else vectorTimes[size] /= 100; } timep = &vectorTimes[33]; for( size = 64; size <= MAX_SIZE; size *= 2 ) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; timep[0] =0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) test += localScaling.minDot( (btVector3*) in, size, testDot); currentTime = ReadTicks() - startTime; timep[0] += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) timep[0] = bestTime; else timep[0] /= 100; timep++; } vlog( "Timing:\n" ); vlog( " size\t scalar\t vector\n" ); for( size = 1; size <= 32; size++ ) vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT ); size_t index = 33; for( size = 64; size <= MAX_SIZE; size *= 2 ) { vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT ); index++; } // Useless check to make sure that the timing loops are not optimized away if( test != correct ) vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test); GuardFree(data); return 0; } static long mindot_ref( const btSimdFloat4 *vertices, float *vec, size_t count, float *dotResult ) { const float *dp = (const float*) vertices; float minDot = BT_INFINITY; long i = 0; long ptIndex = -1; for( i = 0; i < count; i++ ) { float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4; if( dot < minDot ) { minDot = dot; ptIndex = i; } } *dotResult = minDot; return ptIndex; } #endif //BT_USE_SSE