Saxum/extern/bullet/Demos/DX11ClothDemo/DXUT/Optional/DXUTLockFreePipe.h

//--------------------------------------------------------------------------------------
// DXUTLockFreePipe.h
//
// See the "Lockless Programming Considerations for Xbox 360 and Microsoft Windows"
// article in the DirectX SDK for more details.
//
// http://msdn2.microsoft.com/en-us/library/bb310595.aspx
//
// XNA Developer Connection
// Copyright (C) Microsoft Corporation. All rights reserved.
//--------------------------------------------------------------------------------------
#pragma once

#include <sal.h>

#ifdef _XBOX_VER
    // Prevent the CPU from rearranging loads
    // and stores, sufficiently for read-acquire
    // and write-release.
    #define DXUTImportBarrier __lwsync
    #define DXUTExportBarrier __lwsync
#else
    #pragma pack(push)
    #pragma pack(8)
    #include <windows.h>
    #pragma pack (pop)

    extern "C"
        void _ReadWriteBarrier();
    #pragma intrinsic(_ReadWriteBarrier)

    // Prevent the compiler from rearranging loads
    // and stores, sufficiently for read-acquire
    // and write-release. This is sufficient on
    // x86 and x64.
    #define DXUTImportBarrier _ReadWriteBarrier
    #define DXUTExportBarrier _ReadWriteBarrier
#endif

//
// Pipe class designed for use by at most two threads: one reader, one writer.
// Access by more than two threads isn't guaranteed to be safe. 
// 
// In order to provide efficient access the size of the buffer is passed
// as a template parameter and restricted to powers of two less than 31.
//

template <BYTE cbBufferSizeLog2> class DXUTLockFreePipe
{
public:
    DXUTLockFreePipe() : m_readOffset( 0 ),
                         m_writeOffset( 0 )
                         {
                         }

    DWORD                       GetBufferSize() const
    {
        return c_cbBufferSize;
    }

    __forceinline unsigned long BytesAvailable() const
    {
        return m_writeOffset - m_readOffset;
    }

    bool __forceinline          Read( void* pvDest, unsigned long cbDest )
    {
        // Store the read and write offsets into local variables--this is
        // essentially a snapshot of their values so that they stay constant
        // for the duration of the function (and so we don't end up with cache 
        // misses due to false sharing).
        DWORD readOffset = m_readOffset;
        DWORD writeOffset = m_writeOffset;

        // Compare the two offsets to see if we have anything to read.
        // Note that we don't do anything to synchronize the offsets here.
        // Really there's not much we *can* do unless we're willing to completely
        // synchronize access to the entire object. We have to assume that as we 
        // read, someone else may be writing, and the write offset we have now
        // may be out of date by the time we read it. Fortunately that's not a
        // very big deal. We might miss reading some data that was just written.
        // But the assumption is that we'll be back before long to grab more data
        // anyway.
        //
        // Note that this comparison works because we're careful to constrain
        // the total buffer size to be a power of 2, which means it will divide
        // evenly into ULONG_MAX+1. That, and the fact that the offsets are 
        // unsigned, means that the calculation returns correct results even
        // when the values wrap around.
        DWORD cbAvailable = writeOffset - readOffset;
        if( cbDest > cbAvailable )
        {
            return false;
        }

        // The data has been made available, but we need to make sure
        // that our view on the data is up to date -- at least as up to
        // date as the control values we just read. We need to prevent
        // the compiler or CPU from moving any of the data reads before
        // the control value reads. This import barrier serves this
        // purpose, on Xbox 360 and on Windows.

        // Reading a control value and then having a barrier is known
        // as a "read-acquire."
        DXUTImportBarrier();

        unsigned char* pbDest = ( unsigned char* )pvDest;

        unsigned long actualReadOffset = readOffset & c_sizeMask;
        unsigned long bytesLeft = cbDest;

        //
        // Copy from the tail, then the head. Note that there's no explicit
        // check to see if the write offset comes between the read offset
        // and the end of the buffer--that particular condition is implicitly
        // checked by the comparison with AvailableToRead(), above. If copying
        // cbDest bytes off the tail would cause us to cross the write offset,
        // then the previous comparison would have failed since that would imply
        // that there were less than cbDest bytes available to read.
        //
        unsigned long cbTailBytes = min( bytesLeft, c_cbBufferSize - actualReadOffset );
        memcpy( pbDest, m_pbBuffer + actualReadOffset, cbTailBytes );
        bytesLeft -= cbTailBytes;

        if( bytesLeft )
        {
            memcpy( pbDest + cbTailBytes, m_pbBuffer, bytesLeft );
        }

        // When we update the read offset we are, effectively, 'freeing' buffer
        // memory so that the writing thread can use it. We need to make sure that
        // we don't free the memory before we have finished reading it. That is,
        // we need to make sure that the write to m_readOffset can't get reordered
        // above the reads of the buffer data. The only way to guarantee this is to
        // have an export barrier to prevent both compiler and CPU rearrangements.
        DXUTExportBarrier();

        // Advance the read offset. From the CPUs point of view this is several
        // operations--read, modify, store--and we'd normally want to make sure that
        // all of the operations happened atomically. But in the case of a single
        // reader, only one thread updates this value and so the only operation that
        // must be atomic is the store. That's lucky, because 32-bit aligned stores are
        // atomic on all modern processors.
        // 
        readOffset += cbDest;
        m_readOffset = readOffset;

        return true;
    }

    bool __forceinline          Write( const void* pvSrc, unsigned long cbSrc )
    {
        // Reading the read offset here has the same caveats as reading
        // the write offset had in the Read() function above. 
        DWORD readOffset = m_readOffset;
        DWORD writeOffset = m_writeOffset;

        // Compute the available write size. This comparison relies on
        // the fact that the buffer size is always a power of 2, and the
        // offsets are unsigned integers, so that when the write pointer
        // wraps around the subtraction still yields a value (assuming
        // we haven't messed up somewhere else) between 0 and c_cbBufferSize - 1.
        DWORD cbAvailable = c_cbBufferSize - ( writeOffset - readOffset );
        if( cbSrc > cbAvailable )
        {
            return false;
        }

        // It is theoretically possible for writes of the data to be reordered
        // above the reads to see if the data is available. Improbable perhaps,
        // but possible. This barrier guarantees that the reordering will not
        // happen.
        DXUTImportBarrier();

        // Write the data
        const unsigned char* pbSrc = ( const unsigned char* )pvSrc;
        unsigned long actualWriteOffset = writeOffset & c_sizeMask;
        unsigned long bytesLeft = cbSrc;

        // See the explanation in the Read() function as to why we don't 
        // explicitly check against the read offset here.
        unsigned long cbTailBytes = min( bytesLeft, c_cbBufferSize - actualWriteOffset );
        memcpy( m_pbBuffer + actualWriteOffset, pbSrc, cbTailBytes );
        bytesLeft -= cbTailBytes;

        if( bytesLeft )
        {
            memcpy( m_pbBuffer, pbSrc + cbTailBytes, bytesLeft );
        }

        // Now it's time to update the write offset, but since the updated position
        // of the write offset will imply that there's data to be read, we need to 
        // make sure that the data all actually gets written before the update to
        // the write offset. The writes could be reordered by the compiler (on any
        // platform) or by the CPU (on Xbox 360). We need a barrier which prevents
        // the writes from being reordered past each other.
        //
        // Having a barrier and then writing a control value is called "write-release."
        DXUTExportBarrier();

        // See comments in Read() as to why this operation isn't interlocked.
        writeOffset += cbSrc;
        m_writeOffset = writeOffset;

        return true;
    }

private:
    // Values derived from the buffer size template parameter
    //
    const static BYTE c_cbBufferSizeLog2 = min( cbBufferSizeLog2, 31 );
    const static DWORD c_cbBufferSize = ( 1 << c_cbBufferSizeLog2 );
    const static DWORD c_sizeMask = c_cbBufferSize - 1;

    // Leave these private and undefined to prevent their use
    DXUTLockFreePipe( const DXUTLockFreePipe& );
    DXUTLockFreePipe& operator =( const DXUTLockFreePipe& );

    // Member data
    //
    BYTE                        m_pbBuffer[c_cbBufferSize];
    // Note that these offsets are not clamped to the buffer size.
    // Instead the calculations rely on wrapping at ULONG_MAX+1.
    // See the comments in Read() for details.
    volatile DWORD __declspec( align( 4 ) ) m_readOffset;
    volatile DWORD __declspec( align( 4 ) ) m_writeOffset;
};
Adding Bullet Library 2014-10-24 09:42:47 +00:00			`//--------------------------------------------------------------------------------------`
			`// DXUTLockFreePipe.h`
			`//`
			`// See the "Lockless Programming Considerations for Xbox 360 and Microsoft Windows"`
			`// article in the DirectX SDK for more details.`
			`//`
			`// http://msdn2.microsoft.com/en-us/library/bb310595.aspx`
			`//`
			`// XNA Developer Connection`
			`// Copyright (C) Microsoft Corporation. All rights reserved.`
			`//--------------------------------------------------------------------------------------`
			`#pragma once`

			`#include <sal.h>`

			`#ifdef _XBOX_VER`
			`// Prevent the CPU from rearranging loads`
			`// and stores, sufficiently for read-acquire`
			`// and write-release.`
			`#define DXUTImportBarrier __lwsync`
			`#define DXUTExportBarrier __lwsync`
			`#else`
			`#pragma pack(push)`
			`#pragma pack(8)`
			`#include <windows.h>`
			`#pragma pack (pop)`

			`extern "C"`
			`void _ReadWriteBarrier();`
			`#pragma intrinsic(_ReadWriteBarrier)`

			`// Prevent the compiler from rearranging loads`
			`// and stores, sufficiently for read-acquire`
			`// and write-release. This is sufficient on`
			`// x86 and x64.`
			`#define DXUTImportBarrier _ReadWriteBarrier`
			`#define DXUTExportBarrier _ReadWriteBarrier`
			`#endif`

			`//`
			`// Pipe class designed for use by at most two threads: one reader, one writer.`
			`// Access by more than two threads isn't guaranteed to be safe.`
			`//`
			`// In order to provide efficient access the size of the buffer is passed`
			`// as a template parameter and restricted to powers of two less than 31.`
			`//`

			`template <BYTE cbBufferSizeLog2> class DXUTLockFreePipe`
			`{`
			`public:`
			`DXUTLockFreePipe() : m_readOffset( 0 ),`
			`m_writeOffset( 0 )`
			`{`
			`}`

			`DWORD GetBufferSize() const`
			`{`
			`return c_cbBufferSize;`
			`}`

			`__forceinline unsigned long BytesAvailable() const`
			`{`
			`return m_writeOffset - m_readOffset;`
			`}`

			`bool __forceinline Read( void* pvDest, unsigned long cbDest )`
			`{`
			`// Store the read and write offsets into local variables--this is`
			`// essentially a snapshot of their values so that they stay constant`
			`// for the duration of the function (and so we don't end up with cache`
			`// misses due to false sharing).`
			`DWORD readOffset = m_readOffset;`
			`DWORD writeOffset = m_writeOffset;`

			`// Compare the two offsets to see if we have anything to read.`
			`// Note that we don't do anything to synchronize the offsets here.`
			`// Really there's not much we can do unless we're willing to completely`
			`// synchronize access to the entire object. We have to assume that as we`
			`// read, someone else may be writing, and the write offset we have now`
			`// may be out of date by the time we read it. Fortunately that's not a`
			`// very big deal. We might miss reading some data that was just written.`
			`// But the assumption is that we'll be back before long to grab more data`
			`// anyway.`
			`//`
			`// Note that this comparison works because we're careful to constrain`
			`// the total buffer size to be a power of 2, which means it will divide`
			`// evenly into ULONG_MAX+1. That, and the fact that the offsets are`
			`// unsigned, means that the calculation returns correct results even`
			`// when the values wrap around.`
			`DWORD cbAvailable = writeOffset - readOffset;`
			`if( cbDest > cbAvailable )`
			`{`
			`return false;`
			`}`

			`// The data has been made available, but we need to make sure`
			`// that our view on the data is up to date -- at least as up to`
			`// date as the control values we just read. We need to prevent`
			`// the compiler or CPU from moving any of the data reads before`
			`// the control value reads. This import barrier serves this`
			`// purpose, on Xbox 360 and on Windows.`

			`// Reading a control value and then having a barrier is known`
			`// as a "read-acquire."`
			`DXUTImportBarrier();`

			`unsigned char* pbDest = ( unsigned char* )pvDest;`

			`unsigned long actualReadOffset = readOffset & c_sizeMask;`
			`unsigned long bytesLeft = cbDest;`

			`//`
			`// Copy from the tail, then the head. Note that there's no explicit`
			`// check to see if the write offset comes between the read offset`
			`// and the end of the buffer--that particular condition is implicitly`
			`// checked by the comparison with AvailableToRead(), above. If copying`
			`// cbDest bytes off the tail would cause us to cross the write offset,`
			`// then the previous comparison would have failed since that would imply`
			`// that there were less than cbDest bytes available to read.`
			`//`
			`unsigned long cbTailBytes = min( bytesLeft, c_cbBufferSize - actualReadOffset );`
			`memcpy( pbDest, m_pbBuffer + actualReadOffset, cbTailBytes );`
			`bytesLeft -= cbTailBytes;`

			`if( bytesLeft )`
			`{`
			`memcpy( pbDest + cbTailBytes, m_pbBuffer, bytesLeft );`
			`}`

			`// When we update the read offset we are, effectively, 'freeing' buffer`
			`// memory so that the writing thread can use it. We need to make sure that`
			`// we don't free the memory before we have finished reading it. That is,`
			`// we need to make sure that the write to m_readOffset can't get reordered`
			`// above the reads of the buffer data. The only way to guarantee this is to`
			`// have an export barrier to prevent both compiler and CPU rearrangements.`
			`DXUTExportBarrier();`

			`// Advance the read offset. From the CPUs point of view this is several`
			`// operations--read, modify, store--and we'd normally want to make sure that`
			`// all of the operations happened atomically. But in the case of a single`
			`// reader, only one thread updates this value and so the only operation that`
			`// must be atomic is the store. That's lucky, because 32-bit aligned stores are`
			`// atomic on all modern processors.`
			`//`
			`readOffset += cbDest;`
			`m_readOffset = readOffset;`

			`return true;`
			`}`

			`bool __forceinline Write( const void* pvSrc, unsigned long cbSrc )`
			`{`
			`// Reading the read offset here has the same caveats as reading`
			`// the write offset had in the Read() function above.`
			`DWORD readOffset = m_readOffset;`
			`DWORD writeOffset = m_writeOffset;`

			`// Compute the available write size. This comparison relies on`
			`// the fact that the buffer size is always a power of 2, and the`
			`// offsets are unsigned integers, so that when the write pointer`
			`// wraps around the subtraction still yields a value (assuming`
			`// we haven't messed up somewhere else) between 0 and c_cbBufferSize - 1.`
			`DWORD cbAvailable = c_cbBufferSize - ( writeOffset - readOffset );`
			`if( cbSrc > cbAvailable )`
			`{`
			`return false;`
			`}`

			`// It is theoretically possible for writes of the data to be reordered`
			`// above the reads to see if the data is available. Improbable perhaps,`
			`// but possible. This barrier guarantees that the reordering will not`
			`// happen.`
			`DXUTImportBarrier();`

			`// Write the data`
			`const unsigned char* pbSrc = ( const unsigned char* )pvSrc;`
			`unsigned long actualWriteOffset = writeOffset & c_sizeMask;`
			`unsigned long bytesLeft = cbSrc;`

			`// See the explanation in the Read() function as to why we don't`
			`// explicitly check against the read offset here.`
			`unsigned long cbTailBytes = min( bytesLeft, c_cbBufferSize - actualWriteOffset );`
			`memcpy( m_pbBuffer + actualWriteOffset, pbSrc, cbTailBytes );`
			`bytesLeft -= cbTailBytes;`

			`if( bytesLeft )`
			`{`
			`memcpy( m_pbBuffer, pbSrc + cbTailBytes, bytesLeft );`
			`}`

			`// Now it's time to update the write offset, but since the updated position`
			`// of the write offset will imply that there's data to be read, we need to`
			`// make sure that the data all actually gets written before the update to`
			`// the write offset. The writes could be reordered by the compiler (on any`
			`// platform) or by the CPU (on Xbox 360). We need a barrier which prevents`
			`// the writes from being reordered past each other.`
			`//`
			`// Having a barrier and then writing a control value is called "write-release."`
			`DXUTExportBarrier();`

			`// See comments in Read() as to why this operation isn't interlocked.`
			`writeOffset += cbSrc;`
			`m_writeOffset = writeOffset;`

			`return true;`
			`}`

			`private:`
			`// Values derived from the buffer size template parameter`
			`//`
			`const static BYTE c_cbBufferSizeLog2 = min( cbBufferSizeLog2, 31 );`
			`const static DWORD c_cbBufferSize = ( 1 << c_cbBufferSizeLog2 );`
			`const static DWORD c_sizeMask = c_cbBufferSize - 1;`

			`// Leave these private and undefined to prevent their use`
			`DXUTLockFreePipe( const DXUTLockFreePipe& );`
			`DXUTLockFreePipe& operator =( const DXUTLockFreePipe& );`

			`// Member data`
			`//`
			`BYTE m_pbBuffer[c_cbBufferSize];`
			`// Note that these offsets are not clamped to the buffer size.`
			`// Instead the calculations rely on wrapping at ULONG_MAX+1.`
			`// See the comments in Read() for details.`
			`volatile DWORD __declspec( align( 4 ) ) m_readOffset;`
			`volatile DWORD __declspec( align( 4 ) ) m_writeOffset;`
			`};`