/****************************************************************************
 *                                                                          *
 *  Module  :   _ppprtclsxbox.c                                              *
 *                                                                          *
 *  Purpose :                                                               *
 *                                                                          *
 ****************************************************************************/

/****************************************************************************
 Includes
 */
#include <xtl.h>
#include <d3d8.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

/* RW includes */
#include "rpplugin.h"
#include <rpdbgerr.h>
#include <rwcore.h>
#include <rpworld.h>
#include <rprandom.h>

#include "prvprtcl.h"
#include <rpprtsys.h>
#include "ppprtclsxbox.h"

/* Pre compiled vertex shader */
#include "vertdefs.h"
#include "partshdr.h"

#ifdef RWMETRICS
#include "xbmetric.h"
#endif /* RWMETRICS */

/*
Vertex shader converts this info into an animated particle system for rendering.
To render as quads this has to be replicated 4 times for each particle,
with different texture coordinates!
*/
typedef struct ParticleVertex
{
    RwV3d       position;
    RwV3d       velocity;
    RwReal      startTime;
    RwReal      u;                  /* Texture coordinate U */
    RwReal      v;                  /* Texture coordinate V */
} ParticleVertex;

static DWORD ParticlesVertexShaderDecl[] =
{
    D3DVSD_STREAM( 0 ),
    D3DVSD_REG( VSD_REG_POS,        D3DVSDT_FLOAT3 ),     /* Position */
    D3DVSD_REG( VSD_REG_VEL,        D3DVSDT_FLOAT3 ),     /* Velocity */
    D3DVSD_REG( VSD_REG_STARTTIME,  D3DVSDT_FLOAT1 ),     /* Start time */
    D3DVSD_REG( VSD_REG_TEXCOORDS,  D3DVSDT_FLOAT2 ),     /* Texture coordinates */
    D3DVSD_END()
};

static DWORD    ParticlesVertexShader = 0;

/* #define CPU_VERTEX_SHADER */

#ifdef CPU_VERTEX_SHADER
/*
CPU version of vertex shader converts ParticleVertex
into one of these for rendering as quads
*/
typedef struct RenderedVertex
{
    RwReal      x, y, z;        /* The un-transformed position for the vertex */
    RwRGBA      color;          /* The vertex color */
    RwReal      u;              /* Texture coordinate */
    RwReal      v;              /* Texture coordinate V */
                                /* Padding to 32 byte seems to make it a bit slower, so don't! */
} RenderedVertex;

/*
Our custom FVF, which describes our custom vertex structure
*/
#define RenderedVertexFVF (D3DFVF_XYZ |D3DFVF_DIFFUSE | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0))
#endif

/*
Just bung a couple of vertex buffers into an atomic's geometry to hold the particles info
that gets thrown into a vertex shader by a sumbit node.
*/
typedef struct RpParticlesGeomData
{
    /* Vertex buffer for particles info to be passed to a shader */
    LPDIRECT3DVERTEXBUFFER8 particleVertexBuffer;

#ifdef CPU_VERTEX_SHADER
    /* Vertex buffer for software vertex shader to create quads for display */
    LPDIRECT3DVERTEXBUFFER8 renderedVertexBuffer;
#endif

} RpParticlesGeomData;

static RwInt32 GParticlesGeomDataOffset;

/* return a pointer to the pointer we added to RpGeometries */
static RpParticlesGeomData **
PARTICLESGEOMGETDATA( RpGeometry *geometry )
{
    return (RpParticlesGeomData **)(((RwUInt8 *)geometry) + GParticlesGeomDataOffset);
}

static void
RefreshParticleVertices( RpParticlesData *particlesData, LPDIRECT3DVERTEXBUFFER8 particleVertexBuffer )
{
    /*
    Populate a vertex buffer with the initial state information the vertex shader needs to know
    to generate a particle animation
    */
    ParticleVertex *particleVertex;

    RwInt32 i;

    RwReal  width, length;

    RwReal  angle, speedInit, speedVar, dampening;

    RwReal  duration;

    if (!(particlesData->flags & rpPARTICLESDIRTY))
    {
        /* nothing to do */
        return;
    }

    /* something's changed, we need to lock vertex buffer & regenerate particle vertices */
    particlesData->flags = (RpParticlesFlag)
        (~rpPARTICLESDIRTY & particlesData->flags);

    D3DVertexBuffer_Lock( particleVertexBuffer, 0, 0, (RwUInt8**)&particleVertex, 0 );

    width = particlesData->emitterSize.x;
    length = particlesData->emitterSize.y;

    angle = particlesData->angle;
    speedInit = particlesData->speed;
    speedVar = particlesData->speedVariation;
    dampening = particlesData->dampening;
    
    duration = particlesData->duration;

    for (i = 0; i<particlesData->numParticles; i++)
    {
        RwInt32 j;
        RwInt32 dice;

        RwV3d   pos;

        RwV3d               vel;
        RwReal              randX, randY, randZ;
        RwReal              angleRangeX, angleRangeY;
        RwReal              speed;

        RwReal              startTime;

        /* set the initial positions in YX plane */

        dice = RpRandom();
        pos.x = ((((RwReal) (dice) * width) / (RwReal) (RPRANDMAX)) -
                  (width * ((RwReal) 0.5)));

        dice = RpRandom();
        pos.y = ((((RwReal) (dice) * length) / (RwReal) (RPRANDMAX)) -
                  (length * ((RwReal) 0.5)));

        pos.z = ((RwReal) 0);

        /* set the initial velocities */
        dice = RpRandom();
        randX = (((RwReal) (dice)) / ((RwReal) (RPRANDMAX)));
        dice = RpRandom();
        randY = (((RwReal) (dice)) / ((RwReal) (RPRANDMAX)));
        dice = RpRandom();
        randZ = (((RwReal) (dice)) / ((RwReal) (RPRANDMAX)));

        angleRangeX = ((randX * ((RwReal) 2)) - ((RwReal) 1));
        angleRangeY = ((randY * ((RwReal) 2)) - ((RwReal) 1));

        vel.x = angle * angleRangeX;
        vel.y = angle * angleRangeY;
        vel.z = ((RwReal) 1) - ((vel.x * vel.x) + (vel.y * vel.y));

        speed = ((speedInit - (speedVar / ((RwReal) 2))) +
                 (speedVar * randZ));
        speed *= (((RwReal) 1) -
                  (RwRealMin2((angleRangeX * angleRangeX) +
                              (angleRangeY * angleRangeY),
                              ((RwReal) 1)) * dampening * ((RwReal) -
                                                           1)));

        RwV3dScale(&vel, &vel, speed);

        /* set the initial start times */
        dice = RpRandom();
        startTime = (((RwReal) dice * duration) / (RwReal) RPRANDMAX);

        /* and copy all that stuff to 4 vertices! */
        for (j=0; j<4; j++)
        {
            particleVertex[i * 4 + j].position = pos;
            particleVertex[i * 4 + j].velocity = vel;
            particleVertex[i * 4 + j].startTime = startTime;
        }
    }

    D3DVertexBuffer_Unlock( particleVertexBuffer );
}

#ifdef CPU_VERTEX_SHADER
static void
CPUVertexShader( RpParticlesData *particlesData, RpParticlesGeomData *particlesGeomData,
    RwV3d *up, RwV3d *right )
{
    ParticleVertex* particleVertices;
    RenderedVertex* pVertices;
    RwInt32 i;

    RwReal invFlightTime;

    D3DVertexBuffer_Lock( particlesGeomData->particleVertexBuffer, 0, 0, (RwUInt8**)&particleVertices, 0 );

    D3DVertexBuffer_Lock( particlesGeomData->renderedVertexBuffer,
        0, 0, (RwUInt8**)&pVertices, 0 );

    invFlightTime = ((RwReal) 1) / particlesData->flightTime;

    for (i=0; i<particlesData->numParticles * 4; i++)
    {
        RwReal              prtlcTime, tNorm, t2;
        RwRGBAReal          colS, colE, colReal;
        RwReal              growth;
        RwV2d               size;
        RwV3d               pos;

        RwReal              uScale, vScale;

        /* particle time */
        prtlcTime = particleVertices[i].startTime;
        prtlcTime += particlesData->time;
        if (prtlcTime > particlesData->flightTime)
        {
            prtlcTime -= particlesData->flightTime;
        }
        tNorm = prtlcTime * invFlightTime;

        /* particle position */
        /* s = u*t + 0.5*a*(t^2) + p */
        t2 = prtlcTime * prtlcTime;
        pos.x = (particleVertices[i].velocity.x * prtlcTime) + (particlesData->force.x * t2) + particleVertices[i].position.x;
        pos.y = (particleVertices[i].velocity.y * prtlcTime) + (particlesData->force.y * t2) + particleVertices[i].position.y;
        pos.z = (particleVertices[i].velocity.z * prtlcTime) + (particlesData->force.z * t2) + particleVertices[i].position.z;

        /* particle colour */
        RwRGBARealScaleMacro(&colS, &particlesData->startCol, ((RwReal) 1) - tNorm);
        RwRGBARealScaleMacro(&colE, &particlesData->endCol, tNorm);
        RwRGBARealAddMacro(&colReal, &colS, &colE);
        pVertices[i].color.red = (RwUInt8) colReal.blue;
        pVertices[i].color.green = (RwInt8) colReal.green;
        pVertices[i].color.blue = (RwInt8) colReal.red;
        pVertices[i].color.alpha = (RwInt8) colReal.alpha;

        /* particle size */
        growth = 0.5f * (((particlesData->growth - 1.0f) * tNorm) + 1.0f);
        RwV2dScale(&size, &particlesData->particleSize, growth);

        /* figure out quad vertex coordinates from position, size & texture u v */
        uScale = size.x * (1.0f - 2.0f * particleVertices[i].u);
        vScale = size.y * (1.0f - 2.0f * particleVertices[i].v);

        pVertices[i].x = pos.x + vScale * up->x + uScale * right->x;
        pVertices[i].y = pos.y + vScale * up->y + uScale * right->y;
        pVertices[i].z = pos.z + vScale * up->z + uScale * right->z;
    }
    
    D3DVertexBuffer_Unlock( particlesGeomData->renderedVertexBuffer );

    D3DVertexBuffer_Unlock( particlesGeomData->particleVertexBuffer );

    D3DDevice_SetStreamSource( 0, particlesGeomData->renderedVertexBuffer, sizeof(RenderedVertex) );

    RwXboxSetCurrentVertexShader( RenderedVertexFVF );
}
#endif

static void
GetWorldViewProjMatrix( RpAtomic *atomic, D3DMATRIX *destMatrix )
{
    RwCamera    *camera;
    RwMatrix    *camLTM;
    RwMatrix    invCamMtx;
    RwMatrix    *atomicLTM;

    D3DMATRIX   projMatrix;
    D3DMATRIX   viewMatrix;
    D3DMATRIX   worldMatrix;
    D3DMATRIX   tmpMatrix;

    camera = RwCameraGetCurrentCamera();
    
    /*
     * Projection matrix 
     */
    projMatrix.m[0][0] = camera->recipViewWindow.x;
    projMatrix.m[0][1] = 0.0f;
    projMatrix.m[0][2] = 0.0f;
    projMatrix.m[0][3] = 0.0f;

    projMatrix.m[1][0] = 0.0f;
    projMatrix.m[1][1] = camera->recipViewWindow.y;
    projMatrix.m[1][2] = 0.0f;
    projMatrix.m[1][3] = 0.0f;

    projMatrix.m[2][0] = 0.0f;
    projMatrix.m[2][1] = 0.0f;
    projMatrix.m[2][2] = camera->farPlane / (camera->farPlane - camera->nearPlane);
    projMatrix.m[2][3] = -projMatrix.m[2][2] * camera->nearPlane;

    projMatrix.m[3][0] = 0.0f;
    projMatrix.m[3][1] = 0.0f;
    projMatrix.m[3][2] = 1.0f;
    projMatrix.m[3][3] = 0.0f;

    /*
     * View matrix - (camera matrix) 
     */ 
    camLTM = RwFrameGetLTM(RwCameraGetFrame(camera));

    RwMatrixSetIdentity(&invCamMtx);
    RwMatrixInvert(&invCamMtx, camLTM);

    viewMatrix.m[0][0] = -invCamMtx.right.x;
    viewMatrix.m[0][1] = -invCamMtx.up.x;
    viewMatrix.m[0][2] = -invCamMtx.at.x;
    viewMatrix.m[0][3] = -invCamMtx.pos.x;

    viewMatrix.m[1][0] = invCamMtx.right.y;
    viewMatrix.m[1][1] = invCamMtx.up.y;
    viewMatrix.m[1][2] = invCamMtx.at.y;
    viewMatrix.m[1][3] = invCamMtx.pos.y;

    viewMatrix.m[2][0] = invCamMtx.right.z;
    viewMatrix.m[2][1] = invCamMtx.up.z;
    viewMatrix.m[2][2] = invCamMtx.at.z;
    viewMatrix.m[2][3] = invCamMtx.pos.z;

    viewMatrix.m[3][0] = 0.0f;
    viewMatrix.m[3][1] = 0.0f;
    viewMatrix.m[3][2] = 0.0f;
    viewMatrix.m[3][3] = 1.0f;

    /* 
     * World space transformation matrix
     */
    atomicLTM = RwFrameGetLTM(RpAtomicGetFrame(atomic));

    worldMatrix.m[0][0] = atomicLTM->right.x;
    worldMatrix.m[0][1] = atomicLTM->up.x;
    worldMatrix.m[0][2] = atomicLTM->at.x;
    worldMatrix.m[0][3] = atomicLTM->pos.x;

    worldMatrix.m[1][0] = atomicLTM->right.y;
    worldMatrix.m[1][1] = atomicLTM->up.y;
    worldMatrix.m[1][2] = atomicLTM->at.y;
    worldMatrix.m[1][3] = atomicLTM->pos.y;

    worldMatrix.m[2][0] = atomicLTM->right.z;
    worldMatrix.m[2][1] = atomicLTM->up.z;
    worldMatrix.m[2][2] = atomicLTM->at.z;
    worldMatrix.m[2][3] = atomicLTM->pos.z;

    worldMatrix.m[3][0] = 0.0f;
    worldMatrix.m[3][1] = 0.0f;
    worldMatrix.m[3][2] = 0.0f;
    worldMatrix.m[3][3] = 1.0f;

    D3DXMatrixMultiply(&tmpMatrix, &viewMatrix, &worldMatrix);
    D3DXMatrixMultiply(destMatrix, &projMatrix, &tmpMatrix);
}

static void
GPUVertexShader(RpParticlesData *particlesData, RpParticlesGeomData *particlesGeomData,
    RwV3d *up, RwV3d *right, RpAtomic *atom )
{
    RwReal     timeAndGrowth[4] = { particlesData->time,
                                    particlesData->flightTime,
                                    1.0f / particlesData->flightTime,
                                    particlesData->growth - 1.0f },

                force[4] = {        particlesData->force.x,
                                    particlesData->force.y,
                                    particlesData->force.z,
                                    0.0f },

                particleSize[4] = { particlesData->particleSize.y,
                                    particlesData->particleSize.x,
                                    0.0f,
                                    0.0f },
                oneAndHalfAndTwo[4] = { 1.0f, 0.5f, 2.0f, 0.0f },
                up4[4]           = { up->x, up->y, up->z, 0.0f  },
                right4[4]        = { right->x, right->y, right->z, 0.0f},
                
                startCol[4]     = { 
                                     particlesData->startCol.red / 255.0f,
                                     particlesData->startCol.green / 255.0f,
                                     particlesData->startCol.blue / 255.0f,
                                     particlesData->startCol.alpha / 255.0f},

                delCol[4]       = {  (particlesData->endCol.red - particlesData->startCol.red) / 255.0f,
                                     (particlesData->endCol.green - particlesData->startCol.green) / 255.0f,
                                     (particlesData->endCol.blue - particlesData->startCol.blue) / 255.0f,
                                     (particlesData->endCol.alpha - particlesData->startCol.alpha) / 255.0f };

    D3DMATRIX   destMatrix;
    
    D3DDevice_SetVertexShaderConstant(VSCONST_REG_TIMEANDGROWTH,
                                  (void *)timeAndGrowth,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_FORCE,
                                  (void *)force,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_STARTCOLOR,
                                  (void *)startCol,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_DELCOLOR,
                                  (void *)delCol,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_PARTICLESIZE,
                                  (void *)particleSize,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_ONEANDHALFANDTWO,
                                  (void *)oneAndHalfAndTwo,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_UP,
                                  (void *)up4,
                                  1);

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_RIGHT,
                                  (void *)right4,
                                  1);

    GetWorldViewProjMatrix( atom, &destMatrix );

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_TRANSFORM_OFFSET,
                                      (void *)&destMatrix,
                                      VSCONST_REG_TRANSFORM_SIZE);

    D3DDevice_SetStreamSource( 0, particlesGeomData->particleVertexBuffer, sizeof(ParticleVertex) );

    RwXboxSetCurrentVertexShader( ParticlesVertexShader );
}

/****************************************************************************
 _rwXboxSetWorldMatrix 
 
 NB: For the moment, this function is NOT API.  It's not particularly useful
 for external developers.

 On entry   : World matrix
 On exit    : Matrix pointer on success.
 */

const RwMatrix     *
_rwXboxSetWorldMatrix(const RwMatrix * matrix)
{
    RWFUNCTION(RWSTRING("_rwXboxSetWorldMatrix"));

    /* Initialise local to camera matrix */
    if (matrix)
    {
        static D3DMATRIX    d3dMatrix = { 0.0f, 0.0f, 0.0f, 0.0f,
            0.0f, 0.0f, 0.0f, 0.0f,
            0.0f, 0.0f, 0.0f, 0.0f,
            0.0f, 0.0f, 0.0f, 1.0f
        };

        d3dMatrix._11 = matrix->right.x;
        d3dMatrix._12 = matrix->right.y;
        d3dMatrix._13 = matrix->right.z;
        d3dMatrix._21 = matrix->up.x;
        d3dMatrix._22 = matrix->up.y;
        d3dMatrix._23 = matrix->up.z;
        d3dMatrix._31 = matrix->at.x;
        d3dMatrix._32 = matrix->at.y;
        d3dMatrix._33 = matrix->at.z;
        d3dMatrix._41 = matrix->pos.x;
        d3dMatrix._42 = matrix->pos.y;
        d3dMatrix._43 = matrix->pos.z;

        D3DDevice_SetTransform(D3DTS_WORLD, &d3dMatrix);
    }
    else
    {
        /* Identity matrix */
        static D3DMATRIX    d3dNativeIdentityMatrix =
          { 1.0f, 0.0f, 0.0f, 0.0f,
            0.0f, 1.0f, 0.0f, 0.0f,
            0.0f, 0.0f, 1.0f, 0.0f,
            0.0f, 0.0f, 0.0f, 1.0f
        };

        D3DDevice_SetTransform(D3DTS_WORLD, &d3dNativeIdentityMatrix);
    }

    RWRETURN(matrix);
}

static              RwBool
SubmitNode(RxPipelineNodeInstance * self,
           const RxPipelineNodeParam * params)
{
    RxPacket           *packet;
    RxCluster          *meshState;
    RxMeshStateVector  *meshData = NULL;

    RpAtomic           *atomic;
    RpGeometry         *geom;
    RpMaterial         *mat;
    RwTexture          *tex;

    RwBool              zWriteEnable = TRUE, shadeMode;

    RWFUNCTION(RWSTRING("SubmitNode"));

    packet = RxPacketFetch(self);
    RWASSERT(NULL != packet);

    meshState = RxClusterLockRead(packet, 1);
    RWASSERT(NULL != meshState);
    meshData = RxClusterGetCursorData(meshState, RxMeshStateVector);
    RWASSERT(NULL != meshData);

    atomic = (RpAtomic *) RxPipelineNodeParamGetData(params);
    RWASSERT(NULL != atomic);

    geom = RpAtomicGetGeometry(atomic);
    RWASSERT(NULL != geom);

    mat = RpGeometryGetMaterial( geom,  0 );
    RWASSERT(NULL != mat);

    tex = RpMaterialGetTexture( mat );
    
    if (tex)
    {
        RwRaster *ras = RwTextureGetRaster( tex );
 
        RwRenderStateSet(rwRENDERSTATETEXTURERASTER,        (void *)ras);
        RwRenderStateSet(rwRENDERSTATETEXTUREFILTER,        (void *)RwTextureGetFilterMode(tex));

        if (RwTextureGetAddressingU(tex) == RwTextureGetAddressingV(tex))
        {
            RwRenderStateSet(rwRENDERSTATETEXTUREADDRESS,   (void *)RwTextureGetAddressingU(tex));
        }
        else
        {
            RwRenderStateSet(rwRENDERSTATETEXTUREADDRESSU,  (void *)RwTextureGetAddressingU(tex));
            RwRenderStateSet(rwRENDERSTATETEXTUREADDRESSV,  (void *)RwTextureGetAddressingV(tex));
        }
    }
    else
    {
        RwRenderStateSet(rwRENDERSTATETEXTURERASTER,        (void *) NULL);
    }

    RwRenderStateSet(rwRENDERSTATEVERTEXALPHAENABLE,        (void *) TRUE);
    RwRenderStateGet(rwRENDERSTATESHADEMODE,                (void *)&shadeMode);
    RwRenderStateSet(rwRENDERSTATESHADEMODE,                (void *)rwSHADEMODEFLAT);
    RwRenderStateGet(rwRENDERSTATEZWRITEENABLE,             (void *)&zWriteEnable);
    RwRenderStateSet(rwRENDERSTATEZWRITEENABLE,             (void *)FALSE);

    /* Set up the D3D engine with the correct matrix though */
    if (!rwMatrixTestFlags
        (&meshData->Obj2World, rwMATRIXINTERNALIDENTITY))
    {
        _rwXboxSetWorldMatrix(&meshData->Obj2World);
    }

    {
        /* Variables to transform particles */
        RwCamera           *cam;
        RwFrame            *camFrame;
        RwMatrix           *camLTM, *particleMatrix;
        RwV3d               up, right;

        RpParticlesGeomData *particlesGeomData;
        RpParticlesData    *particlesData;


        cam = RwCameraGetCurrentCamera();
        RWASSERT(cam);

        camFrame = RwCameraGetFrame(cam);
        RWASSERT(camFrame);

        camLTM = RwFrameGetLTM(camFrame);
        RWASSERT(camLTM);

        up = *RwMatrixGetUp(camLTM);
        right = *RwMatrixGetRight(camLTM);

        particleMatrix = RwMatrixCreate();
        RwMatrixInvert(particleMatrix, &meshData->Obj2World);

        RwV3dTransformVectors(&up, &up, 1, particleMatrix);
        RwV3dTransformVectors(&right, &right, 1, particleMatrix);

        RwMatrixDestroy(particleMatrix);

        RwV3dNormalize(&up, &up);
        RwV3dNormalize(&right, &right);

        RwV3dScale(&up, &up, ((RwReal) 0.5));
        RwV3dScale(&right, &right, ((RwReal) 0.5));

        particlesGeomData = *PARTICLESGEOMGETDATA(geom);
        RWASSERT(NULL != particlesGeomData);

        particlesData = *PARTICLESATOMICGETDATA(atomic);
        RWASSERT(NULL != particlesData);

        RefreshParticleVertices( particlesData, particlesGeomData->particleVertexBuffer );

        #ifdef CPU_VERTEX_SHADER
        CPUVertexShader( particlesData, particlesGeomData, &up, &right );
        #else
        GPUVertexShader(particlesData, particlesGeomData, &up, &right, atomic );
        #endif

        D3DDevice_DrawVertices( D3DPT_QUADLIST, 0, particlesData->numParticles * 4);

        #ifdef RWMETRICS
        _rwXbMetricsInc( particlesData->numParticles * 4, 0, D3DPT_QUADLIST, 0 );
        #endif /* RWMETRICS */
    }

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numProcTriangles += 2 * meshData->NumVertices;
    RWSRCGLOBAL(metrics)->numTriangles += 2 * meshData->NumVertices;
    RWSRCGLOBAL(metrics)->numVertices += 4 * meshData->NumVertices;
#endif

    if (!rwMatrixTestFlags
        (&meshData->Obj2World, rwMATRIXINTERNALIDENTITY))
    {
        _rwXboxSetWorldMatrix(NULL);
    }

    RwRenderStateSet(rwRENDERSTATEZWRITEENABLE, (void *)zWriteEnable);
    RwRenderStateSet(rwRENDERSTATESHADEMODE,    (void *)shadeMode);

    /* Output the packet to the first (and only) output of this Node.
     * >X< (something like multi-pass rendering 
     * could be done by subsequent Nodes)
     */
    RxPacketDispatch(packet, 0, self);

    RWRETURN(TRUE);
}

RxNodeDefinition   *
RxNodeDefinitionGetXboxSubmitParticles(void)
{
    static RxClusterRef N1clofinterest[] = { /* */
        {&RxClObjSpace3DVertices, rxCLALLOWABSENT, rxCLRESERVED},
        {&RxClMeshState, rxCLALLOWABSENT, rxCLRESERVED},
        {&RxClRenderState, rxCLALLOWABSENT, rxCLRESERVED}
    };

#define NUMCLUSTERSOFINTEREST \
        ((sizeof(N1clofinterest))/(sizeof(N1clofinterest[0])))

    static RxClusterValidityReq N1inputreqs[NUMCLUSTERSOFINTEREST] = { /* */
        rxCLREQ_REQUIRED,
        rxCLREQ_REQUIRED,
        rxCLREQ_OPTIONAL
    };

    static RxClusterValid N1outcl1[NUMCLUSTERSOFINTEREST] = { /* */
        rxCLVALID_NOCHANGE,
        rxCLVALID_NOCHANGE,
        rxCLVALID_NOCHANGE
    };

    static RwChar       _SubmitOut[] = "SubmitOut";

    static RxOutputSpec N1outputs[] = { /* */
        {_SubmitOut,
         N1outcl1,
         rxCLVALID_NOCHANGE}
    };

#define NUMOUTPUTS \
        ((sizeof(N1outputs))/(sizeof(N1outputs[0])))

    static RwChar       _SubmitParticles_csl[] =
        "XboxSubmitParticles.csl";

    static RxNodeDefinition nodeXboxSubmitParticles = { /* */
        _SubmitParticles_csl,
        {SubmitNode, NULL, NULL,
         NULL, NULL, NULL, NULL},
        {NUMCLUSTERSOFINTEREST, N1clofinterest, N1inputreqs,
         NUMOUTPUTS, N1outputs},
        0, FALSE, 0
    };

    RWAPIFUNCTION(RWSTRING("RxNodeDefinitionGetXboxSubmitParticles"));

    RWRETURN(&nodeXboxSubmitParticles);
}

RxPipeline         *
_rpParticleMaterialPipelineCreate(void)
{
    /*
    All we do is submit particles, instancing first if necessary...
    */
    RxPipeline *pipe = NULL;

    RWFUNCTION(RWSTRING("_rpParticleMaterialPipelineCreate"));

    pipe = RxPipelineCreate();
    if( pipe )
    {
        RxPipeline *lpipe = NULL;

        lpipe = RxPipelineLock(pipe);
        if( lpipe )
        {
            lpipe = RxLockedPipeAddFragment(lpipe,
                           NULL,
                           RxNodeDefinitionGetXboxSubmitParticles(),
                           NULL);
            
            if( lpipe )
            {                
                if( RxLockedPipeUnlock(lpipe) )
                {
                    if( lpipe != pipe )
                    {
   /*                     RWMESSAGE(RWSTRING("_rpParticleMaterialPipelineCreate(): \n After unlock: lpipe != pipe")); */
                    }
                    
                    RWRETURN(pipe);
                }
            }
        }
        
        RxPipelineDestroy(pipe);
    }

    RWRETURN(NULL);
}

static RxPipeline *
_rpParticleObjectPipelineCreate(void)
{
    /*
    Just a do-nothing object pipe cause it's easier to do the instancing in
    the render node. 
    */
    RxPipeline *pipe = NULL;

    RWFUNCTION(RWSTRING("_rpParticleObjectPipelineCreate"));

    pipe = RxPipelineCreate();
    if( pipe )
    {
        RxPipeline *lpipe = NULL;
        
        lpipe = RxPipelineLock(pipe);
        if( lpipe )
        {
            lpipe = RxLockedPipeAddFragment(lpipe,
                           NULL,
                           RxNodeDefinitionGetAtomicInstance(),
                           RxNodeDefinitionGetMaterialScatter(),
                           NULL);
            
            if( lpipe )
            {                
                if( RxLockedPipeUnlock(lpipe) )
                {
                    if( lpipe != pipe )
                    {
    /*                    RWMESSAGE(RWSTRING("_rpParticleObjectPipelineCreate(): \n After unlock: lpipe != pipe")); */
                    }
                    
                    RWRETURN(pipe);
                }
            }
        }
        
        RxPipelineDestroy(pipe);
    }

    RWRETURN(NULL);
}

RwBool
_rpParticlesSetupPipes(void)
{
    RWFUNCTION(RWSTRING("_rpParticlesSetupPipes"));

    GParticlesObjectPipe = _rpParticleObjectPipelineCreate();
    if (!GParticlesObjectPipe)
    {
        RWRETURN(FALSE);
    }

    GParticlesMaterialPipe = _rpParticleMaterialPipelineCreate();
    if (!GParticlesMaterialPipe)
    {
        RWRETURN(FALSE);
    }

    /*
    Load vertex shader
    */
    if (D3D_OK != D3DDevice_CreateVertexShader(ParticlesVertexShaderDecl,
                                               dwPartshdrVertexShader,
                                               &ParticlesVertexShader,
                                               0))
    {
        RWRETURN(FALSE);
    }

    RWRETURN(TRUE);
}

RwBool
_rpParticlesDestroyPipes(void)
{
    RWFUNCTION(RWSTRING("_rpParticlesDestroyPipes"));

    if (GParticlesMaterialPipe)
    {
        RxPipelineDestroy(GParticlesMaterialPipe);
        GParticlesMaterialPipe = NULL;
    }

    if (GParticlesObjectPipe)
    {
        RxPipelineDestroy(GParticlesObjectPipe);
        GParticlesObjectPipe = NULL;
    }

    /*
    Destroy vertex shader
    */
    D3DDevice_DeleteVertexShader(ParticlesVertexShader);

    RWRETURN(FALSE);
}

RpAtomic           *
_rpParticleAddGeomData(RpAtomic * atomic, RwInt32 numParticles)
{
    /*
    Allocate space for vertex buffers.
    (Called by RpParticlesAtomicCreate & ParticlesAtomicChunkReadCallBack)
    */
    RpParticlesGeomData *particlesGeomData;
    RpParticlesData    *particlesData;
    RpGeometry         *geometry;

    RWFUNCTION(RWSTRING("_rpParticleAddGeomData"));

    geometry = RpAtomicGetGeometry(atomic);
    if (!geometry)
    {
        RWRETURN(NULL);
    }

    particlesGeomData = *PARTICLESGEOMGETDATA(geometry);

    /*
    Get a vertex buffer for particles data for vertex shader to animate.
    Need 4 vertices for each particle as they're rendered as quads!
    */

    /* Clamp it to a maximum to avoid crashing, vertex buffers are only so big */
    RWASSERT( numParticles < 16383 );

    particlesData = *PARTICLESATOMICGETDATA(atomic);
    RWASSERT(NULL != particlesData);
    if (numParticles >= 16383)
    {
        particlesData->numParticles = numParticles = 16383;
    }

    if (D3DDevice_CreateVertexBuffer( numParticles * 4 * sizeof(ParticleVertex),
      D3DUSAGE_WRITEONLY,
      0,
      D3DPOOL_MANAGED, &particlesGeomData->particleVertexBuffer ) != D3D_OK)
    {
        /* Ooops */
        MESSAGE(RWSTRING("Unable to create vertex buffer"));
        particlesGeomData->particleVertexBuffer = NULL;
    }
    else
    {
        /* Initialize the texture coordinates now because they're constant */
        RwInt32             i;

        /*
        Fill the vertex buffer. To do this, we need to Lock() the VB to
        gain access to the vertices. This mechanism is required becuase vertex
        buffers may be in device memory.
        */

        ParticleVertex* pVertices;
        D3DVertexBuffer_Lock( particlesGeomData->particleVertexBuffer, 0, 0, (RwUInt8**)&pVertices, 0 );

        for (i = 0; i < numParticles; i++)
        {
            pVertices[i * 4].u = 0.0f;
            pVertices[i * 4].v = 0.0f;

            pVertices[i * 4 + 1].u = 0.0f;
            pVertices[i * 4 + 1].v = 1.0f;

            pVertices[i * 4 + 2].u = 1.0f;
            pVertices[i * 4 + 2].v = 1.0f;

            pVertices[i * 4 + 3].u = 1.0f;
            pVertices[i * 4 + 3].v = 0.0f;
        }

        D3DVertexBuffer_Unlock( particlesGeomData->particleVertexBuffer );
    }

#ifdef CPU_VERTEX_SHADER

    /*
    Get a vertex buffer for rendered vertices produced by software vertex shader
    */

    if (D3DDevice_CreateVertexBuffer( numParticles * 4 * sizeof(RenderedVertex),
      D3DUSAGE_WRITEONLY,
      RenderedVertexFVF,
      D3DPOOL_MANAGED, &particlesGeomData->renderedVertexBuffer ) != D3D_OK)
    {
        /* Ooops */
        MESSAGE(RWSTRING("Unable to create vertex buffer"));
        particlesGeomData->renderedVertexBuffer = NULL;
    }
    else
    {
        /* Initialize the texture coordinates now because they're constant */
        RwInt32             i;

        /*
        Fill the vertex buffer. To do this, we need to Lock() the VB to
        gain access to the vertices. This mechanism is required becuase vertex
        buffers may be in device memory.
        */

        RenderedVertex* pVertices;
        D3DVertexBuffer_Lock( particlesGeomData->renderedVertexBuffer,
            0, 0, (RwUInt8**)&pVertices, 0 );

        for (i = 0; i < numParticles; i++)
        {
            pVertices[i * 4].u = 0.0f;
            pVertices[i * 4].v = 0.0f;

            pVertices[i * 4 + 1].u = 0.0f;
            pVertices[i * 4 + 1].v = 1.0f;

            pVertices[i * 4 + 2].u = 1.0f;
            pVertices[i * 4 + 2].v = 1.0f;

            pVertices[i * 4 + 3].u = 1.0f;
            pVertices[i * 4 + 3].v = 0.0f;
        }

        D3DVertexBuffer_Unlock( particlesGeomData->renderedVertexBuffer );
    }
#endif

    RWRETURN(atomic);
}

static void        *
ParticleGeomConstructor(void *object,
                        RwInt32 __RWUNUSED__ offset,
                        RwInt32 __RWUNUSED__ size)
{
    /*
    This function doesn't do much because we can't rely on it to be called
    after we know the # particles, etc (called before ParticlesAtomicChunkReadCallBack,
    but during RpParticlesAtomicCreate!)
    just allocate space for our RpParticlesGeomData struct.
    */
    RpParticlesGeomData *pData;

    RWFUNCTION(RWSTRING("ParticleGeomConstructor"));

    RWASSERT(object);

    pData = (RpParticlesGeomData *)RwMalloc( sizeof(RpParticlesGeomData) );
    pData->particleVertexBuffer = 0;
#ifdef CPU_VERTEX_SHADER
    pData->renderedVertexBuffer = 0;
#endif

    *PARTICLESGEOMGETDATA(object) = pData;

    RWRETURN(object);
}

static void        *
ParticleGeomDestructor(void *object,
                       RwInt32 __RWUNUSED__ offset,
                       RwInt32 __RWUNUSED__ size)
{
    RpParticlesGeomData *pData;

    RWFUNCTION(RWSTRING("ParticleGeomDestructor"));

    RWASSERT(object);

    pData = *PARTICLESGEOMGETDATA(object);

    if (pData)
    {
        *PARTICLESGEOMGETDATA(object) = NULL;

        /*
        Just destroy the vertex buffers we made:
        */

        if (pData->particleVertexBuffer)
        {
            IDirect3DVertexBuffer8_Release(pData->particleVertexBuffer);
            pData->particleVertexBuffer = NULL;
        }
#ifdef CPU_VERTEX_SHADER
        if (pData->renderedVertexBuffer)
        {
            IDirect3DVertexBuffer8_Release(pData->renderedVertexBuffer);
            pData->renderedVertexBuffer = NULL;
        }
#endif
        RwFree( pData );
    }

    RWRETURN(object);
}

static void        *
ParticleGeomCopy(void *dstObject,
                 const void *srcObject,
                 RwInt32 __RWUNUSED__ offset, RwInt32 __RWUNUSED__ size)
{
    RWFUNCTION(RWSTRING("ParticleGeomCopy"));

    /* Don't try to copy particles cos I can't be bothered to write this fn */
    RWASSERT(0);

    RWRETURN(0);

#if 0

    RpGeometry         *dstGeometry;
    RpParticlesGeomData *dstParticlesGeomData;
    RwInt32             numParticles;
    const RpGeometry   *srcGeometry;
    const RpParticlesGeomData *srcParticlesGeomData;

    RWFUNCTION(RWSTRING("ParticleGeomCopy"));

    srcGeometry = (const RpGeometry *) srcObject;
    srcParticlesGeomData = *PARTICLESGEOMGETCONSTDATA(srcGeometry);
    if (!srcParticlesGeomData)
    {
        RWRETURN(NULL);
    }

    numParticles = RpGeometryGetNumVertices(srcGeometry);

    dstGeometry = (RpGeometry *) dstObject;
    dstParticlesGeomData = *PARTICLESGEOMGETDATA(dstGeometry);
    if (!dstParticlesGeomData)
    {
        dstParticlesGeomData = ParticlesGeomDataCreate(numParticles);
        if (!dstParticlesGeomData)
        {
            RWRETURN(NULL);
        }

        *PARTICLESGEOMGETDATA(dstGeometry) = dstParticlesGeomData;
    }

    /* TODO: properly */
    *dstParticlesGeomData = *srcParticlesGeomData;
    RWRETURN(dstObject);
#endif
}

RwBool
_rpParticleExtendGeom(void)
{
    RWFUNCTION(RWSTRING("_rpParticleExtendGeom"));

    /* Add a pointer to our RpParticlesGeomData to all RpGeometries (!) */
    GParticlesGeomDataOffset =
        RpGeometryRegisterPlugin(sizeof(RpParticlesGeomData *),
                                 rwID_PARTICLESPLUGIN,
                                 ParticleGeomConstructor,
                                 ParticleGeomDestructor,
                                 ParticleGeomCopy);

    RWRETURN(TRUE);
}
