/****************************************************************************
 *                                                                          *
 *  Module  :   skinxbox.c                                                  *
 *                                                                          *
 *  Purpose :   Matrix-blending pipeline (xbox)                             *
 *                                                                          *
 ****************************************************************************/

/****************************************************************************
 Includes
 */

/* Tell D3D we've 16 byte aligned all our matrices
so 4x4 matrix mul is SSE optimized. */
#define _USE_XGMATH

#include <xtl.h>
#include <d3d8.h>

#include <rwcore.h>
#include <rpworld.h>

#include "rpplugin.h"
#include "rpdbgerr.h"

#include "rpskin.h"

#include "skin.h"

#include "shaderdesc.h"
#include "skindefs.h"

SkinGlobals _rpSkinGlobals =
{
    0,
    0,
    0,
    { (RwMatrix *)NULL, NULL },
    (RwFreeList *)NULL,
    { 0, 0 },
    {                                 /* SkinGlobalPlatform  platform    */
        0
    }
};

/*
Remap bones s.t. only those used per atomic (rather than whole hierarchy)
are uploaded to GPU - means you can have more bones,
but you do more rearranging of data which is slower.
*/
#define OPTIMIZEBONES

#if (!defined(DOXYGEN_SHOULD_SKIP_THIS))
static const char   rcsid[] __RWUNUSED__ =
    "@@(#)$Id: skinxbox.c,v 1.19 2001/10/04 16:58:00 antonk Exp antonk $";
#endif /* (!defined(DOXYGEN_SHOULD_SKIP_THIS)) */

/****************************************************************************
 Local Types
 */


/****************************************************************************
 Local (Static) Prototypes
 */

/****************************************************************************
 Local Defines
 */
#define INSTANCESPEWx

#define SHADERCONSTS_PER_BONE   3

#define NUM_CONSTANTS_CACHED    96

#define IGNORED_XBOX            0

/****************************************************************************
 Globals (across program)
 */

/****************************************************************************
 Local (static) Globals
 */

static RxPipeline   *XboxSkinPipeline = NULL;
static RxXboxAllInOneRenderCallBack RenderPipeline = NULL;

/* 
Store tranforms, light data etc, which changes only per atomic here,
Upload to constant registers in one block.
Must be 16 byte aligned so we can do SSE matrix math in here.
*/
static _MM_ALIGN16 RwV4d    perAtomicConstants[96 - PER_ATOMIC_OFFSET];
enum { perAtomicCountStart = VSCONST_REG_TRANSFORM_SIZE + VSCONST_REG_SCREENSPACE_SIZE };
static RwInt32  perAtomicCount = perAtomicCountStart;

__inline VertexFormatHash( RwInt32 weights, RwInt32 normals, RwInt32 tex, RwInt32 color )
/*
Hash a bunch of flags into an array index of our pre-generated list of vertex format descriptors
*/
{
    return tex + 3 * ((color << 0) | (normals << 1) | ((weights - 1) << 2));
}

static DWORD    *vertexShaderHandles = 0;

static RwBool InitShaders()
{
    RwInt32 numVertexFormats, i;
    _rpSkinXboxShaderDesc **descAry; 
    RwInt32 numShaders;

    RWFUNCTION(RWSTRING("InitShaders"));

    numVertexFormats = _rpSkinXboxGetNumVertexFormats();

    numShaders = _rpSkinXboxGetNumShaderDesc();
    descAry = _rpSkinXboxGetShaderDesc();

    vertexShaderHandles = RwMalloc(numVertexFormats * numShaders * sizeof(DWORD));

    for (i=0; i<numVertexFormats * numShaders; i++)
    {
        vertexShaderHandles[i] = 0;
    }

    RWRETURN(TRUE);
}

static void DestroyShaders()
{
    RwInt32 numVertexFormats, i;
    RwInt32 numShaders;

    RWFUNCTION(RWSTRING("InitShaders"));

    numVertexFormats = _rpSkinXboxGetNumVertexFormats();

    numShaders = _rpSkinXboxGetNumShaderDesc();

    for (i=0; i<numVertexFormats * numShaders; i++)
    {
        if (vertexShaderHandles[i] != 0)
        {
            D3DDevice_DeleteVertexShader(vertexShaderHandles[i]);
        }
    }

    RwFree(vertexShaderHandles);

    RWRETURNVOID();
}

static int numShadersCreated = 0;

static void SetShader( RwInt32 formatIndex, RwInt32 shaderIndex )
{
    RwInt32 numVertexFormats, numShaders, i;
    DWORD **formatList;
    _rpSkinXboxShaderDesc **descAry;

    RWFUNCTION(RWSTRING("SetShader"));

    numVertexFormats = _rpSkinXboxGetNumVertexFormats();
    formatList = _rpSkinXboxGetVertexFormatList();

    numShaders = _rpSkinXboxGetNumShaderDesc();
    descAry = _rpSkinXboxGetShaderDesc();

    RWASSERT(0 <= formatIndex && formatIndex < numVertexFormats);
    RWASSERT(0 <= shaderIndex && shaderIndex < numShaders);

    i = formatIndex * numShaders + shaderIndex;

    RWASSERT(i < numShaders * numVertexFormats);

    if (vertexShaderHandles[i] == 0)
    {
        if (D3D_OK != D3DDevice_CreateVertexShader(formatList[formatIndex],
                                                   descAry[shaderIndex]->byteCode,
                                                   &vertexShaderHandles[i],
                                                   0))
        {
            RWASSERT(0);
        }

        numShadersCreated++;
    }

    RwXboxSetCurrentVertexShader(vertexShaderHandles[i]);

    RWRETURNVOID();
}

static
RwInt32 GetMinWeightsSupportedGE( RwInt32 desiredWeights )
{
    RwInt32 i, numShaders, minWeights;
    const RwInt32   impossibleWeights = 5;

    _rpSkinXboxShaderDesc *desc;
    _rpSkinXboxShaderDesc **descAry; 

    RWFUNCTION(RWSTRING("GetMinWeightsSupported"));
    
    numShaders = _rpSkinXboxGetNumShaderDesc();
    descAry = _rpSkinXboxGetShaderDesc();

    minWeights = impossibleWeights;

    for (i = 0; i<numShaders; i++)
    {
        desc = descAry[i];

        if (    (desc->numWeights >= desiredWeights)
            &&  (desc->numWeights < minWeights))
        {
            minWeights = desc->numWeights;
        }
    }

    RWASSERT(minWeights != impossibleWeights);

    RWRETURN(minWeights);
}

static
_rpSkinXboxShaderDesc * FindBestShader(
    RwInt32 requiredMatfx,
    RwInt32 requiredPrelit,
    RwInt32 requiredWeights,
    RwInt32 minDirLights,
    RwInt32 minPntLights,
    RwInt32 *index
    )
{
    RwInt32 i = 0, numShaders;
    _rpSkinXboxShaderDesc **shaderDesc;

    numShaders = _rpSkinXboxGetNumShaderDesc();
    shaderDesc = _rpSkinXboxGetShaderDesc();

    while (i < numShaders) /* linear search, could reorganize this into an n-way tree structure */
    {
        /*
        Must match weights exactly now because data's already been instanced this size
        - it'd be a bit silly to reinstance to match lighting,
        but perhaps could merge in seperate streams?
        */
        if (shaderDesc[i]->matfx == requiredMatfx)
        {
            if (shaderDesc[i]->prelit == requiredPrelit)
            {
                if (shaderDesc[i]->numWeights == requiredWeights)
                {
                    if (shaderDesc[i]->numDirect >= minDirLights)
                    {
                        if (shaderDesc[i]->numPoint >= minPntLights)
                        {
                            *index = i;
                            return shaderDesc[i];
                        }
                    }
                }
            }
        }

        i++;
    }

    /* no suitable vertex shader exists */
    *index = 0;
    return 0;
}

static RwInt32 dirLightCount, pointLightCount;
enum { maxDirLights = 64, maxPointLights = 64 };
static RpLight *dirLights[maxDirLights], *pointLights[maxPointLights];

static void FindGlobalLights( RpGeometryFlag flags )
{
    RwLLLink    *cur, *end;
    RpWorld     *world;

    RWFUNCTION(RWSTRING("FindGlobalLights"));

    world = (RpWorld *)RWSRCGLOBAL(curWorld);

    cur = rwLinkListGetFirstLLLink(&world->directionalLightList);
    end = rwLinkListGetTerminator(&world->directionalLightList);
    while (cur != end)
    {
        RpLight *light;

        light = rwLLLinkGetData(cur, RpLight, inWorld);

        /* NB light may actually be a dummyTie from a enclosing ForAll */
        if (light && (rwObjectTestFlags(light, flags)))
        {
            switch (RpLightGetType(light))
            {
                case rpLIGHTAMBIENT:
                    {
                        const RwRGBAReal    *color;

                        color = RpLightGetColor(light);

                        perAtomicConstants[perAtomicCount].x += color->red;
                        perAtomicConstants[perAtomicCount].y += color->green;
                        perAtomicConstants[perAtomicCount].z += color->blue;
                        perAtomicConstants[perAtomicCount].w += color->alpha;
                    }
                    break;

                case rpLIGHTDIRECTIONAL:
                    dirLights[dirLightCount++]=light;
                    RWASSERT(dirLightCount < maxDirLights);
                    break;

                default:
                    RWASSERT(0); /* unsupported light type */
            }
        }

        /* Next */
        cur = rwLLLinkGetNext(cur);
    } /* while */

    /* clamp ambient to 1.0 */
    perAtomicConstants[perAtomicCount].x = min(perAtomicConstants[perAtomicCount].x, 1.0f);
    perAtomicConstants[perAtomicCount].y = min(perAtomicConstants[perAtomicCount].y, 1.0f);
    perAtomicConstants[perAtomicCount].z = min(perAtomicConstants[perAtomicCount].z, 1.0f);
    perAtomicConstants[perAtomicCount].w = min(perAtomicConstants[perAtomicCount].w, 1.0f);

    RWRETURNVOID();
}

static void FindLocalLights( RpAtomic *atomic )
{
    RwLLLink    *cur, *end;

    RWFUNCTION(RWSTRING("FindLocalLights"));

    /* don't light this atomic with the same light more than once! */
    RWSRCGLOBAL(lightFrame)++;

    /* For all sectors that this atomic lies in, apply all lights within */
    cur = rwLinkListGetFirstLLLink(&atomic->llWorldSectorsInAtomic);
    end = rwLinkListGetTerminator(&atomic->llWorldSectorsInAtomic);
    while (cur != end)
    {
        RpTie       *tpTie = rwLLLinkGetData(cur, RpTie, lWorldSectorInAtomic);
        RwLLLink    *curLight, *endLight;

        /* Lights in the sector */
        curLight = rwLinkListGetFirstLLLink(&tpTie->worldSector->lightsInWorldSector);
        endLight = rwLinkListGetTerminator(&tpTie->worldSector->lightsInWorldSector);

        while (curLight != endLight)
        {
            RpLightTie  *lightTie;
            RpLight *theLight;
    
            lightTie = rwLLLinkGetData(curLight, RpLightTie, lightInWorldSector);

            theLight = lightTie->light;

            /* NB lightTie may actually be a dummyTie from a enclosing ForAll */

            /* Check to see if the light has already been applied and is set to
             * light atomics
             */
            if (theLight
                && (theLight->lightFrame != RWSRCGLOBAL(lightFrame))
                && (rwObjectTestFlags(theLight, rpLIGHTLIGHTATOMICS)))
            {
                /* don't light this atomic with the same light again! */
                theLight->lightFrame = RWSRCGLOBAL(lightFrame);

                /* Does the light intersect the atomics bounding sphere */
                switch (RpLightGetType(theLight))
                {
                    case rpLIGHTPOINT:
                        pointLights[pointLightCount++] = theLight;
                        RWASSERT(pointLightCount < maxPointLights);
                        break;

                    default:
                        RWASSERT(0); /* unsupported light type */
                }
            }

            /* Next */
            curLight = rwLLLinkGetNext(curLight);
        }

        /* Next one */
        cur = rwLLLinkGetNext(cur);
    }

    RWRETURNVOID();
}

static void 
rxXbAtomicSkinLightingCallback(
    RxXboxResEntryHeader *resEntryHeader,
    RxXboxInstanceData   *instancedData,
    RpAtomic *atomic,
    DWORD **meshShaders
    )
{
    RpSkin          *skin;
    RpGeometryFlag  flags;
    RwInt32         i;
    RwMatrix        inverseAtomicLTM;
    _rpSkinXboxShaderDesc *shaderDesc = 0, *lastShaderDesc = 0;
    RwInt32         mesh;

    RWFUNCTION(RWSTRING("rxXbAtomicSkinLightingCallback"));

    perAtomicCount = perAtomicCountStart;

    flags = (RpGeometryFlag)RpGeometryGetFlags(RpAtomicGetGeometry(atomic));

    /*
    Need all the directionals & points in 1 contigous block (and check there's not too many!)
    So go and count and organize them.
    */
    dirLightCount = 0;
    pointLightCount = 0;

    if (flags & rxGEOMETRY_LIGHT)
    {
        /* start ambient at black and add to it if we find any ambients */
        perAtomicConstants[perAtomicCount].x = 0.0f;
        perAtomicConstants[perAtomicCount].y = 0.0f;
        perAtomicConstants[perAtomicCount].z = 0.0f;
        perAtomicConstants[perAtomicCount].w = 1.0f;

        if (NULL != RWSRCGLOBAL(curWorld))
        {
            FindGlobalLights( flags );
            FindLocalLights( atomic );
        } /* if world */
    }
    else
    {
        /* object is "unlit" so should be fully bright ambient to make it self luminous */
        perAtomicConstants[perAtomicCount].x = 1.0f;
        perAtomicConstants[perAtomicCount].y = 1.0f;
        perAtomicConstants[perAtomicCount].z = 1.0f;
        perAtomicConstants[perAtomicCount].w = 1.0f;
    }

    /* ambient lighting is done */
    perAtomicCount++;

    skin  = RpSkinGeometryGetSkin(RpAtomicGetGeometry(atomic));

    /* determine shader for each mesh (as matfx might vary!), lights better all match! */
    for (mesh=0; mesh<resEntryHeader->numMeshes; mesh++)
    {
        RwInt32 index;

        shaderDesc = FindBestShader(
            _rpSkinXboxGetMaterialMatfxHash(instancedData->material, flags),
            (flags & rxGEOMETRY_PRELIT) != 0,
            skin->platformData.maxWeightsUsed,
            dirLightCount,
            pointLightCount,
            &index
            );

        RWASSERT( shaderDesc );

        meshShaders[mesh] = (DWORD *)index;

        RWASSERT(!lastShaderDesc || (lastShaderDesc->numDirect == shaderDesc->numDirect)  );
        RWASSERT(!lastShaderDesc || (lastShaderDesc->numPoint == shaderDesc->numPoint)  );

        lastShaderDesc = shaderDesc;

        /* Move onto the next instancedData */
        instancedData++;
    }

    /* this really ought to be cached somewhere!!! */
    if (dirLightCount + pointLightCount > 0)
    {
        RwMatrixInvert(&inverseAtomicLTM, RwFrameGetLTM(RpAtomicGetFrame(atomic)));                    
    }

    /* collect directional light constants for shader */
    for (i=0; i<dirLightCount; i++)
    {
        RwV3d               *at;
        const RwRGBAReal    *color;
        RpLight             *light;

        light = dirLights[i];

        /* Set the lights direction, in object space */
        at = RwMatrixGetAt(RwFrameGetLTM(RpLightGetFrame(light)));

        RwV3dTransformVectors (
            (RwV3d *)&perAtomicConstants[perAtomicCount],
            at,
            1,
            &inverseAtomicLTM );

        perAtomicConstants[perAtomicCount].w = 0.0f; /* Use this for clamping */

        perAtomicCount++;

        /* Set the light color */
        color = RpLightGetColor(light);
    
        perAtomicConstants[perAtomicCount].x = color->red;
        perAtomicConstants[perAtomicCount].y = color->green;
        perAtomicConstants[perAtomicCount].z = color->blue;
        perAtomicConstants[perAtomicCount].w = color->alpha;

        perAtomicCount++;
    }

    /* need to pad with black directional lights if only shader available has too many */
    for (; i<shaderDesc->numDirect; i++)
    {
        perAtomicConstants[perAtomicCount].x = 0.0f;
        perAtomicConstants[perAtomicCount].y = 0.0f;
        perAtomicConstants[perAtomicCount].z = 0.0f;
        perAtomicConstants[perAtomicCount].w = 0.0f;

        perAtomicCount++;

        perAtomicConstants[perAtomicCount].x = 0.0f;
        perAtomicConstants[perAtomicCount].y = 0.0f;
        perAtomicConstants[perAtomicCount].z = 0.0f;
        perAtomicConstants[perAtomicCount].w = 0.0f;

        perAtomicCount++;
    }

    for (i=0; i<shaderDesc->numPoint; i+=4)
    {
        RwInt32 j;

        /* pack 4 radii into one constant for better vectorization of intensity calcs in shader */
        RwReal  *radiiConstant = (RwReal *)&perAtomicConstants[perAtomicCount++];

        for (j=0; j<min(shaderDesc->numPoint - i, 4); j++)
        {
            RwInt32 lightNum = i + j;

            if (lightNum < pointLightCount)
            {
                /* it's a real light */
                RpLight             *light;
                const RwRGBAReal    *color;
                RwV3d               *pos;

                light = pointLights[lightNum];

                /* Set the light's position, in object space */
                pos = RwMatrixGetPos(RwFrameGetLTM(RpLightGetFrame(light)));

                RwV3dTransformPoints(
                    (RwV3d *)&perAtomicConstants[perAtomicCount],
                    pos,
                    1,
                    &inverseAtomicLTM );

                perAtomicConstants[perAtomicCount].w = 0.0f; /* Use this for clamping */

                perAtomicCount++;

                /* Set the light color */
                color = RpLightGetColor(light);
    
                perAtomicConstants[perAtomicCount].x = color->red;
                perAtomicConstants[perAtomicCount].y = color->green;
                perAtomicConstants[perAtomicCount].z = color->blue;
                perAtomicConstants[perAtomicCount].w = color->alpha;

                perAtomicCount++;

                /* and the radius */
                *radiiConstant++ = 1.0f / RpLightGetRadius(light);
            }
            else
            {
                /* it's pad light */
                perAtomicConstants[perAtomicCount].x = 0.0f;
                perAtomicConstants[perAtomicCount].y = 0.0f;
                perAtomicConstants[perAtomicCount].z = 0.0f;
                perAtomicConstants[perAtomicCount].w = 0.0f;

                perAtomicCount++;

                perAtomicConstants[perAtomicCount].x = 0.0f;
                perAtomicConstants[perAtomicCount].y = 0.0f;
                perAtomicConstants[perAtomicCount].z = 0.0f;
                perAtomicConstants[perAtomicCount].w = 0.0f;

                perAtomicCount++;
        
                /* need a huge radius ? - 1.0f / 0.0f */
                *radiiConstant++ = 10000000.0f;
            }
        }
    }

    /*
    Check we haven't used too many bones + lights for this atomic
    We start at constant register PER_ATOMIC_OFFSET,
    skip perAtomicCount constants for the transform & lighting set up
    and then require 3 * number of bones more constants,
    so this had all better stop before constant register 96 because it doesn't exist!
    */
#ifdef OPTIMIZEBONES
    RWASSERT( (perAtomicCount + (RwInt32)skin->platformData.numBonesUsed * SHADERCONSTS_PER_BONE + PER_ATOMIC_OFFSET) < 96 );
#else
    RWASSERT( (perAtomicCount + (RwInt32)skin->boneData.numBones * SHADERCONSTS_PER_BONE + PER_ATOMIC_OFFSET) < 96 );
#endif

    RWRETURNVOID();
}

/****************************************************************************
 rxXBAtomicSkinInstanceCallback

 Purpose:   To instance.

 On entry:

 On exit :
*/
__inline RwUInt32 Normal2PackedNormal(RwV3d *normal)
{
    RwUInt32     nx, ny, nz;

    nx = ((RwInt32)(normal->x * 1023.0f)) & 0x7ff;
    ny = ((RwInt32)(normal->y * 1023.0f)) & 0x7ff;
    nz = ((RwInt32)(normal->z * 511.0f)) & 0x3ff;
    
    return (nz << 22) | (ny << 11) | nx;
}

__inline RwUInt8 Weight2PackedWeight( RwReal weight )
{
    return ((RwInt8)(weight * 255.0f + 0.5f)) & 0xff;
}

static RwBool
rxXBAtomicSkinInstanceCallback(void *object,
                               RxXboxResEntryHeader *instancedData,
                               RwBool reinstance)
{
    RpAtomic                *atomic;
    RpGeometry              *geometry;
    RpSkin                  *skin;
    RwUInt32                vbSize;
    RwUInt32                offset;
    RpGeometryFlag          flags;
    RwUInt32                numVertices;
    RwUInt8                 *lockedVertexBuffer;
    RwUInt8                 *vertexBuffer;
    RwV3d                   *pos;
    RwInt32                 weightsToInstance;
    RwInt32                 numUVs;
    RxXboxInstanceData      *mesh;

    RWFUNCTION(RWSTRING("rxXBAtomicSkinInstanceCallback"));

#ifdef INSTANCESPEW
    RWMESSAGE((RWSTRING("Instancing...")));
#endif
    
    atomic = (RpAtomic *)object;
    geometry = RpAtomicGetGeometry(atomic);
    flags = (RpGeometryFlag)RpGeometryGetFlags(geometry);

    skin = RpSkinGeometryGetSkin(RpAtomicGetGeometry(atomic));
    
    RWASSERT(skin->platformData.numBonesUsed > 0);
    RWASSERT(skin->boneData.numBones > 0);

    /* this is the absolute max you could ever pack into the constants slots,
    but lighting & c may reduce it further... */
#ifdef OPTIMIZEBONES
    RWASSERT(skin->platformData.numBonesUsed <= 64);
#else
    RWASSERT(skin->boneData.numBones <= 64);
#endif

    /*
    We need to fudge this number up to the minimum # of weights we can handle with any vertex shader,
    otherwise the stream size will be mismatched against the shader, and we'll end up skinning garbage
    which doesn't tend to look good.
    */
    weightsToInstance = skin->platformData.maxWeightsUsed;
#ifdef INSTANCESPEW
    RWMESSAGE((RWSTRING("Instancing... %d weights"), weightsToInstance));
#endif

    /*
     * Calculate the stride of the vertex
     */
    
    if (!reinstance)
    {
        /* Position */
        instancedData->stride = sizeof(RwV3d);

        /* Weights */
        instancedData->stride += sizeof(RwUInt8) * weightsToInstance;
        
        /* Indices */
        instancedData->stride += sizeof(RwInt16) * weightsToInstance;

        /* Normals */
        if (flags & rxGEOMETRY_NORMALS)
        {
            instancedData->stride += sizeof(RwUInt32);
    #ifdef INSTANCESPEW        
            RWMESSAGE((RWSTRING("Instancing... rxGEOMETRY_NORMALS")));
    #endif
        }

        /* Pre-lighting */
        if (flags & rxGEOMETRY_PRELIT)
        {
            instancedData->stride += sizeof(RwUInt32);
    #ifdef INSTANCESPEW        
            RWMESSAGE((RWSTRING("Instancing... rxGEOMETRY_PRELIT")));
    #endif
        }

        /* Texture coordinates */
        if (flags & rpGEOMETRYTEXTURED2)
        {
            instancedData->stride += 2 * sizeof(RwTexCoords);
    #ifdef INSTANCESPEW        
            RWMESSAGE((RWSTRING("Instancing... rpGEOMETRYTEXTURED2")));
    #endif
        }
        else if (flags & rxGEOMETRY_TEXTURED)
        {
            instancedData->stride += sizeof(RwTexCoords);
    #ifdef INSTANCESPEW        
            RWMESSAGE((RWSTRING("Instancing... rxGEOMETRY_TEXTURED")));
    #endif
        }
    
    #ifdef INSTANCESPEW    
        RWMESSAGE((RWSTRING("Instancing... stride %d"), instancedData->stride));
        RWMESSAGE((RWSTRING("Instancing... %d vertices"), instancedData->numVertices));
    #endif

        /*
         * Create the vertex buffer
         */
        vbSize = instancedData->stride * instancedData->numVertices;

        if (D3D_OK != D3DDevice_CreateVertexBuffer(vbSize,
                                                   IGNORED_XBOX,
                                                   IGNORED_XBOX,
                                                   IGNORED_XBOX,
                                                   (D3DVertexBuffer **)&instancedData->vertexBuffer))
        {
            RWRETURN(FALSE);
        }
    }
    else
    {
        vbSize = instancedData->stride * instancedData->numVertices;
    }

    /*
     * Lock the vertex buffer
     */
    D3DVertexBuffer_Lock((D3DVertexBuffer *)instancedData->vertexBuffer,
                         0, vbSize, &lockedVertexBuffer, 0);

    /*
     * Instance
     */

    /* Positions */
    pos = geometry->morphTarget[0].verts;

    vertexBuffer = lockedVertexBuffer;
    numVertices = instancedData->numVertices;
    while (numVertices--)
    {
        *((RwV3d *)vertexBuffer) = *pos;
        pos++;

        vertexBuffer += instancedData->stride;
    }

    offset = sizeof(RwV3d);

    /*
     * Weights
     */
    {
        RwMatrixWeights *weights;
        weights = skin->vertexMaps.matrixWeights;

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            RwInt32 i;
            RwReal  w;
         
            for (i=0; i<weightsToInstance; i++)
            {
                w = *(((float *)weights)+i);
                RWASSERT( w >= 0.0f );
                RWASSERT( w <= 1.01f );                

                *(((RwUInt8 *)vertexBuffer) + i) =  Weight2PackedWeight(w);
            }
            
            weights++;

            vertexBuffer += instancedData->stride;
        }

        offset += sizeof(RwUInt8) * weightsToInstance;
    }

    /* Indices */
    {
        RwUInt32    *indices;

        indices = skin->vertexMaps.matrixIndices;

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            RwUInt32    index;
            RwInt32 i;

            index = *indices;

            for (i=0; i<weightsToInstance; i++)
            {
                RwUInt32    index;

                /* unpack index */
                index = ((*indices) >> (i*8)) & 0xFF;

                RWASSERT( (RwInt32)index >= 0 );
                RWASSERT( (RwUInt32)index < skin->boneData.numBones );

                #ifdef OPTIMIZEBONES
                /* remap it to match remapped matrices */
                index =skin->platformData.vertexIndexMap[index];
                RWASSERT( (RwInt32)index < skin->platformData.numBonesUsed );
                #endif

                /* point into constant registers & store */
                *(((RwInt16 *)vertexBuffer) + i) = (RwInt16)(index * SHADERCONSTS_PER_BONE);
            }
            indices++;

            vertexBuffer += instancedData->stride;
        }

        offset += sizeof(RwInt16) * weightsToInstance;
    }
     
    /* Normals */
    if (flags & rxGEOMETRY_NORMALS)
    {
        RwV3d       *normal;

        normal = geometry->morphTarget[0].normals;

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            *((RwUInt32 *)vertexBuffer) = Normal2PackedNormal(normal);
            vertexBuffer += instancedData->stride;
            normal++;
        }

        offset += sizeof(RwUInt32);
    }
    
    /* Pre-lighting */
    if (flags & rxGEOMETRY_PRELIT)
    {
        RwRGBA  *color;

        color = geometry->preLitLum;

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            *((RwUInt32 *)vertexBuffer) = ((color->alpha << 24) |
                                           (color->red << 16) |
                                           (color->green << 8) |
                                           (color->blue));
            vertexBuffer += instancedData->stride;
            color++;
        }

        offset += sizeof(RwUInt32);
    }

    /* Texture coordinates */
    if ((flags & rxGEOMETRY_TEXTURED) || (flags & rpGEOMETRYTEXTURED2))
    {
        RwTexCoords *texCoord;

        texCoord = geometry->texCoords[0];

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            *((RwTexCoords *)vertexBuffer) = *texCoord;
            texCoord++;

            vertexBuffer += instancedData->stride;
        }
        
        offset += sizeof(RwTexCoords);
    }
    
    if (flags & rpGEOMETRYTEXTURED2)
    {
        RwTexCoords *texCoord;

        texCoord = geometry->texCoords[1];

        vertexBuffer = lockedVertexBuffer + offset;
        numVertices = instancedData->numVertices;
        while (numVertices--)
        {
            *((RwTexCoords *)vertexBuffer) = *texCoord;
            texCoord++;

            vertexBuffer += instancedData->stride;
        }

        offset += sizeof(RwTexCoords);
    }
    
    /*
     * Unlock the vertex buffer
     */
    D3DVertexBuffer_Unlock((D3DVertexBuffer *)instancedData->vertexBuffer);

    /*
     * Set the vertex shader flags - these just give the vertex format
     * shader needs to be determined every frame
     */
     
    numUVs = ((flags & rxGEOMETRY_TEXTURED) != 0)
            + 2 * ((flags & rpGEOMETRYTEXTURED2) != 0);
    
    for (mesh = instancedData->begin; mesh != instancedData->end; ++mesh)
    {
        mesh->vertexShader = 
                VertexFormatHash(   weightsToInstance,
                                    (flags & rxGEOMETRY_NORMALS) != 0,
                                    numUVs,
                                    (flags & rxGEOMETRY_PRELIT) != 0 );
    }
    
    RWRETURN(TRUE);
}

static RwBool
rxXBAtomicSkinReInstanceCallback(void *object, RxXboxResEntryHeader *instancedHeader)
{
    RWFUNCTION(RWSTRING("rxXBAtomicSkinReInstanceCallback"));
    /* morph targets won't quite work, but DMorph is okay. */
    RWRETURN(rxXBAtomicSkinInstanceCallback(object, instancedHeader, TRUE));
}

/****************************************************************************
 rpSkinMatrixUpdate
  
 Inputs :
                                 
 Outputs:
 */
 
__inline RwMatrix2D3DMATRIX(  RwMatrix *m, D3DMATRIX *d )
{
    RwReal *val = (RwReal *)d;

    *val++ = m->right.x;
    *val++ = m->up.x;
    *val++ = m->at.x;
    *val++ = m->pos.x;

    *val++ = m->right.y;
    *val++ = m->up.y;
    *val++ = m->at.y;
    *val++ = m->pos.y;

    *val++ = m->right.z;
    *val++ = m->up.z;
    *val++ = m->at.z;
    *val++ = m->pos.z;
}

/****************************************************************************
 _rpSkinMatrixUpdating
 Skin matrices update function
 Inputs :  *atomic     - Pointer on the current atomic
           *skin       - Pointer on the current skin
 Outputs : 
           *dest       - where to put the matrices
 */
 
static void _rpSkinMatrixUpdating(
    RpAtomic *atomic,
    RpSkin *skin,
    RwV4d *dest )
{
    RpHAnimHierarchy *hierarchy;
    SkinAtomicData *atomicData;

    RWFUNCTION(RWSTRING("_rpSkinMatrixUpdating"));
    RWASSERT(NULL != atomic);
    RWASSERT(NULL != skin);

    /* Get the atomic's extension data. */
    atomicData = RPSKINATOMICGETDATA(atomic);
    RWASSERT(NULL != atomicData);

    /* Then it's hierarchy. */
    hierarchy = atomicData->hierarchy;

    if(NULL != hierarchy)
    {
        RwMatrix result;
        RwInt32 numNodes;
        RwMatrix *matrixArray;
        const RwMatrix *skinToBone;

        /* Get the number of frames in the hierarchy and it's matrix array. */
        numNodes = hierarchy->numNodes;
        matrixArray = hierarchy->pMatrixArray;
        RWASSERT(NULL != matrixArray);

        /* Get the bone information. */
        skinToBone = RpSkinGetSkinToBoneMatrices(skin);
        RWASSERT(NULL != skinToBone);
        
        if(hierarchy->flags & rpHANIMHIERARCHYNOMATRICES)
        {
            RwFrame *frame;
            RwMatrix *ltm;

            RwMatrix inverseLtm;
            RwMatrix prodMatrix;
            RwInt32 iNode;

            RpHAnimNodeInfo *frameInfo;

            /* Get the hierarchy's frame information. */
            frameInfo = hierarchy->pNodeInfo;
            RWASSERT(NULL != frameInfo);

            /* Setup the matrix flags. */
            inverseLtm.flags = 0;
            prodMatrix.flags = 0;

            /* Invert the atomics ltm. */
            frame = RpAtomicGetFrame(atomic);
            RWASSERT(NULL != frame);
            ltm = RwFrameGetLTM(frame);
            RWASSERT(NULL != ltm);
            RwMatrixInvert(&inverseLtm, ltm);

            #ifdef OPTIMIZEBONES
            for (iNode = 0; iNode < skin->platformData.numBonesUsed; iNode++)
            {   
                RwInt32 srcMat = skin->platformData.matrixIndexMap[iNode];
                RwFrame *frame;
                RwMatrix *ltm;

                frame = frameInfo[srcMat].pFrame;
                RWASSERT(NULL != frame);
                ltm = RwFrameGetLTM(frame);
                RWASSERT(NULL != ltm);

                RwMatrixMultiply( &prodMatrix,
                                           &skinToBone[srcMat],
                                           ltm );

                RwMatrixMultiply( &result,
                                           &prodMatrix,
                                           &inverseLtm);
                                           
                RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[iNode * SHADERCONSTS_PER_BONE]);                                        
            }
            #else
            for( iNode = 0; iNode < numNodes; iNode++ )
            {
                RwFrame *frame;
                RwMatrix *ltm;

                frame = frameInfo[iNode].pFrame;
                RWASSERT(NULL != frame);
                ltm = RwFrameGetLTM(frame);
                RWASSERT(NULL != ltm);

                RwMatrixMultiply( &prodMatrix,
                                           &skinToBone[iNode],
                                           ltm );

                RwMatrixMultiply( &result,
                                           &prodMatrix,
                                           &inverseLtm);
                                           
                RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[iNode * SHADERCONSTS_PER_BONE]);
            }
            #endif
        }
        else
        {
            if( hierarchy->flags & rpHANIMHIERARCHYLOCALSPACEMATRICES )
            {
                RwInt32 i;                                                            

                #ifdef OPTIMIZEBONES
                for (i = 0; i < skin->platformData.numBonesUsed; i++)
                {   
                    RwInt32 srcMat = skin->platformData.matrixIndexMap[i];
                    RwMatrixMultiply(&result, &skinToBone[srcMat], &matrixArray[srcMat]);
                    RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[i * SHADERCONSTS_PER_BONE]);                                            
                }
                #else
                for (i = 0; i < numNodes; i++)
                {
                    RwMatrixMultiply(&result, &skinToBone[i], &matrixArray[i]);
                    RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[i * SHADERCONSTS_PER_BONE]);                                            
                }
                #endif
            }
            else
            {
                RwFrame *frame;
                RwMatrix *ltm;

                RwMatrix inverseLtm;
                RwMatrix tempMatrix;
                RwInt32 i;

                /* Setup the matrix flags. */
                inverseLtm.flags = 0;

                /* Invert the atomics ltm. */
                frame = RpAtomicGetFrame(atomic);
                RWASSERT(NULL != frame);
                ltm = RwFrameGetLTM(frame);
                RWASSERT(NULL != ltm);
                RwMatrixInvert(&inverseLtm, ltm);

                #ifdef OPTIMIZEBONES
                for (i = 0; i < skin->platformData.numBonesUsed; i++)
                {   
                    RwInt32 srcMat = skin->platformData.matrixIndexMap[i];
                    RwMatrixMultiply(&tempMatrix, &skinToBone[srcMat], &matrixArray[srcMat]);
                    RwMatrixMultiply(&result, &tempMatrix, &inverseLtm);
                    RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[i * SHADERCONSTS_PER_BONE]);
                }
                #else
                for (i = 0; i < numNodes; i++)
                {
                    RwMatrixMultiply(&tempMatrix, &skinToBone[i], &matrixArray[i]);
                    RwMatrixMultiply(&result, &tempMatrix, &inverseLtm);
                    RwMatrix2D3DMATRIX(&result, (D3DMATRIX *)&dest[i * SHADERCONSTS_PER_BONE]);
                }
                #endif
               
            }
        }
    }
    
    RWRETURNVOID();
}

#define COLORSCALAR 0.003921568627450980392156862745098f    /* 1.0f/ 255.0f */

static void SetMaterialColor(RpMaterial *mat)
{
    RwReal diffScale = mat->surfaceProps.diffuse * COLORSCALAR;

    RwRGBA  *color = &mat->color;

    RwReal matColor[4] = {color->red * diffScale,
                      color->green * diffScale,
                      color->blue * diffScale,
                      color->alpha * diffScale };

    RWFUNCTION(RWSTRING("SetMaterialColor"));

    D3DDevice_SetVertexShaderConstant(VSCONST_REG_MAT_COLOR_OFFSET,
                              matColor,
                              VSCONST_REG_MAT_COLOR_SIZE);

    RWRETURNVOID();
}

/* recomputed everytime we render a new atomic */
static _MM_ALIGN16 D3DXMATRIX cachedViewWorldMatrix;

void
_rpSkinSetEnvMatrix( RwFrame *frame )
{
    static _MM_ALIGN16 D3DXMATRIX texMat =
    {
         0.5f, 0.0f, 0.0f, 0.0f,
         0.0f,-0.5f, 0.0f, 0.0f,
         0.0f, 0.0f, 1.0f, 0.0f,
         0.5f, 0.5f, 0.0f, 1.0f
    };
    
    _MM_ALIGN16 D3DXMATRIX result;
    
    RwReal constants[8];
    
    RwInt32 i;
    
    if (frame)
    {
        _MM_ALIGN16 D3DMATRIX   matrix;
        _MM_ALIGN16 D3DMATRIX   temp;
        RwMatrix    *camMtx;
        RwMatrix    *envMtx;
        RwMatrix    *invMtx;
        RwMatrix    *tmpMtx;

        invMtx = RwMatrixCreate();
        tmpMtx = RwMatrixCreate();

        /* Transform the normals back into world space */
        camMtx = RwFrameGetLTM(RwCameraGetFrame(RwCameraGetCurrentCamera()));

        /* Transfrom the normals by the inverse of the env maps frame */
        envMtx = RwFrameGetLTM(frame);
        
        RwMatrixInvert(invMtx, envMtx);

        RwMatrixMultiply(tmpMtx, invMtx, camMtx);

        matrix.m[0][0] = -tmpMtx->right.x;
        matrix.m[0][1] = -tmpMtx->right.y;
        matrix.m[0][2] = -tmpMtx->right.z;
        matrix.m[0][3] = 0.0f;

        matrix.m[1][0] = tmpMtx->up.x;
        matrix.m[1][1] = tmpMtx->up.y;
        matrix.m[1][2] = tmpMtx->up.z;
        matrix.m[1][3] = 0.0f;

        matrix.m[2][0] = tmpMtx->at.x;
        matrix.m[2][1] = tmpMtx->at.y;
        matrix.m[2][2] = tmpMtx->at.z;
        matrix.m[2][3] = 0.0f;

        matrix.m[3][0] = 0.0f;
        matrix.m[3][1] = 0.0f;
        matrix.m[3][2] = 0.0f;
        matrix.m[3][3] = 1.0f;

        RwMatrixDestroy(tmpMtx);
        RwMatrixDestroy(invMtx);
        
        D3DXMatrixMultiply(&temp, &matrix, &texMat);
        D3DXMatrixMultiply(&result, &temp, &cachedViewWorldMatrix);
    }
    else
    {
        D3DXMatrixMultiply(&result, &texMat, &cachedViewWorldMatrix);
    }
    
    /* pack relevant 2 rows into constant registers */
    for (i=0; i<4; i++)
    {
        constants[i] = ((RwReal *)&result)[4 * i];
        constants[i+4] = ((RwReal *)&result)[4 * i + 1];
    }
    
    D3DDevice_SetVertexShaderConstant(
       VSCONST_REG_ENV_OFFSET,
       constants,
       VSCONST_REG_ENV_SIZE );
}

/* recomputed everytime we render an atomic */
static _MM_ALIGN16 D3DMATRIX   cachedWorldMatrix;

void
_rpSkinSetBumpConstants(RwV4d *bumpPosFudge, RwV4d *bumpShift)
{
    D3DDevice_SetVertexShaderConstant(
        VSCONST_REG_BUMPWORLD_OFFSET,
        &cachedWorldMatrix,
        VSCONST_REG_BUMPWORLD_SIZE);
       
    D3DDevice_SetVertexShaderConstant(
       VSCONST_REG_BUMPPOSFUDGE_OFFSET,
       bumpPosFudge,
       VSCONST_REG_BUMPPOSFUDGE_SIZE );

    D3DDevice_SetVertexShaderConstant(
       VSCONST_REG_BUMPSHIFT_OFFSET,
       bumpShift,
       VSCONST_REG_BUMPSHIFT_SIZE );
}
 
static void
SkinXboxComputeProjViewWorld(   D3DMATRIX * worldMatrix,
                                D3DMATRIX * viewWorldMatrix,
                                D3DMATRIX * projViewWorldMatrix,
                                RpAtomic *atomic,
                                const RwV4d *screenSpaceScale )
{
    _MM_ALIGN16 D3DMATRIX   viewMatrix;
    _MM_ALIGN16 D3DMATRIX   projMatrix;
                                 
    RwCamera    *camera;
    RwMatrix    *camLTM;
    RwMatrix    invCamMtx;
    RwMatrix    *atomicLTM;
    RwV2d       vw;

    RWFUNCTION(RWSTRING("ComputeProjViewWorld"));
    RWASSERT( projViewWorldMatrix != 0 );
    RWASSERT( atomic != 0 );

    camera = RwCameraGetCurrentCamera();

    /*
     * Projection matrix
     */
    vw.x = camera->recipViewWindow.x * screenSpaceScale->x;
    vw.y = camera->recipViewWindow.y * screenSpaceScale->y;
    projMatrix.m[0][0] = vw.x;
    projMatrix.m[0][1] = 0.0f;
    projMatrix.m[0][2] = vw.x * camera->viewOffset.x;  /* Shear */
    projMatrix.m[0][3] = -camera->viewOffset.x * screenSpaceScale->x; /* Translate to shear origin */

    projMatrix.m[1][0] = 0.0f;
    projMatrix.m[1][1] = vw.y;
    projMatrix.m[1][2] = vw.y * camera->viewOffset.y; /* Shear */
    projMatrix.m[1][3] = -camera->viewOffset.y * screenSpaceScale->y; /* Translate to shear origin */

    if (camera->projectionType == rwPARALLEL)
    {
        projMatrix.m[2][0] = 0.0f;
        projMatrix.m[2][1] = 0.0f;
        projMatrix.m[2][2] = screenSpaceScale->z / (camera->farPlane - camera->nearPlane);
        projMatrix.m[2][3] = -projMatrix.m[2][2] * camera->nearPlane;
        
        projMatrix.m[3][0] = 0.0f;
        projMatrix.m[3][1] = 0.0f;
        projMatrix.m[3][2] = 0.0f;        
        projMatrix.m[3][3] = 1.0f;
    }
    else
    {
        projMatrix.m[2][0] = 0.0f;
        projMatrix.m[2][1] = 0.0f;
        projMatrix.m[2][2] = screenSpaceScale->z * camera->farPlane / (camera->farPlane - camera->nearPlane);
        projMatrix.m[2][3] = -projMatrix.m[2][2] * camera->nearPlane;

        projMatrix.m[3][0] = 0.0f;
        projMatrix.m[3][1] = 0.0f;
        projMatrix.m[3][2] = 1.0f;
        projMatrix.m[3][3] = 0.0f;
    }
    
    /*
     * View matrix - (camera matrix)
     */
    camLTM = RwFrameGetLTM(RwCameraGetFrame(camera));

    RwMatrixSetIdentity(&invCamMtx);
    RwMatrixInvert(&invCamMtx, camLTM);

    viewMatrix.m[0][0] = -invCamMtx.right.x;
    viewMatrix.m[0][1] = -invCamMtx.up.x;
    viewMatrix.m[0][2] = -invCamMtx.at.x;
    viewMatrix.m[0][3] = -invCamMtx.pos.x;

    viewMatrix.m[1][0] = invCamMtx.right.y;
    viewMatrix.m[1][1] = invCamMtx.up.y;
    viewMatrix.m[1][2] = invCamMtx.at.y;
    viewMatrix.m[1][3] = invCamMtx.pos.y;

    viewMatrix.m[2][0] = invCamMtx.right.z;
    viewMatrix.m[2][1] = invCamMtx.up.z;
    viewMatrix.m[2][2] = invCamMtx.at.z;
    viewMatrix.m[2][3] = invCamMtx.pos.z;

    viewMatrix.m[3][0] = 0.0f;
    viewMatrix.m[3][1] = 0.0f;
    viewMatrix.m[3][2] = 0.0f;
    viewMatrix.m[3][3] = 1.0f;

    /* 
     * World space transformation matrix
     */
    atomicLTM = RwFrameGetLTM(RpAtomicGetFrame(atomic));

    worldMatrix->m[0][0] = atomicLTM->right.x;
    worldMatrix->m[0][1] = atomicLTM->up.x;
    worldMatrix->m[0][2] = atomicLTM->at.x;
    worldMatrix->m[0][3] = atomicLTM->pos.x;

    worldMatrix->m[1][0] = atomicLTM->right.y;
    worldMatrix->m[1][1] = atomicLTM->up.y;
    worldMatrix->m[1][2] = atomicLTM->at.y;
    worldMatrix->m[1][3] = atomicLTM->pos.y;

    worldMatrix->m[2][0] = atomicLTM->right.z;
    worldMatrix->m[2][1] = atomicLTM->up.z;
    worldMatrix->m[2][2] = atomicLTM->at.z;
    worldMatrix->m[2][3] = atomicLTM->pos.z;

    worldMatrix->m[3][0] = 0.0f;
    worldMatrix->m[3][1] = 0.0f;
    worldMatrix->m[3][2] = 0.0f;
    worldMatrix->m[3][3] = 1.0f;

    D3DXMatrixMultiply(viewWorldMatrix, &viewMatrix, worldMatrix);
    D3DXMatrixMultiply(projViewWorldMatrix, &projMatrix, viewWorldMatrix);

    RWRETURNVOID();
}

typedef struct ScreenSpaceFudgeFactor
{
    RwUInt32 mode;
    RwReal  scale_x, scale_y, offset_x, offset_y;
} ScreenSpaceFudgeFactor;

static
void ComputeScreenSpaceTransform( RwV4d *scale, RwV4d *offset )
{
    /* these magic numbers all come from the "Understanding the Vertex Shader Processor" white paper */
    ScreenSpaceFudgeFactor screenScaleOffset[] = {
        {D3DMULTISAMPLE_NONE,                                       1.0f,	1.0f,	0.53125f,	0.53125f},
        {D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_LINEAR,               1.0f,	1.0f,	0.03125f,	0.03125f}, 
        {D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_QUINCUNX,             1.0f,	1.0f,	0.03125f,	0.03125f}, 
        {D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_HORIZONTAL_LINEAR,    2.0f,	1.0f,	0.53125f,	0.53125f},
        {D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_VERTICAL_LINEAR,      1.0f,	2.0f,	0.53125f,	0.53125f},
        {D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_LINEAR,               1.0f,	1.0f,	0.03125f,	0.03125f}, 
        {D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_GAUSSIAN,             1.0f,	1.0f,	0.03125f,	0.03125f},
        {D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_LINEAR,               2.0f,	2.0f,	0.53125f,	0.53125f},
        {D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_GAUSSIAN,             2.0f,	2.0f,	0.53125f,	0.53125f},
        {D3DMULTISAMPLE_9_SAMPLES_MULTISAMPLE_GAUSSIAN,             1.5f,	1.5f,	0.03125f,	0.03125f},
        {D3DMULTISAMPLE_9_SAMPLES_SUPERSAMPLE_GAUSSIAN,            3.0f,	3.0f,	0.53125f,	0.53125f}
    };

    const RwInt32 numModes = sizeof( screenScaleOffset ) / sizeof( ScreenSpaceFudgeFactor );

    RwReal zFudge[] = {
        (RwReal)D3DZ_MAX_D24S8,
        (RwReal)D3DZ_MAX_F24S8,
        (RwReal)D3DZ_MAX_D16,
        (RwReal)D3DZ_MAX_F16,
        (RwReal)D3DZ_MAX_D24S8,
        (RwReal)D3DZ_MAX_F24S8,
        (RwReal)D3DZ_MAX_D16,
        (RwReal)D3DZ_MAX_F16
    };

    const RwInt32 numZFormats = sizeof( zFudge ) / sizeof( RwReal );

    RwInt32 mode, zformat;
    RwReal halfWidth, halfHeight;

    D3DVIEWPORT8    viewport;
    
    D3DSurface      *renderTarget;
    D3DSURFACE_DESC colorDesc, depthStencilDesc;

    RWFUNCTION(RWSTRING("ComputeScreenSpaceTransform"));

    /* get multisample mode from current render target */
    D3DDevice_GetRenderTarget( &renderTarget );
    if (renderTarget)
    {
        D3DSurface_GetDesc(renderTarget, &colorDesc );
        D3DSurface_Release( renderTarget );
    }
    else
    {
        colorDesc.MultiSampleType = D3DMULTISAMPLE_NONE;
    }

    /* match current sample mode to our table... */
    for (mode = 0; mode<numModes; mode++)
    {
        if (screenScaleOffset[mode].mode == colorDesc.MultiSampleType)
        {
            break;
        }
    }
    
    RWASSERT(mode < numModes ); /* No mode found go check D3D docs for missing modes!!! */

    /* get z/stencil buffer type from current render target */
    D3DDevice_GetDepthStencilSurface( &renderTarget );
    if (renderTarget)
    {
        /* we're using a z/stencil buffer */
        D3DSurface_GetDesc(renderTarget, &depthStencilDesc );
        D3DSurface_Release( renderTarget );
        zformat = depthStencilDesc.Format - D3DFMT_D24S8;
    }
    else
    {
        /* we're not using a z/stencil buffer */
        zformat = 0;
    }

    RWASSERT( zformat >= 0 && zformat < numZFormats); /* No mode found go check D3D docs for missing modes!!! */

    /* work out constants from current viewport */
    D3DDevice_GetViewport( &viewport );

    halfWidth = 0.5f * (RwReal)viewport.Width * screenScaleOffset[mode].scale_x;
    halfHeight = 0.5f * (RwReal)viewport.Height * screenScaleOffset[mode].scale_y;
    scale->x = halfWidth;
    scale->y = - halfHeight;
    scale->z = zFudge[zformat];

    offset->x = viewport.X * screenScaleOffset[mode].scale_x + halfWidth + screenScaleOffset[mode].offset_x;
    offset->y = viewport.Y * screenScaleOffset[mode].scale_y + halfHeight + screenScaleOffset[mode].offset_y;
    offset->z = 0.0f;
    offset->w = 255.0f;
    
    RWRETURNVOID();
}

static RwV4d    ConstantDataCache[NUM_CONSTANTS_CACHED];
static RwBool   in192ConstantMode = FALSE;

static void SkinXboxTo192Constants()
{   
    /*
     * Switch to 192 constant mode - do screen space transform ourselves no -37 -38!
     */
    
    D3DDevice_SetShaderConstantMode(D3DSCM_192CONSTANTS | D3DSCM_NORESERVEDCONSTANTS );

    D3DDevice_GetVertexShaderConstant(-96,
                                      ConstantDataCache,
                                      NUM_CONSTANTS_CACHED );
                                      
    in192ConstantMode = TRUE;
}

static
void SkinXboxTo96Constants()
{
    /*
     * Restore the cached constant registers
     */

    D3DDevice_SetVertexShaderConstant(-96,
                                      ConstantDataCache,
                                      NUM_CONSTANTS_CACHED
                                      );
    /*
     * Switch to 96 constant mode
     */
    D3DDevice_SetShaderConstantMode(D3DSCM_96CONSTANTS);

    in192ConstantMode = FALSE;
}

static void
SubmitMeshes(
    RxXboxResEntryHeader    *resEntryHeader,
    RxXboxInstanceData      *instancedData,
    RpAtomic *atomic,
    RwUInt32 flags,
    DWORD **meshShaders
    )
{
    RwInt32                 mesh;

    RWFUNCTION(RWSTRING("SubmitMeshes"));

    D3DDevice_SetStreamSource(0,
                      (D3DVertexBuffer *)resEntryHeader->vertexBuffer,
                      resEntryHeader->stride);

    for (mesh = 0; mesh<resEntryHeader->numMeshes; mesh++)
    {
        RwInt32         pass, extraPasses;
        
 
        SetShader(instancedData->vertexShader, (RwInt32)meshShaders[mesh]);

        /*
        material set up
        */

        SetMaterialColor( instancedData->material );
        
        pass = 1;
        
        do
        {
            extraPasses = _rpSkinXboxMaterialSetUp( instancedData->material, pass++ );
            
            if (resEntryHeader->vertexAlpha ||
                (0xFF != instancedData->material->color.alpha))
            {
                RwRenderStateSet(rwRENDERSTATEVERTEXALPHAENABLE, (void *)TRUE);
            }
            else
            {
                RwRenderStateSet(rwRENDERSTATEVERTEXALPHAENABLE, (void *)FALSE);
            }

           /*
            * Render
            */
            D3DDevice_DrawIndexedVertices((D3DPRIMITIVETYPE)resEntryHeader->primType,
                                       instancedData->numIndices,
                                       instancedData->indexBuffer);
        } while (extraPasses > 0) ;
                
        _rpSkinXboxMaterialTearDown( instancedData->material );

        #ifdef RWMETRICS
        _rwXbMetricsInc( instancedData->numVertices, instancedData->numIndices, resEntryHeader->primType );
        #endif /* RWMETRICS */

        /* Move onto the next instancedData */
        instancedData++;
    }

    /* Count up things drawn for metrics builds */
    #ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += RpGeometryGetNumTriangles( RpAtomicGetGeometry( atomic ) );
    #endif /* RWMETRICS */

    RWRETURNVOID();
}

static DWORD   **meshShaders = 0;
static RwInt32  lastMeshShaders = 0;

static void
rxXBAtomicSkinRenderCallback(RxXboxResEntryHeader *resEntryHeader,
                             void *object,
                             RwUInt8 type,
                             RwUInt32 flags)
{
    RpAtomic    *atomic = (RpAtomic *)object;
    RpSkin      *skin;
    RwV4d       screenSpaceScale;
    
    RWFUNCTION(RWSTRING("rxXBAtomicSkinRenderCallback"));

    skin = RpSkinGeometryGetSkin(RpAtomicGetGeometry(atomic));

    SkinXboxTo192Constants();

    ComputeScreenSpaceTransform( &screenSpaceScale,
        /* bung screenspace offset right into constant block */ 
        &perAtomicConstants[VSCONST_REG_SCREENSPACE_OFFSET - PER_ATOMIC_OFFSET] );

    SkinXboxComputeProjViewWorld(
        /* keep world matrix around in case we need some bump mapping */
        &cachedWorldMatrix,
        /* keep view world matrix around in case we need some env mapping */
        &cachedViewWorldMatrix,
        /* bung screenspacescale/proj/view/world matrix right into constant block */
        (D3DXMATRIX*)&perAtomicConstants[VSCONST_REG_TRANSFORM_OFFSET - PER_ATOMIC_OFFSET],
        atomic,
        &screenSpaceScale );

    /* make some space to store shaders to be used for each mesh, determined by lighting */
    if (resEntryHeader->numMeshes > lastMeshShaders)
    {
        if (meshShaders)
        {
            RwFree( meshShaders );
        }

        meshShaders = RwMalloc( resEntryHeader->numMeshes * sizeof(DWORD *) );

        lastMeshShaders = resEntryHeader->numMeshes;
    }

    /* figure out lighting, convert to shader constants
    and determine best shader for each mesh in atomic */
    rxXbAtomicSkinLightingCallback(resEntryHeader, resEntryHeader->begin, atomic, meshShaders);

    /* convert bone matrices to shader constants */
    _rpSkinMatrixUpdating(atomic, skin, &perAtomicConstants[perAtomicCount]);

#ifdef OPTIMIZEBONES
    perAtomicCount += skin->platformData.numBonesUsed * SHADERCONSTS_PER_BONE;
#else
    perAtomicCount += skin->boneData.numBones * SHADERCONSTS_PER_BONE;
#endif

#ifdef RWDEBUG
    /* MS still (August Final QFE build 3926) have an assert in their code which wrongly fires
    when you set more than 135 constants at once. It's fine in release though.
    */
    #define BOGUS_ASSERT 135
    if (perAtomicCount <= BOGUS_ASSERT)
    {
        /* upload per atomic shader constants all at once */
        D3DDevice_SetVertexShaderConstant(PER_ATOMIC_OFFSET,
                                  perAtomicConstants,
                                  perAtomicCount);    
    }
    else
    {
        /* upload per atomic shader constants in two gos :-P */
        D3DDevice_SetVertexShaderConstant(PER_ATOMIC_OFFSET,
                                  perAtomicConstants,
                                  BOGUS_ASSERT);
                                  
        D3DDevice_SetVertexShaderConstant(PER_ATOMIC_OFFSET + BOGUS_ASSERT,
                                  &perAtomicConstants[BOGUS_ASSERT],
                                  perAtomicCount - BOGUS_ASSERT);
    }
#else
    /* upload per atomic shader constants all at once */
    D3DDevice_SetVertexShaderConstant(PER_ATOMIC_OFFSET,
                              perAtomicConstants,
                              perAtomicCount);
#endif

    SubmitMeshes(resEntryHeader, resEntryHeader->begin, atomic, flags, meshShaders );

    SkinXboxTo96Constants();

    RWRETURNVOID();
}

/****************************************************************************
 rpSkinMBPipelineDestroy
 
 Purpose:
 
 On entry:
                
 On exit:
 */
RwBool
_rpSkinPipelinesDestroy(void)
{
    RWFUNCTION(RWSTRING("_rpSkinPipelinesDestroy"));

    RxPipelineDestroy(XboxSkinPipeline);
    XboxSkinPipeline = NULL;

    DestroyShaders();

    _rpSkinXboxPipelinesDestroyExtra();
    
    RWRETURN(TRUE);
}

/****************************************************************************
 rpAtomicSkinPipelineCreate
 
 Purpose:
 
 On entry:
                
 On exit:
 */
static RxPipeline *
rpAtomicSkinPipelineCreate(void)
{
    RxPipeline  *pipe;

    RWFUNCTION(RWSTRING("rpAtomicSkinPipelineCreate"));

    pipe = RxPipelineCreate();
    if (pipe)
    {
        RxLockedPipe    *lpipe;

        lpipe = RxPipelineLock(pipe);
        if (NULL != lpipe)
        {
            RxNodeDefinition    *instanceNode;

            /*
             * Get the instance node definition
             */
            instanceNode = RxNodeDefinitionGetXboxAtomicAllInOne();

            /*
             * Add the node to the pipeline
             */
            lpipe = RxLockedPipeAddFragment(lpipe, NULL, instanceNode, NULL);

            /*
             * Unlock the pipeline
             */
            lpipe = RxLockedPipeUnlock(lpipe);

            RWRETURN(pipe);
        }

        RxPipelineDestroy(pipe);
    }

    RWRETURN(NULL);
}

RwBool
_rpSkinPipelinesCreate(RwUInt32 pipes)
{
    RxNodeDefinition    *instanceNode;
    RxPipelineNode      *node;

    RWFUNCTION(RWSTRING("_rpSkinPipelinesCreate"));

    if (!InitShaders())
    {
        RWRETURN(FALSE);
    }

    /*
     * Create a new atomic pipeline
     */
    XboxSkinPipeline = rpAtomicSkinPipelineCreate();
    if (!XboxSkinPipeline)
    {
        RWRETURN(FALSE);
    }
    
    /*
     * Get the instance node definition
     */
    instanceNode = RxNodeDefinitionGetXboxAtomicAllInOne();
    RWASSERT(NULL != instanceNode);

    /*
     * Set the pipeline specific data
     */
    node = RxPipelineFindNodeByName(XboxSkinPipeline, instanceNode->name, NULL, NULL);
    RWASSERT(NULL != node);

    /*
     * Set the skinning object pipeline
     */
    _rxXboxAllInOneSetInstanceCallBack(node, rxXBAtomicSkinInstanceCallback);

    /*
     * Morph targets will completely destroy our skinning instance data if we don't overload this!!!
     */
    _rxXboxAllInOneSetReinstanceCallBack(node, rxXBAtomicSkinReInstanceCallback);

    /*
     * Set Lighting callback - we do ours from within rendercallback because we need to know mesh info
     */
    RxXboxAllInOneSetLightingCallBack(node, 0);

    /*
     * Get the default object pipeline
     */
    RenderPipeline = RxXboxAllInOneGetRenderCallBack(node);

    /*
     * Set the skinning render callback
     */
    RxXboxAllInOneSetRenderCallBack(node, rxXBAtomicSkinRenderCallback);

    RWRETURN(_rpSkinXboxPipelinesCreateExtra( pipes ));
}

static void
XboxSkinPlatformDataCreate( RpSkin *skin )
{
    RWFUNCTION(RWSTRING("XboxSkinPlatformDataCreate"));

    skin->platformData.matrixIndexMap = RwMalloc(sizeof(RwInt32) * 256);
    skin->platformData.vertexIndexMap = RwMalloc(sizeof(RwInt32) * 256);
    skin->platformData.numBonesUsed = 0;
    skin->platformData.maxWeightsUsed = 0;

    RWRETURNVOID();
}

static void
XboxSkinPlatformDataDestroy( RpSkin *skin )
{
    RWFUNCTION(RWSTRING("XboxSkinPlatformDataDestroy"));

    RwFree( skin->platformData.matrixIndexMap );
    RwFree( skin->platformData.vertexIndexMap );

    RWRETURNVOID();
}

RpAtomic *
_rpSkinPipelinesAttach(RpAtomic *atomic)
{
    RxPipeline *pipeline;
    RpAtomic *success;
    
    RWFUNCTION(RWSTRING("_rpSkinPipelinesAttach"));

    pipeline = XboxSkinPipeline;
    RWASSERT(NULL != pipeline);

    /*
    Stomp over whatever the pipeline was (if it had matfx, we'll fix it later,
    otherwise, tough!)
    */
    success = RpAtomicSetPipeline(atomic, pipeline);

    RWRETURN(success);

}

/****************************************************************************
 rpSkinMBInitAtomic
 
 Initialise an atomic's matrix-blending skin data.
 
 Inputs :   RpAtomic *          A pointer to a skin atomic.
                                 
 Outputs:   RwBool              TRUE on success
 */

RpGeometry *
_rpSkinInitialize(RpGeometry *geometry)
{
    RpSkin             *skin;
    RwInt32             i;
    RwInt32             newBoneIndex;
    RwBool              usedBones[256];

    RWFUNCTION(RWSTRING("_rpSkinInitalise"));

    /* build a platform specific mapping for bone indices, this allows us
       to only use the subset of bone matrices which are actually used */
    skin = RpSkinGeometryGetSkin(geometry);

    if (skin == NULL)
    {
        RWRETURN(FALSE);
    }
    
    XboxSkinPlatformDataCreate( skin );
    
    /* assume none are used */
    for (i=0; i<256; i++)
    {
        usedBones[i] = FALSE;
    }

    /* check each vertex to see which bones it references & flag them */
    skin->platformData.maxWeightsUsed = 0;
    for (i=0; i<RpGeometryGetNumVertices(geometry); i++)
    {
        RwInt32 index = skin->vertexMaps.matrixIndices[i];
        if (skin->vertexMaps.matrixWeights[i].w0 > 0.0f)
        {
            usedBones[index & 0xFF] = TRUE;

            if (skin->platformData.maxWeightsUsed < 1)
            {
                skin->platformData.maxWeightsUsed = 1;
            }
        }
        if (skin->vertexMaps.matrixWeights[i].w1 > 0.0f)
        {
            usedBones[(index >> 8) & 0xFF] = TRUE;

            if (skin->platformData.maxWeightsUsed < 2)
            {
                skin->platformData.maxWeightsUsed = 2;
            }
        }
        if (skin->vertexMaps.matrixWeights[i].w2 > 0.0f)
        {
            usedBones[(index >> 16) & 0xFF] = TRUE;

            if (skin->platformData.maxWeightsUsed < 3)
            {
                skin->platformData.maxWeightsUsed = 3;
            }
        }
        if (skin->vertexMaps.matrixWeights[i].w3 > 0.0f)
        {
            usedBones[(index >> 24) & 0xFF] = TRUE;

            if (skin->platformData.maxWeightsUsed < 4)
            {
                skin->platformData.maxWeightsUsed = 4;
            }
        }
    }

    RWASSERT( skin->platformData.maxWeightsUsed > 0 );

    /*
    We need to fudge this number up to the minimum # of weights we can handle with any vertex shader,
    otherwise the stream size will be mismatched against the shader, and we'll end up skinning garbage
    which doesn't tend to look good.
    */
    skin->platformData.maxWeightsUsed = GetMinWeightsSupportedGE( skin->platformData.maxWeightsUsed );

    newBoneIndex = 0;

    for (i=0; i<256; i++)
    {
        skin->platformData.vertexIndexMap[i] = -1;
        skin->platformData.matrixIndexMap[i] = -1;
    }

    /* collapse indices of bones used into a contiguous block, vertexIndexMap */
    for (i=0; i<256; i++)
    {
        if (usedBones[i])
        {
            skin->platformData.vertexIndexMap[i] = newBoneIndex;
            newBoneIndex++;
        }
    }

    skin->platformData.numBonesUsed = newBoneIndex;

    for (i=0; i<256; i++)
    {
        if (skin->platformData.vertexIndexMap[i] != -1)
        {
            skin->platformData.matrixIndexMap[skin->platformData.vertexIndexMap[i]] = i;
        }
    }

    for (i=0; i<256; i++)
    {
        if (skin->platformData.vertexIndexMap[i] == -1)
        {
            skin->platformData.vertexIndexMap[i] = 0;
        }
    }

    RWRETURN(geometry);
}

RpGeometry *
_rpSkinDeinitialize(RpGeometry *geometry)
{
    RWFUNCTION(RWSTRING("_rpSkinDeinitalise"));
    RWASSERT(NULL != geometry);

    if (RpSkinGeometryGetSkin(geometry))
    {
        XboxSkinPlatformDataDestroy(RpSkinGeometryGetSkin(geometry));
    }

    RWRETURN(geometry);
}
