/*
 * Refinement plugin
 */

/****************************************************************************
 *                                                                          *
 *  Module  :   nodeRefine.c                                                *
 *                                                                          *
 *  Purpose :   Node of Surface Refinement plugin (rpRefine.c)              *
 *                                                                          *
 ****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "rpplugin.h"
#include "time.h"
#include <rpdbgerr.h>
#include <rwcore.h>
#include "rprefine.h"
#include "refinevars.h"

/* Check for SSE */

#if ( ((defined(__ICL)) && (400 <= __ICL)) && defined(RWSIMD) )

#include <rtintel.h>
#include <rpworld.h>

#include "bbtpsse.h"
#include "refinesse.h"

#include "nodeRefine.h"
#include "nodeSSERefine.h"

static const char   rcsid[] __RWUNUSED__ =
    "@@(#)$Id: nodeSSERefine.c,v 1.62 2001/02/02 16:04:20 johns Exp $";

/****************************************************************************
 Local defines
 */

#define NUMCLUSTERSOFINTEREST 6
#define NUMOUTPUTS            2

#define MESSAGE(_string)                                        \
    RwDebugSendMessage(rwDEBUGMESSAGE, "RefineSSECSL", _string)

#define REFINEOUT         0
#define REFINEPASSTHROUGH 1

/*****************************************************************************
                           Node core funcs
 ****************************************************************************/

#if (defined(RWDEBUG))
#define DEBUGREFINEPROCESSPACKETUV(_i)                                             \
do                                                                                 \
{                                                                                  \
    static char                string[256];                                        \
                                                                                   \
    sprintf(string,                                                                \
            "UVs cluster %d does not coincide with supposedly parallel vertices",  \
            ((_i) + 1));                                                           \
    MESSAGE(string);                                                               \
} while (0)

#define DEBUGREFINEPROCESSPACKETRGB(_i)                                             \
do                                                                                  \
{                                                                                   \
    static char                string[256];                                         \
                                                                                    \
    sprintf(string,                                                                 \
            "RGBAs cluster %d does not coincide with supposedly parallel vertices", \
            ((_i) + 1));                                                            \
    MESSAGE(string);                                                                \
} while (0)
#endif /* (defined(RWDEBUG)) */

#if (!defined(DEBUGREFINEPROCESSPACKETUV))
#define DEBUGREFINEPROCESSPACKETUV(_i) /* No op */
#endif /* (!defined(DEBUGREFINEPROCESSPACKETUV)) */

#if (!defined(DEBUGREFINEPROCESSPACKETRGB))
#define DEBUGREFINEPROCESSPACKETRGB(_i) /* No op */
#endif /* (!defined(DEBUGREFINEPROCESSPACKETRGB)) */

/*****************************************************************************
                           Primary packet process loop
 ****************************************************************************/

static              RwBool
_refineProcessPacketSSE(RxPipelineNodeInstance * self,
                        RxPacket * packet, RxHeap * heap,
                        RwUInt32 depth, RwUInt32 numExtraUVs,
                        RwUInt32 numExtraRGBAs)
{
    RxMeshStateVector  *stateData;
    RxCluster          *indices, *state, *devVerts, *camVerts,
        *objVerts, *camNorms, **extraUVs, **extraRGBAs;
    RwUInt32            i, numTris, newVerts, numVerts, numPresentUVs,
        numPresentRGBAs;
    refineSSERefineData refineData;
    refineCameraData    camData;
    RwBool              result;

    RWFUNCTION(RWSTRING("_refineProcessPacketSSE"));

    result = TRUE;
    objVertKilled = FALSE;

    camVerts = RxClusterLockWrite(packet, 0, self);
    devVerts = RxClusterLockWrite(packet, 1, self);
    indices = RxClusterLockWrite(packet, 2, self);
    state = RxClusterLockWrite(packet, 3, self);
    objVerts = RxClusterLockRead(packet, 4);
    camNorms = RxClusterLockWrite(packet, 5, self);

    RWASSERT((devVerts != NULL) && (devVerts->numUsed > 0));
    RWASSERT((camVerts != NULL) && (camVerts->numUsed > 0));
    RWASSERT((indices != NULL) && (indices->numUsed > 0));
    RWASSERT((state != NULL) && (state->numUsed > 0));
    RWASSERT((objVerts != NULL) && (objVerts->numUsed > 0));
    RWASSERT((camNorms != NULL) && (camNorms->numUsed > 0));

    stateData = RxClusterGetCursorData(state, RxMeshStateVector);

    /* Calculate the new number of verts and tris. */
    numTris = (depth * depth) * stateData->NumElements;

    newVerts = (((depth + 1) * (depth + 2)) >> 1) - 3;
    numVerts =
        stateData->NumVertices + (stateData->NumElements * newVerts);

    extraUVs = NULL;
    if (numExtraUVs != 0)
    {
        extraUVs = RxHeapAlloc(heap, numExtraUVs * sizeof(RxCluster *));
        RWASSERT(NULL != extraUVs);
    }

    extraRGBAs = NULL;
    if (numExtraRGBAs != 0)
    {
        extraRGBAs =
            RxHeapAlloc(heap, numExtraRGBAs * sizeof(RxCluster *));
        RWASSERT(NULL != extraRGBAs);
    }

    /* Grab any extra UV clusters we ned to interpolate */
    numPresentUVs = 0;
    for (i = 0; i < numExtraUVs; i++)
    {
        extraUVs[numPresentUVs] =
            RxClusterLockWrite(packet, NUMCLUSTERSOFINTEREST + i, self);
        RWASSERT(NULL != extraUVs[numPresentUVs]);
        /* Make sure the cluster's present
         * - if not, some node above us decided not
         * to produce a set of UVs this time, which is fine by us
         * - less work to do! */
        if (extraUVs[numPresentUVs]->numUsed > 0)
        {
            /* Check the UVs coincide with the current vertices */
            RWASSERT(extraUVs[numPresentUVs]->numUsed
                     == stateData->NumVertices)
#ifdef RWDEBUG
                if (extraUVs[numPresentUVs]->numUsed !=
                    stateData->NumVertices)
            {
                DEBUGREFINEPROCESSPACKETUV(i);
            }
#endif /* RWDEBUG */
            extraUVs[numPresentUVs] =
                RxClusterResizeData(extraUVs[numPresentUVs], numVerts);
            RWASSERT(NULL != extraUVs[numPresentUVs]);
            numPresentUVs++;
        }
    }

    numPresentRGBAs = 0;
    for (i = 0; i < numExtraRGBAs; i++)
    {
        extraRGBAs[numPresentRGBAs] =
            RxClusterLockWrite(packet,
                               NUMCLUSTERSOFINTEREST + numExtraUVs + i,
                               self);
        RWASSERT(NULL != extraRGBAs[numPresentRGBAs]);
        if (extraRGBAs[numPresentRGBAs]->numUsed > 0)
        {
            /* Check the RGBAs coincide with the current vertices */
            RWASSERT(extraRGBAs[numPresentRGBAs]->numUsed
                     == stateData->NumVertices)
#ifdef RWDEBUG
                if (extraRGBAs[numPresentRGBAs]->numUsed !=
                    stateData->NumVertices)
            {
                DEBUGREFINEPROCESSPACKETRGB(i);
            }
#endif /* RWDEBUG */
            extraRGBAs[numPresentRGBAs] =
                RxClusterResizeData(extraRGBAs[numPresentRGBAs],
                                    numVerts);
            RWASSERT(NULL != extraRGBAs[numPresentRGBAs]);
            numPresentRGBAs++;
        }
    }

    indices = RxClusterResizeData(indices, numTris * 3);
    RWASSERT(NULL != indices);
    devVerts = RxClusterResizeData(devVerts, numVerts);
    RWASSERT(NULL != devVerts);
    camVerts = RxClusterResizeData(camVerts, numVerts);
    RWASSERT(NULL != camVerts);
    camNorms = RxClusterResizeData(camNorms, numVerts);
    RWASSERT(NULL != camNorms);

    refineData.refineData.oldNumTris = stateData->NumElements;
    refineData.refineData.newNumTris = numTris;

    refineData.refineData.oldNumVerts = stateData->NumVertices;
    refineData.refineData.newNumVerts = numVerts;

    refineData.refineData.depth = depth;

    refineData.refineData.numPresentUVs = numPresentUVs;
    refineData.refineData.numPresentRGBAs = numPresentRGBAs;

    /* strides */
    refineData.idxsStride = indices->stride;
    refineData.camNormsStride = camNorms->stride;
    refineData.camVertsStride = camVerts->stride;
    refineData.devVertsStride = devVerts->stride;

    refineData.extraUVStride = sizeof(RxUV);
    refineData.extraRGBAStride = sizeof(RwRGBAReal);

    /* triangle's new vert strides. The new verts for each triangle are
     * stored sequencially. This is needed for generating the new indices.
     */
    refineData.triCamNormsStride = refineData.camNormsStride * newVerts;
    refineData.triCamVertsStride = refineData.camVertsStride * newVerts;
    refineData.triDevVertsStride = refineData.devVertsStride * newVerts;

    refineData.refineData.idxs = indices;
    refineData.refineData.objVerts = objVerts;
    refineData.refineData.camVerts = camVerts;
    refineData.refineData.camNorms = camNorms;
    refineData.refineData.devVerts = devVerts;

    refineData.refineData.extraUVs = extraUVs;
    refineData.refineData.extraRGBAs = extraRGBAs;

    /* Generate the refinement */
    _rt_rtrefineSetupCameraSSE(&refineData, &camData);
    result =
        _rtrefineGenerateRefinementSSE(&refineData, &camData, heap);

    /* getting there, finally generate new indices */
    _rtrefineGenerateNewVertIndex(&refineData.refineData);

    /* update with new vertices */
    stateData->NumVertices = numVerts;
    stateData->NumElements = numTris;

    indices->numUsed = stateData->NumElements * 3;
    devVerts->numUsed = stateData->NumVertices;
    camVerts->numUsed = stateData->NumVertices;
    camNorms->numUsed = stateData->NumVertices;

    for (i = 0; i < numPresentUVs; i++)
    {
        extraUVs[i]->numUsed = stateData->NumVertices;
    }
    for (i = 0; i < numPresentRGBAs; i++)
    {
        extraRGBAs[i]->numUsed = stateData->NumVertices;
    }

    /* objverts now invalid. kill the cluster */
    RxClusterDestroyData(objVerts);

    objVerts->flags = rxCLFLAGS_NULL;

    if (extraUVs != NULL)
        RxHeapFree(heap, extraUVs);
    if (extraRGBAs != NULL)
        RxHeapFree(heap, extraRGBAs);

    RWRETURN(result);
}

/*****************************************************************************
 _RefineNodePipelineNodeInitFn

 Initialises the private data (refinement ON by default)
*/

static              RwBool
_RefineNodePipelineNodeInitFnSSE(RxPipelineNode * self)
{
    RWFUNCTION(RWSTRING("_RefineNodePipelineNodeInitFnSSE"));

    if (self)
    {
        RpNodeRefineData    data;

        data.refineOn = TRUE;
        data.numExtraUVs = 0;
        data.numExtraRGBAs = 0;

        *((RpNodeRefineData *) self->privateData) = data;
        RWRETURN(TRUE);
    }
    RWRETURN(FALSE);
}

/* main refinement loop */
static              RwBool
_refineNodeSSE(RxPipelineNodeInstance * self,
               const RxPipelineNodeParam * params)
{
    RwInt32             output;
    RpNodeRefineData   *refineData;
    RxPacket           *packet;
    RxHeap             *heap;
    RwInt32             offset;
    RxCluster          *state;
    RpAtomic           *atom;
    RxMeshStateVector  *stateData;
    RpRefineAtomicExtension *extension;
    RwObject           *sourceObject;
    RwInt32             depth, numExtraUVs, numExtraRGBAs;

    RWFUNCTION(RWSTRING("_refineNodeSSE"));

    packet = RxPacketFetch(self);
    RWASSERT(NULL != packet);

    RWASSERT(NULL != params);
    heap = RxPipelineNodeParamGetHeap(params);
    RWASSERT(NULL != heap);

    /* Cheap early out if this node's toggled off */
    refineData = (RpNodeRefineData *) self->privateData;
    RWASSERT(NULL != refineData);
    if (refineData->refineOn == FALSE)
    {
        RxPacketDispatch(packet, 0, self);
        RWRETURN(TRUE);
    }

    rpRefineGlobals.sseFlag = rpRefineGlobals.enabledFlag;

    state = RxClusterLockRead(packet, 3);
    stateData = RxClusterGetCursorData(state, RxMeshStateVector);

    /* We can only handle tri list at present. */
    RWASSERT(stateData->PrimType == rwPRIMTYPETRILIST);
    sourceObject = (RwObject *) stateData->DataObject;
    RWASSERT(NULL != sourceObject);
    /* Quick hack. Typecase the stateData and check the flag to see if it an
     * RpAtomic before refining.
     */
    RWASSERT(sourceObject->type == (RwUInt8) rpATOMIC);
    atom = (RpAtomic *) sourceObject;

    /* Default output is 0, pass-through */
    output = REFINEPASSTHROUGH;

    offset = rpRefineGlobals.atmExtOffset;
    extension = RPREFINEOFFSET(atom, offset);

    if (extension != NULL)
    {
        depth = (*extension->selectDepth) (atom);

        numExtraUVs = refineData->numExtraUVs;
        numExtraRGBAs = refineData->numExtraRGBAs;

        if (depth > 1)
        {
            /* do the business */
            if (_refineProcessPacketSSE
                (self, packet, heap, depth, numExtraUVs,
                 numExtraRGBAs) == FALSE)
            {
                MESSAGE("Error during SSE refinement.");

                RxPacketDestroy(packet, self);
                RWRETURN(FALSE);
            }

            /* We have refinement! So output to channel 1 */
            output = REFINEOUT;
        }
    }

    /* Output the packet to output of this Node */
    RxPacketDispatch(packet, output, self);

    RWRETURN(TRUE);
}

#endif /* ( ((defined(__ICL)) && (400 <= __ICL)) && defined(RWSIMD) ) */

/**
 * \ingroup rprefine
 * \ref RxNodeDefinitionGetSSERefineCSL
 * returns a pointer to a node implementing refinement in custom pipelines.
 *
 * This node make use of Intel's SSE instruction set.
 *
 * The include file rtintel.h and the library file rtintel.lib are also
 * required.
 *
 * This node refines triangles in the triangles cluster according
 * to the specified criteria
 *
 * \verbatim
   The node has two outputs.
   The first is sent data which has been refined in camera space, when
   refinemnet is active.
   The second is sentunmodified data, when refinemnet is inactive.
  
   The input requirements of this node:
  
   RxClCamSpace3DVertices - required
   RxClScrSpace2DVertices - required
   RxClIndices - required
   RxClMeshState - required
   RxClObjSpace3DVertices - required
   RxClCamNorms - required
  
   The characteristics of the first of this node's outputs,
  
   RxClCamSpace3DVertices - valid
   RxClScrSpace2DVertices - valid
   RxClIndices - valid
   RxClMeshState - valid
   RxClObjSpace3DVertices - invalid
   RxClCamNorms - valid
  
   The characteristics of the second of this node's outputs:
  
   RxClCamSpace3DVertices - no change
   RxClScrSpace2DVertices - no change
   RxClIndices - no change
   RxClMeshState - no change
   RxClObjSpace3DVertices - no change
   RxClCamNorms - no change
   \endverbatim
 *
 * \return pointer to node for refinemnetin custom pipelines on success,
 * NULL otherwise
 */

RxNodeDefinition   *
RxNodeDefinitionGetSSERefineCSL(void)
{
    RwBool              SSEEnabledCPU = FALSE, WNIEnabledCPU = FALSE;
    RxNodeDefinition   *result = (RxNodeDefinition *)NULL;

    RWAPIFUNCTION(RWSTRING("RxNodeDefinitionGetSSERefineCSL"));

#if ( ((defined(__ICL)) && (400 <= __ICL)) && defined(RWSIMD) )

    SSEEnabledCPU = RtIntelHaveSSE();

#ifdef RW_WNI

    WNIEnabledCPU = RtIntelHaveWNI();

#endif /* RW_WNI */

    if (SSEEnabledCPU)
    {
        static RxClusterRef gNodeClusters[NUMCLUSTERSOFINTEREST] = { /* */
            {&RxClCamSpace3DVertices, rxCLALLOWABSENT, rxCLRESERVED},
            {&RxClScrSpace2DVertices, rxCLALLOWABSENT, rxCLRESERVED},
            {&RxClIndices, rxCLALLOWABSENT, rxCLRESERVED},
            {&RxClMeshState, rxCLALLOWABSENT, rxCLRESERVED},
            {&RxClObjSpace3DVertices, rxCLALLOWABSENT, rxCLRESERVED},
            {&RxClCamNorms, rxCLALLOWABSENT, rxCLRESERVED}
        };

        static RxClusterValidityReq gNodeReqs[NUMCLUSTERSOFINTEREST] = { /* */
            /* parallel to ClusterRefs */
            rxCLREQ_REQUIRED,
            rxCLREQ_REQUIRED,
            rxCLREQ_REQUIRED,
            rxCLREQ_REQUIRED,
            rxCLREQ_REQUIRED,
            rxCLREQ_REQUIRED
        };

        static              RxClusterValid
            gNodeRefinePassThrough[NUMCLUSTERSOFINTEREST] = {
            /* parallel to ClusterRefs */
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID
        };

        static RxClusterValid gNodeRefineOut[NUMCLUSTERSOFINTEREST] = { /* */
            /* parallel to ClusterRefs */
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_VALID,
            rxCLVALID_INVALID,  /* ObjVerts invalidated! Not enough of 'em any more */
            rxCLVALID_VALID
        };

        static RwChar       _PassThrough[] = RWSTRING("PassThrough");
        static RwChar       _RefineOut[] = RWSTRING("RefineOut");

        static RxOutputSpec gNodeOuts[NUMOUTPUTS] = /* */
        { {_RefineOut,         /* Name */
           gNodeRefineOut,     /* OutputClusters */
           rxCLVALID_NOCHANGE},
        {_PassThrough,
         gNodeRefinePassThrough,
         rxCLVALID_NOCHANGE},
        };                     /* AllOtherClusters */

        static RwChar       _SSERefine_csl[] =
            RWSTRING("SSERefine.csl");

        static RxNodeDefinition nodeSSERefineCSL = { /* */
            _SSERefine_csl,     /* Name */
            {                  /* nodemethods */
             _refineNodeSSE,   /* +-- nodebody */
             NULL,             /* +-- nodeinit */
             NULL,             /* +-- nodeterm */
             _RefineNodePipelineNodeInitFnSSE, /* +-- pipelinenodeinit */
             NULL,             /* +-- pipelinenodeterm */
             NULL,
             NULL},
            {                  /* Io */
             NUMCLUSTERSOFINTEREST, /* +-- NumClustersOfInterest */
             gNodeClusters,    /* +-- ClustersOfInterest */
             gNodeReqs,        /* +-- InputRequirements */
             NUMOUTPUTS,       /* +-- NumOutputs */
             gNodeOuts         /* +-- Outputs */
             },
            sizeof(RpNodeRefineData), /* no private data needed for pipeline nodes made from this */
            FALSE,             /* node definition not editable (it's a global) */
            0
        };                     /* how many pipeline nodes have been made from this definition? */

        result = &nodeSSERefineCSL;
    }

#endif /* ( ((defined(__ICL)) && (400 <= __ICL)) && defined(RWSIMD) ) */

    if (!SSEEnabledCPU)
    {
        RWERROR((E_RW_NOTSSEENABLEDCPU));
    }
    else
        rpRefineGlobals.enabledFlag |= RPREFINE_SSE;

    if (WNIEnabledCPU)
        rpRefineGlobals.enabledFlag |= RPREFINE_WNI;

    RWRETURN(result);
}
