/************************************************************************
 * File: dmaalloc.c                                                     *
 *                                                                      *
 * DMA circular list allocator                                          *
 ************************************************************************/

#define COUNTBLOCKSx
#define LOGALLOCSx

#include <string.h>
#include <eekernel.h>

#include "batypes.h"
#include "balibtyp.h"
#include "badebug.h"
#include "basky.h"
#include "barwtyp.h"
#include "badma.h"

#include "devprofile.h"

#include "dmaalloc.h"

static const char rcsid[] __RWUNUSED__ = "@@(#)$Id: dmaalloc.c,v 1.20 2001/07/24 11:38:51 rabin Exp $";


/* Max size of block the circular allocator will allocate */
RwUInt32 sweCircAllocBlockSize = (64 * 1024);


#ifdef LOGALLOCS

RwInt32 allocs = 0;
void * allocations[1000];
RwUInt32 allocSize[1000];

static void
logAlloc(void *memory, RwUInt32 size)
{
    RWFUNCTION(RWSTRING("logAlloc"));

    PFENTRY(PFlogAlloc);
    allocations[allocs] = memory;
    allocSize[allocs] = size;
    allocs++;
    PFEXIT(PFlogAlloc);

    RWRETURNVOID();
}

static void
unlogAlloc(void *memory)
{
    RwInt32 i;
    RWFUNCTION(RWSTRING("unlogAlloc"));
    PFENTRY(PFunlogAlloc);

    for (i = 0; i < allocs; i++)
    {
        if (allocations[i] == memory)
        {
            allocs--;
            allocations[i] = allocations[allocs];
            allocSize[i] = allocSize[allocs];
            RWRETURNVOID();
        }
    }
    PFEXIT(PFunlogAlloc);

    RWRETURNVOID();
}

#endif /* LOGALLOCS */

static circularAllocatorBlock *
createNewBlock(circularAllocator *allocator)
{
    circularAllocatorBlock *newBlock;
#ifdef COUNTBLOCKS
    static RwInt32 numBlocks = 0;
#endif /* COUNTBLOCKS */

    RWFUNCTION(RWSTRING("createNewBlock"));
    PFENTRY(PFcreateNewBlock);
    RWASSERT(allocator);

    PFCALL(PFcreateNewBlock);
    newBlock = (circularAllocatorBlock *)
        RwSky2Malloc(sizeof(circularAllocatorBlock));
    PFRET(PFcreateNewBlock);
#if 1
    if (allocator->showNewBlocks)
    {
         printf("~~~~~~ New DMA alloc block in allocator %x~~~~~~\n",
                (unsigned int)allocator);
    }
#endif

    if (newBlock)
    {
        /* Plus 48 to allow for cache alignment (we already get memory on a 16
         * bytes boundary - quadword).
         */
        PFCALL(PFcreateNewBlock);
        newBlock->realMemory = RwSky2Malloc(allocator->blockSize+48);
        PFRET(PFcreateNewBlock);
        if (newBlock->realMemory)
        {
            /* Cache align it */
            newBlock->memory = (void *)(((RwUInt32)newBlock->realMemory + 63) & ~63);
            newBlock->waterLevel = 0;
            newBlock->numBlocksAlloced = 0;

            /* Flush it out of cache for DMA access */
            PFCALL(PFcreateNewBlock);
            SyncDCache(newBlock->memory, SCESYNCDCACHEROUNDUP((RwUInt8 *)newBlock->memory + allocator->blockSize));
            PFRET(PFcreateNewBlock);

#ifdef COUNTBLOCKS
            numBlocks++;
#ifdef LOGALLOCS
            printf("%s(%d):%s - ** %d blks, %d allocs\n",
                   __FILE__, __LINE__, __FUNCTION__,
                   numBlocks, allocs);
#else /* LOGALLOCS */
            printf("%s(%d):%s - ** %d blks\n",
                   __FILE__, __LINE__, __FUNCTION__,
                   numBlocks);
#endif /* LOGALLOCS */
#endif /* COUNTBLOCKS */
            PFEXIT(PFcreateNewBlock);
            RWRETURN(newBlock);
        }
        PFCALL(PFcreateNewBlock);
        RwSky2Free(newBlock);
        PFRET(PFcreateNewBlock);
    }
    PFEXIT(PFcreateNewBlock);
    RWRETURN((circularAllocatorBlock *)NULL);
}

static RwBool
circularAllocatorExtend(circularAllocator *allocator)
{
    circularAllocatorBlock *newBlock;

    RWFUNCTION(RWSTRING("circularAllocatorExtend"));
    PFENTRY(PFcircularAllocatorExtend);
    RWASSERT(allocator);

    /* Allocate a new block */
    PFCALL(PFcircularAllocatorExtend);
    newBlock = createNewBlock(allocator);
    PFRET(PFcircularAllocatorExtend);
    if (newBlock)
    {
        /* Insert into the list just after the active block */
        newBlock->next = allocator->activeAllocBlock->next;
        allocator->activeAllocBlock->next = newBlock;
        allocator->activeAllocBlock = newBlock;

        PFEXIT(PFcircularAllocatorExtend);
        RWRETURN(TRUE);
    }

    PFEXIT(PFcircularAllocatorExtend);
    RWRETURN(FALSE);
}

static void *
circularMallocInternal(circularAllocator *allocator, RwUInt32 size,
                       RwUInt32 flags)
{
    circularAllocatorBlock *activeBlock;
    void *memory;
    RwUInt32 alignment = ALIGNMENT-1;

    RWFUNCTION(RWSTRING("circularMallocInternal"));
    PFENTRY(PFcircularMalloc);
    RWASSERT(allocator);

    activeBlock = allocator->activeAllocBlock;

    /* This is good enough to ensure alignment because:
     * 1) mallocs are aligned to 16 bytes.
     * 2) sub block allocations are contiguous
     */
    size = (size + alignment) & ~alignment;

    /* Try the quick way first */
    if (((activeBlock->waterLevel + size) <= (allocator->blockSize)) &&
        (allocator->lastBlockFlags == flags))
    {
        /* Now we can just give out some memory - we know it will fit */
        memory = (void *)((RwUInt8 *)activeBlock->memory +
                          activeBlock->waterLevel);
        activeBlock->waterLevel += size;
        activeBlock->numBlocksAlloced++;

        /* Cache the pointer so we know that we can realloc easily */
        allocator->lastBlockAllocated = memory;
        allocator->lastBlockSize = size;

#ifdef LOGALLOCS
        PFCALL(PFcircularMalloc);
        logAlloc(memory, size);
        PFRET(PFcircularMalloc);
#endif /* LOGALLOCS */

        PFEXIT(PFcircularMalloc);
        RWRETURN(memory);
    }

    if (size <= allocator->blockSize)
    {
        /* If the type changes, align on a cache line.  For allocations at
         * the start of a block (waterLevel == 0), this has no effect, and
         * therefore the initial value of lastBlockFlags is not terribly
         * important.  Neat design or fluke - you decide... :-)
         */

        /* Align on a cache line (overall block is cache line aligned) */
        alignment = (flags ^ allocator->lastBlockFlags) ? 63 : 0;
        activeBlock->waterLevel = ((activeBlock->waterLevel + alignment) &
                                   ~alignment);
        allocator->lastBlockFlags = flags;

        /* It's possible then */
        if ((activeBlock->waterLevel + size) > (allocator->blockSize))
        {
            /* We need to look at the next block (which needs to be empty),
             * or extend the list to generate an empty block.
             */
            if (activeBlock->next->numBlocksAlloced)
            {
                /* Time to extend */
                PFCALL(PFcircularMalloc);
                if ((allocator->limit) || (!circularAllocatorExtend(allocator)))
                {
                    /* Failed to allocate */
                    PFRET(PFcircularMalloc);
                    PFEXIT(PFcircularMalloc);
                    RWRETURN(NULL);
                }
                PFRET(PFcircularMalloc);
            }
            else
            {
                /* Skip to the next (empty) block */
                allocator->activeAllocBlock = activeBlock->next;
            }

            /* Hmmm, guess what, it changed... */
            activeBlock = allocator->activeAllocBlock;

            /* No need to flush the cache - we did that when we allocated, and
             * we never touch this memory with cached access.
             */
        }

        /* Now we can just give out some memory - we know it will fit */
        memory = (void *)((RwUInt8 *)activeBlock->memory +
                          activeBlock->waterLevel);
        activeBlock->waterLevel += size;
        activeBlock->numBlocksAlloced++;

        /* Cache the pointer so we know that we can realloc easily */
        allocator->lastBlockAllocated = memory;
        allocator->lastBlockSize = size;

#ifdef LOGALLOCS
        PFCALL(PFcircularMalloc);
        logAlloc(memory, size);
        PFRET(PFcircularMalloc);
#endif /* LOGALLOCS */

        PFEXIT(PFcircularMalloc);
        RWRETURN(memory);
    }

    PFEXIT(PFcircularMalloc);
    RWRETURN(NULL);
}

void *
circularRealloc(circularAllocator *allocator, void *memory, RwUInt32 size)
{
    RWFUNCTION(RWSTRING("circularRealloc"));
    PFENTRY(PFcircularRealloc);
    RWASSERT(allocator);
    RWASSERT(memory);

    size = (size + (ALIGNMENT-1)) & ~(ALIGNMENT-1);

    if (size <= allocator->blockSize)
    {
        void *newMemory;
        circularAllocatorBlock *activeBlock = allocator->activeAllocBlock;

        if (allocator->lastBlockAllocated == memory)
        {
            RwUInt32 extraSize = (size - allocator->lastBlockSize);
            /* It's possible then, first see if we can extend in place */
            /* allocator->activeAllocBlock points to the containing block */
            if ((activeBlock->waterLevel + extraSize) < allocator->blockSize)
            {
                /* No problem, make it so */
                activeBlock->waterLevel += extraSize;
                allocator->lastBlockSize += extraSize;
                PFEXIT(PFcircularRealloc);
                RWRETURN(memory);
            }
        }

        /* It's not going to fit, or we are re-allocing out of order, so
         * allocate a new block, copy contents, and free to old block.
         * By taking ~lastBlockFlags, we eliminate cache line effects between
         * this and both the last and next block, since we don't know the true
         * block allocation type.
         */
        PFCALL(PFcircularRealloc);
        newMemory = circularMallocInternal(allocator, size,
                                           ~allocator->lastBlockFlags);
        PFRET(PFcircularRealloc);
        if (newMemory)
        {
            /* This is naughty - when growing the block, copy the new size
             * of memory (will copy too much).  FIX LATER.
             */
            PFCALL(PFcircularRealloc);
            memcpy(newMemory, memory, size);
            circularFree(allocator, memory);

            /* Flush the cache in case it was a DMA packet */
            /* We leave the old packet in the cache
             * -- since the lines aren't dirty,
             * they should never be written to memory
             */
            SyncDCache(newMemory, SCESYNCDCACHEROUNDUP((RwUInt8 *)newMemory + size));
            PFRET(PFcircularRealloc);

            PFEXIT(PFcircularRealloc);
            RWRETURN(newMemory);
        }

        /* Allocation failure */
    }

    PFEXIT(PFcircularRealloc);
    RWRETURN(NULL);
}

void
circularFree(circularAllocator *allocator, void *memory)
{
    RwBool foundBlock;
    circularAllocatorBlock *curBlock;

    RWFUNCTION(RWSTRING("circularFree"));
    PFENTRY(PFcircularFree);
    RWASSERT(allocator);
    RWASSERT(memory);

    /* Look for the block containing the memory starting at activeFreeBlock */
    foundBlock = FALSE;
    curBlock = allocator->activeFreeBlock;
    do
    {
        if (((RwUInt8 *)curBlock->memory <= (RwUInt8 *)memory) &&
            (((RwUInt8 *)curBlock->memory + allocator->blockSize) > (RwUInt8 *)memory))
        {
            /* This block contains the memory */
            foundBlock = TRUE;
        }
        else
        {
            /* Consider the next one */
            curBlock = curBlock->next;
        }
    }
    while (!foundBlock && (curBlock != allocator->activeFreeBlock));

    if (foundBlock)
    {
#ifdef LOGALLOCS
        PFCALL(PFcircularFree);
        unlogAlloc(memory);
        PFRET(PFcircularFree);
#endif /* LOGALLOCS */

        /* This block is the one */
        curBlock->numBlocksAlloced--;
        if (curBlock->numBlocksAlloced == 0)
        {
            /* Reset the block back to the start */
            curBlock->waterLevel = 0;
        }
    }

    /* Move the activeFreeBlock along if we can (help future searches) */
    if (allocator->activeFreeBlock->numBlocksAlloced == 0)
    {
        allocator->activeFreeBlock = allocator->activeFreeBlock->next;
    }

    PFEXIT(PFcircularFree);
    RWRETURNVOID();
}

circularAllocator *
circularAllocOpen(RwUInt32 blockSize)
{
    circularAllocator *allocator;
    circularAllocatorBlock *newBlock;

    RWFUNCTION(RWSTRING("circularAllocOpen"));
    PFENTRY(PFcircularAllocOpen);

    PFCALL(PFcircularAllocOpen);
    allocator = (circularAllocator *)RwSky2Malloc(sizeof(circularAllocator));
    PFRET(PFcircularAllocOpen);
    if (!allocator)
    {
        PFEXIT(PFcircularAllocOpen);
        RWRETURN((circularAllocator *)NULL);
    }

    /* Fill in all the info for maintaining it */
    allocator->blockSize = blockSize;
    sweCircAllocBlockSize = blockSize;

    allocator->showNewBlocks = 0;
    allocator->limit = 0;

    /* Allocate the first block */
    PFCALL(PFcircularAllocOpen);
    newBlock = createNewBlock(allocator);
    PFRET(PFcircularAllocOpen);

    /* Its the only block */
    newBlock->next = newBlock;
    allocator->firstBlock = newBlock;
    allocator->activeAllocBlock = newBlock;
    allocator->activeFreeBlock = newBlock;

    allocator->lastBlockAllocated = NULL;
    allocator->lastBlockSize = 0;

    allocator->lastBlockFlags = 0;

    PFEXIT(PFcircularAllocOpen);
    RWRETURN(allocator);
}

RwBool
circularAllocClose(circularAllocator *allocator)
{
    int size = 0;
    RWFUNCTION(RWSTRING("circularAllocClose"));
    PFENTRY(PFcircularAllocClose);
    RWASSERT(allocator);

    while (allocator->firstBlock)
    {
        if (allocator->firstBlock->next != allocator->firstBlock)
        {
            /* Remove one block from the list */
            circularAllocatorBlock *block2free = allocator->firstBlock->next;

            allocator->firstBlock->next = block2free->next;
            PFCALL(PFcircularAllocClose);
            RwSky2Free(block2free->realMemory);
            RwSky2Free(block2free);
            PFRET(PFcircularAllocClose);
        }
        else
        {
            /* The last block */
            PFCALL(PFcircularAllocClose);
            RwSky2Free(allocator->firstBlock->realMemory);
            RwSky2Free(allocator->firstBlock);
            PFRET(PFcircularAllocClose);
            allocator->firstBlock = (circularAllocatorBlock *)NULL;
        }
        size += allocator->blockSize;
    }
    if (allocator->showNewBlocks)
    {
        printf("Circular Allocator %x used %d\n", (int)allocator, size);
    }

    PFCALL(PFcircularAllocClose);
    RwSky2Free(allocator);
    PFRET(PFcircularAllocClose);

    PFEXIT(PFcircularAllocClose);
    RWRETURN(TRUE);
}

RwBool
circularAllocPreAlloc(circularAllocator *allocator, int size)
{
    char **ptr = (char **)NULL;
    char **ptr1;
    int i;

    RWFUNCTION(RWSTRING("circularAllocPreAlloc"));

    allocator->showNewBlocks = 0;
    /* Alloc block */
    for (i = -1; i<size/(int)allocator->blockSize; i++)
    {
        if ((ptr1 = (char **)
             circularMalloc(allocator, 
                            allocator->blockSize,
                            calDCACHE_ALLOC)))
        {
            *ptr1 = (char*)ptr;
            ptr = ptr1;
        }
        else
        {
            printf("circularAllocPreAlloc failed\n");
            while (ptr != NULL)
            {
                ptr1 = (char**)*ptr;
                circularFree(allocator, ptr);
                ptr = ptr1;
            }
            RWRETURN(FALSE);
        }
    }
    /* Now free them */
    while (ptr != NULL)
    {
        ptr1 = (char**)*ptr;
        circularFree(allocator, ptr);
        ptr = ptr1;
    }
    /* Turn on watch variable */
    allocator->showNewBlocks = 1;
    FlushCache(0);
    RWRETURN(TRUE);
}

RwBool
circularLimitAlloc(circularAllocator *allocator, RwBool limit)
{
    RwBool result;
    RWFUNCTION(RWSTRING("circularLimitAlloc"));

    RWASSERT(allocator);

    /* We only let this happen if circular pre alloc has been called */

    result = allocator->showNewBlocks;

    if (result)
    {
        /* I'm using a int so I can eventually make this a bit field */
        allocator->limit = limit?1:0;
    }
    else
    {
        printf("circularAllocPreAlloc hasn't been called for %x yet\n",
               (int)allocator);
    }

    RWRETURN(result);

}

void *
circularMalloc(circularAllocator *allocator, RwUInt32 size, RwUInt32 flags)
{
    void *result;

    RWFUNCTION(RWSTRING("circularMalloc"));

    result = circularMallocInternal(allocator, size, flags);

    /* If necessary spin until we get some space */
    while ((!result) && (allocator->limit))
    {
        _sweFlush();
        _sweGarbageCollectChain();
        result = circularMallocInternal(allocator, size, flags);
    }

    RWRETURN(result);
}

