/**
 * \file hellocal.cpp
 * \brief CAL program along the same lines as Hello World on the CPU
 */
///////////////////////////////////////////////////////////////////////////////
//! Header files
///////////////////////////////////////////////////////////////////////////////
#include "cal.h"
#include "calcl.h"
#include <string>

////////////////////////////////////////////////////////////////////////////
//! Device Program to be executed on the GPU
////////////////////////////////////////////////////////////////////////////
/**
 * \var programIL
 * \brief simple IL kernel that multiplies a value by a constant
 *
 * The value is read in based on the coordinates from vpos from a texture.
 * The texture is then multiplied by a constant value before being written
 * to the output texture.
 */
std::string programIL =
"il_ps_2_0\n"
"dcl_input_interp(linear) v0.xy\n"
"dcl_output_generic o0\n"
"dcl_cb cb0[1]\n"
"dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
"sample_resource(0)_sampler(0) r0, v0.xyxx\n"
"mul o0, r0, cb0[0]\n"
"ret_dyn\n"
"end\n";


////////////////////////////////////////////////////////////////////////////
//! Main function
////////////////////////////////////////////////////////////////////////////
/**
 * \fn int main(int argc, char** argv)
 * \param argc The counter for the number of arguments passed in
 * \param argv The array of arguments that are passed in
 * \return 1 on failure, 0 on success
 * \brief Sets up CAL and runs the simple kernels.
 *
 * For more information, please refer to the Programming Guide.
 * \note the programming guide should be located in the CALROOT/doc directory
 */
int main(int argc, char** argv)
{
    // Initializing CAL
    calInit();

    //-------------------------------------------------------------------------
    // Querying and opening device
    //-------------------------------------------------------------------------
    // Finding number of devices
    CALuint numDevices = 0;
    calDeviceGetCount(&numDevices);

    // Opening device
    CALdevice device = 0;
    calDeviceOpen(&device, 0);

    // Querying device attribs
    CALdeviceattribs attribs;
    attribs.struct_size = sizeof(CALdeviceattribs);
    calDeviceGetAttribs(&attribs, 0);

    // Creating context w.r.t. to opened device
    CALcontext ctx = 0;
    calCtxCreate(&ctx, device);

    //-------------------------------------------------------------------------
    // Compiling Device Program
    //-------------------------------------------------------------------------
    CALobject obj = NULL;
    CALimage image = NULL;

    CALlanguage lang = CAL_LANGUAGE_IL;
    std::string program = programIL;
    std::string kernelType = "IL";

    if (calclCompile(&obj, lang, program.c_str(), attribs.target) != CAL_RESULT_OK)
    {
        fprintf(stdout, "Program compilation failed. Exiting.\n");
        return 1;
    }

    if (calclLink(&image, &obj, 1) != CAL_RESULT_OK)
    {
        fprintf(stdout, "Program linking failed. Exiting.\n");
        return 1;
    }

    //-------------------------------------------------------------------------
    // Allocating and initializing resources
    //-------------------------------------------------------------------------
    // Input and output resources
    CALresource inputRes = 0;
    CALresource outputRes = 0;

    calResAllocLocal2D(&inputRes, device, 256, 256, CAL_FORMAT_FLOAT_1, 0);
    calResAllocLocal2D(&outputRes, device, 256, 256, CAL_FORMAT_FLOAT_1, 0);




    // Constant resource
    CALresource constRes = 0;
    calResAllocRemote1D(&constRes, &device, 1, 1, CAL_FORMAT_FLOAT_4, 0);

    // Filling values in input buffer
    float* fdata = NULL;
    CALuint pitch = 0;
    CALmem inputMem = 0;

    // Mapping resource to CPU
    // Returns 'fdata' as a CPU accessible pointer to resource 'inputRes'
    calCtxGetMem(&inputMem, ctx, inputRes);
    calResMap((CALvoid**)&fdata, &pitch, inputRes, 0);
    for (int i = 0; i < 256; ++i)
    {
        float* tmp = &fdata[i * pitch];
        for (int j = 0; j < 256; ++j)
        {
            tmp[j] = (float)(i * pitch + j);
        }
    }
    calResUnmap(inputRes);

    // Filling values in constant
    float* constPtr = NULL;
    CALuint constPitch = 0;
    CALmem constMem = 0;

    // Mapping resource to CPU
    calCtxGetMem(&constMem, ctx, constRes);
    calResMap((CALvoid**)&constPtr, &constPitch, constRes, 0);
    constPtr[0] = 0.5f,     constPtr[1] = 0.0f;
    constPtr[2] = 0.0f;     constPtr[3] = 0.0f;
    calResUnmap(constRes);

    // Mapping output resource to CPU and initializing values
    void* data = NULL;

    // Getting memory handle from resources
    CALmem outputMem = 0;

    calCtxGetMem(&outputMem, ctx, outputRes);
    calResMap(&data, &pitch, outputRes, 0);
    memset(data, 0, pitch * 256 * sizeof(float));
    calResUnmap(outputRes);

    //-------------------------------------------------------------------------
    // Loading module and setting domain
    //-------------------------------------------------------------------------

    // Creating module using compiled image
    CALmodule module = 0;
    calModuleLoad(&module, ctx, image);

    // Defining symbols in module
    CALfunc func = 0;
    CALname inName = 0, outName = 0, constName = 0;

    // Defining entry point for the module
    calModuleGetEntry(&func, ctx, module, "main");
    calModuleGetName(&inName, ctx, module, "i0");
    calModuleGetName(&outName, ctx, module, "o0");
    calModuleGetName(&constName, ctx, module, "cb0");

    // Setting input and output buffers
    // used in the kernel
    calCtxSetMem(ctx, inName, inputMem);
    calCtxSetMem(ctx, outName, outputMem);
    calCtxSetMem(ctx, constName, constMem);

    // Setting domain
    CALdomain domain = {0, 0, 256, 256};

    //-------------------------------------------------------------------------
    // Executing program and waiting for program to terminate
    //-------------------------------------------------------------------------

    // Event to check completion of the program
    CALevent e = 0;
    calCtxRunProgram(&e, ctx, func, &domain);

    // Checking whether the execution of the program is complete or not
    while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);

    // Reading output from output resources
    calResMap((CALvoid**)&fdata, &pitch, outputRes, 0);
    for (int i = 0; i < 8; ++i)
    {
        float* tmp = &fdata[i * pitch];
        for(int j = 0; j < 8; ++j)
        {
            printf("%f ", tmp[j]);
        }
        printf("\n");
    }
    calResUnmap(outputRes);

    //-------------------------------------------------------------------------
    // Cleaning up
    //-------------------------------------------------------------------------

    // Unloading the module
    calModuleUnload(ctx, module);

    // Freeing compiled program binary
    calclFreeImage(image);
    calclFreeObject(obj);

    // Releasing resource from context
    calCtxReleaseMem(ctx, inputMem);
    calCtxReleaseMem(ctx, constMem);
    calCtxReleaseMem(ctx, outputMem);

    // Deallocating resources
    calResFree(outputRes);
    calResFree(constRes);
    calResFree(inputRes);

    // Destroying context
    calCtxDestroy(ctx);

    // Closing device
    calDeviceClose(device);

    // Shutting down CAL
    calShutdown();

    bool waitForUser = true;
    if( waitForUser )
    {
        printf("\nPress enter to exit...\n");
        getchar();
    }

    return 0;
}

