#include "cal.h"
#include "calcl.h"
#include <string>

std::string programIL =
"il_ps_2_0\n"
"dcl_input_interp(linear) v0.xy\n"
"dcl_output_generic o0\n"
"dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
"dcl_resource_id(1)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
"sample_resource(0)_sampler(0) r0, v0.xyxx\n"
"sample_resource(1)_sampler(1) r1, v0.xyxx\n"
"add o0, r0, r1\n"
"ret_dyn\n"
"end\n";

int main(int argc, char** argv)
{
  calInit();

  CALuint numDevices = 0;
  calDeviceGetCount(&numDevices);

  CALdevice device = 0;
  calDeviceOpen(&device, 0);

  CALdeviceattribs attribs;
  attribs.struct_size = sizeof(CALdeviceattribs);
  calDeviceGetAttribs(&attribs, 0);

  CALcontext ctx = 0;
  calCtxCreate(&ctx, device);

  CALobject obj = NULL;
  CALimage image = NULL;

  CALlanguage lang = CAL_LANGUAGE_IL;
  std::string program = programIL;
  std::string kernelType = "IL";

  if (calclCompile(&obj, lang, program.c_str(), attribs.target) != CAL_RESULT_OK) {
    fprintf(stdout, "Program compilation failed. Exiting.\n");
    return 1;
  }

  if (calclLink(&image, &obj, 1) != CAL_RESULT_OK) {
    fprintf(stdout, "Program linking failed. Exiting.\n");
    return 1;
  }

  CALresource inputRes_a = 0;
  CALresource inputRes_b = 0;
  CALresource outputRes = 0;

  calResAllocLocal2D(&inputRes_a, device, 256, 256, CAL_FORMAT_FLOAT_1, 0);
  calResAllocLocal2D(&inputRes_b, device, 256, 256, CAL_FORMAT_FLOAT_1, 0);
  calResAllocLocal2D(&outputRes,  device, 256, 256, CAL_FORMAT_FLOAT_1, 0);

  float* fdata = NULL;
  CALuint pitch = 0;
  CALmem inputMem_a = 0;
  CALmem inputMem_b = 0;
  CALmem outputMem = 0;

  calResMap((CALvoid**)&fdata, &pitch, inputRes_a, 0);
  for (int i = 0; i < 256; ++i) {
    float* tmp = &fdata[i * pitch];
    for (int j = 0; j < 256; ++j) {
      tmp[j] = 3.0;
    }
  }
  calResUnmap(inputRes_a);

  calResMap((CALvoid**)&fdata, &pitch, inputRes_b, 0);
  for (int i = 0; i < 256; ++i) {
    float* tmp = &fdata[i * pitch];
    for (int j = 0; j < 256; ++j) {
      tmp[j] = 2.0;
    }
  }
  calResUnmap(inputRes_b);

  calCtxGetMem(&inputMem_a, ctx, inputRes_a);
  calCtxGetMem(&inputMem_b, ctx, inputRes_b);
  calCtxGetMem(&outputMem, ctx, outputRes);

  CALmodule module = 0;
  calModuleLoad(&module, ctx, image);

  CALfunc func = 0;
  CALname inName_a = 0, inName_b = 0, outName = 0;

  calModuleGetEntry(&func, ctx, module, "main");
  calModuleGetName(&inName_a, ctx, module, "i0");
  calModuleGetName(&inName_b, ctx, module, "i1");
  calModuleGetName(&outName,  ctx, module, "o0");

  calCtxSetMem(ctx, inName_a, inputMem_a);
  calCtxSetMem(ctx, inName_b, inputMem_b);
  calCtxSetMem(ctx, outName,  outputMem);

  CALdomain domain = {0, 0, 256, 256};

  CALevent e = 0;
  calCtxRunProgram(&e, ctx, func, &domain);

  while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);

  calResMap((CALvoid**)&fdata, &pitch, outputRes, 0);
  for (int i = 0; i < 8; ++i) {
    float* tmp = &fdata[i * pitch];
    for(int j = 0; j < 8; ++j) {
      printf("%f ", tmp[j]);
    }
    printf("\n");
  }
  calResUnmap(outputRes);

  calModuleUnload(ctx, module);
  calclFreeImage(image);
  calclFreeObject(obj);
  calCtxReleaseMem(ctx, inputMem_a);
  calCtxReleaseMem(ctx, inputMem_b);
  calCtxReleaseMem(ctx, outputMem);
  calResFree(outputRes);
  calResFree(inputRes_b);
  calResFree(inputRes_a);
  calCtxDestroy(ctx);
  calDeviceClose(device);
  calShutdown();

  return 0;
}
