samples/atiSamples/Template/Template.rkt
#lang racket
(require "../../../c.rkt"
         "../atiUtils/utils.rkt"
         ffi/unsafe
         ffi/cvector
         ffi/unsafe/cvector)

(define width 0)
(define input #f)
(define output #f)
(define multiplier 0)
(define devices #f)
(define context #f)
(define commandQueue #f)
(define program #f)
(define inputBuffer #f)
(define outputBuffer #f)
(define kernel #f)

(define (print1DArray arrayName arrayData length)
  (define numElementsToPrint (if (< 256 length) 256 length))
  (printf "~n~a:~n" arrayName)
  (for ([i (in-range numElementsToPrint)])
    (printf "~a " (ptr-ref arrayData _cl_uint i)))
  (display "\n"))

(define (initializeHost)
  (set! width 256)
  (set! multiplier 2)
  ;allocate and initialize memory used by host
  (define sizeInBytes (* width (ctype-sizeof _cl_uint)))
  (set! input (malloc sizeInBytes 'raw))
  (set! output (malloc sizeInBytes 'raw))
  (for ([i (in-range width)])
    (ptr-set! input _cl_uint i i))
  (print1DArray "Input" input width))

(define (initializeCL)
  (set!-values (devices context commandQueue program) (init-cl "Template_Kernels.cl" #:deviceType 'CL_DEVICE_TYPE_CPU))
  (set! inputBuffer (clCreateBuffer context '(CL_MEM_READ_WRITE CL_MEM_USE_HOST_PTR) (* width (ctype-sizeof _cl_uint)) input))
  (set! outputBuffer (clCreateBuffer context '(CL_MEM_READ_WRITE CL_MEM_USE_HOST_PTR) (* width (ctype-sizeof _cl_uint)) output))
  (set! kernel (clCreateKernel program #"templateKernel")))

(define (runCLKernels)
  (define device (cvector-ref devices 0))
  (define maxWorkGroupSize (clGetDeviceInfo:generic device 'CL_DEVICE_MAX_WORK_GROUP_SIZE))
  (define maxDims (clGetDeviceInfo:generic device 'CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS))
  (define maxWorkItemSizes (clGetDeviceInfo:generic device 'CL_DEVICE_MAX_WORK_ITEM_SIZES))
  (define globalThreads (vector width))
  (define localThreads (vector 1))
  (clSetKernelArg:_cl_mem kernel 0 outputBuffer)
  (clSetKernelArg:_cl_mem kernel 1 inputBuffer)
  (clSetKernelArg:_cl_uint kernel 2 multiplier)
  (define event (clEnqueueNDRangeKernel commandQueue kernel 1 globalThreads localThreads (make-vector 0)))
  (clWaitForEvents (vector event))
  (clReleaseEvent event)
  (set! event (clEnqueueReadBuffer commandQueue outputBuffer 'CL_TRUE 0 (* width (ctype-sizeof _cl_uint)) output (make-vector 0)))
  (clWaitForEvents (vector event))
  (clReleaseEvent event))

(define (cleanupCL)
  (clReleaseKernel kernel)
  (clReleaseProgram program)
  (clReleaseMemObject inputBuffer)
  (clReleaseMemObject outputBuffer)
  (clReleaseCommandQueue commandQueue)
  (clReleaseContext context))

(define (cleanupHost)
  (free input)
  (free output))

(define (verify)
  (define passed #t)
  (for ([i (in-range width)])
    (when (not (= (* (ptr-ref input _cl_uint i) multiplier) (ptr-ref output _cl_uint i)))
      (set! passed #f)))
  (printf "~a~n" (if passed "Passed!" "Failed!")))

(initializeHost)
(initializeCL)
(runCLKernels)
(print1DArray "Output" output width)
(verify)
(cleanupCL)
(cleanupHost)