cf4ocl (C Framework for OpenCL)  v2.1.0
Object-oriented framework for developing and benchmarking OpenCL projects in C/C++
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ccl_kernel_wrapper.c
Go to the documentation of this file.
1 /*
2  * This file is part of cf4ocl (C Framework for OpenCL).
3  *
4  * cf4ocl is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License as
6  * published by the Free Software Foundation, either version 3 of the
7  * License, or (at your option) any later version.
8  *
9  * cf4ocl is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with cf4ocl. If not, see
16  * <http://www.gnu.org/licenses/>.
17  * */
18 
30 #include "ccl_kernel_wrapper.h"
31 #include "ccl_program_wrapper.h"
32 #include "_ccl_abstract_wrapper.h"
33 #include "_ccl_defs.h"
34 
40 struct ccl_kernel {
41 
46  CCLWrapper base;
47 
52  GHashTable* args;
53 
54 };
55 
65 static void ccl_kernel_release_fields(CCLKernel* krnl) {
66 
67  /* Make sure krnl wrapper object is not NULL. */
68  g_return_if_fail(krnl != NULL);
69 
70  /* Free kernel arguments. */
71  if (krnl->args != NULL)
72  g_hash_table_destroy(krnl->args);
73 
74 }
75 
98 CCL_EXPORT
99 CCLKernel* ccl_kernel_new_wrap(cl_kernel kernel) {
100 
101  return (CCLKernel*) ccl_wrapper_new(
102  CCL_KERNEL, (void*) kernel, sizeof(CCLKernel));
103 
104 }
105 
117 CCL_EXPORT
119  CCLProgram* prg, const char* kernel_name, CCLErr** err) {
120 
121  /* Make sure err is NULL or it is not set. */
122  g_return_val_if_fail((err) == NULL || *(err) == NULL, NULL);
123 
124  /* Make sure prg is not NULL. */
125  g_return_val_if_fail(prg != NULL, NULL);
126 
127  /* Make sure kernel_name is not NULL. */
128  g_return_val_if_fail(kernel_name != NULL, NULL);
129 
130  /* Kernel wrapper object. */
131  CCLKernel* krnl = NULL;
132 
133  /* OpenCL return status. */
134  cl_int ocl_status;
135 
136  /* The OpenCL kernel object. */
137  cl_kernel kernel = NULL;
138 
139  /* Create kernel. */
140  kernel = clCreateKernel(ccl_program_unwrap(prg),
141  kernel_name, &ocl_status);
143  CL_SUCCESS != ocl_status, ocl_status, error_handler,
144  "%s: unable to create kernel (OpenCL error %d: %s).",
145  CCL_STRD, ocl_status, ccl_err(ocl_status));
146 
147  /* Create kernel wrapper. */
148  krnl = ccl_kernel_new_wrap(kernel);
149 
150  /* If we got here, everything is OK. */
151  g_assert(err == NULL || *err == NULL);
152  goto finish;
153 
154 error_handler:
155 
156  /* If we got here there was an error, verify that it is so. */
157  g_assert(err == NULL || *err != NULL);
158 
159  krnl = NULL;
160 
161 finish:
162 
163  /* Return kernel wrapper. */
164  return krnl;
165 
166 }
167 
176 CCL_EXPORT
178 
179  ccl_wrapper_unref((CCLWrapper*) krnl, sizeof(CCLKernel),
180  (ccl_wrapper_release_fields) ccl_kernel_release_fields,
181  (ccl_wrapper_release_cl_object) clReleaseKernel, NULL);
182 
183 }
184 
204 CCL_EXPORT
205 void ccl_kernel_set_arg(CCLKernel* krnl, cl_uint arg_index, void* arg) {
206 
207  /* Make sure krnl is not NULL. */
208  g_return_if_fail(krnl != NULL);
209 
210  /* Initialize table of kernel arguments if necessary. */
211  if (krnl->args == NULL) {
212  krnl->args = g_hash_table_new_full(g_direct_hash,
213  g_direct_equal, NULL, (GDestroyNotify) ccl_arg_destroy);
214  }
215 
216  /* Keep argument in table. */
217  g_hash_table_replace(krnl->args, GUINT_TO_POINTER(arg_index),
218  (gpointer) arg);
219 
220 }
221 
249 CCL_EXPORT
250 void ccl_kernel_set_args(CCLKernel* krnl, ...) {
251 
252  /* Make sure krnl is not NULL. */
253  g_return_if_fail(krnl != NULL);
254 
255  /* The va_list, which represents the variable argument list. */
256  va_list args_va;
257  /* Array of arguments, created from the va_list. */
258  void** args_array = NULL;
259  /* Number of arguments. */
260  guint num_args = 0;
261  /* Aux. arg. when cycling through the va_list. */
262  void* aux_arg;
263 
264  /* Initialize the va_list. */
265  va_start(args_va, krnl);
266 
267  /* Get first argument. */
268  aux_arg = va_arg(args_va, void*);
269 
270  /* Check if any arguments are given, and if so, populate array
271  * of arguments. */
272  if (aux_arg != NULL) {
273 
274  /* 1. Determine number of arguments. */
275 
276  while (aux_arg != NULL) {
277  num_args++;
278  aux_arg = va_arg(args_va, void*);
279  }
280  va_end(args_va);
281 
282  /* 2. Populate array of arguments. */
283 
284  args_array = g_slice_alloc((num_args + 1) * sizeof(void*));
285  va_start(args_va, krnl);
286 
287  for (guint i = 0; i < num_args; ++i) {
288  aux_arg = va_arg(args_va, void*);
289  args_array[i] = aux_arg;
290  }
291  va_end(args_va);
292  args_array[num_args] = NULL;
293 
294  }
295 
296  /* If any arguments are given... */
297  if (num_args > 0) {
298 
299  /* Call the array version of this function.*/
300  ccl_kernel_set_args_v(krnl, args_array);
301 
302  /* Free the array of arguments. */
303  g_slice_free1((num_args + 1) * sizeof(void*), args_array);
304 
305  }
306 
307 }
308 
333 CCL_EXPORT
334 void ccl_kernel_set_args_v(CCLKernel* krnl, void** args) {
335 
336  /* Make sure krnl is not NULL. */
337  g_return_if_fail(krnl != NULL);
338  /* Make sure args is not NULL. */
339  g_return_if_fail(args != NULL);
340 
341  /* Cycle through the arguments. */
342  for (guint i = 0; args[i] != NULL; ++i) {
343 
344  /* Get next argument. */
345  CCLArg* arg = args[i];
346 
347  /* Ignore "skip" arguments. */
348  if (arg == ccl_arg_skip) continue;
349 
350  /* Set the i^th kernel argument. */
351  ccl_kernel_set_arg(krnl, i, arg);
352 
353  }
354 
355 }
356 
392 CCL_EXPORT
394  cl_uint work_dim, const size_t* global_work_offset,
395  const size_t* global_work_size, const size_t* local_work_size,
396  CCLEventWaitList* evt_wait_lst, CCLErr** err) {
397 
398  /* Make sure krnl is not NULL. */
399  g_return_val_if_fail(krnl != NULL, NULL);
400  /* Make sure cq is not NULL. */
401  g_return_val_if_fail(cq != NULL, NULL);
402  /* Make sure err is NULL or it is not set. */
403  g_return_val_if_fail(err == NULL || *err == NULL, NULL);
404 
405  /* OpenCL status flag. */
406  cl_int ocl_status;
407 
408  /* OpenCL event. */
409  cl_event event;
410  /* Event wrapper. */
411  CCLEvent* evt;
412 
413  /* Iterator for table of kernel arguments. */
414  GHashTableIter iter;
415  gpointer arg_index_ptr, arg_ptr;
416 
417  /* Set pending kernel arguments. */
418  if (krnl->args != NULL) {
419  g_hash_table_iter_init(&iter, krnl->args);
420  while (g_hash_table_iter_next(&iter, &arg_index_ptr, &arg_ptr)) {
421  cl_uint arg_index = GPOINTER_TO_UINT(arg_index_ptr);
422  CCLArg* arg = (CCLArg*) arg_ptr;
423  ocl_status = clSetKernelArg(ccl_kernel_unwrap(krnl), arg_index,
424  ccl_arg_size(arg), ccl_arg_value(arg));
426  CL_SUCCESS != ocl_status, ocl_status, error_handler,
427  "%s: unable to set kernel arg %d (OpenCL error %d: %s).",
428  CCL_STRD, arg_index, ocl_status, ccl_err(ocl_status));
429  g_hash_table_iter_remove(&iter);
430  }
431  }
432 
433  /* Run kernel. */
434  ocl_status = clEnqueueNDRangeKernel(ccl_queue_unwrap(cq),
435  ccl_kernel_unwrap(krnl), work_dim, global_work_offset,
436  global_work_size, local_work_size,
437  ccl_event_wait_list_get_num_events(evt_wait_lst),
438  ccl_event_wait_list_get_clevents(evt_wait_lst), &event);
440  CL_SUCCESS != ocl_status, ocl_status, error_handler,
441  "%s: unable to enqueue kernel (OpenCL error %d: %s).",
442  CCL_STRD, ocl_status, ccl_err(ocl_status));
443 
444  /* Wrap event and associate it with the respective command queue.
445  * The event object will be released automatically when the command
446  * queue is released. */
447  evt = ccl_queue_produce_event(cq, event);
448 
449  /* Clear event wait list. */
450  ccl_event_wait_list_clear(evt_wait_lst);
451 
452  /* If we got here, everything is OK. */
453  g_assert(err == NULL || *err == NULL);
454  goto finish;
455 
456 error_handler:
457 
458  /* If we got here there was an error, verify that it is so. */
459  g_assert(err == NULL || *err != NULL);
460 
461  /* An error occurred, return NULL to signal it. */
462  evt = NULL;
463 
464 finish:
465 
466  /* Return evt. */
467  return evt;
468 
469 }
470 
515 CCL_EXPORT
517  cl_uint work_dim, const size_t* global_work_offset,
518  const size_t* global_work_size, const size_t* local_work_size,
519  CCLEventWaitList* evt_wait_lst, CCLErr** err, ...) {
520 
521  /* Make sure krnl is not NULL. */
522  g_return_val_if_fail(krnl != NULL, NULL);
523  /* Make sure cq is not NULL. */
524  g_return_val_if_fail(cq != NULL, NULL);
525  /* Make sure err is NULL or it is not set. */
526  g_return_val_if_fail(err == NULL || *err == NULL, NULL);
527 
528  /* Event wrapper. */
529  CCLEvent* evt;
530  /* The va_list, which represents the variable argument list. */
531  va_list args_va;
532  /* Array of arguments, to be created from the va_list. */
533  void** args_array = NULL;
534  /* Number of arguments. */
535  guint num_args = 0;
536  /* Aux. arg. when cycling through the va_list. */
537  void* aux_arg;
538 
539  /* Initialize the va_list. */
540  va_start(args_va, err);
541 
542  /* Get first argument. */
543  aux_arg = va_arg(args_va, void*);
544 
545  /* Check if any arguments are given, and if so, populate array
546  * of arguments. */
547  if (aux_arg != NULL) {
548 
549  /* 1. Determine number of arguments. */
550 
551  while (aux_arg != NULL) {
552  num_args++;
553  aux_arg = va_arg(args_va, void*);
554  }
555  va_end(args_va);
556 
557  /* 2. Populate array of arguments. */
558 
559  args_array = g_slice_alloc((num_args + 1) * sizeof(void*));
560  va_start(args_va, err);
561 
562  for (guint i = 0; i < num_args; ++i) {
563  aux_arg = va_arg(args_va, void*);
564  args_array[i] = aux_arg;
565  }
566  va_end(args_va);
567  args_array[num_args] = NULL;
568 
569  }
570 
571  /* Set kernel arguments and run it. */
572  evt = ccl_kernel_set_args_and_enqueue_ndrange_v(krnl, cq, work_dim,
573  global_work_offset, global_work_size, local_work_size,
574  evt_wait_lst, args_array, err);
575 
576  /* If any arguments are given... */
577  if (num_args > 0) {
578 
579  /* Free the array of arguments. */
580  g_slice_free1((num_args + 1) * sizeof(void*), args_array);
581 
582  }
583 
584  /* Return event wrapper. */
585  return evt;
586 
587 }
588 
634 CCL_EXPORT
636  CCLQueue* cq, cl_uint work_dim, const size_t* global_work_offset,
637  const size_t* global_work_size, const size_t* local_work_size,
638  CCLEventWaitList* evt_wait_lst, void** args, CCLErr** err) {
639 
640  /* Make sure krnl is not NULL. */
641  g_return_val_if_fail(krnl != NULL, NULL);
642  /* Make sure cq is not NULL. */
643  g_return_val_if_fail(cq != NULL, NULL);
644  /* Make sure err is NULL or it is not set. */
645  g_return_val_if_fail(err == NULL || *err == NULL, NULL);
646 
647  CCLErr* err_internal = NULL;
648 
649  CCLEvent* evt = NULL;
650 
651  /* Set kernel arguments. */
652  ccl_kernel_set_args_v(krnl, args);
653 
654  /* Enqueue kernel. */
655  evt = ccl_kernel_enqueue_ndrange(krnl, cq, work_dim, global_work_offset,
656  global_work_size, local_work_size, evt_wait_lst, &err_internal);
657  ccl_if_err_propagate_goto(err, err_internal, error_handler);
658 
659  /* If we got here, everything is OK. */
660  g_assert(err == NULL || *err == NULL);
661  goto finish;
662 
663 error_handler:
664 
665  /* If we got here there was an error, verify that it is so. */
666  g_assert(err == NULL || *err != NULL);
667 
668 finish:
669 
670  /* Return event wrapper. */
671  return evt;
672 
673 }
674 
705 CCL_EXPORT
707  void (CL_CALLBACK * user_func)(void*), void* args, size_t cb_args,
708  cl_uint num_mos, CCLMemObj* const* mo_list,
709  const void** args_mem_loc, CCLEventWaitList* evt_wait_lst,
710  CCLErr** err) {
711 
712  /* Make sure cq is not NULL. */
713  g_return_val_if_fail(cq != NULL, NULL);
714  /* Make sure user_func is not NULL. */
715  g_return_val_if_fail(user_func != NULL, NULL);
716  /* Make sure that num_mos == 0 AND mo_list != NULL, OR, that
717  * num_mos > 0 AND mo_list != NULL */
718  g_return_val_if_fail(((num_mos == 0) && (mo_list == NULL))
719  || ((num_mos > 0) && (mo_list != NULL)), NULL);
720  /* Make sure err is NULL or it is not set. */
721  g_return_val_if_fail(err == NULL || *err == NULL, NULL);
722 
723  /* OpenCL status flag. */
724  cl_int ocl_status;
725  /* OpenCL event. */
726  cl_event event = NULL;
727  /* Event wrapper. */
728  CCLEvent* evt = NULL;
729  /* List of cl_mem objects. */
730  cl_mem* mem_list = NULL;
731 
732  /* Unwrap memory objects. */
733  if (num_mos > 0) {
734  mem_list = g_slice_alloc(sizeof(cl_mem) * num_mos);
735  for (cl_uint i = 0; i < num_mos; ++i) {
736  mem_list[i] = mo_list[i] != NULL
737  ? ccl_memobj_unwrap(mo_list[i])
738  : NULL;
739  }
740  }
741 
742  /* Enqueue kernel. */
743  ocl_status = clEnqueueNativeKernel(ccl_queue_unwrap(cq), user_func,
744  args, cb_args, num_mos, (const cl_mem*) mem_list, args_mem_loc,
745  ccl_event_wait_list_get_num_events(evt_wait_lst),
746  ccl_event_wait_list_get_clevents(evt_wait_lst), &event);
748  CL_SUCCESS != ocl_status, ocl_status, error_handler,
749  "%s: unable to enqueue native kernel (OpenCL error %d: %s).",
750  CCL_STRD, ocl_status, ccl_err(ocl_status));
751 
752  /* Wrap event and associate it with the respective command queue.
753  * The event object will be released automatically when the command
754  * queue is released. */
755  evt = ccl_queue_produce_event(cq, event);
756 
757  /* Clear event wait list. */
758  ccl_event_wait_list_clear(evt_wait_lst);
759 
760  /* If we got here, everything is OK. */
761  g_assert(err == NULL || *err == NULL);
762  goto finish;
763 
764 error_handler:
765 
766  /* If we got here there was an error, verify that it is so. */
767  g_assert(err == NULL || *err != NULL);
768 
769 finish:
770 
771  /* Release temporary cl_mem list. */
772  if (num_mos > 0)
773  g_slice_free1(sizeof(cl_mem) * num_mos, mem_list);
774 
775  /* Return event wrapper. */
776  return evt;
777 
778 }
779 
798 CCL_EXPORT
800 
801  /* Make sure krnl is not NULL. */
802  g_return_val_if_fail(krnl != NULL, 0);
803  /* Make sure err is NULL or it is not set. */
804  g_return_val_if_fail(err == NULL || *err == NULL, 0);
805 
806  cl_context context;
807  CCLContext* ctx;
808  CCLErr* err_internal = NULL;
809  cl_uint ocl_ver;
810 
811  /* Get cl_context object for this kernel. */
812  context = ccl_kernel_get_info_scalar(
813  krnl, CL_KERNEL_CONTEXT, cl_context, &err_internal);
814  ccl_if_err_propagate_goto(err, err_internal, error_handler);
815 
816  /* Get context wrapper. */
817  ctx = ccl_context_new_wrap(context);
818 
819  /* Get OpenCL version. */
820  ocl_ver = ccl_context_get_opencl_version(ctx, &err_internal);
821  ccl_if_err_propagate_goto(err, err_internal, error_handler);
822 
823  /* Unref. the context wrapper. */
824  ccl_context_unref(ctx);
825 
826  /* If we got here, everything is OK. */
827  g_assert(err == NULL || *err == NULL);
828  goto finish;
829 
830 error_handler:
831 
832  /* If we got here there was an error, verify that it is so. */
833  g_assert(err == NULL || *err != NULL);
834  ocl_ver = 0;
835 
836 finish:
837 
838  /* Return event wrapper. */
839  return ocl_ver;
840 
841 }
842 
854 #define ccl_if_err_not_info_unavailable_propagate_goto( \
855  err, err_internal, error_handler) \
856  if (((err_internal) != NULL) && ((err_internal)->domain == CCL_ERROR) && \
857  ((err_internal)->code == CCL_ERROR_INFO_UNAVAILABLE_OCL)) { \
858  g_warning("In %s: %s", CCL_STRD, (err_internal)->message); \
859  g_clear_error(&(err_internal)); \
860  } else { \
861  ccl_if_err_propagate_goto(err, err_internal, error_handler); \
862  }
863 
902 CCL_EXPORT
904  cl_uint dims, const size_t* real_worksize, size_t* gws, size_t* lws,
905  CCLErr** err) {
906 
907  /* Make sure dev is not NULL. */
908  g_return_val_if_fail(dev != NULL, CL_FALSE);
909  /* Make sure dims not zero. */
910  g_return_val_if_fail(dims > 0, CL_FALSE);
911  /* Make sure real_worksize is not NULL. */
912  g_return_val_if_fail(real_worksize != NULL, CL_FALSE);
913  /* Make sure lws is not NULL. */
914  g_return_val_if_fail(lws != NULL, CL_FALSE);
915  /* Make sure err is NULL or it is not set. */
916  g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
917 
918  /* The preferred workgroup size. */
919  size_t wg_size_mult = 0;
920  size_t wg_size_max = 0;
921  size_t wg_size = 1, wg_size_aux;
922  size_t* max_wi_sizes;
923  cl_uint dev_dims;
924  cl_bool ret_status;
925  size_t real_ws = 1;
926 
927  /* Error handling object. */
928  CCLErr* err_internal = NULL;
929 
930  /* Check if device supports the requested dims. */
931  dev_dims = ccl_device_get_info_scalar(
932  dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint, &err_internal);
933  ccl_if_err_propagate_goto(err, err_internal, error_handler);
934  ccl_if_err_create_goto(*err, CCL_ERROR, dims > dev_dims,
935  CCL_ERROR_UNSUPPORTED_OCL, error_handler,
936  "%s: device only supports a maximum of %d dimension(s), "
937  "but %d were requested.",
938  CCL_STRD, dev_dims, dims);
939 
940  /* Get max. work item sizes for device. */
941  max_wi_sizes = ccl_device_get_info_array(
942  dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, size_t*, &err_internal);
943  ccl_if_err_propagate_goto(err, err_internal, error_handler);
944 
945  /* For each dimension, if the user specified a maximum local work
946  * size, the effective maximum local work size will be the minimum
947  * between the user value and the device value. */
948  for (cl_uint i = 0; i < dims; ++i) {
949  if (lws[i] != 0)
950  max_wi_sizes[i] = MIN(max_wi_sizes[i], lws[i]);
951  }
952 
953  /* If kernel is not NULL, query it about workgroup size preferences
954  * and capabilities. */
955  if (krnl != NULL) {
956 
957  /* Determine maximum workgroup size. */
958  wg_size_max = ccl_kernel_get_workgroup_info_scalar(krnl, dev,
959  CL_KERNEL_WORK_GROUP_SIZE, size_t, &err_internal);
960  ccl_if_err_not_info_unavailable_propagate_goto(
961  err, err_internal, error_handler);
962 
963 #ifdef CL_VERSION_1_1
964 
965  /* Determine preferred workgroup size multiple (OpenCL >= 1.1). */
966 
967  /* Get OpenCL version of the underlying platform. */
968  cl_uint ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal);
969  ccl_if_err_propagate_goto(err, err_internal, error_handler);
970 
971  /* If OpenCL version of the underlying platform is >= 1.1 ... */
972  if (ocl_ver >= 110) {
973 
974  /* ...use CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE... */
976  krnl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
977  size_t, &err_internal);
978  ccl_if_err_not_info_unavailable_propagate_goto(
979  err, err_internal, error_handler);
980 
981  } else {
982 
983  /* ...otherwise just use CL_KERNEL_WORK_GROUP_SIZE. */
984  wg_size_mult = wg_size_max;
985 
986  }
987 
988 #else
989 
990  wg_size_mult = wg_size_max;
991 
992 #endif
993 
994  }
995 
996  /* If it was not possible to obtain wg_size_mult and wg_size_max, either
997  * because kernel is NULL or the information was unavailable, use values
998  * obtained from device. */
999  if ((wg_size_max == 0) && (wg_size_mult == 0)) {
1000  wg_size_max = ccl_device_get_info_scalar(
1001  dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t, &err_internal);
1002  ccl_if_err_propagate_goto(err, err_internal, error_handler);
1003  wg_size_mult = wg_size_max;
1004  }
1005 
1006  /* Try to find an appropriate local worksize. */
1007  for (cl_uint i = 0; i < dims; ++i) {
1008 
1009  /* Each lws component is at most the preferred workgroup
1010  * multiple or the maximum size of that component in device. */
1011  lws[i] = MIN(wg_size_mult, max_wi_sizes[i]);
1012 
1013  /* Update total workgroup size. */
1014  wg_size *= lws[i];
1015 
1016  /* Update total real worksize. */
1017  real_ws *= real_worksize[i];
1018 
1019  }
1020 
1021  /* Don't let each component of the local worksize to be
1022  * higher than the respective component of the real
1023  * worksize. */
1024  for (cl_uint i = 0; i < dims; ++i) {
1025  while (lws[i] > real_worksize[i]) {
1026  lws[i] /= 2;
1027  wg_size /= 2;
1028  }
1029  }
1030 
1031  /* The total workgroup size can't be higher than the maximum
1032  * supported by the device. */
1033  while (wg_size > wg_size_max) {
1034  wg_size_aux = wg_size;
1035  for (int i = dims - 1; i >= 0; --i) {
1036  if (lws[i] > 1) {
1037  /* Local work size can't be smaller than 1. */
1038  lws[i] /= 2;
1039  wg_size /= 2;
1040  }
1041  if (wg_size <= wg_size_max) break;
1042  }
1043  /* Avoid infinite loops and throw error if wg_size didn't
1044  * change. */
1045  ccl_if_err_create_goto(*err, CCL_ERROR, wg_size == wg_size_aux,
1046  CCL_ERROR_OTHER, error_handler,
1047  "%s: Unable to determine a work size within the device limit (%d).",
1048  CCL_STRD, (int) wg_size_max);
1049  }
1050 
1051  /* If output variable gws is not NULL... */
1052  if (gws != NULL) {
1053  /* ...find a global worksize which is a multiple of the local
1054  * worksize and is big enough to handle the real worksize. */
1055  for (cl_uint i = 0; i < dims; ++i) {
1056  gws[i] = ((real_worksize[i] / lws[i])
1057  + (((real_worksize[i] % lws[i]) > 0) ? 1 : 0))
1058  * lws[i];
1059  }
1060  } else {
1061  /* ...otherwise check if found local worksizes are divisors of
1062  * the respective real_worksize. If so keep them, otherwise find
1063  * local worksizes which respect the maximum sizes allowed by
1064  * the kernel and the device, and is a dimension-wise divisor of
1065  * the real_worksize. */
1066  cl_bool lws_are_divisors = CL_TRUE;
1067  for (cl_uint i = 0; i < dims; ++i) {
1068  /* Check if lws[i] is divisor of real_worksize[i]. */
1069  if (real_worksize[i] % lws[i] != 0) {
1070  /* Ops... lws[i] is not divisor of real_worksize[i], so
1071  * we'll have to try and find new lws ahead. */
1072  lws_are_divisors = CL_FALSE;
1073  break;
1074  }
1075  }
1076  /* Is lws divisor of real_worksize, dimension-wise? */
1077  if (!lws_are_divisors) {
1078  /* No, so we'll have to find new lws. */
1079  wg_size = 1;
1080  for (cl_uint i = 0; i < dims; ++i) {
1081 
1082  /* For each dimension, try to use the previously
1083  * found lws[i]. */
1084  if ((real_worksize[i] % lws[i] != 0)
1085  || (lws[i] * wg_size > wg_size_max))
1086  {
1087  /* Previoulsy found lws[i] not usable, find
1088  * new one. Must be a divisor of real_worksize[i]
1089  * and respect the kernel and device maximum lws.*/
1090  cl_uint best_lws_i = 1;
1091  for (cl_uint j = 2; j <= real_worksize[i] / 2; ++j) {
1092  /* If current value is higher than the kernel
1093  * and device limits, stop searching and use
1094  * the best one so far. */
1095  if ((wg_size * j > wg_size_max)
1096  || (j > max_wi_sizes[i])) break;
1097  /* Otherwise check if current value is divisor
1098  * of lws[i]. If so, keep it as the best so
1099  * far. */
1100  if (real_worksize[i] % j == 0)
1101  best_lws_i = j;
1102  }
1103  /* Keep the best divisor for current dimension. */
1104  lws[i] = best_lws_i;
1105  }
1106  /* Update absolute workgroup size (all dimensions). */
1107  wg_size *= lws[i];
1108  }
1109  }
1110  }
1111 
1112  /* If we got here, everything is OK. */
1113  g_assert(err == NULL || *err == NULL);
1114  ret_status = CL_TRUE;
1115  goto finish;
1116 
1117 error_handler:
1118 
1119  /* If we got here there was an error, verify that it is so. */
1120  g_assert(err == NULL || *err != NULL);
1121  ret_status = CL_FALSE;
1122 
1123 finish:
1124 
1125  /* Return status. */
1126  return ret_status;
1127 
1128 }
1129 
1130 #ifdef CL_VERSION_1_2
1131 
1151 cl_int ccl_kernel_get_arg_info_adapter(cl_kernel kernel, void* ptr_arg_indx,
1152  cl_kernel_arg_info param_name, size_t param_value_size, void *param_value,
1153  size_t* param_value_size_ret) {
1154 
1155  return clGetKernelArgInfo(kernel, GPOINTER_TO_UINT(ptr_arg_indx),
1156  param_name, param_value_size, param_value, param_value_size_ret);
1157 }
1158 
1159 #endif
1160 
1177 CCL_EXPORT
1179  cl_kernel_arg_info param_name, CCLErr** err) {
1180 
1181  /* Make sure krnl is not NULL. */
1182  g_return_val_if_fail(krnl != NULL, NULL);
1183 
1184  /* Helper wrapper. */
1185  CCLWrapper fake_wrapper;
1186 
1187  /* Kernel information to return. */
1188  CCLWrapperInfo* info;
1189 
1190  /* Error handling object. */
1191  CCLErr* err_internal = NULL;
1192 
1193  /* OpenCL version of the underlying platform. */
1194  double ocl_ver;
1195 
1196 #ifndef CL_VERSION_1_2
1197 
1198  CCL_UNUSED(idx);
1199  CCL_UNUSED(param_name);
1200  CCL_UNUSED(fake_wrapper);
1201  CCL_UNUSED(err_internal);
1202  CCL_UNUSED(ocl_ver);
1203 
1204  /* If cf4ocl was not compiled with support for OpenCL >= 1.2, always throw
1205  * error. */
1206  ccl_if_err_create_goto(*err, CCL_ERROR, TRUE,
1207  CCL_ERROR_UNSUPPORTED_OCL, error_handler,
1208  "%s: Obtaining kernel argument information requires cf4ocl to be "
1209  "deployed with support for OpenCL version 1.2 or newer.",
1210  CCL_STRD);
1211 
1212 #else
1213 
1214  /* Check that context platform is >= OpenCL 1.2 */
1215  ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal);
1216  ccl_if_err_propagate_goto(err, err_internal, error_handler);
1217 
1218  /* If OpenCL version is not >= 1.2, throw error. */
1219  ccl_if_err_create_goto(*err, CCL_ERROR, ocl_ver < 120,
1220  CCL_ERROR_UNSUPPORTED_OCL, error_handler,
1221  "%s: information about kernel arguments requires OpenCL" \
1222  " version 1.2 or newer.", CCL_STRD);
1223 
1224  /* Wrap argument index in a fake cl_object. */
1225  fake_wrapper.cl_object = GUINT_TO_POINTER(idx);
1226 
1227  /* Get kernel argument info. */
1228  info = ccl_wrapper_get_info(
1229  (CCLWrapper*) krnl, &fake_wrapper, param_name, 0,
1230  CCL_INFO_KERNEL_ARG, CL_FALSE, &err_internal);
1231  ccl_if_err_propagate_goto(err, err_internal, error_handler);
1232 
1233 #endif
1234 
1235  /* If we got here, everything is OK. */
1236  g_assert(err == NULL || *err == NULL);
1237  goto finish;
1238 
1239 error_handler:
1240 
1241  /* If we got here there was an error, verify that it is so. */
1242  g_assert(err == NULL || *err != NULL);
1243 
1244  /* An error occurred, return NULL to signal it. */
1245  info = NULL;
1246 
1247 finish:
1248 
1249  /* Return argument info. */
1250  return info;
1251 
1252 }
1253 
void ccl_kernel_destroy(CCLKernel *krnl)
Decrements the reference count of the kernel wrapper object.
CCLEvent * ccl_kernel_set_args_and_enqueue_ndrange_v(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, void **args, CCLErr **err)
Set kernel arguments and enqueue it for execution on a device.
#define CCL_OCL_ERROR
Resolves to error category identifying string, in this case an error in the OpenCL library...
Definition: ccl_common.h:324
#define ccl_kernel_get_workgroup_info_scalar(krnl, dev, param_name, param_type, err)
Macro which returns a scalar kernel workgroup information value.
Definition of a wrapper class and its methods for OpenCL kernel objects.
Any other errors.
Definition: ccl_common.h:315
#define ccl_if_err_create_goto(err, quark, error_condition, error_code, label, msg,...)
If error is detected (error_code != no_error_code), create an error object (CCLErr) and go to the spe...
Definition: _ccl_defs.h:91
const CCLArg * ccl_arg_skip
Use this constant to skip kernel arguments in the ccl_kernel_set_args(), ccl_kernel_set_args_v(), ccl_kernel_set_args_and_enqueue_ndrange() and ccl_kernel_set_args_and_enqueue_ndrange_v() functions.
GPtrArray * CCLEventWaitList
A list of event objects on which enqueued commands can wait.
Useful definitions used internally by cf4ocl.
#define ccl_memobj_unwrap(mo)
Get the OpenCL cl_mem object.
CCLWrapperInfo * ccl_kernel_get_arg_info(CCLKernel *krnl, cl_uint idx, cl_kernel_arg_info param_name, CCLErr **err)
Get a CCLWrapperInfo kernel argument information object.
The context wrapper class.
#define ccl_if_err_propagate_goto(err_dest, err_src, label)
Same as ccl_if_err_goto(), but rethrows error in a source CCLErr object to a new destination CCLErr o...
Definition: _ccl_defs.h:120
cl_uint ccl_kernel_get_opencl_version(CCLKernel *krnl, CCLErr **err)
Get the OpenCL version of the platform associated with this kernel.
Base class for memory object wrappers, i.e., CCLBuffer and CCLImage.
Command queue wrapper class.
const char * ccl_err(int code)
Convert OpenCL error code to a readable string.
Definition: ccl_errors.c:118
void ccl_event_wait_list_clear(CCLEventWaitList *evt_wait_lst)
Clears an event wait list.
Kernel object.
Definition: ccl_common.h:104
CCLContext * ccl_context_new_wrap(cl_context context)
Get the context wrapper for the given OpenCL context.
cl_uint ccl_context_get_opencl_version(CCLContext *ctx, CCLErr **err)
Get the OpenCL version of the platform associated with this context.
void ccl_kernel_set_args(CCLKernel *krnl,...)
Set all kernel arguments.
cl_bool ccl_kernel_suggest_worksizes(CCLKernel *krnl, CCLDevice *dev, cl_uint dims, const size_t *real_worksize, size_t *gws, size_t *lws, CCLErr **err)
Suggest appropriate local (and optionally global) work sizes for the given real work size...
#define CCL_ERROR
Resolves to error category identifying string, in this case an error in cf4ocl.
Definition: ccl_common.h:320
CCLWrapperInfo * ccl_wrapper_get_info(CCLWrapper *wrapper1, CCLWrapper *wrapper2, cl_uint param_name, size_t min_size, CCLInfo info_type, cl_bool use_cache, CCLErr **err)
Get information about any wrapped OpenCL object.
Definition of a wrapper class and its methods for OpenCL program objects.
Event wrapper class.
Base class for all OpenCL wrappers.
#define ccl_kernel_get_info_scalar(krnl, param_name, param_type, err)
Macro which returns a scalar kernel information value.
#define ccl_context_unref(ctx)
Alias to ccl_context_destroy().
#define CCL_UNUSED(x)
Macro to avoid warning in unused variables.
Definition: ccl_common.h:86
#define ccl_kernel_unwrap(krnl)
Get the OpenCL kernel object.
#define ccl_device_get_info_array(dev, param_name, param_type, err)
Macro which returns an array device information value.
CCLEvent * ccl_kernel_enqueue_native(CCLQueue *cq, void(*user_func)(void *), void *args, size_t cb_args, cl_uint num_mos, CCLMemObj *const *mo_list, const void **args_mem_loc, CCLEventWaitList *evt_wait_lst, CCLErr **err)
Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler...
#define ccl_device_get_info_scalar(dev, param_name, param_type, err)
Macro which returns a scalar device information value.
CCLKernel * ccl_kernel_new_wrap(cl_kernel kernel)
Get the kernel wrapper for the given OpenCL kernel.
Program wrapper class.
CCLEvent * ccl_kernel_enqueue_ndrange(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, CCLErr **err)
Enqueues a kernel for execution on a device.
Kernel wrapper class.
Class which represents information about a wrapped OpenCL object.
void ccl_kernel_set_arg(CCLKernel *krnl, cl_uint arg_index, void *arg)
Set one kernel argument.
#define ccl_program_unwrap(prg)
Get the OpenCL program object.
GError CCLErr
Error handling class.
Definition: ccl_common.h:291
Request information about kernel arguments.
Definition: ccl_common.h:139
void ccl_kernel_set_args_v(CCLKernel *krnl, void **args)
Set all kernel arguments.
#define ccl_queue_unwrap(cq)
Get the OpenCL command queue object.
Device wrapper class.
The operation is not supported by the version of the selected OpenCL platform.
Definition: ccl_common.h:311
CCLKernel * ccl_kernel_new(CCLProgram *prg, const char *kernel_name, CCLErr **err)
Create a new kernel wrapper object.
CCLEvent * ccl_kernel_set_args_and_enqueue_ndrange(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, CCLErr **err,...)
Set kernel arguments and enqueue it for execution on a device.