cf4ocl (C Framework for OpenCL)  v2.1.0
Object-oriented framework for developing and benchmarking OpenCL projects in C/C++
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ccl_profiler.c
Go to the documentation of this file.
1 /*
2  * This file is part of cf4ocl (C Framework for OpenCL).
3  *
4  * cf4ocl is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License as
6  * published by the Free Software Foundation, either version 3 of the
7  * License, or (at your option) any later version.
8  *
9  * cf4ocl is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with cf4ocl. If not, see
16  * <http://www.gnu.org/licenses/>.
17  * */
18 
29 #include "ccl_profiler.h"
30 #include "_ccl_defs.h"
31 
41 #define CCL_PROF_CMP_INT(x, y, ord) (((ord) == CCL_PROF_SORT_ASC) \
42  ? (((x) > (y)) ? 1 : (((x) < (y)) ? -1 : 0)) \
43  : (((x) < (y)) ? 1 : (((x) > (y)) ? -1 : 0)))
44 
54 #define CCL_PROF_CMP_STR(s1, s2, ord) (((ord) == CCL_PROF_SORT_ASC) \
55  ? g_strcmp0(s1, s2) : g_strcmp0(s2, s1))
56 
66 #define ccl_prof_get_sort(userdata) \
67  {0x0F & *((int*) userdata), 0xF0 & *((int*) userdata)}
68 
74 typedef struct ccl_prof_sort_data {
75 
77  CCLProfSortOrder order;
78 
80  int criteria;
81 
82 } CCLProfSort;
83 
91 struct ccl_prof {
92 
98  gboolean calc;
99 
105  GHashTable* event_names;
106 
111  GHashTable* event_name_ids;
112 
117  GHashTable* queues;
118 
123  cl_uint num_events;
124 
129  GList* instants;
130 
135  GList* infos;
136 
142  GList* aggs;
143 
148  GList* overlaps;
149 
154  GList* agg_iter;
155 
160  GList* info_iter;
161 
166  GList* inst_iter;
167 
172  GList* overlap_iter;
173 
178  cl_ulong total_events_time;
179 
185  cl_ulong total_events_eff_time;
186 
191  cl_ulong t_start;
192 
197  gchar* summary;
198 
203  GTimer* timer;
204 
205 };
206 
207 /* Default export options. */
208 static CCLProfExportOptions export_options = {
209 
210  .separator = "\t",
211  .newline = "\n",
212  .queue_delim = "",
213  .evname_delim = "",
214  .zero_start = CL_TRUE
215 
216 };
217 
232 static CCLProfInst* ccl_prof_inst_new(const char* event_name,
233  const char* queue_name, cl_uint id, cl_ulong instant,
234  CCLProfInstType type) {
235 
236  /* Allocate memory for event instant data structure. */
237  CCLProfInst* inst = g_slice_new(CCLProfInst);
238 
239  /* Initialize structure fields. */
240  inst->event_name = event_name;
241  inst->queue_name = queue_name;
242  inst->id = id;
243  inst->instant = instant;
244  inst->type = type;
245 
246  /* Return event instant data structure. */
247  return inst;
248 }
249 
258 static void ccl_prof_inst_destroy(CCLProfInst* instant) {
259 
260  g_return_if_fail(instant != NULL);
261 
262  g_slice_free(CCLProfInst, instant);
263 }
264 
278 static gint ccl_prof_inst_comp(
279  gconstpointer a, gconstpointer b, gpointer userdata) {
280 
281  /* Cast input parameters to event instant data structures. */
282  CCLProfInst* ev_inst1 = (CCLProfInst*) a;
283  CCLProfInst* ev_inst2 = (CCLProfInst*) b;
284  CCLProfSort sort = ccl_prof_get_sort(userdata);
285  /* Perform comparison. */
286  switch ((CCLProfInstSort) sort.criteria) {
287  gint result;
289  /* Sort by instant */
290  return CCL_PROF_CMP_INT(ev_inst1->instant, ev_inst2->instant,
291  sort.order);
293  /* Sort by ID */
294  result = CCL_PROF_CMP_INT(ev_inst1->id, ev_inst2->id,
295  sort.order);
296  if (result != 0) return result;
297  if (ev_inst1->type == CCL_PROF_INST_TYPE_START)
298  return sort.order ? 1 : -1;
299  if (ev_inst1->type == CCL_PROF_INST_TYPE_END)
300  return sort.order ? -1 : 1;
301  /* We shouldn't get here. */
302  default:
303  g_return_val_if_reached(0);
304  }
305 }
306 
316 static CCLProfAgg* ccl_prof_agg_new(const char* event_name) {
317  CCLProfAgg* agg = g_slice_new(CCLProfAgg);
318  agg->event_name = event_name;
319  return agg;
320 }
321 
330 static void ccl_prof_agg_destroy(CCLProfAgg* agg) {
331  g_return_if_fail(agg != NULL);
332  g_slice_free(CCLProfAgg, agg);
333 }
334 
348 static gint ccl_prof_agg_comp(
349  gconstpointer a, gconstpointer b, gpointer userdata) {
350 
351  /* Cast input parameters to event instant data structures. */
352  CCLProfAgg* ev_agg1 = (CCLProfAgg*) a;
353  CCLProfAgg* ev_agg2 = (CCLProfAgg*) b;
354  CCLProfSort sort = ccl_prof_get_sort(userdata);
355 
356  /* Perform comparison. */
357  switch ((CCLProfAggSort) sort.criteria) {
358 
359  /* Sort by event name. */
361  return CCL_PROF_CMP_STR(ev_agg1->event_name,
362  ev_agg2->event_name, sort.order);
363 
364  /* Sort by absolute time. */
366  return CCL_PROF_CMP_INT(ev_agg1->absolute_time,
367  ev_agg2->absolute_time, sort.order);
368 
369  /* We shouldn't get here. */
370  default:
371  g_return_val_if_reached(0);
372  }
373 
374 }
375 
397 static CCLProfInfo* ccl_prof_info_new(const char* event_name,
398  cl_command_type command_type, const char* queue_name,
399  cl_ulong t_queued, cl_ulong t_submit, cl_ulong t_start,
400  cl_ulong t_end) {
401 
402  CCLProfInfo* info = g_slice_new(CCLProfInfo);
403 
404  info->event_name = event_name;
405  info->command_type = command_type;
406  info->queue_name = queue_name;
407  info->t_queued = t_queued;
408  info->t_submit = t_submit;
409  info->t_start = t_start;
410  info->t_end = t_end;
411 
412  return info;
413 }
414 
423 static void ccl_prof_info_destroy(CCLProfInfo* info) {
424  g_return_if_fail(info != NULL);
425  g_slice_free(CCLProfInfo, info);
426 }
427 
441 static gint ccl_prof_info_comp(
442  gconstpointer a, gconstpointer b, gpointer userdata) {
443 
444  /* Cast input parameters to event instant data structures. */
445  CCLProfInfo* ev1 = (CCLProfInfo*) a;
446  CCLProfInfo* ev2 = (CCLProfInfo*) b;
447  CCLProfSort sort = ccl_prof_get_sort(userdata);
448  /* Perform comparison. */
449  switch ((CCLProfInfoSort) sort.criteria) {
450 
451  /* Sort aggregate event data instances by event name. */
453  return CCL_PROF_CMP_STR(ev1->event_name, ev2->event_name,
454  sort.order);
455 
456  /* Sort aggregate event data instances by queue name. */
458  return CCL_PROF_CMP_STR(ev1->queue_name, ev2->queue_name,
459  sort.order);
460 
461  /* Sort aggregate event data instances by queued time. */
463  return CCL_PROF_CMP_INT(ev1->t_queued, ev2->t_queued,
464  sort.order);
465 
466  /* Sort aggregate event data instances by submit time. */
468  return CCL_PROF_CMP_INT(ev1->t_submit, ev2->t_submit,
469  sort.order);
470 
471  /* Sort aggregate event data instances by start time. */
473  return CCL_PROF_CMP_INT(ev1->t_start, ev2->t_start,
474  sort.order);
475 
476  /* Sort aggregate event data instances by end time. */
478  return CCL_PROF_CMP_INT(ev1->t_end, ev2->t_end, sort.order);
479 
480  /* We shouldn't get here. */
481  default:
482  g_return_val_if_reached(0);
483  }
484 
485 }
486 
498 static CCLProfOverlap* ccl_prof_overlap_new(const char* event1_name,
499  const char* event2_name, cl_ulong duration) {
500 
501  CCLProfOverlap* ovlp = g_slice_new(CCLProfOverlap);
502 
503  ovlp->event1_name = event1_name;
504  ovlp->event2_name = event2_name;
505  ovlp->duration = duration;
506 
507  return ovlp;
508 
509 }
510 
519 static void ccl_prof_overlap_destroy(CCLProfOverlap* ovlp) {
520 
521  g_return_if_fail(ovlp != NULL);
522  g_slice_free(CCLProfOverlap, ovlp);
523 
524 }
525 
538 static gint ccl_prof_overlap_comp(
539  gconstpointer a, gconstpointer b, gpointer userdata) {
540 
541  gint result;
542 
543  /* Cast input parameters to event instant data structures. */
544  CCLProfOverlap* ovlp1 = (CCLProfOverlap*) a;
545  CCLProfOverlap* ovlp2 = (CCLProfOverlap*) b;
546  CCLProfSort sort = ccl_prof_get_sort(userdata);
547  /* Perform comparison. */
548  switch ((CCLProfOverlapSort) sort.criteria) {
549 
550  /* Sort overlap instances by event name. */
552  result = CCL_PROF_CMP_STR(ovlp1->event1_name,
553  ovlp2->event1_name, sort.order);
554  if (result != 0)
555  return result;
556  else
557  return CCL_PROF_CMP_STR(ovlp1->event2_name,
558  ovlp2->event2_name, sort.order);
559 
560  /* Sort overlap instances by overlap duration. */
562  return CCL_PROF_CMP_INT(ovlp1->duration, ovlp2->duration,
563  sort.order);
564 
565  /* We shouldn't get here. */
566  default:
567  g_return_val_if_reached(0);
568  }
569 
570 }
571 
584 static void ccl_prof_add_event(CCLProf* prof, const char* cq_name,
585  CCLEvent* evt, CCLErr** err) {
586 
587  /* Make sure err is NULL or it is not set. */
588  g_return_if_fail(err == NULL || *err == NULL);
589  /* Make sure profile object is not NULL. */
590  g_return_if_fail(prof != NULL);
591  /* Make sure command queue name is not NULL. */
592  g_return_if_fail(cq_name != NULL);
593  /* Make sure event wrapper is not NULL. */
594  g_return_if_fail(evt != NULL);
595 
596  /* Event name ID. */
597  cl_uint* event_name_id;
598  /* Specific event ID. */
599  cl_uint event_id;
600  /* Event instants. */
601  cl_ulong instant_queued, instant_submit, instant_start, instant_end;
602  /* Type of command which produced the event. */
603  cl_command_type command_type;
604  /* Event instant objects. */
605  CCLProfInst* evinst_start;
606  CCLProfInst* evinst_end;
607  /* Internal error handling object. */
608  CCLErr* err_internal = NULL;
609 
610  /* Event name. */
611  const char* event_name;
612 
613  /* Get event name. */
614  event_name = ccl_event_get_final_name(evt);
615 
616  /* Get event queued instant. */
617  instant_queued = ccl_event_get_profiling_info_scalar(
618  evt, CL_PROFILING_COMMAND_QUEUED, cl_ulong, &err_internal);
619  ccl_if_err_propagate_goto(err, err_internal, error_handler);
620 
621  /* Get event submit instant. */
622  instant_submit = ccl_event_get_profiling_info_scalar(
623  evt, CL_PROFILING_COMMAND_SUBMIT, cl_ulong, &err_internal);
624  ccl_if_err_propagate_goto(err, err_internal, error_handler);
625 
626  /* Get event start instant. */
627  instant_start = ccl_event_get_profiling_info_scalar(
628  evt, CL_PROFILING_COMMAND_START, cl_ulong, &err_internal);
629  ccl_if_err_propagate_goto(err, err_internal, error_handler);
630 
631  /* Get event end instant. */
633  evt, CL_PROFILING_COMMAND_END, cl_ulong, &err_internal);
634  ccl_if_err_propagate_goto(err, err_internal, error_handler);
635 
636  /* Get command type. */
637  command_type = ccl_event_get_info_scalar(
638  evt, CL_EVENT_COMMAND_TYPE, cl_command_type, &err_internal);
639  ccl_if_err_propagate_goto(err, err_internal, error_handler);
640 
641  /* If we get here, update number of profilable events, and get an ID
642  * for the given event. */
643  event_id = ++prof->num_events;
644 
645  /* Check if event name is already registered in the table of event
646  * names... */
647  if (!g_hash_table_contains(prof->event_names, event_name)) {
648  /* ...if not, register it. */
649  event_name_id =
650  GUINT_TO_POINTER(g_hash_table_size(prof->event_names));
651  g_hash_table_insert(
652  prof->event_names,
653  (gpointer) event_name,
654  (gpointer) event_name_id);
655  }
656 
657  /* If end instant occurs after start instant... */
658  if (instant_end > instant_start) {
659 
660  /* Add event start instant to list of event instants. */
661  evinst_start = ccl_prof_inst_new(event_name, cq_name, event_id,
662  instant_start, CCL_PROF_INST_TYPE_START);
663  prof->instants = g_list_prepend(
664  prof->instants, (gpointer) evinst_start);
665 
666  /* Add event end instant to list of event instants. */
667  evinst_end = ccl_prof_inst_new(event_name, cq_name, event_id,
668  instant_end, CCL_PROF_INST_TYPE_END);
669  prof->instants = g_list_prepend(
670  prof->instants, (gpointer) evinst_end);
671 
672  /* Check if start instant is the oldest instant. If so, keep it. */
673  if (instant_start < prof->t_start)
674  prof->t_start = instant_start;
675 
676  } else {
677 
678  g_info("Event '%s' did not use device time. As such its "\
679  "start and end instants will not be added to the list of "\
680  "event instants.", event_name);
681 
682  }
683 
684  /* Add event information to list of event information..*/
685  prof->infos = g_list_prepend(prof->infos,
686  (gpointer) ccl_prof_info_new(event_name, command_type, cq_name,
687  instant_queued, instant_submit, instant_start, instant_end));
688 
689  /* If we got here, everything is OK. */
690  g_assert(err == NULL || *err == NULL);
691  goto finish;
692 
693 error_handler:
694  /* If we got here there was an error, verify that it is so. */
695  g_assert(err == NULL || *err != NULL);
696 
697 finish:
698 
699  /* Return. */
700  return;
701 }
702 
714 static void ccl_prof_process_queues(CCLProf* prof, CCLErr** err) {
715 
716  /* Make sure err is NULL or it is not set. */
717  g_return_if_fail(err == NULL || *err == NULL);
718  /* Make sure profile object is not NULL. */
719  g_return_if_fail(prof != NULL);
720 
721  /* Hash table iterator. */
722  GHashTableIter iter;
723  /* Command queue name and wrapper. */
724  gpointer cq_name;
725  gpointer cq;
726  /* Queue properties. */
727  cl_command_queue_properties qprop;
728  /* Internal error reporting object. */
729  CCLErr* err_internal = NULL;
730 
731  /* Iterate over the command queues. */
732  g_hash_table_iter_init(&iter, prof->queues);
733  while (g_hash_table_iter_next(&iter, &cq_name, &cq)) {
734 
735  /* Check that queue has profiling enabled. */
736  qprop = ccl_queue_get_info_scalar(cq, CL_QUEUE_PROPERTIES,
737  cl_command_queue_properties, &err_internal);
738  ccl_if_err_propagate_goto(err, err_internal, error_handler);
740  (qprop & CL_QUEUE_PROFILING_ENABLE) == 0, CCL_ERROR_OTHER,
741  error_handler,
742  "%s: the '%s' queue does not have profiling enabled.",
743  G_STRLOC, (char*) cq_name);
744 
745  /* Iterate over the events in current command queue. */
746  CCLEvent* evt;
747  ccl_queue_iter_event_init((CCLQueue*) cq);
748  while ((evt = ccl_queue_iter_event_next((CCLQueue*) cq))) {
749 
750  /* Add event for profiling. */
751  ccl_prof_add_event(
752  prof, (const char*) cq_name, evt, &err_internal);
753  if ((err_internal != NULL) &&
754  (((err_internal->domain == CCL_OCL_ERROR) &&
755  (err_internal->code == CL_PROFILING_INFO_NOT_AVAILABLE))
756  ||
757  ((err_internal->domain == CCL_ERROR) &&
758  (err_internal->code == CCL_ERROR_INFO_UNAVAILABLE_OCL)))) {
759 
760  /* Some types of events in certain platforms don't
761  * provide profiling info. Don't stop profiling,
762  * ignore this specific event, but log a message
763  * saying so. */
764  g_info("The '%s' event does not have profiling info",
766  g_clear_error(&err_internal);
767  continue;
768  }
769  ccl_if_err_propagate_goto(err, err_internal, error_handler);
770 
771  }
772 
773  /* Release queue events. */
774  ccl_queue_gc((CCLQueue*) cq);
775  }
776 
777  /* If we got here, everything is OK. */
778  g_assert(err == NULL || *err == NULL);
779  goto finish;
780 
781 error_handler:
782  /* If we got here there was an error, verify that it is so. */
783  g_assert(err == NULL || *err != NULL);
784 
785 finish:
786 
787  /* Return. */
788  return;
789 
790 }
791 
800 static void ccl_prof_calc_agg(CCLProf* prof) {
801 
802  /* Make sure profile object is not NULL. */
803  g_return_if_fail(prof != NULL);
804 
805  /* Hash table iterator. */
806  GHashTableIter iter;
807  /* Aux. hash table for aggregate statistics. */
808  GHashTable* agg_table;
809  /* A pointer for a event name. */
810  gpointer event_name;
811  /* Aggregate event info. */
812  CCLProfAgg* evagg = NULL;
813  /* Type of sorting to perform on event list. */
814  int sort_type;
815  /* Aux. pointer for event data structure kept in a GList. */
816  GList* curr_evinst_container = NULL;
817  /* A pointer to a CCLProfAgg (agg. event info) variable. */
818  gpointer value_agg;
819  /* Auxiliary aggregate event info variable.*/
820  CCLProfAgg* curr_agg = NULL;
821 
822  /* Create table of aggregate statistics. */
823  agg_table = g_hash_table_new(g_str_hash, g_str_equal);
824 
825  /* Initalize table, and set aggregate values to zero. */
826  g_hash_table_iter_init(&iter, prof->event_names);
827  while (g_hash_table_iter_next(&iter, &event_name, NULL)) {
828  evagg = ccl_prof_agg_new(event_name);
829  evagg->absolute_time = 0;
830  g_hash_table_insert(
831  agg_table, event_name, (gpointer) evagg);
832  }
833 
834  /* Sort event instants by eid, and then by START, END order. */
836  prof->instants = g_list_sort_with_data(
837  prof->instants, ccl_prof_inst_comp,
838  (gpointer) &sort_type);
839 
840  /* Iterate through all event instants and determine total times. */
841  curr_evinst_container = prof->instants;
842  while (curr_evinst_container) {
843 
844  /* Loop aux. variables. */
845  CCLProfInst* curr_evinst = NULL;
846  cl_ulong start_inst, end_inst;
847 
848  /* Get START event instant. */
849  curr_evinst = (CCLProfInst*) curr_evinst_container->data;
850  start_inst = curr_evinst->instant;
851 
852  /* Get END event instant */
853  curr_evinst_container = curr_evinst_container->next;
854  curr_evinst = (CCLProfInst*) curr_evinst_container->data;
855  end_inst = curr_evinst->instant;
856 
857  /* Add new interval to respective aggregate value. */
858  curr_agg = (CCLProfAgg*) g_hash_table_lookup(
859  agg_table, curr_evinst->event_name);
860  curr_agg->absolute_time += end_inst - start_inst;
861  prof->total_events_time += end_inst - start_inst;
862 
863  /* Get next START event instant. */
864  curr_evinst_container = curr_evinst_container->next;
865  }
866 
867  /* Determine relative times. */
868  g_hash_table_iter_init(&iter, agg_table);
869  while (g_hash_table_iter_next(&iter, &event_name, &value_agg)) {
870  curr_agg = (CCLProfAgg*) value_agg;
871  curr_agg->relative_time =
872  ((double) curr_agg->absolute_time)
873  /
874  ((double) prof->total_events_time);
875  }
876 
877  /* Keep a list of aggregates. */
878  prof->aggs = g_hash_table_get_values(agg_table);
879 
880  /* Release aux. hash table. */
881  g_hash_table_destroy(agg_table);
882 
883 }
884 
893 static void ccl_prof_calc_overlaps(CCLProf* prof) {
894 
895  /* Make sure profile object is not NULL. */
896  g_return_if_fail(prof != NULL);
897 
898  /* Total overlap time. */
899  cl_ulong total_overlap = 0;
900  /* Overlap matrix. */
901  cl_ulong* overlap_matrix = NULL;
902  /* Number of event names. */
903  cl_uint num_event_names;
904  /* Helper table to account for all overlapping events. */
905  GHashTable* overlaps = NULL;
906  /* Occurring events table. */
907  GHashTable* occurring_events = NULL;
908  /* Type of sorting to perform. */
909  CCLProfInstSort sort_type;
910  /* Container for current event instants. */
911  GList* curr_evinst_container;
912 
913  /* Determine number of event names. */
914  num_event_names = g_hash_table_size(prof->event_names);
915 
916  /* Initialize overlap matrix. */
917  overlap_matrix = g_slice_alloc0(
918  sizeof(cl_ulong) * num_event_names * num_event_names);
919 
920  /* Initialize helper table to account for all overlapping events. */
921  overlaps = g_hash_table_new_full(g_direct_hash, g_direct_equal,
922  NULL, (GDestroyNotify) g_hash_table_destroy);
923 
924  /* Setup ocurring events table (key: eventID, value: eventNameID) */
925  occurring_events = g_hash_table_new(g_int_hash, g_int_equal);
926 
927  /* Sort all event instants. */
929  prof->instants = g_list_sort_with_data(prof->instants,
930  ccl_prof_inst_comp, (gpointer) &sort_type);
931 
932  /* Iterate through all event instants */
933  curr_evinst_container = prof->instants;
934  while (curr_evinst_container) {
935 
936  /* ** Loop aux. variables. ** */
937 
938  /* Current event instant. */
939  CCLProfInst* curr_evinst = NULL;
940  /* Inner hash table (is value for overlap hash table). */
941  GHashTable* inner_table = NULL;
942  /* Hash table iterator. */
943  GHashTableIter iter;
944  /* Hashtable key, event name ID for current event, event
945  * name ID for occurring event. */
946  gpointer key_eid, ueid_curr_ev, ueid_occu_ev;
947  /* Keys for start and end event instants. */
948  cl_uint eid_key1, eid_key2;
949  /* Event overlap in nanoseconds. */
950  cl_ulong eff_overlap;
951 
952  /* Get current event instant. */
953  curr_evinst = (CCLProfInst*) curr_evinst_container->data;
954 
955  /* Check if event time is START or END time */
956  if (curr_evinst->type == CCL_PROF_INST_TYPE_START) {
957  /* Event START instant. */
958 
959  /* 1 - Check for overlaps with ocurring events */
960 
961  g_hash_table_iter_init(&iter, occurring_events);
962  while (g_hash_table_iter_next (&iter, &key_eid, NULL)) {
963 
964  /* The first hash table key will be the smaller event id. */
965  eid_key1 = curr_evinst->id <= *((cl_uint*) key_eid)
966  ? curr_evinst->id
967  : *((cl_uint*) key_eid);
968  /* The second hash table key will be the larger event id. */
969  eid_key2 = curr_evinst->id > *((cl_uint*) key_eid)
970  ? curr_evinst->id
971  : *((cl_uint*) key_eid);
972  /* Check if the first key (smaller id) is already in the
973  * hash table... */
974  if (!g_hash_table_lookup_extended(overlaps,
975  GUINT_TO_POINTER(eid_key1), NULL,
976  (gpointer) &inner_table)) {
977  /* ...if not in table, add it to table, creating a new
978  * inner table as value. Inner table will be initalized
979  * with second key (larger id) as key and event start
980  * instant as value. */
981  inner_table = g_hash_table_new(
982  g_direct_hash, g_direct_equal);
983  g_hash_table_insert(
984  overlaps, GUINT_TO_POINTER(eid_key1), inner_table);
985  }
986  /* Add second key (larger id) to inner tabler, setting the
987  * start instant as the value. */
988  g_hash_table_insert(
989  inner_table,
990  GUINT_TO_POINTER(eid_key2),
991  &(curr_evinst->instant));
992  }
993 
994  /* 2 - Add event to occurring events. */
995  g_hash_table_insert(
996  occurring_events,
997  &(curr_evinst->id), /* eid */
998  g_hash_table_lookup(prof->event_names,
999  curr_evinst->event_name) /* ueid */
1000  );
1001 
1002  } else {
1003  /* Event END instant. */
1004 
1005  /* 1 - Remove event from ocurring events */
1006  g_hash_table_remove(occurring_events, &(curr_evinst->id));
1007 
1008  /* 2 - Check for overlap termination with current events */
1009  g_hash_table_iter_init(&iter, occurring_events);
1010  while (g_hash_table_iter_next(&iter, &key_eid, &ueid_occu_ev)) {
1011  /* The first hash table key will be the smaller event id. */
1012  eid_key1 = curr_evinst->id <= *((cl_uint*) key_eid)
1013  ? curr_evinst->id
1014  : *((cl_uint*) key_eid);
1015  /* The second hash table key will be the larger event id. */
1016  eid_key2 = curr_evinst->id > *((cl_uint*) key_eid)
1017  ? curr_evinst->id
1018  : *((cl_uint*) key_eid);
1019  /* Get effective overlap in nanoseconds. */
1020  inner_table = g_hash_table_lookup(
1021  overlaps, GUINT_TO_POINTER(eid_key1));
1022  eff_overlap =
1023  curr_evinst->instant
1024  -
1025  *((cl_ulong*) g_hash_table_lookup(
1026  inner_table, GUINT_TO_POINTER(eid_key2)));
1027  /* Add overlap to overlap matrix. */
1028  ueid_curr_ev = g_hash_table_lookup(
1029  prof->event_names, curr_evinst->event_name);
1030  cl_uint ueid_min =
1031  GPOINTER_TO_UINT(ueid_curr_ev)
1032  <= GPOINTER_TO_UINT(ueid_occu_ev)
1033  ? GPOINTER_TO_UINT(ueid_curr_ev)
1034  : GPOINTER_TO_UINT(ueid_occu_ev);
1035  cl_uint ueid_max =
1036  GPOINTER_TO_UINT(ueid_curr_ev)
1037  > GPOINTER_TO_UINT(ueid_occu_ev)
1038  ? GPOINTER_TO_UINT(ueid_curr_ev)
1039  : GPOINTER_TO_UINT(ueid_occu_ev);
1040  overlap_matrix[ueid_min * num_event_names + ueid_max] +=
1041  eff_overlap;
1042  total_overlap += eff_overlap;
1043  }
1044  }
1045 
1046  /* Get next event instant. */
1047  curr_evinst_container = curr_evinst_container->next;
1048  }
1049 
1050  /* Populate list of overlaps. */
1051  for (cl_uint i = 0; i < num_event_names; i++) {
1052  for (cl_uint j = 0; j < num_event_names; j++) {
1053  if (overlap_matrix[i * num_event_names + j] > 0) {
1054  /* There is an overlap here, create overlap object... */
1055  CCLProfOverlap* ovlp = ccl_prof_overlap_new(
1056  (const char*) g_hash_table_lookup(
1057  prof->event_name_ids, GUINT_TO_POINTER(i)),
1058  (const char*) g_hash_table_lookup(
1059  prof->event_name_ids, GUINT_TO_POINTER(j)),
1060  overlap_matrix[i * num_event_names + j]);
1061  /* ...and add it to list of overlaps. */
1062  prof->overlaps = g_list_prepend(
1063  prof->overlaps, (gpointer) ovlp);
1064  }
1065  }
1066  }
1067 
1068  /* Determine and save effective events time. */
1069  prof->total_events_eff_time = prof->total_events_time - total_overlap;
1070 
1071  /* Free the overlaps matrix. */
1072  g_slice_free1(sizeof(cl_ulong) * num_event_names * num_event_names,
1073  overlap_matrix);
1074 
1075  /* Free overlaps hash table. */
1076  g_hash_table_destroy(overlaps);
1077 
1078  /* Free occurring_events hash table. */
1079  g_hash_table_destroy(occurring_events);
1080 
1081 }
1082 
1095 CCL_EXPORT
1097 
1098  /* Allocate memory for new profile data structure. */
1099  CCLProf* prof = g_slice_new0(CCLProf);
1100 
1101  /* Set absolute start time to maximum possible. */
1102  prof->t_start = CL_ULONG_MAX;
1103 
1104  /* Return new profile data structure */
1105  return prof;
1106 
1107 }
1108 
1116 CCL_EXPORT
1118 
1119  /* Profile to destroy cannot be NULL. */
1120  g_return_if_fail(prof != NULL);
1121 
1122  /* Destroy table of event names. */
1123  if (prof->event_names != NULL)
1124  g_hash_table_destroy(prof->event_names);
1125 
1126  /* Destroy table of event IDs. */
1127  if (prof->event_name_ids != NULL)
1128  g_hash_table_destroy(prof->event_name_ids);
1129 
1130  /* Destroy table of command queue wrappers. */
1131  if (prof->queues != NULL)
1132  g_hash_table_destroy(prof->queues);
1133 
1134  /* Destroy list of all event instants. */
1135  if (prof->instants != NULL)
1136  g_list_free_full(
1137  prof->instants, (GDestroyNotify) ccl_prof_inst_destroy);
1138 
1139  /* Destroy list of event profiling information. */
1140  if (prof->infos != NULL)
1141  g_list_free_full(
1142  prof->infos, (GDestroyNotify) ccl_prof_info_destroy);
1143 
1144  /* Destroy list of aggregate statistics. */
1145  if (prof->aggs != NULL)
1146  g_list_free_full(
1147  prof->aggs, (GDestroyNotify) ccl_prof_agg_destroy);
1148 
1149  /* Free the overlap matrix. */
1150  if (prof->overlaps != NULL)
1151  g_list_free_full(
1152  prof->overlaps, (GDestroyNotify) ccl_prof_overlap_destroy);
1153 
1154  /* Free the summary string. */
1155  if (prof->summary != NULL)
1156  g_free(prof->summary);
1157 
1158  /* Destroy timer. */
1159  if (prof->timer != NULL)
1160  g_timer_destroy(prof->timer);
1161 
1162  /* Destroy profile data structure. */
1163  g_slice_free(CCLProf, prof);
1164 
1165 }
1166 
1176 CCL_EXPORT
1178 
1179  /* Make sure profile is not NULL. */
1180  g_return_if_fail(prof != NULL);
1181 
1182  /* Start timer. */
1183  prof->timer = g_timer_new();
1184 }
1185 
1194 CCL_EXPORT
1195 void ccl_prof_stop(CCLProf* prof) {
1196 
1197  /* Make sure profile is not NULL. */
1198  g_return_if_fail(prof != NULL);
1199 
1200  /* Stop timer. */
1201  g_timer_stop(prof->timer);
1202 }
1203 
1214 CCL_EXPORT
1216 
1217  /* Make sure profile is not NULL. */
1218  g_return_val_if_fail(prof != NULL, 0.0);
1219 
1220  /* Stop timer. */
1221  return g_timer_elapsed(prof->timer, NULL);
1222 }
1223 
1233 CCL_EXPORT
1235  CCLProf* prof, const char* cq_name, CCLQueue* cq) {
1236 
1237  /* Make sure profile is not NULL. */
1238  g_return_if_fail(prof != NULL);
1239  /* Make sure cq is not NULL. */
1240  g_return_if_fail(cq != NULL);
1241  /* Must be added before calculations. */
1242  g_return_if_fail(prof->calc == FALSE);
1243 
1244  /* Check if table needs to be created first. */
1245  if (prof->queues == NULL) {
1246  prof->queues = g_hash_table_new_full(
1247  g_str_hash, g_direct_equal, NULL,
1248  (GDestroyNotify) ccl_queue_destroy);
1249  }
1250  /* Warn if table already contains a queue with the specified
1251  * name. */
1252  if (g_hash_table_contains(prof->queues, cq_name))
1253  g_warning("Profile object already contains a queue named '%s'." \
1254  "The existing queue will be replaced.", cq_name);
1255 
1256  /* Add queue to queue table. */
1257  g_hash_table_replace(prof->queues, (gpointer) cq_name, cq);
1258 
1259  /* Increment queue ref. count. */
1260  ccl_queue_ref(cq);
1261 
1262 }
1263 
1279 CCL_EXPORT
1280 cl_bool ccl_prof_calc(CCLProf* prof, CCLErr** err) {
1281 
1282  /* Make sure prof is not NULL. */
1283  g_return_val_if_fail(prof != NULL, CL_FALSE);
1284  /* Make sure err is NULL or it is not set. */
1285  g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1286  /* Calculations can only be performed once. */
1287  g_return_val_if_fail(prof->calc == FALSE, CL_FALSE);
1288  /* There must be some queues to process. */
1289  g_return_val_if_fail(prof->queues != NULL, CL_FALSE);
1290 
1291  /* Internal error handling object. */
1292  CCLErr* err_internal = NULL;
1293 
1294  /* Function return status flag. */
1295  cl_bool status;
1296 
1297  /* Hash table iterator. */
1298  GHashTableIter iter;
1299 
1300  /* Auxiliary pointers for determining the table of event_ids. */
1301  gpointer p_evt_name, p_id;
1302 
1303  /* Create table of event names. */
1304  prof->event_names = g_hash_table_new(g_str_hash, g_str_equal);
1305 
1306  /* Process queues and respective events. */
1307  ccl_prof_process_queues(prof, &err_internal);
1308  ccl_if_err_propagate_goto(err, err_internal, error_handler);
1309 
1310  /* Obtain the event_ids table (by reversing the event_names table) */
1311  prof->event_name_ids = g_hash_table_new(g_direct_hash, g_direct_equal);
1312  /* Populate table. */
1313  g_hash_table_iter_init(&iter, prof->event_names);
1314  while (g_hash_table_iter_next(&iter, &p_evt_name, &p_id)) {
1315  g_hash_table_insert(prof->event_name_ids, p_id, p_evt_name);
1316  }
1317 
1318  /* Calculate aggregate statistics. */
1319  ccl_prof_calc_agg(prof);
1320 
1321  /* Determine event overlaps. */
1322  ccl_prof_calc_overlaps(prof);
1323 
1324  /* If we got here, everything is OK. */
1325  g_assert(err == NULL || *err == NULL);
1326  status = CL_TRUE;
1327  prof->calc = TRUE;
1328  goto finish;
1329 
1330 error_handler:
1331  /* If we got here there was an error, verify that it is so. */
1332  g_assert(err == NULL || *err != NULL);
1333  status = CL_FALSE;
1334 
1335 finish:
1336 
1337  /* Return status. */
1338  return status;
1339 
1340 }
1341 
1351 CCL_EXPORT
1353  CCLProf* prof, const char* event_name) {
1354 
1355  /* Make sure prof is not NULL. */
1356  g_return_val_if_fail(prof != NULL, NULL);
1357  /* Make sure event name is not NULL. */
1358  g_return_val_if_fail(event_name != NULL, NULL);
1359  /* This function can only be called after calculations are made. */
1360  g_return_val_if_fail(prof->calc == TRUE, NULL);
1361 
1362  /* Find the aggregate statistic for the given event. */
1363  CCLProfAgg* agg = NULL;
1364  GList* agg_container = prof->aggs;
1365  while (agg_container != NULL) {
1366  const char* curr_event_name =
1367  ((CCLProfAgg*) agg_container->data)->event_name;
1368  if (g_strcmp0(event_name, curr_event_name) == 0) {
1369  agg = (CCLProfAgg*) agg_container->data;
1370  break;
1371  }
1372  agg_container = agg_container->next;
1373  }
1374 
1375  /* Return result. */
1376  return agg;
1377 }
1378 
1389 CCL_EXPORT
1390 void ccl_prof_iter_agg_init(CCLProf* prof, int sort) {
1391 
1392  /* Make sure prof is not NULL. */
1393  g_return_if_fail(prof != NULL);
1394  /* This function can only be called after calculations are made. */
1395  g_return_if_fail(prof->calc == TRUE);
1396 
1397  /* Sort list of aggregate statistics as requested by client. */
1398  prof->aggs = g_list_sort_with_data(
1399  prof->aggs, ccl_prof_agg_comp, &sort);
1400 
1401  /* Set the iterator as the first element in list. */
1402  prof->agg_iter = prof->aggs;
1403 
1404 }
1405 
1414 CCL_EXPORT
1416 
1417  /* Make sure prof is not NULL. */
1418  g_return_val_if_fail(prof != NULL, NULL);
1419  /* This function can only be called after calculations are made. */
1420  g_return_val_if_fail(prof->calc == TRUE, NULL);
1421 
1422  /* The aggregate statistic to return. */
1423  CCLProfAgg* agg;
1424 
1425  /* Check if there are any more left. */
1426  if (prof->agg_iter != NULL) {
1427  /* Yes, send current one, pass to the next. */
1428  agg = (CCLProfAgg*) prof->agg_iter->data;
1429  prof->agg_iter = prof->agg_iter->next;
1430  } else {
1431  /* Nothing left. */
1432  agg = NULL;
1433  }
1434 
1435  /* Return the aggregate statistic. */
1436  return (const CCLProfAgg*) agg;
1437 }
1438 
1448 CCL_EXPORT
1449 void ccl_prof_iter_info_init(CCLProf* prof, int sort) {
1450 
1451  /* Make sure prof is not NULL. */
1452  g_return_if_fail(prof != NULL);
1453  /* This function can only be called after calculations are made. */
1454  g_return_if_fail(prof->calc == TRUE);
1455 
1456  /* Sort list of event prof. infos as requested by client. */
1457  prof->infos = g_list_sort_with_data(
1458  prof->infos, ccl_prof_info_comp, &sort);
1459 
1460  /* Set the iterator as the first element in list. */
1461  prof->info_iter = prof->infos;
1462 }
1463 
1472 CCL_EXPORT
1474 
1475  /* Make sure prof is not NULL. */
1476  g_return_val_if_fail(prof != NULL, NULL);
1477  /* This function can only be called after calculations are made. */
1478  g_return_val_if_fail(prof->calc == TRUE, NULL);
1479 
1480  /* The event profiling info instance to return. */
1481  CCLProfInfo* info;
1482 
1483  /* Check if there are any more left. */
1484  if (prof->info_iter != NULL) {
1485  /* Yes, send current one, pass to the next. */
1486  info = (CCLProfInfo*) prof->info_iter->data;
1487  prof->info_iter = prof->info_iter->next;
1488  } else {
1489  /* Nothing left. */
1490  info = NULL;
1491  }
1492 
1493  /* Return the profiling info instance. */
1494  return (const CCLProfInfo*) info;
1495 }
1496 
1506 CCL_EXPORT
1507 void ccl_prof_iter_inst_init(CCLProf* prof, int sort) {
1508 
1509  /* Make sure prof is not NULL. */
1510  g_return_if_fail(prof != NULL);
1511  /* This function can only be called after calculations are made. */
1512  g_return_if_fail(prof->calc == TRUE);
1513 
1514  /* Sort list of event instants as requested by client. */
1515  prof->instants = g_list_sort_with_data(
1516  prof->instants, ccl_prof_inst_comp, &sort);
1517 
1518  /* Set the iterator as the first element in list. */
1519  prof->inst_iter = prof->instants;
1520 
1521 }
1522 
1531 CCL_EXPORT
1533 
1534  /* Make sure prof is not NULL. */
1535  g_return_val_if_fail(prof != NULL, NULL);
1536  /* This function can only be called after calculations are made. */
1537  g_return_val_if_fail(prof->calc == TRUE, NULL);
1538 
1539  /* The event profiling info instance to return. */
1540  CCLProfInst* inst;
1541 
1542  /* Check if there are any more left. */
1543  if (prof->inst_iter != NULL) {
1544  /* Yes, send current one, pass to the next. */
1545  inst = (CCLProfInst*) prof->inst_iter->data;
1546  prof->inst_iter = prof->inst_iter->next;
1547  } else {
1548  /* Nothing left. */
1549  inst = NULL;
1550  }
1551 
1552  /* Return the profiling info instance. */
1553  return (const CCLProfInst*) inst;
1554 }
1555 
1566 CCL_EXPORT
1567 void ccl_prof_iter_overlap_init(CCLProf* prof, int sort) {
1568 
1569  /* Make sure prof is not NULL. */
1570  g_return_if_fail(prof != NULL);
1571  /* This function can only be called after calculations are made. */
1572  g_return_if_fail(prof->calc == TRUE);
1573 
1574  /* Sort list of overlaps as requested by client. */
1575  prof->overlaps = g_list_sort_with_data(
1576  prof->overlaps, ccl_prof_overlap_comp, &sort);
1577 
1578  /* Set the iterator as the first element in list. */
1579  prof->overlap_iter = prof->overlaps;
1580 }
1581 
1590 CCL_EXPORT
1592 
1593  /* Make sure prof is not NULL. */
1594  g_return_val_if_fail(prof != NULL, NULL);
1595  /* This function can only be called after calculations are made. */
1596  g_return_val_if_fail(prof->calc == TRUE, NULL);
1597 
1598  /* The overlap instance to return. */
1599  CCLProfOverlap* ovlp;
1600 
1601  /* Check if there are any more left. */
1602  if (prof->overlap_iter != NULL) {
1603  /* Yes, send current one, pass to the next. */
1604  ovlp = (CCLProfOverlap*) prof->overlap_iter->data;
1605  prof->overlap_iter = prof->overlap_iter->next;
1606  } else {
1607  /* Nothing left. */
1608  ovlp = NULL;
1609  }
1610 
1611  /* Return the overlap instance. */
1612  return (const CCLProfOverlap*) ovlp;
1613 }
1614 
1623 CCL_EXPORT
1625 
1626  /* Make sure prof is not NULL. */
1627  g_return_val_if_fail(prof != NULL, 0);
1628  /* This function can only be called after calculations are made. */
1629  g_return_val_if_fail(prof->calc == TRUE, 0);
1630 
1631  /* Return requested data. */
1632  return prof->total_events_time;
1633 
1634 }
1635 
1649 CCL_EXPORT
1651 
1652  /* Make sure prof is not NULL. */
1653  g_return_val_if_fail(prof != NULL, 0);
1654  /* This function can only be called after calculations are made. */
1655  g_return_val_if_fail(prof->calc == TRUE, 0);
1656 
1657  /* Return requested data. */
1658  return prof->total_events_eff_time;
1659 }
1660 
1674 CCL_EXPORT
1676 
1677  /* Make sure prof is not NULL. */
1678  g_return_if_fail(prof != NULL);
1679  /* This function can only be called after calculations are made. */
1680  g_return_if_fail(prof->calc == TRUE);
1681 
1682  /* Summary to print. */
1683  const char* summary;
1684 
1685  /* Get the summary. */
1686  summary = ccl_prof_get_summary(prof,
1689 
1690  /* Print summary. */
1691  g_printf("%s", summary);
1692 
1693 }
1694 
1710 CCL_EXPORT
1712  CCLProf* prof, int agg_sort, int ovlp_sort) {
1713 
1714  /* Make sure prof is not NULL. */
1715  g_return_val_if_fail(prof != NULL, NULL);
1716  /* This function can only be called after calculations are made. */
1717  g_return_val_if_fail(prof->calc == TRUE, NULL);
1718 
1719  /* Current aggregate statistic to print. */
1720  const CCLProfAgg* agg = NULL;
1721  /* Current overlap to print. */
1722  const CCLProfOverlap* ovlp = NULL;
1723  /* The summary string. */
1724  GString* str_obj = g_string_new("\n");
1725 
1726  /* Show aggregate event times */
1727  g_string_append_printf(str_obj,
1728  " Aggregate times by event :\n");
1729  g_string_append_printf(str_obj,
1730  " ------------------------------------------------------------------\n");
1731  g_string_append_printf(str_obj,
1732  " | Event name | Rel. time (%%) | Abs. time (s) |\n");
1733  g_string_append_printf(str_obj,
1734  " ------------------------------------------------------------------\n");
1735  ccl_prof_iter_agg_init(prof, agg_sort);
1736  while ((agg = ccl_prof_iter_agg_next(prof)) != NULL) {
1737  g_string_append_printf(str_obj,
1738  " | %-30.30s | %13.4f | %13.4e |\n",
1739  agg->event_name,
1740  agg->relative_time * 100.0,
1741  agg->absolute_time * 1e-9);
1742  }
1743  g_string_append_printf(str_obj,
1744  " ------------------------------------------------------------------\n");
1745 
1746  /* Show total events time */
1747  if (prof->total_events_time > 0) {
1748  g_string_append_printf(str_obj,
1749  " | Total | %13.4e |\n",
1750  prof->total_events_time * 1e-9);
1751  g_string_append_printf(str_obj,
1752  " ---------------------------------\n");
1753  }
1754 
1755  /* *** Show overlaps *** */
1756 
1757  if (g_list_length(prof->overlaps) > 0) {
1758  /* Title the several overlaps. */
1759  g_string_append_printf(str_obj,
1760  " Event overlaps :\n");
1761  g_string_append_printf(str_obj,
1762  " ------------------------------------------------------------------\n");
1763  g_string_append_printf(str_obj,
1764  " | Event 1 | Event2 | Overlap (s) |\n");
1765  g_string_append_printf(str_obj,
1766  " ------------------------------------------------------------------\n");
1767  /* Show overlaps table. */
1768  ccl_prof_iter_overlap_init(prof, ovlp_sort);
1769  while ((ovlp = ccl_prof_iter_overlap_next(prof)) != NULL) {
1770  g_string_append_printf(str_obj, " | %-22.22s | %-22.22s | %12.4e |\n",
1771  ovlp->event1_name, ovlp->event2_name, ovlp->duration * 1e-9);
1772  }
1773  g_string_append_printf(str_obj,
1774  " ------------------------------------------------------------------\n");
1775  /* Show total events effective time (discount overlaps) */
1776  g_string_append_printf(str_obj,
1777  " | Total | %12.4e |\n",
1778  (prof->total_events_time - prof->total_events_eff_time) * 1e-9);
1779  g_string_append_printf(str_obj,
1780  " -----------------------------------------\n");
1781  g_string_append_printf(str_obj,
1782  " Tot. of all events (eff.) : %es\n",
1783  prof->total_events_eff_time * 1e-9);
1784  } else {
1785  g_string_append_printf(str_obj,
1786  " Event overlaps : None\n");
1787  }
1788 
1789  /* Show total ellapsed time */
1790  if (prof->timer) {
1791  double t_ellapsed = g_timer_elapsed(prof->timer, NULL);
1792  g_string_append_printf(str_obj,
1793  " Total ellapsed time : %es\n", t_ellapsed);
1794  g_string_append_printf(str_obj,
1795  " Time spent in device : %.2f%%\n",
1796  prof->total_events_eff_time * 1e-9 * 100 / t_ellapsed);
1797  g_string_append_printf(str_obj,
1798  " Time spent in host : %.2f%%\n",
1799  100 - prof->total_events_eff_time * 1e-9 * 100 / t_ellapsed);
1800  }
1801  g_string_append_printf(str_obj, "\n");
1802 
1803  /* If a summary already exists, free it before keeping a new one. */
1804  if (prof->summary != NULL)
1805  g_free(prof->summary);
1806 
1807  /* Free String object and keep underlying string in prof struct. */
1808  prof->summary = g_string_free(str_obj, FALSE);
1809 
1810  /* Return summary string. */
1811  return (const char*) prof->summary;
1812 
1813 }
1814 
1844 CCL_EXPORT
1845 cl_bool ccl_prof_export_info(CCLProf* prof, FILE* stream, CCLErr** err) {
1846 
1847  /* Make sure prof is not NULL. */
1848  g_return_val_if_fail(prof != NULL, CL_FALSE);
1849  /* Make sure stream is not NULL. */
1850  g_return_val_if_fail(stream != NULL, CL_FALSE);
1851  /* Make sure err is NULL or it is not set. */
1852  g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1853  /* This function can only be called after calculations are made. */
1854  g_return_val_if_fail(prof->calc == TRUE, CL_FALSE);
1855 
1856  /* Stream write status. */
1857  int write_status;
1858  /* Return status. */
1859  cl_bool ret_status;
1860  /* Current event information. */
1861  const CCLProfInfo* curr_ev;
1862  /* Start time. */
1863  cl_ulong t_start = 0;
1864 
1865  /* Sort event information by START order, ascending. */
1868 
1869  /* If zero start is set, use the start time of the first event
1870  * as zero time. */
1871  if (export_options.zero_start)
1872  t_start = prof->t_start;
1873 
1874  /* Iterate through all event information and export it to stream. */
1875  while ((curr_ev = ccl_prof_iter_info_next(prof)) != NULL) {
1876 
1877  /* Write to stream. */
1878  write_status = fprintf(stream, "%s%s%s%s%lu%s%lu%s%s%s%s%s",
1879  export_options.queue_delim,
1880  curr_ev->queue_name,
1881  export_options.queue_delim,
1882  export_options.separator,
1883  (unsigned long) (curr_ev->t_start - t_start),
1884  export_options.separator,
1885  (unsigned long) (curr_ev->t_end - t_start),
1886  export_options.separator,
1887  export_options.evname_delim,
1888  curr_ev->event_name,
1889  export_options.evname_delim,
1890  export_options.newline);
1891 
1892  ccl_if_err_create_goto(*err, CCL_ERROR, write_status < 0,
1893  CCL_ERROR_STREAM_WRITE, error_handler,
1894  "Error while exporting profiling information" \
1895  "(writing to stream).");
1896 
1897  }
1898 
1899  /* If we got here, everything is OK. */
1900  g_assert(err == NULL || *err == NULL);
1901  ret_status = CL_TRUE;
1902  goto finish;
1903 
1904 error_handler:
1905  /* If we got here there was an error, verify that it is so. */
1906  g_assert(err == NULL || *err != NULL);
1907  ret_status = CL_FALSE;
1908 
1909 finish:
1910 
1911  /* Return status. */
1912  return ret_status;
1913 
1914 }
1915 
1930 CCL_EXPORT
1932  CCLProf* prof, const char* filename, CCLErr** err) {
1933 
1934  /* Make sure prof is not NULL. */
1935  g_return_val_if_fail(prof != NULL, CL_FALSE);
1936  /* Make sure filename is not NULL. */
1937  g_return_val_if_fail(filename != NULL, CL_FALSE);
1938  /* Make sure err is NULL or it is not set. */
1939  g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1940  /* This function can only be called after calculations are made. */
1941  g_return_val_if_fail(prof->calc == TRUE, CL_FALSE);
1942 
1943  /* Aux. var. */
1944  cl_bool status;
1945 
1946  /* Internal CCLErr object. */
1947  CCLErr* err_internal = NULL;
1948 
1949  /* Open file. */
1950  FILE* fp = fopen(filename, "w");
1951  ccl_if_err_create_goto(*err, CCL_ERROR, fp == NULL,
1952  CCL_ERROR_OPENFILE, error_handler,
1953  "Unable to open file '%s' for exporting.", filename);
1954 
1955  /* Export data. */
1956  status = ccl_prof_export_info(prof, fp, &err_internal);
1957  ccl_if_err_propagate_goto(err, err_internal, error_handler);
1958 
1959  /* If we got here, everything is OK. */
1960  g_assert(err == NULL || *err == NULL);
1961  status = CL_TRUE;
1962  goto finish;
1963 
1964 error_handler:
1965  /* If we got here there was an error, verify that it is so. */
1966  g_assert(err == NULL || *err != NULL);
1967  status = CL_FALSE;
1968 
1969 finish:
1970 
1971  /* Close file. */
1972  if (fp) fclose(fp);
1973 
1974  /* Return file contents in string form. */
1975  return status;
1976 
1977 }
1978 
1986 CCL_EXPORT
1988  export_options = export_opts;
1989 }
1990 
1998 CCL_EXPORT
2000  return export_options;
2001 }
2002 
Export options.
Definition: ccl_profiler.h:449
const char * ccl_event_get_final_name(CCLEvent *evt)
Get the final event name for profiling purposes.
Sort event profiling info instances by end time.
Definition: ccl_profiler.h:339
Sort descending.
Definition: ccl_profiler.h:219
cl_bool ccl_prof_export_info_file(CCLProf *prof, const char *filename, CCLErr **err)
Helper function which exports profiling info to a given file, automatically opening and closing the f...
const char * queue_name
Name of command queue associated with event.
Definition: ccl_profiler.h:372
cl_ulong t_submit
Device time counter in nanoseconds when the command identified by event that has been enqueued is sub...
Definition: ccl_profiler.h:300
Sort event profiling info instances by queued time.
Definition: ccl_profiler.h:330
void ccl_prof_iter_overlap_init(CCLProf *prof, int sort)
Initialize an iterator for overlap instances.
double ccl_prof_time_elapsed(CCLProf *prof)
If profiling has started but not stopped, returns the time since the profiling started.
Start event instant.
Definition: ccl_profiler.h:350
#define CCL_OCL_ERROR
Resolves to error category identifying string, in this case an error in the OpenCL library...
Definition: ccl_common.h:324
cl_ulong duration
Overlap duration in nanoseconds.
Definition: ccl_profiler.h:429
CCLProfAggSort
Sort criteria for aggregate event info instances.
Definition: ccl_profiler.h:254
cl_ulong ccl_prof_get_duration(CCLProf *prof)
Get duration of all events in nanoseconds.
Representation of an overlap of events.
Definition: ccl_profiler.h:411
Sort overlaps by event name.
Definition: ccl_profiler.h:439
#define ccl_queue_get_info_scalar(cq, param_name, param_type, err)
Macro which returns a scalar command queue information value.
Any other errors.
Definition: ccl_common.h:315
#define ccl_if_err_create_goto(err, quark, error_condition, error_code, label, msg,...)
If error is detected (error_code != no_error_code), create an error object (CCLErr) and go to the spe...
Definition: _ccl_defs.h:91
const char * event_name
Name of event which the instant refers to.
Definition: ccl_profiler.h:366
cl_bool ccl_prof_calc(CCLProf *prof, CCLErr **err)
Determine aggregate statistics for the given profile object.
Useful definitions used internally by cf4ocl.
#define ccl_event_get_profiling_info_scalar(evt, param_name, param_type, err)
Macro which returns a scalar event profiling information value.
void ccl_prof_iter_agg_init(CCLProf *prof, int sort)
Initialize an iterator for profiled aggregate event instances.
cl_bool zero_start
Start at instant 0 (TRUE, default), or start at oldest instant returned by OpenCL (FALSE)...
Definition: ccl_profiler.h:479
Sort ascending (default).
Definition: ccl_profiler.h:217
#define ccl_if_err_propagate_goto(err_dest, err_src, label)
Same as ccl_if_err_goto(), but rethrows error in a source CCLErr object to a new destination CCLErr o...
Definition: _ccl_defs.h:120
Sort overlaps by overlap duration.
Definition: ccl_profiler.h:442
Command queue wrapper class.
End event instant.
Definition: ccl_profiler.h:353
CCLProf * ccl_prof_new()
Create a new profile object.
Sort event profiling info instances by event name.
Definition: ccl_profiler.h:324
#define ccl_queue_ref(cq)
Increase the reference count of the command queue object.
const char * evname_delim
Event name delimiter, defaults to empty string.
Definition: ccl_profiler.h:473
cl_ulong t_start
Device time in nanoseconds when the command identified by event starts execution on the device...
Definition: ccl_profiler.h:307
cl_ulong ccl_prof_get_eff_duration(CCLProf *prof)
Get effective duration of all events in nanoseconds, i.e.
Sort event profiling info instances by submit time.
Definition: ccl_profiler.h:333
void ccl_prof_stop(CCLProf *prof)
Stops the global profiler timer.
Definition of classes and methods for profiling OpenCL events.
Sort event instants by event id.
Definition: ccl_profiler.h:404
void ccl_queue_gc(CCLQueue *cq)
Release all events associated with the command queue.
const CCLProfInfo * ccl_prof_iter_info_next(CCLProf *prof)
Return the next event profiling info instance.
void ccl_prof_start(CCLProf *prof)
Starts the global profiler timer.
Unable to open file.
Definition: ccl_common.h:300
const char * ccl_prof_get_summary(CCLProf *prof, int agg_sort, int ovlp_sort)
Get a summary with the profiling info.
const CCLProfAgg * ccl_prof_get_agg(CCLProf *prof, const char *event_name)
Return aggregate statistics for events with the given name.
const CCLProfOverlap * ccl_prof_iter_overlap_next(CCLProf *prof)
Return the next overlap instance.
#define CCL_ERROR
Resolves to error category identifying string, in this case an error in cf4ocl.
Definition: ccl_common.h:320
Sort aggregate event data instances by name.
Definition: ccl_profiler.h:257
cl_command_type command_type
Type of command which produced the event.
Definition: ccl_profiler.h:279
cl_uint id
Event instant ID.
Definition: ccl_profiler.h:378
Event wrapper class.
#define ccl_event_get_info_scalar(evt, param_name, param_type, err)
Macro which returns a scalar event information value.
Sort event profiling info instances by start time.
Definition: ccl_profiler.h:336
CCLProfOverlapSort
Sort criteria for overlaps (CCLProfOverlap).
Definition: ccl_profiler.h:436
Sort aggregate event data instances by time.
Definition: ccl_profiler.h:260
Aggregate event info.
Definition: ccl_profiler.h:226
void ccl_prof_destroy(CCLProf *prof)
Destroy a profile object.
cl_bool ccl_prof_export_info(CCLProf *prof, FILE *stream, CCLErr **err)
Export event profiling information to a given stream.
void ccl_prof_iter_inst_init(CCLProf *prof, int sort)
Initialize an iterator for event instant instances.
const char * queue_delim
Queue name delimiter, defaults to empty string.
Definition: ccl_profiler.h:467
CCLProfExportOptions ccl_prof_get_export_opts()
Get current export options.
void ccl_prof_set_export_opts(CCLProfExportOptions export_opts)
Set export options using a CCLProfExportOptions struct.
cl_ulong t_queued
Device time in nanoseconds when the command identified by event is enqueued in a command-queue by the...
Definition: ccl_profiler.h:292
Object information is unavailable.
Definition: ccl_common.h:313
const CCLProfInst * ccl_prof_iter_inst_next(CCLProf *prof)
Return the next event instant instance.
void ccl_prof_add_queue(CCLProf *prof, const char *cq_name, CCLQueue *cq)
Add a command queue wrapper for profiling.
const char * event2_name
Name of second overlapping event.
Definition: ccl_profiler.h:423
cl_ulong instant
Event instant in nanoseconds from current device time counter.
Definition: ccl_profiler.h:384
Sort event profiling info instances by queue name.
Definition: ccl_profiler.h:327
void ccl_prof_iter_info_init(CCLProf *prof, int sort)
Initialize an iterator for event profiling info instances.
GError CCLErr
Error handling class.
Definition: ccl_common.h:291
Event profiling info.
Definition: ccl_profiler.h:267
Error writing to a stream.
Definition: ccl_common.h:306
const char * event1_name
Name of first overlapping event.
Definition: ccl_profiler.h:417
Event instant.
Definition: ccl_profiler.h:360
const char * separator
Field separator, defaults to tab (\t).
Definition: ccl_profiler.h:455
CCLProfInfoSort
Sort criteria for event profiling info instances.
Definition: ccl_profiler.h:321
void ccl_prof_print_summary(CCLProf *prof)
Print a summary of the profiling info.
void ccl_queue_destroy(CCLQueue *cq)
Decrements the reference count of the command queue wrapper object.
CCLProfSortOrder
Sort order for the profile module iterators.
Definition: ccl_profiler.h:215
double relative_time
Relative time of events with name equal to CCLProfAgg::event_name.
Definition: ccl_profiler.h:246
Profile class, contains profiling information of OpenCL queues and events.
Definition: ccl_profiler.c:91
CCLProfInstSort
Sort criteria for event instants (CCLProfInst).
Definition: ccl_profiler.h:398
cl_ulong t_end
Device time in nanoseconds when the command identified by event has finished execution on the device...
Definition: ccl_profiler.h:314
CCLProfInstType type
Type of event instant (CCL_PROF_INST_TYPE_START or CCL_PROF_INST_TYPE_END).
Definition: ccl_profiler.h:391
const char * newline
Newline character, Defaults to Unix newline (\n).
Definition: ccl_profiler.h:461
cl_ulong absolute_time
Total (absolute) time of events with name equal to CCLProfAgg::event_name.
Definition: ccl_profiler.h:239
CCLProfInstType
Type of event instant (CCLProfInst).
Definition: ccl_profiler.h:347
const CCLProfAgg * ccl_prof_iter_agg_next(CCLProf *prof)
Return the next aggregate statistic instance.
Sort event instants by instant.
Definition: ccl_profiler.h:401