cf4ocl (C Framework for OpenCL)  v2.1.0
Object-oriented framework for developing and benchmarking OpenCL projects in C/C++
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Profiler

The profiler module provides classes and methods for profiling wrapped OpenCL events and queues. More...

Data Structures

struct  ccl_prof_agg
 Aggregate event info. More...
 
struct  ccl_prof_export_options
 Export options. More...
 
struct  ccl_prof_info
 Event profiling info. More...
 
struct  ccl_prof_inst
 Event instant. More...
 
struct  ccl_prof_overlap
 Representation of an overlap of events. More...
 

Typedefs

typedef struct ccl_prof CCLProf
 Profile class, contains profiling information of OpenCL queues and events. More...
 
typedef struct ccl_prof CCLProf
 Profile class, contains profiling information of OpenCL queues and events. More...
 
typedef struct ccl_prof_agg CCLProfAgg
 Aggregate event info.
 
typedef struct ccl_prof_agg CCLProfAgg
 Aggregate event info.
 
typedef struct
ccl_prof_export_options 
CCLProfExportOptions
 Export options.
 
typedef struct
ccl_prof_export_options 
CCLProfExportOptions
 Export options.
 
typedef struct ccl_prof_info CCLProfInfo
 Event profiling info.
 
typedef struct ccl_prof_info CCLProfInfo
 Event profiling info.
 
typedef struct ccl_prof_inst CCLProfInst
 Event instant.
 
typedef struct ccl_prof_inst CCLProfInst
 Event instant.
 
typedef struct ccl_prof_overlap CCLProfOverlap
 Representation of an overlap of events.
 
typedef struct ccl_prof_overlap CCLProfOverlap
 Representation of an overlap of events.
 

Enumerations

enum  CCLProfAggSort { CCL_PROF_AGG_SORT_NAME = 0x00, CCL_PROF_AGG_SORT_TIME = 0x10, CCL_PROF_AGG_SORT_NAME = 0x00, CCL_PROF_AGG_SORT_TIME = 0x10 }
 Sort criteria for aggregate event info instances. More...
 
enum  CCLProfAggSort { CCL_PROF_AGG_SORT_NAME = 0x00, CCL_PROF_AGG_SORT_TIME = 0x10, CCL_PROF_AGG_SORT_NAME = 0x00, CCL_PROF_AGG_SORT_TIME = 0x10 }
 Sort criteria for aggregate event info instances. More...
 
enum  CCLProfInfoSort {
  CCL_PROF_INFO_SORT_NAME_EVENT = 0x20, CCL_PROF_INFO_SORT_NAME_QUEUE = 0x30, CCL_PROF_INFO_SORT_T_QUEUED = 0x40, CCL_PROF_INFO_SORT_T_SUBMIT = 0x50,
  CCL_PROF_INFO_SORT_T_START = 0x60, CCL_PROF_INFO_SORT_T_END = 0x70, CCL_PROF_INFO_SORT_NAME_EVENT = 0x20, CCL_PROF_INFO_SORT_NAME_QUEUE = 0x30,
  CCL_PROF_INFO_SORT_T_QUEUED = 0x40, CCL_PROF_INFO_SORT_T_SUBMIT = 0x50, CCL_PROF_INFO_SORT_T_START = 0x60, CCL_PROF_INFO_SORT_T_END = 0x70
}
 Sort criteria for event profiling info instances. More...
 
enum  CCLProfInfoSort {
  CCL_PROF_INFO_SORT_NAME_EVENT = 0x20, CCL_PROF_INFO_SORT_NAME_QUEUE = 0x30, CCL_PROF_INFO_SORT_T_QUEUED = 0x40, CCL_PROF_INFO_SORT_T_SUBMIT = 0x50,
  CCL_PROF_INFO_SORT_T_START = 0x60, CCL_PROF_INFO_SORT_T_END = 0x70, CCL_PROF_INFO_SORT_NAME_EVENT = 0x20, CCL_PROF_INFO_SORT_NAME_QUEUE = 0x30,
  CCL_PROF_INFO_SORT_T_QUEUED = 0x40, CCL_PROF_INFO_SORT_T_SUBMIT = 0x50, CCL_PROF_INFO_SORT_T_START = 0x60, CCL_PROF_INFO_SORT_T_END = 0x70
}
 Sort criteria for event profiling info instances. More...
 
enum  CCLProfInstSort { CCL_PROF_INST_SORT_INSTANT = 0x80, CCL_PROF_INST_SORT_ID = 0x90, CCL_PROF_INST_SORT_INSTANT = 0x80, CCL_PROF_INST_SORT_ID = 0x90 }
 Sort criteria for event instants (CCLProfInst). More...
 
enum  CCLProfInstSort { CCL_PROF_INST_SORT_INSTANT = 0x80, CCL_PROF_INST_SORT_ID = 0x90, CCL_PROF_INST_SORT_INSTANT = 0x80, CCL_PROF_INST_SORT_ID = 0x90 }
 Sort criteria for event instants (CCLProfInst). More...
 
enum  CCLProfInstType { CCL_PROF_INST_TYPE_START, CCL_PROF_INST_TYPE_END, CCL_PROF_INST_TYPE_START, CCL_PROF_INST_TYPE_END }
 Type of event instant (CCLProfInst). More...
 
enum  CCLProfInstType { CCL_PROF_INST_TYPE_START, CCL_PROF_INST_TYPE_END, CCL_PROF_INST_TYPE_START, CCL_PROF_INST_TYPE_END }
 Type of event instant (CCLProfInst). More...
 
enum  CCLProfOverlapSort { CCL_PROF_OVERLAP_SORT_NAME = 0xa0, CCL_PROF_OVERLAP_SORT_DURATION = 0xb0, CCL_PROF_OVERLAP_SORT_NAME = 0xa0, CCL_PROF_OVERLAP_SORT_DURATION = 0xb0 }
 Sort criteria for overlaps (CCLProfOverlap). More...
 
enum  CCLProfOverlapSort { CCL_PROF_OVERLAP_SORT_NAME = 0xa0, CCL_PROF_OVERLAP_SORT_DURATION = 0xb0, CCL_PROF_OVERLAP_SORT_NAME = 0xa0, CCL_PROF_OVERLAP_SORT_DURATION = 0xb0 }
 Sort criteria for overlaps (CCLProfOverlap). More...
 
enum  CCLProfSortOrder { CCL_PROF_SORT_ASC = 0x0, CCL_PROF_SORT_DESC = 0x1, CCL_PROF_SORT_ASC = 0x0, CCL_PROF_SORT_DESC = 0x1 }
 Sort order for the profile module iterators. More...
 
enum  CCLProfSortOrder { CCL_PROF_SORT_ASC = 0x0, CCL_PROF_SORT_DESC = 0x1, CCL_PROF_SORT_ASC = 0x0, CCL_PROF_SORT_DESC = 0x1 }
 Sort order for the profile module iterators. More...
 

Functions

void ccl_prof_add_queue (CCLProf *prof, const char *cq_name, CCLQueue *cq)
 Add a command queue wrapper for profiling. More...
 
cl_bool ccl_prof_calc (CCLProf *prof, CCLErr **err)
 Determine aggregate statistics for the given profile object. More...
 
void ccl_prof_destroy (CCLProf *prof)
 Destroy a profile object. More...
 
cl_bool ccl_prof_export_info (CCLProf *prof, FILE *stream, CCLErr **err)
 Export event profiling information to a given stream. More...
 
cl_bool ccl_prof_export_info_file (CCLProf *prof, const char *filename, CCLErr **err)
 Helper function which exports profiling info to a given file, automatically opening and closing the file. More...
 
const CCLProfAggccl_prof_get_agg (CCLProf *prof, const char *event_name)
 Return aggregate statistics for events with the given name. More...
 
cl_ulong ccl_prof_get_duration (CCLProf *prof)
 Get duration of all events in nanoseconds. More...
 
cl_ulong ccl_prof_get_eff_duration (CCLProf *prof)
 Get effective duration of all events in nanoseconds, i.e. More...
 
CCLProfExportOptions ccl_prof_get_export_opts ()
 Get current export options. More...
 
const char * ccl_prof_get_summary (CCLProf *prof, int agg_sort, int ovlp_sort)
 Get a summary with the profiling info. More...
 
void ccl_prof_iter_agg_init (CCLProf *prof, int sort)
 Initialize an iterator for profiled aggregate event instances. More...
 
const CCLProfAggccl_prof_iter_agg_next (CCLProf *prof)
 Return the next aggregate statistic instance. More...
 
void ccl_prof_iter_info_init (CCLProf *prof, int sort)
 Initialize an iterator for event profiling info instances. More...
 
const CCLProfInfoccl_prof_iter_info_next (CCLProf *prof)
 Return the next event profiling info instance. More...
 
void ccl_prof_iter_inst_init (CCLProf *prof, int sort)
 Initialize an iterator for event instant instances. More...
 
const CCLProfInstccl_prof_iter_inst_next (CCLProf *prof)
 Return the next event instant instance. More...
 
void ccl_prof_iter_overlap_init (CCLProf *prof, int sort)
 Initialize an iterator for overlap instances. More...
 
const CCLProfOverlapccl_prof_iter_overlap_next (CCLProf *prof)
 Return the next overlap instance. More...
 
CCLProfccl_prof_new ()
 Create a new profile object. More...
 
void ccl_prof_print_summary (CCLProf *prof)
 Print a summary of the profiling info. More...
 
void ccl_prof_set_export_opts (CCLProfExportOptions export_opts)
 Set export options using a CCLProfExportOptions struct. More...
 
void ccl_prof_start (CCLProf *prof)
 Starts the global profiler timer. More...
 
void ccl_prof_stop (CCLProf *prof)
 Stops the global profiler timer. More...
 
double ccl_prof_time_elapsed (CCLProf *prof)
 If profiling has started but not stopped, returns the time since the profiling started. More...
 

Detailed Description

The profiler module provides classes and methods for profiling wrapped OpenCL events and queues.

Warning
The functions in this module are not thread-safe.

The profiling module offers two methods for obtaining information about the performed computations:

  1. Detailed profiling of OpenCL events using the ccl_prof_add_queue() function.
  2. Simple (and optional) timming of the performed computations using the ccl_prof_start() and ccl_prof_stop() functions. If these function are used, the measured time will be taken into account by the ccl_prof_*_summary() functions.

In order to use the first method, the CL_QUEUE_PROFILING_ENABLE property should be specified when creating command queue wrappers with ccl_queue_new() or ccl_queue_new_full().

After all the computations and memory transfers take place, the utilized queue wrappers are passed to the profiler using the ccl_prof_add_queue() function. The ccl_prof_calc() function can then be called to perform the required analysis.

At this stage, different types of profiling information become available, and can be iterated over:

  1. Aggregate event information: absolute and relative durations of all events with same name, represented by the CCLProfAgg* class. If an event name is not set during the course of the computation, the aggregation is performed by event type, i.e., by events which represent the same command. A sequence of CCLProfAgg* objects can be iterated over using the ccl_prof_iter_agg_init() and ccl_prof_iter_agg_next() functions. A specific aggregate event can be obtained by name using the ccl_prof_get_agg() function.
  2. Non-aggregate event information: event-specific information, represented by the CCLProfInfo* class, such as event name (or type, if no name is given), the queue the event is associated with, and submit, queue, start and end instants. A sequence of CCLProfInfo* objects can be iterated over using the ccl_prof_iter_info_init() and ccl_prof_iter_info_next() functions.
  3. Event instants: specific start and end event instants, represented by the CCLProfInst* class. A sequence of CCLProfInst* objects can be iterated over using the ccl_prof_iter_inst_init() and ccl_prof_iter_inst_next() functions.
  4. Event overlaps: information about event overlaps, represented by the CCLProfOverlap* class. Event overlaps can only occur when more than one queue is used on the same device. A sequence of CCLProfOverlap* objects can be iterated over using the ccl_prof_iter_overlap_init() and ccl_prof_iter_overlap_next() functions.

While this information can be subject to different types of examination by client code, the profiler module also offers some functionality which allows for a more immediate interpretation of results:

  1. A summary of the profiling analysis can be obtained or printed with the ccl_prof_get_summary() or ccl_prof_print_summary() functions, respectively.
  2. An exported list of CCLProfInfo* data, namely queue name, start instant, end instant and event name, sorted by start instant, can be opened by the plot events script to plot a Gantt-like chart of the performed computation. Such list can be exported with the ccl_prof_export_info() or ccl_prof_export_info_file() functions, using the default export options.

Example: Conway's game of life using double-buffered images (complete example)

/* Wrappers for OpenCL objects. */
CCLQueue* queue_exec;
CCLQueue* queue_comm;
CCLProf* prof;
/* Error handling object (must be NULL). */
CCLErr* err = NULL;

/* Create command queues. */
queue_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err);
queue_comm = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err);

/* Start profiling. */
prof = ccl_prof_new();
/* Write initial state. */
ccl_image_enqueue_write(img1, queue_comm, CL_TRUE,
origin, region, 0, 0, input_image, NULL, &err);

/* Run CA_ITERS iterations of the CA. */
for (cl_uint i = 0; i < CA_ITERS; ++i) {
/* Read result of last iteration. On first run it is the initial
* state. */
evt_comm = ccl_image_enqueue_read(img1, queue_comm, CL_FALSE,
origin, region, 0, 0, output_images[i], NULL, &err);

/* Execute iteration. */
krnl, queue_exec, 2, NULL, gws, lws, NULL, &err,
img1, img2, NULL);

}
/* Read result of last iteration. */
ccl_image_enqueue_read(img1, queue_comm, CL_TRUE,
origin, region, 0, 0, output_images[CA_ITERS], &ewl, &err);

/* Stop profiling timer and add queues for analysis. */
ccl_prof_add_queue(prof, "Comms", queue_comm);
ccl_prof_add_queue(prof, "Exec", queue_exec);

/* Process profiling info. */
ccl_prof_calc(prof, &err);

/* Print profiling info. */
/* Save profiling info. */
ccl_prof_export_info_file(prof, "prof.tsv", &err);

/* Release wrappers. */
ccl_queue_destroy(queue_comm);
ccl_queue_destroy(queue_exec);

/* Destroy profiler. */

The output of ccl_prof_print_summary() will be something like:

   Aggregate times by event  :
     ------------------------------------------------------------------
     | Event name                     | Rel. time (%) | Abs. time (s) |
     ------------------------------------------------------------------
     | NDRANGE_KERNEL                 |       97.2742 |    3.7468e-02 |
     | READ_IMAGE                     |        2.6747 |    1.0303e-03 |
     | WRITE_IMAGE                    |        0.0511 |    1.9690e-05 |
     ------------------------------------------------------------------
                                      |         Total |    3.8518e-02 |
                                      ---------------------------------
   Event overlaps            :
     ------------------------------------------------------------------
     | Event 1                | Event2                 | Overlap (s)  |
     ------------------------------------------------------------------
     | READ_IMAGE             | NDRANGE_KERNEL         |   1.3618e-04 |
     ------------------------------------------------------------------
                              |                  Total |   1.3618e-04 |
                              -----------------------------------------
   Tot. of all events (eff.) : 3.838198e-02s
   Total ellapsed time       : 4.295200e-02s
   Time spent in device      : 89.36%
   Time spent in host        : 10.64%

Instead of the default command-based event names such as NDRANGE_KERNEL, specific names can be set with the ccl_event_set_name() function. This allows to: (a) separate the aggregation of events of the same type (e.g., differentiate between the execution of two different kernels); and, (b) aggregate events of different types (e.g., aggregate reads and writes into a single "comms" event).

The ccl_plot_events script can be used to plot a Gantt-like chart of the events which took place in the queues. Running the following command...

$ python ccl_plot_events.py prof.tsv

...will produce the following image:

gantt_ca.png

Data Structure Documentation

struct ccl_prof_agg

Aggregate event info.

Definition at line 226 of file ccl_profiler.h.

Collaboration diagram for ccl_prof_agg:
Collaboration graph
Data Fields
cl_ulong absolute_time Total (absolute) time of events with name equal to CCLProfAgg::event_name.
const char * event_name Name of event which the instant refers to.
double relative_time Relative time of events with name equal to CCLProfAgg::event_name.
struct ccl_prof_export_options

Export options.

Definition at line 449 of file ccl_profiler.h.

Collaboration diagram for ccl_prof_export_options:
Collaboration graph
Data Fields
const char * evname_delim Event name delimiter, defaults to empty string.
const char * newline Newline character, Defaults to Unix newline (\n).
const char * queue_delim Queue name delimiter, defaults to empty string.
const char * separator Field separator, defaults to tab (\t).
cl_bool zero_start Start at instant 0 (TRUE, default), or start at oldest instant returned by OpenCL (FALSE).
struct ccl_prof_info

Event profiling info.

Definition at line 267 of file ccl_profiler.h.

Collaboration diagram for ccl_prof_info:
Collaboration graph
Data Fields
cl_command_type command_type Type of command which produced the event.
const char * event_name Name of event.
const char * queue_name Name of command queue which generated this event.
cl_ulong t_end Device time in nanoseconds when the command identified by event has finished execution on the device.
cl_ulong t_queued Device time in nanoseconds when the command identified by event is enqueued in a command-queue by the host.
cl_ulong t_start Device time in nanoseconds when the command identified by event starts execution on the device.
cl_ulong t_submit Device time counter in nanoseconds when the command identified by event that has been enqueued is submitted by the host to the device associated with the command-queue.
struct ccl_prof_inst

Event instant.

Definition at line 360 of file ccl_profiler.h.

Collaboration diagram for ccl_prof_inst:
Collaboration graph
Data Fields
const char * event_name Name of event which the instant refers to.
cl_uint id Event instant ID.
cl_ulong instant Event instant in nanoseconds from current device time counter.
const char * queue_name Name of command queue associated with event.
CCLProfInstType type Type of event instant (CCL_PROF_INST_TYPE_START or CCL_PROF_INST_TYPE_END).
struct ccl_prof_overlap

Representation of an overlap of events.

Definition at line 411 of file ccl_profiler.h.

Collaboration diagram for ccl_prof_overlap:
Collaboration graph
Data Fields
cl_ulong duration Overlap duration in nanoseconds.
const char * event1_name Name of first overlapping event.
const char * event2_name Name of second overlapping event.

Typedef Documentation

typedef struct ccl_prof CCLProf

Profile class, contains profiling information of OpenCL queues and events.

Warning
Instances of this class are not thread-safe.

Definition at line 210 of file ccl_profiler.h.

typedef struct ccl_prof CCLProf

Profile class, contains profiling information of OpenCL queues and events.

Warning
Instances of this class are not thread-safe.

Definition at line 210 of file ccl_profiler.h.

Enumeration Type Documentation

Sort criteria for aggregate event info instances.

Enumerator
CCL_PROF_AGG_SORT_NAME 

Sort aggregate event data instances by name.

CCL_PROF_AGG_SORT_TIME 

Sort aggregate event data instances by time.

CCL_PROF_AGG_SORT_NAME 

Sort aggregate event data instances by name.

CCL_PROF_AGG_SORT_TIME 

Sort aggregate event data instances by time.

Definition at line 254 of file ccl_profiler.h.

Sort criteria for aggregate event info instances.

Enumerator
CCL_PROF_AGG_SORT_NAME 

Sort aggregate event data instances by name.

CCL_PROF_AGG_SORT_TIME 

Sort aggregate event data instances by time.

CCL_PROF_AGG_SORT_NAME 

Sort aggregate event data instances by name.

CCL_PROF_AGG_SORT_TIME 

Sort aggregate event data instances by time.

Definition at line 254 of file ccl_profiler.h.

Sort criteria for event profiling info instances.

Enumerator
CCL_PROF_INFO_SORT_NAME_EVENT 

Sort event profiling info instances by event name.

CCL_PROF_INFO_SORT_NAME_QUEUE 

Sort event profiling info instances by queue name.

CCL_PROF_INFO_SORT_T_QUEUED 

Sort event profiling info instances by queued time.

CCL_PROF_INFO_SORT_T_SUBMIT 

Sort event profiling info instances by submit time.

CCL_PROF_INFO_SORT_T_START 

Sort event profiling info instances by start time.

CCL_PROF_INFO_SORT_T_END 

Sort event profiling info instances by end time.

CCL_PROF_INFO_SORT_NAME_EVENT 

Sort event profiling info instances by event name.

CCL_PROF_INFO_SORT_NAME_QUEUE 

Sort event profiling info instances by queue name.

CCL_PROF_INFO_SORT_T_QUEUED 

Sort event profiling info instances by queued time.

CCL_PROF_INFO_SORT_T_SUBMIT 

Sort event profiling info instances by submit time.

CCL_PROF_INFO_SORT_T_START 

Sort event profiling info instances by start time.

CCL_PROF_INFO_SORT_T_END 

Sort event profiling info instances by end time.

Definition at line 321 of file ccl_profiler.h.

Sort criteria for event profiling info instances.

Enumerator
CCL_PROF_INFO_SORT_NAME_EVENT 

Sort event profiling info instances by event name.

CCL_PROF_INFO_SORT_NAME_QUEUE 

Sort event profiling info instances by queue name.

CCL_PROF_INFO_SORT_T_QUEUED 

Sort event profiling info instances by queued time.

CCL_PROF_INFO_SORT_T_SUBMIT 

Sort event profiling info instances by submit time.

CCL_PROF_INFO_SORT_T_START 

Sort event profiling info instances by start time.

CCL_PROF_INFO_SORT_T_END 

Sort event profiling info instances by end time.

CCL_PROF_INFO_SORT_NAME_EVENT 

Sort event profiling info instances by event name.

CCL_PROF_INFO_SORT_NAME_QUEUE 

Sort event profiling info instances by queue name.

CCL_PROF_INFO_SORT_T_QUEUED 

Sort event profiling info instances by queued time.

CCL_PROF_INFO_SORT_T_SUBMIT 

Sort event profiling info instances by submit time.

CCL_PROF_INFO_SORT_T_START 

Sort event profiling info instances by start time.

CCL_PROF_INFO_SORT_T_END 

Sort event profiling info instances by end time.

Definition at line 321 of file ccl_profiler.h.

Sort criteria for event instants (CCLProfInst).

Enumerator
CCL_PROF_INST_SORT_INSTANT 

Sort event instants by instant.

CCL_PROF_INST_SORT_ID 

Sort event instants by event id.

CCL_PROF_INST_SORT_INSTANT 

Sort event instants by instant.

CCL_PROF_INST_SORT_ID 

Sort event instants by event id.

Definition at line 398 of file ccl_profiler.h.

Sort criteria for event instants (CCLProfInst).

Enumerator
CCL_PROF_INST_SORT_INSTANT 

Sort event instants by instant.

CCL_PROF_INST_SORT_ID 

Sort event instants by event id.

CCL_PROF_INST_SORT_INSTANT 

Sort event instants by instant.

CCL_PROF_INST_SORT_ID 

Sort event instants by event id.

Definition at line 398 of file ccl_profiler.h.

Type of event instant (CCLProfInst).

Enumerator
CCL_PROF_INST_TYPE_START 

Start event instant.

CCL_PROF_INST_TYPE_END 

End event instant.

CCL_PROF_INST_TYPE_START 

Start event instant.

CCL_PROF_INST_TYPE_END 

End event instant.

Definition at line 347 of file ccl_profiler.h.

Type of event instant (CCLProfInst).

Enumerator
CCL_PROF_INST_TYPE_START 

Start event instant.

CCL_PROF_INST_TYPE_END 

End event instant.

CCL_PROF_INST_TYPE_START 

Start event instant.

CCL_PROF_INST_TYPE_END 

End event instant.

Definition at line 347 of file ccl_profiler.h.

Sort criteria for overlaps (CCLProfOverlap).

Enumerator
CCL_PROF_OVERLAP_SORT_NAME 

Sort overlaps by event name.

CCL_PROF_OVERLAP_SORT_DURATION 

Sort overlaps by overlap duration.

CCL_PROF_OVERLAP_SORT_NAME 

Sort overlaps by event name.

CCL_PROF_OVERLAP_SORT_DURATION 

Sort overlaps by overlap duration.

Definition at line 436 of file ccl_profiler.h.

Sort criteria for overlaps (CCLProfOverlap).

Enumerator
CCL_PROF_OVERLAP_SORT_NAME 

Sort overlaps by event name.

CCL_PROF_OVERLAP_SORT_DURATION 

Sort overlaps by overlap duration.

CCL_PROF_OVERLAP_SORT_NAME 

Sort overlaps by event name.

CCL_PROF_OVERLAP_SORT_DURATION 

Sort overlaps by overlap duration.

Definition at line 436 of file ccl_profiler.h.

Sort order for the profile module iterators.

Enumerator
CCL_PROF_SORT_ASC 

Sort ascending (default).

CCL_PROF_SORT_DESC 

Sort descending.

CCL_PROF_SORT_ASC 

Sort ascending (default).

CCL_PROF_SORT_DESC 

Sort descending.

Definition at line 215 of file ccl_profiler.h.

Sort order for the profile module iterators.

Enumerator
CCL_PROF_SORT_ASC 

Sort ascending (default).

CCL_PROF_SORT_DESC 

Sort descending.

CCL_PROF_SORT_ASC 

Sort ascending (default).

CCL_PROF_SORT_DESC 

Sort descending.

Definition at line 215 of file ccl_profiler.h.

Function Documentation

void ccl_prof_add_queue ( CCLProf prof,
const char *  cq_name,
CCLQueue cq 
)

Add a command queue wrapper for profiling.

Parameters
[in]profA profile object.
[in]cq_nameCommand queue name.
[in]cqCommand queue wrapper object.
Examples:
ca.c, and canon.c.

Definition at line 1234 of file ccl_profiler.c.

cl_bool ccl_prof_calc ( CCLProf prof,
CCLErr **  err 
)

Determine aggregate statistics for the given profile object.

The command queues to be profiled will have their events garbage collected with ccl_queue_gc(). As such, they can be reused and re-added for profiling to a new profile object.

Parameters
[in]profA profile object.
[out]errReturn location for a CCLErr object, or NULL if error reporting is to be ignored.
Returns
CL_TRUE if function terminates successfully, or CL_FALSE otherwise.
Examples:
ca.c, and canon.c.

Definition at line 1280 of file ccl_profiler.c.

void ccl_prof_destroy ( CCLProf prof)

Destroy a profile object.

Parameters
[in]profProfile object to destroy.
Examples:
ca.c, and canon.c.

Definition at line 1117 of file ccl_profiler.c.

cl_bool ccl_prof_export_info ( CCLProf prof,
FILE *  stream,
CCLErr **  err 
)

Export event profiling information to a given stream.

Each line of the exported data will have the following format, ordered by event start time:

queue start-time end-time event-name

For example:

q0    100    120    load_data1
q1    100    132    load_data2
q0    121    159    process_data1
q1    133    145    process_data2
q0    146    157    read_result

Several export parameters can be configured with the ccl_prof_get_export_opts() and ccl_prof_set_export_opts() functions, by manipulating a CCLProfExportOptions struct.

Parameters
[in]profProfile object.
[out]streamStream where export info to.
[out]errReturn location for a CCLErr object, or NULL if error reporting is to be ignored.
Returns
CL_TRUE if function terminates successfully, CL_FALSE otherwise.

Definition at line 1845 of file ccl_profiler.c.

cl_bool ccl_prof_export_info_file ( CCLProf prof,
const char *  filename,
CCLErr **  err 
)

Helper function which exports profiling info to a given file, automatically opening and closing the file.

See the ccl_prof_export_info() for more information.

Parameters
[in]profProfile object.
[in]filenameName of file where information will be saved to.
[out]errReturn location for a CCLErr object, or NULL if error reporting is to be ignored.
Returns
CL_TRUE if function terminates successfully, CL_FALSE otherwise.
Examples:
ca.c, and canon.c.

Definition at line 1931 of file ccl_profiler.c.

const CCLProfAgg * ccl_prof_get_agg ( CCLProf prof,
const char *  event_name 
)

Return aggregate statistics for events with the given name.

Parameters
[in]profProfile object.
[in]event_nameEvent name.
Returns
Aggregate statistics for events with the given name.

Definition at line 1352 of file ccl_profiler.c.

cl_ulong ccl_prof_get_duration ( CCLProf prof)

Get duration of all events in nanoseconds.

Parameters
[in]profProfile object.
Returns
The duration of all events in nanoseconds.

Definition at line 1624 of file ccl_profiler.c.

cl_ulong ccl_prof_get_eff_duration ( CCLProf prof)

Get effective duration of all events in nanoseconds, i.e.

the duration of all events minus event overlaps.

If no overlaps occur, this function will return the same value as ccl_prof_get_duration().

Parameters
[in]profProfile object.
Returns
The effective duration of all events in nanoseconds, i.e. the duration of all events minus event overlaps.

Definition at line 1650 of file ccl_profiler.c.

CCLProfExportOptions ccl_prof_get_export_opts ( )

Get current export options.

Returns
Current export options.

Definition at line 1999 of file ccl_profiler.c.

const char * ccl_prof_get_summary ( CCLProf prof,
int  agg_sort,
int  ovlp_sort 
)

Get a summary with the profiling info.

More specifically, this function returns a string containing a table of aggregate event statistics and a table of event overlaps. The order of the returned information can be specified in the function arguments.

Parameters
[in]profProfile object.
[in]agg_sortSorting performed on aggregate statistics (bitfield of CCLProfAggSort ORed with CCLProfSortOrder).
[in]ovlp_sortSorting performed on event overlaps (bitfield of CCLProfOverlapSort ORed with CCLProfSortOrder).
Returns
A string containing the summary.

Definition at line 1711 of file ccl_profiler.c.

void ccl_prof_iter_agg_init ( CCLProf prof,
int  sort 
)

Initialize an iterator for profiled aggregate event instances.

Parameters
[in]profProfile object.
[in]sortBitfield of CCLProfAggSort OR CCLProfSortOrder, for example CCL_PROF_AGG_SORT_NAME | CCL_PROF_SORT_DESC.

Definition at line 1390 of file ccl_profiler.c.

const CCLProfAgg * ccl_prof_iter_agg_next ( CCLProf prof)

Return the next aggregate statistic instance.

Parameters
[in]profProfile object.
Returns
The next aggregate statistic instance.

Definition at line 1415 of file ccl_profiler.c.

void ccl_prof_iter_info_init ( CCLProf prof,
int  sort 
)

Initialize an iterator for event profiling info instances.

Parameters
[in]profProfile object.
[in]sortBitfield of CCLProfInfoSort OR CCLProfSortOrder, for example CCL_PROF_INFO_SORT_T_START | CCL_PROF_SORT_ASC.

Definition at line 1449 of file ccl_profiler.c.

const CCLProfInfo * ccl_prof_iter_info_next ( CCLProf prof)

Return the next event profiling info instance.

Parameters
[in]profProfile object.
Returns
The next event profiling info instance.

Definition at line 1473 of file ccl_profiler.c.

void ccl_prof_iter_inst_init ( CCLProf prof,
int  sort 
)

Initialize an iterator for event instant instances.

Parameters
[in]profProfile object.
[in]sortBitfield of CCLProfInstSort OR CCLProfSortOrder, for example CCL_PROF_INST_SORT_INSTANT | CCL_PROF_SORT_ASC.

Definition at line 1507 of file ccl_profiler.c.

const CCLProfInst * ccl_prof_iter_inst_next ( CCLProf prof)

Return the next event instant instance.

Parameters
[in]profProfile object.
Returns
The next event instant instance.

Definition at line 1532 of file ccl_profiler.c.

void ccl_prof_iter_overlap_init ( CCLProf prof,
int  sort 
)

Initialize an iterator for overlap instances.

Parameters
[in]profProfile object.
[in]sortBitfield of CCLProfOverlapSort OR CCLProfSortOrder, for example CCL_PROF_OVERLAP_SORT_DURATION | CCL_PROF_SORT_DESC.

Definition at line 1567 of file ccl_profiler.c.

const CCLProfOverlap * ccl_prof_iter_overlap_next ( CCLProf prof)

Return the next overlap instance.

Parameters
[in]profProfile object.
Returns
The next overlap instance.

Definition at line 1591 of file ccl_profiler.c.

CCLProf * ccl_prof_new ( )

Create a new profile object.

Returns
A new profile object.
Examples:
ca.c, and canon.c.

Definition at line 1096 of file ccl_profiler.c.

void ccl_prof_print_summary ( CCLProf prof)

Print a summary of the profiling info.

More specifically, this function prints a table of aggregate event statistics (sorted by absolute time), and a table of event overlaps (sorted by overlap duration).

For more control of where and how this summary is printed, use the ccl_prof_get_summary() function.

Parameters
[in]profProfile object.
Examples:
ca.c, and canon.c.

Definition at line 1675 of file ccl_profiler.c.

void ccl_prof_set_export_opts ( CCLProfExportOptions  export_opts)

Set export options using a CCLProfExportOptions struct.

Parameters
[in]export_optsExport options to set.

Definition at line 1987 of file ccl_profiler.c.

void ccl_prof_start ( CCLProf prof)

Starts the global profiler timer.

Only required if client wishes to compare the effectively ellapsed time with the OpenCL kernels time.

Parameters
[in]profA profile object.
Examples:
ca.c.

Definition at line 1177 of file ccl_profiler.c.

void ccl_prof_stop ( CCLProf prof)

Stops the global profiler timer.

Only required if ccl_prof_start() was called.

Parameters
[in]profA profile object.
Examples:
ca.c.

Definition at line 1195 of file ccl_profiler.c.

double ccl_prof_time_elapsed ( CCLProf prof)

If profiling has started but not stopped, returns the time since the profiling started.

If profiling has been stopped, returns the elapsed time between the time it started and the time it stopped.

Parameters
[in]profA profile object.
Returns
number of seconds elapsed, including any fractional part.

Definition at line 1215 of file ccl_profiler.c.