Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
flow_graph_opencl_node.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_flow_graph_opencl_node_H
18 #define __TBB_flow_graph_opencl_node_H
19 
20 #include "tbb/tbb_config.h"
21 #if __TBB_PREVIEW_OPENCL_NODE
22 
23 #include "flow_graph.h"
24 
25 #include <vector>
26 #include <string>
27 #include <algorithm>
28 #include <iostream>
29 #include <fstream>
30 #include <map>
31 #include <mutex>
32 
33 #ifdef __APPLE__
34 #include <OpenCL/opencl.h>
35 #else
36 #include <CL/cl.h>
37 #endif
38 
39 namespace tbb {
40 namespace flow {
41 
42 namespace interface10 {
43 
44 template <typename DeviceFilter>
46 
47 namespace opencl_info {
49 }
50 
51 template <typename Factory>
53 
54 inline void enforce_cl_retcode(cl_int err, std::string msg) {
55  if (err != CL_SUCCESS) {
56  std::cerr << msg << "; error code: " << err << std::endl;
57  throw msg;
58  }
59 }
60 
61 template <typename T>
62 T event_info(cl_event e, cl_event_info i) {
63  T res;
64  enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
65  return res;
66 }
67 
68 template <typename T>
69 T device_info(cl_device_id d, cl_device_info i) {
70  T res;
71  enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
72  return res;
73 }
74 
75 template <>
76 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
77  size_t required;
78  enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
79 
80  char *buff = (char*)alloca(required);
81  enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
82 
83  return buff;
84 }
85 
86 template <typename T>
87 T platform_info(cl_platform_id p, cl_platform_info i) {
88  T res;
89  enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
90  return res;
91 }
92 
93 template <>
94 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
95  size_t required;
96  enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
97 
98  char *buff = (char*)alloca(required);
99  enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
100 
101  return buff;
102 }
103 
104 
106 public:
107  typedef size_t device_id_type;
108  enum : device_id_type {
111  };
112 
114 
115  opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
116 
118 
119  std::string platform_profile() const {
120  return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
121  }
122  std::string platform_version() const {
123  return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
124  }
125  std::string platform_name() const {
126  return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
127  }
128  std::string platform_vendor() const {
129  return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
130  }
131  std::string platform_extensions() const {
132  return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
133  }
134 
135  template <typename T>
136  void info( cl_device_info i, T &t ) const {
137  t = device_info<T>( my_cl_device_id, i );
138  }
139  std::string version() const {
140  // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
141  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
142  }
143  int major_version() const {
144  int major;
145  std::sscanf( version().c_str(), "OpenCL %d", &major );
146  return major;
147  }
148  int minor_version() const {
149  int major, minor;
150  std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
151  return minor;
152  }
154 #if CL_VERSION_2_0
155  if ( major_version() >= 2 )
156  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
157  else
158 #endif /* CL_VERSION_2_0 */
159  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
160  }
162 #if CL_VERSION_2_0
163  if ( major_version() >= 2 )
164  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
165  else
166 #endif /* CL_VERSION_2_0 */
167  return false;
168  }
169  std::array<size_t, 3> max_work_item_sizes() const {
170  return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
171  }
172  size_t max_work_group_size() const {
173  return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
174  }
175  bool built_in_kernel_available( const std::string& k ) const {
176  const std::string semi = ";";
177  // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
178  return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
179  }
180  std::string built_in_kernels() const {
181  return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
182  }
183  std::string name() const {
184  return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
185  }
186  cl_bool available() const {
187  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
188  }
189  cl_bool compiler_available() const {
190  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
191  }
192  cl_bool linker_available() const {
193  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
194  }
195  bool extension_available( const std::string &ext ) const {
196  const std::string space = " ";
197  // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
198  return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
199  }
200  std::string extensions() const {
201  return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
202  }
203 
204  cl_device_type type() const {
205  return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
206  }
207 
208  std::string vendor() const {
209  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
210  }
211 
212  cl_uint address_bits() const {
213  return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
214  }
215 
216  cl_device_id device_id() const {
217  return my_cl_device_id;
218  }
219 
220  cl_command_queue command_queue() const {
221  return my_cl_command_queue;
222  }
223 
224  void set_command_queue( cl_command_queue cmd_queue ) {
225  my_cl_command_queue = cmd_queue;
226  }
227 
228  cl_platform_id platform_id() const {
229  return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
230  }
231 
232 private:
233 
235  cl_device_id my_cl_device_id;
236  cl_command_queue my_cl_command_queue;
237 
238  friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
239 
240  template <typename DeviceFilter>
241  friend class opencl_factory;
242  template <typename Factory>
243  friend class opencl_memory;
244  template <typename Factory>
245  friend class opencl_program;
246 
247 #if TBB_USE_ASSERT
248  template <typename T, typename Factory>
249  friend class opencl_buffer;
250 #endif
251 };
252 
254  typedef std::vector<opencl_device> container_type;
255 public:
256  typedef container_type::iterator iterator;
257  typedef container_type::const_iterator const_iterator;
258  typedef container_type::size_type size_type;
259 
261  opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
262 
263  void add( opencl_device d ) { my_container.push_back( d ); }
264  size_type size() const { return my_container.size(); }
265  bool empty() const { return my_container.empty(); }
266  iterator begin() { return my_container.begin(); }
267  iterator end() { return my_container.end(); }
268  const_iterator begin() const { return my_container.begin(); }
269  const_iterator end() const { return my_container.end(); }
270  const_iterator cbegin() const { return my_container.cbegin(); }
271  const_iterator cend() const { return my_container.cend(); }
272 
273 private:
275 };
276 
277 namespace internal {
278 
279 // Retrieve all OpenCL devices from machine
281  opencl_device_list opencl_devices;
282 
283  cl_uint num_platforms;
284  enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
285 
286  std::vector<cl_platform_id> platforms(num_platforms);
287  enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
288 
289  cl_uint num_devices;
290  std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
291  cl_uint num_all_devices = 0;
292  while (platforms_it != platforms.end()) {
293  cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
294  if (err == CL_DEVICE_NOT_FOUND) {
295  platforms_it = platforms.erase(platforms_it);
296  }
297  else {
298  enforce_cl_retcode(err, "clGetDeviceIDs failed");
299  num_all_devices += num_devices;
300  ++platforms_it;
301  }
302  }
303 
304  std::vector<cl_device_id> devices(num_all_devices);
305  std::vector<cl_device_id>::iterator devices_it = devices.begin();
306  for (auto p = platforms.begin(); p != platforms.end(); ++p) {
307  enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
308  devices_it += num_devices;
309  }
310 
311  for (auto d = devices.begin(); d != devices.end(); ++d) {
312  opencl_devices.add(opencl_device((*d)));
313  }
314 
315  return opencl_devices;
316 }
317 
318 } // namespace internal
319 
320 // TODO: consider this namespace as public API
321 namespace opencl_info {
322 
324  // Static storage for all available OpenCL devices on machine
325  static const opencl_device_list my_devices = internal::find_available_devices();
326  return my_devices;
327  }
328 
329 } // namespace opencl_info
330 
331 
333 public:
334  virtual void call() = 0;
335  virtual ~callback_base() {}
336 };
337 
338 template <typename Callback, typename T>
339 class callback : public callback_base {
340  Callback my_callback;
342 public:
343  callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
344 
346  my_callback( my_data );
347  }
348 };
349 
350 template <typename T, typename Factory = opencl_info::default_opencl_factory>
351 class opencl_async_msg : public async_msg<T> {
352 public:
353  typedef T value_type;
354 
355  opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
356  my_callback_flag_ptr->store<tbb::relaxed>(false);
357  }
358 
359  explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
360  my_callback_flag_ptr->store<tbb::relaxed>(false);
361  }
362 
363  opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
364  my_callback_flag_ptr->store<tbb::relaxed>(false);
365  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
366  }
367 
368  T& data( bool wait = true ) {
369  if ( my_is_event && wait ) {
370  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
371  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
372  my_is_event = false;
373  }
374  return my_data;
375  }
376 
377  const T& data( bool wait = true ) const {
378  if ( my_is_event && wait ) {
379  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
380  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
381  my_is_event = false;
382  }
383  return my_data;
384  }
385 
386  opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
387  my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
389  {
390  if ( my_is_event )
391  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
392  }
393 
394  opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
395  my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
397  {
398  dmsg.my_is_event = false;
399  }
400 
403 
404  // Release original event
405  if ( my_is_event )
406  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
407 
408  my_data = dmsg.my_data;
409  my_event = dmsg.my_event;
410  my_is_event = dmsg.my_is_event;
411 
412  // Retain copied event
413  if ( my_is_event )
414  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
415 
417  return *this;
418  }
419 
421  if ( my_is_event )
422  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
423  }
424 
425  cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
426  void set_event( cl_event e ) const {
427  if ( my_is_event ) {
428  cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
429  if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
430  enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
431  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
432  }
433  my_is_event = true;
434  my_event = e;
435  clRetainEvent( my_event );
436  }
437 
438  void clear_event() const {
439  if ( my_is_event ) {
440  enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
441  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
442  }
443  my_is_event = false;
444  }
445 
446  template <typename Callback>
447  void register_callback( Callback c ) const {
448  __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
449  enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
450  }
451 
452  operator T&() { return data(); }
453  operator const T&() const { return data(); }
454 
455 protected:
456  // Overridden in this derived class to inform that
457  // async calculation chain is over
458  void finalize() const __TBB_override {
460  if (! my_callback_flag_ptr->fetch_and_store(true)) {
461  opencl_async_msg a(*this);
462  if (my_is_event) {
463  register_callback([a](const T& t) mutable {
464  a.set(t);
465  });
466  }
467  else {
468  a.set(my_data);
469  }
470  }
471  clear_event();
472  }
473 
474 private:
475  static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
476  tbb::internal::suppress_unused_warning( event_command_exec_status );
477  __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
478  __TBB_ASSERT( data, NULL );
479  callback_base *c = static_cast<callback_base*>(data);
480  c->call();
481  delete c;
482  }
483 
485  mutable cl_event my_event;
486  mutable bool my_is_event = false;
487 
488  std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
489 };
490 
491 template <typename K, typename T, typename Factory>
494  const T &t = dmsg.data( false );
495  __TBB_STATIC_ASSERT( true, "" );
496  return key_from_message<K, T>( t );
497 }
498 
499 template <typename Factory>
501 public:
503  opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
504  my_curr_device_id = my_factory->devices().begin()->my_device_id;
505  }
506 
508  if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
509  enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
510  }
511 
512  cl_mem get_cl_mem() const {
513  return my_cl_mem;
514  }
515 
516  void* get_host_ptr() {
517  if ( !my_host_ptr ) {
519  d.data();
520  __TBB_ASSERT( d.data() == my_host_ptr, NULL );
521  }
522  return my_host_ptr;
523  }
524 
525  Factory *factory() const { return my_factory; }
526 
529  if (e) {
531  } else {
533  }
534 
535  // Concurrent receives are prohibited so we do not worry about synchronization.
537  map_memory(*my_factory->devices().begin(), d);
539  my_host_ptr = d.data(false);
540  }
541  // Release the sending event
543  enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
544  my_sending_event_present = false;
545  }
546  return d;
547  }
548 
550  opencl_device::device_id_type device_id = device.my_device_id;
551  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
552  {
554  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
555  __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
557  my_factory->enqueue_unmap_buffer(device, *this, d);
558  my_sending_event = *d.get_event();
560  enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
561  my_host_ptr = NULL;
563  }
564  }
566  }
567 
568  // !e means that buffer has come from the host
570 
571  __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
573  }
574 
576 protected:
577  cl_mem my_cl_mem;
579  void* my_host_ptr;
580  Factory *my_factory;
581 
585 };
586 
587 template <typename Factory>
588 class opencl_buffer_impl : public opencl_memory<Factory> {
589  size_t my_size;
590 public:
591  opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
592  cl_int err;
593  this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
594  enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
595  }
596 
597  // The constructor for subbuffers.
598  opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
599  cl_int err;
600  cl_buffer_region region = { index, size };
601  this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
602  enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
603  }
604 
605  size_t size() const {
606  return my_size;
607  }
608 
610  this->my_factory->enqueue_map_buffer( device, *this, dmsg );
611  }
612 
613 #if TBB_USE_ASSERT
614  template <typename, typename>
615  friend class opencl_buffer;
616 #endif
617 };
618 
623 };
624 
625 template <typename T, typename Factory = opencl_info::default_opencl_factory>
627 
628 template <typename T, typename Factory = opencl_info::default_opencl_factory>
630 public:
631  typedef cl_mem native_object_type;
633  typedef Factory opencl_factory_type;
634 
635  template<access_type a> using iterator = T*;
636 
637  template <access_type a>
638  iterator<a> access() const {
639  T* ptr = (T*)my_impl->get_host_ptr();
640  __TBB_ASSERT( ptr, NULL );
641  return iterator<a>( ptr );
642  }
643 
644  T* data() const { return &access<read_write>()[0]; }
645 
646  template <access_type a = read_write>
647  iterator<a> begin() const { return access<a>(); }
648 
649  template <access_type a = read_write>
650  iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
651 
652  size_t size() const { return my_impl->size()/sizeof(T); }
653 
654  T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
655 
657  opencl_buffer( size_t size );
658  opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
659 
660  cl_mem native_object() const {
661  return my_impl->get_cl_mem();
662  }
663 
664  const opencl_buffer& memory_object() const {
665  return *this;
666  }
667 
668  void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
669  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
670  opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
671  const cl_event *e = d.get_event();
672  if ( e ) dependency.set_event( *e );
673  else dependency.clear_event();
674  }
675  void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
676  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
677  opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
678  const cl_event *e = d.get_event();
679  if ( e ) dependency.set_event( *e );
680  else dependency.clear_event();
681  }
682 
683  opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
684 private:
685  // The constructor for subbuffers.
686  opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
687 
689 
690  std::shared_ptr<impl_type> my_impl;
691 
692  friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
693  return lhs.my_impl == rhs.my_impl;
694  }
695 
696  template <typename>
697  friend class opencl_factory;
698  template <typename, typename>
699  friend class opencl_subbuffer;
700 };
701 
702 template <typename T, typename Factory>
703 class opencl_subbuffer : public opencl_buffer<T, Factory> {
705 public:
707  opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
708  opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
709 };
710 
711 template <typename T, typename Factory>
713  return opencl_subbuffer<T, Factory>( *this, index, size );
714 }
715 
716 
717 #define is_typedef(type) \
718  template <typename T> \
719  struct is_##type { \
720  template <typename C> \
721  static std::true_type check( typename C::type* ); \
722  template <typename C> \
723  static std::false_type check( ... ); \
724  \
725  static const bool value = decltype(check<T>(0))::value; \
726  }
727 
728 is_typedef( native_object_type );
729 is_typedef( memory_object_type );
730 
731 template <typename T>
732 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
733  return t.native_object();
734 }
735 
736 template <typename T>
738  return t;
739 }
740 
741 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
742 template <typename T, typename Factory>
744  const T &t = dmsg.data( false );
745  typedef typename T::memory_object_type mem_obj_t;
746  mem_obj_t mem_obj = t.memory_object();
748  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
749  mem_obj.send( device, d );
750  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
751 }
752 
753 template <typename T>
755  typedef typename T::memory_object_type mem_obj_t;
756  mem_obj_t mem_obj = t.memory_object();
758  mem_obj.send( device, dmsg );
759 }
760 
761 template <typename T>
763 
764 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
765 template <typename T, typename Factory>
767  const T &t = dmsg.data( false );
768  typedef typename T::memory_object_type mem_obj_t;
769  mem_obj_t mem_obj = t.memory_object();
771  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
772  mem_obj.receive( d );
773  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
774 }
775 
776 template <typename T>
778 
780 public:
781  typedef size_t range_index_type;
782  typedef std::array<range_index_type, 3> nd_range_type;
783 
784  template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
785  typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
786  opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
787  auto g_it = global_work.begin();
788  auto l_it = local_work.begin();
789  my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
790  // my_local_work_size is still uninitialized
791  for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
792  __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
793  my_global_work_size[s] = *g_it;
794  my_local_work_size[s] = *l_it;
795  }
796  }
797 
798  const nd_range_type& global_range() const { return my_global_work_size; }
799  const nd_range_type& local_range() const { return my_local_work_size; }
800 
801 private:
804 };
805 
806 template <typename DeviceFilter>
807 class opencl_factory {
808 public:
811 
813  public:
814  kernel( const kernel& k ) : my_factory( k.my_factory ) {
815  // Clone my_cl_kernel via opencl_program
816  size_t ret_size = 0;
817 
818  std::vector<char> kernel_name;
819  for ( size_t curr_size = 32;; curr_size <<= 1 ) {
820  kernel_name.resize( curr_size <<= 1 );
821  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
822  if ( ret_size < curr_size ) break;
823  }
824 
825  cl_program program;
826  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
827  __TBB_ASSERT( ret_size == sizeof(program), NULL );
828 
829  my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
830  }
831 
833  enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
834  }
835 
836  private:
838 
839  kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
840 
841  // Data
842  cl_kernel my_cl_kernel;
844 
845  template <typename DeviceFilter_>
846  friend class opencl_factory;
847 
848  template <typename Factory>
849  friend class opencl_program;
850  };
851 
852  typedef kernel kernel_type;
853 
854  // 'range_type' enables kernel_executor with range support
855  // it affects expectations for enqueue_kernel(.....) interface method
857 
860  if ( my_devices.size() ) {
861  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
862  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
863  }
864  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
865  }
866  }
867 
868  bool init( const opencl_device_list &device_list ) {
870  if ( !my_devices.size() ) {
871  my_devices = device_list;
872  return true;
873  }
874  return false;
875  }
876 
877 
878 private:
879  template <typename Factory>
881  cl_event const* e1 = dmsg.get_event();
882  cl_event e2;
883  cl_int err;
884  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
885  e1 == NULL ? 0 : 1, e1, &e2, &err );
886  enforce_cl_retcode( err, "Failed to map a buffer" );
887  dmsg.data( false ) = ptr;
888  dmsg.set_event( e2 );
889  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
890  }
891 
892 
893  template <typename Factory>
895  cl_event const* e1 = dmsg.get_event();
896  cl_event e2;
898  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
899  "Failed to unmap a buffer" );
900  dmsg.set_event( e2 );
901  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
902  }
903 
904  // --------- Kernel argument & event list helpers --------- //
905  template <size_t NUM_ARGS, typename T>
906  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
907  auto p = get_native_object(t);
908  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
909  }
910 
911  template <size_t NUM_ARGS, typename T, typename F>
912  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
913  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
914 
915  const cl_event * const e = msg.get_event();
916  if (e != NULL) {
917  events[num_events++] = *e;
918  }
919 
920  process_one_arg( kernel, events, num_events, place, msg.data(false) );
921  }
922 
923  template <size_t NUM_ARGS, typename T, typename ...Rest>
924  void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
925  process_one_arg( kernel, events, num_events, place, t );
926  process_arg_list( kernel, events, num_events, place, args... );
927  }
928 
929  template <size_t NUM_ARGS>
930  void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
931  // ------------------------------------------- //
932  template <typename T>
933  void update_one_arg( cl_event, T& ) {}
934 
935  template <typename T, typename F>
936  void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
937  msg.set_event( e );
938  }
939 
940  template <typename T, typename ...Rest>
941  void update_arg_list( cl_event e, T& t, Rest&... args ) {
942  update_one_arg( e, t );
943  update_arg_list( e, args... );
944  }
945 
946  void update_arg_list( cl_event ) {}
947  // ------------------------------------------- //
948 public:
949  template <typename ...Args>
950  void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
951  std::array<cl_event, sizeof...(Args)> events;
952  int num_events = 0;
953  int place = 0;
954  process_arg_list( kernel, events, num_events, place, args... );
955 
956  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
957 
958  update_arg_list(e, args...);
959 
960  // Release our own reference to cl_event
961  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
962  }
963 
964  // ------------------------------------------- //
965  template <typename T, typename ...Rest>
966  void send_data(opencl_device device, T& t, Rest&... args) {
967  send_if_memory_object( device, t );
968  send_data( device, args... );
969  }
970 
972  // ------------------------------------------- //
973 
974 private:
975  cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
976  const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
977  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
978  const typename range_type::nd_range_type& g_size = work_size.global_range();
979  const typename range_type::nd_range_type& l_size = work_size.local_range();
980  cl_uint s;
981  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
982  cl_event event;
984  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
985  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
986  "Failed to enqueue a kernel" );
987  return event;
988  }
989 
990  // ------------------------------------------- //
991  template <typename T>
992  bool get_event_from_one_arg( cl_event&, const T& ) {
993  return false;
994  }
995 
996  template <typename T, typename F>
997  bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
998  cl_event const *e_ptr = msg.get_event();
999 
1000  if ( e_ptr != NULL ) {
1001  e = *e_ptr;
1002  return true;
1003  }
1004 
1005  return false;
1006  }
1007 
1008  template <typename T, typename ...Rest>
1009  bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1010  if ( get_event_from_one_arg( e, t ) ) {
1011  return true;
1012  }
1013 
1014  return get_event_from_args( e, args... );
1015  }
1016 
1017  bool get_event_from_args( cl_event& ) {
1018  return false;
1019  }
1020  // ------------------------------------------- //
1021 
1023  virtual ~finalize_fn() {}
1024  virtual void operator() () {}
1025  };
1026 
1027  template<typename Fn>
1028  struct finalize_fn_leaf : public finalize_fn {
1029  Fn my_fn;
1032  };
1033 
1034  static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1035  tbb::internal::suppress_unused_warning(event_command_exec_status);
1036  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1037 
1038  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1039  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1040  (*fn_ptr)();
1041 
1042  // Function pointer was created by 'new' & this callback must be called once only
1043  delete fn_ptr;
1044  }
1045 public:
1046  template <typename FinalizeFn, typename ...Args>
1047  void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1048  cl_event e;
1049 
1050  if ( get_event_from_args( e, args... ) ) {
1051  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1052  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1053  }
1054 
1055  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1056  }
1057 
1059  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1060  return my_devices;
1061  }
1062 
1063 private:
1066  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1067  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1068  return true;
1069  return d1 == d2;
1070  }
1071 private:
1072  opencl_factory( const opencl_factory& );
1074 
1075  cl_context context() {
1076  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1077  return my_cl_context;
1078  }
1079 
1080  void init_once() {
1081  {
1083  if (!my_devices.size())
1084  my_devices = DeviceFilter()( opencl_info::available_devices() );
1085  }
1086 
1087  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1088  cl_platform_id platform_id = my_devices.begin()->platform_id();
1089  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1090  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1091 
1092  std::vector<cl_device_id> cl_device_ids;
1093  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1094  cl_device_ids.push_back((*d).my_cl_device_id);
1095  }
1096 
1097  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1098  cl_int err;
1099  cl_context ctx = clCreateContext(context_properties,
1100  (cl_uint)cl_device_ids.size(),
1101  cl_device_ids.data(),
1102  NULL, NULL, &err);
1103  enforce_cl_retcode(err, "Failed to create context");
1104  my_cl_context = ctx;
1105 
1106  size_t device_counter = 0;
1107  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1108  (*d).my_device_id = device_counter++;
1109  cl_int err2;
1110  cl_command_queue cq;
1111 #if CL_VERSION_2_0
1112  if ((*d).major_version() >= 2) {
1113  if ((*d).out_of_order_exec_mode_on_host_present()) {
1114  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1115  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1116  } else {
1117  cl_queue_properties props[] = { 0 };
1118  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1119  }
1120  } else
1121 #endif
1122  {
1123  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1124  // Suppress "declared deprecated" warning for the next line.
1125 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1126 #pragma GCC diagnostic push
1127 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1128 #endif
1129 #if _MSC_VER || __INTEL_COMPILER
1130 #pragma warning( push )
1131 #if __INTEL_COMPILER
1132 #pragma warning (disable: 1478)
1133 #else
1134 #pragma warning (disable: 4996)
1135 #endif
1136 #endif
1137  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1138 #if _MSC_VER || __INTEL_COMPILER
1139 #pragma warning( pop )
1140 #endif
1141 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1142 #pragma GCC diagnostic pop
1143 #endif
1144  }
1145  enforce_cl_retcode(err2, "Failed to create command queue");
1146  (*d).my_cl_command_queue = cq;
1147  }
1148  }
1149 
1150  std::once_flag my_once_flag;
1152  cl_context my_cl_context;
1153 
1155 
1156  template <typename Factory>
1157  friend class opencl_program;
1158  template <typename Factory>
1159  friend class opencl_buffer_impl;
1160  template <typename Factory>
1161  friend class opencl_memory;
1162 }; // class opencl_factory
1163 
1164 // TODO: consider this namespace as public API
1165 namespace opencl_info {
1166 
1167 // Default types
1168 
1169 template <typename Factory>
1172  __TBB_ASSERT(!f.devices().empty(), "No available devices");
1173  return *(f.devices().begin());
1174  }
1175 };
1176 
1179  opencl_device_list dl;
1180  cl_platform_id platform_id = devices.begin()->platform_id();
1181  for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1182  if (it->platform_id() == platform_id) {
1183  dl.add(*it);
1184  }
1185  }
1186  return dl;
1187  }
1188 };
1189 
1190 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1191 public:
1193 
1195 
1196 private:
1197  default_opencl_factory() = default;
1198 };
1199 
1202  return default_factory;
1203 }
1204 
1205 } // namespace opencl_info
1206 
1207 template <typename T, typename Factory>
1208 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1209 
1210 
1212  SOURCE,
1213  PRECOMPILED,
1214  SPIR
1215 };
1216 
1217 template <typename Factory = opencl_info::default_opencl_factory>
1219 public:
1220  typedef typename Factory::kernel_type kernel_type;
1221 
1222  opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1223  opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1224  opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1225 
1226  opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1227  opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1228  opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1230 
1232  // Set my_do_once_flag to the called state.
1233  std::call_once( my_do_once_flag, [](){} );
1234  }
1235 
1236  kernel_type get_kernel( const std::string& k ) const {
1237  return kernel_type( get_cl_kernel(k), my_factory );
1238  }
1239 
1240 private:
1241  opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1242  // Set my_do_once_flag to the called state.
1243  std::call_once( my_do_once_flag, [](){} );
1244  }
1245 
1246  cl_kernel get_cl_kernel( const std::string& k ) const {
1247  std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1248  cl_int err;
1249  cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1250  enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1251  return kernel;
1252  }
1253 
1254  class file_reader {
1255  public:
1256  file_reader( const std::string& filepath ) {
1257  std::ifstream file_descriptor( filepath, std::ifstream::binary );
1258  if ( !file_descriptor.is_open() ) {
1259  std::string str = std::string( "Could not open file: " ) + filepath;
1260  std::cerr << str << std::endl;
1261  throw str;
1262  }
1263  file_descriptor.seekg( 0, file_descriptor.end );
1264  size_t length = size_t( file_descriptor.tellg() );
1265  file_descriptor.seekg( 0, file_descriptor.beg );
1266  my_content.resize( length );
1267  char* begin = &*my_content.begin();
1268  file_descriptor.read( begin, length );
1269  file_descriptor.close();
1270  }
1271  const char* content() { return &*my_content.cbegin(); }
1272  size_t length() { return my_content.length(); }
1273  private:
1274  std::string my_content;
1275  };
1276 
1278  public:
1279  typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1280  opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1281  cl_uint num_devices, cl_device_id* device_list,
1282  const char* options, cl_callback_type callback,
1283  void* user_data ) {
1284  cl_int err = clBuildProgram( program, num_devices, device_list, options,
1285  callback, user_data );
1286  if( err == CL_SUCCESS )
1287  return;
1288  std::string str = std::string( "Failed to build program: " ) + name;
1289  if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1290  const opencl_device_list &devices = f.devices();
1291  for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1292  std::cerr << "Build log for device: " << (*d).name() << std::endl;
1293  size_t log_size;
1294  cl_int query_err = clGetProgramBuildInfo(
1295  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1296  &log_size );
1297  enforce_cl_retcode( query_err, "Failed to get build log size" );
1298  if( log_size ) {
1299  std::vector<char> output;
1300  output.resize( log_size );
1301  query_err = clGetProgramBuildInfo(
1302  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1303  output.size(), output.data(), NULL );
1304  enforce_cl_retcode( query_err, "Failed to get build output" );
1305  std::cerr << output.data() << std::endl;
1306  } else {
1307  std::cerr << "No build log available" << std::endl;
1308  }
1309  }
1310  }
1311  enforce_cl_retcode( err, str );
1312  }
1313  };
1314 
1316  public:
1317  template<typename Filter>
1318  opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1319  Filter filter, const char* message ) {
1320  for ( cl_uint i = 0; i < num_devices; ++i )
1321  if ( filter(device_list[i]) ) {
1322  device_list[i--] = device_list[--num_devices];
1323  }
1324  if ( !num_devices )
1325  enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1326  }
1327  };
1328 
1329  void init( const std::string& ) const {
1330  cl_uint num_devices;
1331  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1332  "Failed to get OpenCL context info" );
1333  if ( !num_devices )
1334  enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1335  cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1336  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1337  "Failed to get OpenCL context info" );
1338  const char *options = NULL;
1339  switch ( my_type ) {
1341  file_reader fr( my_arg_str );
1342  const char *s[] = { fr.content() };
1343  const size_t l[] = { fr.length() };
1344  cl_int err;
1345  my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1346  enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1348  num_devices, device_list,
1349  []( const opencl_device& d ) -> bool {
1350  return !d.compiler_available() || !d.linker_available();
1351  }, "No one device supports building program from sources" );
1353  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1354  options, /*callback*/ NULL, /*user data*/NULL );
1355  break;
1356  }
1358  options = "-x spir";
1360  file_reader fr( my_arg_str );
1361  std::vector<const unsigned char*> s(
1362  num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1363  std::vector<size_t> l( num_devices, fr.length() );
1364  std::vector<cl_int> bin_statuses( num_devices, -1 );
1365  cl_int err;
1366  my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1367  device_list, l.data(), s.data(),
1368  bin_statuses.data(), &err );
1369  if( err != CL_SUCCESS ) {
1370  std::string statuses_str;
1371  for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1372  statuses_str += std::to_string((*st));
1373  }
1374 
1375  enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1376  std::string( ", binary_statuses = " ) + statuses_str );
1377  }
1379  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1380  options, /*callback*/ NULL, /*user data*/NULL );
1381  break;
1382  }
1383  default:
1384  __TBB_ASSERT( false, "Unsupported program type" );
1385  }
1386  }
1387 
1388  Factory& my_factory;
1390  std::string my_arg_str;
1391  mutable cl_program my_cl_program;
1392  mutable std::once_flag my_do_once_flag;
1393 
1394  template <typename DeviceFilter>
1395  friend class opencl_factory;
1396 
1397  template <typename DeviceFilter>
1398  friend class opencl_factory<DeviceFilter>::kernel;
1399 };
1400 
1401 template<typename... Args>
1403 
1404 template<typename JP, typename Factory, typename... Ports>
1405 class opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1406  typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1407 public:
1408  typedef typename base_type::kernel_type kernel_type;
1409 
1410  opencl_node( graph &g, const kernel_type& kernel )
1411  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1412  {
1413  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1414  }
1415 
1416  opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1417  : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1418  {
1419  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1420  }
1421 
1422  template <typename DeviceSelector>
1423  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1424  : base_type( g, kernel, d, f)
1425  {
1426  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1427  }
1428 };
1429 
1430 template<typename JP, typename... Ports>
1431 class opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1433 public:
1434  typedef typename base_type::kernel_type kernel_type;
1435 
1436  opencl_node( graph &g, const kernel_type& kernel )
1437  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1438  {}
1439 
1440  template <typename DeviceSelector>
1441  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1442  : base_type( g, kernel, d, opencl_info::default_factory() )
1443  {}
1444 };
1445 
1446 template<typename... Ports>
1447 class opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1449 public:
1450  typedef typename base_type::kernel_type kernel_type;
1451 
1452  opencl_node( graph &g, const kernel_type& kernel )
1453  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1454  {}
1455 
1456  template <typename DeviceSelector>
1457  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1458  : base_type( g, kernel, d, opencl_info::default_factory() )
1459  {}
1460 };
1461 
1462 } // namespace interface10
1463 
1464 using interface10::opencl_node;
1468 using interface10::opencl_buffer;
1469 using interface10::opencl_subbuffer;
1470 using interface10::opencl_device;
1471 using interface10::opencl_device_list;
1472 using interface10::opencl_program;
1474 using interface10::opencl_async_msg;
1475 using interface10::opencl_factory;
1476 using interface10::opencl_range;
1477 
1478 } // namespace flow
1479 } // namespace tbb
1480 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1481 
1482 #endif // __TBB_flow_graph_opencl_node_H
opencl_program(const std::string &program_name)
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
opencl_program(Factory &factory, const std::string &program_name)
kernel_type get_kernel(const std::string &k) const
opencl_async_msg< void *, Factory > receive(const cl_event *e)
opencl_program(Factory &factory, const char *program_name)
void info(cl_device_info i, T &t) const
friend bool operator==(opencl_device d1, opencl_device d2)
Represents acquisition of a mutex.
Definition: spin_mutex.h:50
void store(value_type value)
Definition: atomic.h:313
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
void move(tbb_thread &t1, tbb_thread &t2)
Definition: tbb_thread.h:305
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
const opencl_device_list & available_devices()
opencl_device_list(std::initializer_list< opencl_device > il)
void const char const char int ITT_FORMAT __itt_group_sync s
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
value_type load() const
Definition: atomic.h:302
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
default_opencl_factory & default_factory()
virtual void map_memory(opencl_device, opencl_async_msg< void *, Factory > &)=0
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
Primary template for atomic.
Definition: atomic.h:403
bool built_in_kernel_available(const std::string &k) const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
Base class for types that should not be copied or assigned.
Definition: tbb_stddef.h:331
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
#define __TBB_override
Definition: tbb_stddef.h:240
const nd_range_type & local_range() const
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
const opencl_buffer & memory_object() const
T event_info(cl_event e, cl_event_info i)
kernel(const cl_kernel &k, factory_type &f)
The graph class.
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
static void fgt_multiinput_multioutput_node(string_index, void *, void *)
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
A lock that occupies a single byte.
Definition: spin_mutex.h:36
opencl_device_list operator()(const opencl_device_list &devices)
bool extension_available(const std::string &ext) const
Base class for types that should not be assigned.
Definition: tbb_stddef.h:320
opencl_buffer_impl< Factory > impl_type
opencl_program(opencl_program_type type, const std::string &program_name)
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
T device_info(cl_device_id d, cl_device_info i)
Release.
Definition: atomic.h:45
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
bool get_event_from_one_arg(cl_event &, const T &)
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
cl_kernel get_cl_kernel(const std::string &k) const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
opencl_async_msg(const opencl_async_msg &dmsg)
void send_data(opencl_device device, T &t, Rest &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:377
const nd_range_type & global_range() const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
No ordering.
Definition: atomic.h:47
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
opencl_factory & operator=(const opencl_factory &)
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
K key_from_message(const T &t)
Definition: flow_graph.h:691
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
void update_arg_list(cl_event e, T &t, Rest &... args)
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
T platform_info(cl_platform_id p, cl_platform_info i)
std::array< size_t, 3 > max_work_item_sizes() const
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
opencl_program(Factory &factory, cl_program program)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
is_typedef(native_object_type)
void const char const char int ITT_FORMAT __itt_group_sync p
A stage in a pipeline.
Definition: pipeline.h:61
bool init(const opencl_device_list &device_list)
Acquire.
Definition: atomic.h:43
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
std::array< range_index_type, 3 > nd_range_type
The graph related classes and functions.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
void set_command_queue(cl_command_queue cmd_queue)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
opencl_async_msg(const T &data, cl_event event)
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:532
void enforce_cl_retcode(cl_int err, std::string msg)
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.