Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
flow_graph_opencl_node.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef __TBB_flow_graph_opencl_node_H
22 #define __TBB_flow_graph_opencl_node_H
23 
24 #include "tbb/tbb_config.h"
25 #if __TBB_PREVIEW_OPENCL_NODE
26 
27 #include "flow_graph.h"
28 
29 #include <vector>
30 #include <string>
31 #include <algorithm>
32 #include <iostream>
33 #include <fstream>
34 #include <map>
35 #include <mutex>
36 
37 #ifdef __APPLE__
38 #include <OpenCL/opencl.h>
39 #else
40 #include <CL/cl.h>
41 #endif
42 
43 namespace tbb {
44 namespace flow {
45 
46 namespace interface10 {
47 
48 template <typename DeviceFilter>
50 
51 namespace opencl_info {
53 }
54 
55 template <typename Factory>
57 
58 inline void enforce_cl_retcode(cl_int err, std::string msg) {
59  if (err != CL_SUCCESS) {
60  std::cerr << msg << "; error code: " << err << std::endl;
61  throw msg;
62  }
63 }
64 
65 template <typename T>
66 T event_info(cl_event e, cl_event_info i) {
67  T res;
68  enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
69  return res;
70 }
71 
72 template <typename T>
73 T device_info(cl_device_id d, cl_device_info i) {
74  T res;
75  enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
76  return res;
77 }
78 
79 template <>
80 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
81  size_t required;
82  enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
83 
84  char *buff = (char*)alloca(required);
85  enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
86 
87  return buff;
88 }
89 
90 template <typename T>
91 T platform_info(cl_platform_id p, cl_platform_info i) {
92  T res;
93  enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
94  return res;
95 }
96 
97 template <>
98 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
99  size_t required;
100  enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
101 
102  char *buff = (char*)alloca(required);
103  enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
104 
105  return buff;
106 }
107 
108 
110 public:
111  typedef size_t device_id_type;
112  enum : device_id_type {
115  };
116 
118 
119  opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
120 
122 
123  std::string platform_profile() const {
124  return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
125  }
126  std::string platform_version() const {
127  return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
128  }
129  std::string platform_name() const {
130  return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
131  }
132  std::string platform_vendor() const {
133  return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
134  }
135  std::string platform_extensions() const {
136  return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
137  }
138 
139  template <typename T>
140  void info( cl_device_info i, T &t ) const {
141  t = device_info<T>( my_cl_device_id, i );
142  }
143  std::string version() const {
144  // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
145  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
146  }
147  int major_version() const {
148  int major;
149  std::sscanf( version().c_str(), "OpenCL %d", &major );
150  return major;
151  }
152  int minor_version() const {
153  int major, minor;
154  std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
155  return minor;
156  }
158 #if CL_VERSION_2_0
159  if ( major_version() >= 2 )
160  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
161  else
162 #endif /* CL_VERSION_2_0 */
163  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
164  }
166 #if CL_VERSION_2_0
167  if ( major_version() >= 2 )
168  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
169  else
170 #endif /* CL_VERSION_2_0 */
171  return false;
172  }
173  std::array<size_t, 3> max_work_item_sizes() const {
174  return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
175  }
176  size_t max_work_group_size() const {
177  return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
178  }
179  bool built_in_kernel_available( const std::string& k ) const {
180  const std::string semi = ";";
181  // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
182  return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
183  }
184  std::string built_in_kernels() const {
185  return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
186  }
187  std::string name() const {
188  return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
189  }
190  cl_bool available() const {
191  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
192  }
193  cl_bool compiler_available() const {
194  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
195  }
196  cl_bool linker_available() const {
197  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
198  }
199  bool extension_available( const std::string &ext ) const {
200  const std::string space = " ";
201  // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
202  return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
203  }
204  std::string extensions() const {
205  return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
206  }
207 
208  cl_device_type type() const {
209  return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
210  }
211 
212  std::string vendor() const {
213  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
214  }
215 
216  cl_uint address_bits() const {
217  return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
218  }
219 
220  cl_device_id device_id() const {
221  return my_cl_device_id;
222  }
223 
224  cl_command_queue command_queue() const {
225  return my_cl_command_queue;
226  }
227 
228  void set_command_queue( cl_command_queue cmd_queue ) {
229  my_cl_command_queue = cmd_queue;
230  }
231 
232  cl_platform_id platform_id() const {
233  return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
234  }
235 
236 private:
237 
239  cl_device_id my_cl_device_id;
240  cl_command_queue my_cl_command_queue;
241 
242  friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
243 
244  template <typename DeviceFilter>
245  friend class opencl_factory;
246  template <typename Factory>
247  friend class opencl_memory;
248  template <typename Factory>
249  friend class opencl_program;
250 
251 #if TBB_USE_ASSERT
252  template <typename T, typename Factory>
253  friend class opencl_buffer;
254 #endif
255 };
256 
258  typedef std::vector<opencl_device> container_type;
259 public:
260  typedef container_type::iterator iterator;
261  typedef container_type::const_iterator const_iterator;
262  typedef container_type::size_type size_type;
263 
265  opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
266 
267  void add( opencl_device d ) { my_container.push_back( d ); }
268  size_type size() const { return my_container.size(); }
269  bool empty() const { return my_container.empty(); }
270  iterator begin() { return my_container.begin(); }
271  iterator end() { return my_container.end(); }
272  const_iterator begin() const { return my_container.begin(); }
273  const_iterator end() const { return my_container.end(); }
274  const_iterator cbegin() const { return my_container.cbegin(); }
275  const_iterator cend() const { return my_container.cend(); }
276 
277 private:
279 };
280 
281 namespace internal {
282 
283 // Retrieve all OpenCL devices from machine
285  opencl_device_list opencl_devices;
286 
287  cl_uint num_platforms;
288  enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
289 
290  std::vector<cl_platform_id> platforms(num_platforms);
291  enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
292 
293  cl_uint num_devices;
294  std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
295  cl_uint num_all_devices = 0;
296  while (platforms_it != platforms.end()) {
297  cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
298  if (err == CL_DEVICE_NOT_FOUND) {
299  platforms_it = platforms.erase(platforms_it);
300  }
301  else {
302  enforce_cl_retcode(err, "clGetDeviceIDs failed");
303  num_all_devices += num_devices;
304  ++platforms_it;
305  }
306  }
307 
308  std::vector<cl_device_id> devices(num_all_devices);
309  std::vector<cl_device_id>::iterator devices_it = devices.begin();
310  for (auto p = platforms.begin(); p != platforms.end(); ++p) {
311  enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
312  devices_it += num_devices;
313  }
314 
315  for (auto d = devices.begin(); d != devices.end(); ++d) {
316  opencl_devices.add(opencl_device((*d)));
317  }
318 
319  return opencl_devices;
320 }
321 
322 } // namespace internal
323 
324 // TODO: consider this namespace as public API
325 namespace opencl_info {
326 
328  // Static storage for all available OpenCL devices on machine
329  static const opencl_device_list my_devices = internal::find_available_devices();
330  return my_devices;
331  }
332 
333 } // namespace opencl_info
334 
335 
337 public:
338  virtual void call() = 0;
339  virtual ~callback_base() {}
340 };
341 
342 template <typename Callback, typename T>
343 class callback : public callback_base {
344  Callback my_callback;
346 public:
347  callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
348 
350  my_callback( my_data );
351  }
352 };
353 
354 template <typename T, typename Factory = opencl_info::default_opencl_factory>
355 class opencl_async_msg : public async_msg<T> {
356 public:
357  typedef T value_type;
358 
359  opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
360  my_callback_flag_ptr->store<tbb::relaxed>(false);
361  }
362 
363  explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
364  my_callback_flag_ptr->store<tbb::relaxed>(false);
365  }
366 
367  opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
368  my_callback_flag_ptr->store<tbb::relaxed>(false);
369  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
370  }
371 
372  T& data( bool wait = true ) {
373  if ( my_is_event && wait ) {
374  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
375  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
376  my_is_event = false;
377  }
378  return my_data;
379  }
380 
381  const T& data( bool wait = true ) const {
382  if ( my_is_event && wait ) {
383  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
384  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
385  my_is_event = false;
386  }
387  return my_data;
388  }
389 
390  opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
391  my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
393  {
394  if ( my_is_event )
395  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
396  }
397 
398  opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
399  my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
401  {
402  dmsg.my_is_event = false;
403  }
404 
407 
408  // Release original event
409  if ( my_is_event )
410  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
411 
412  my_data = dmsg.my_data;
413  my_event = dmsg.my_event;
414  my_is_event = dmsg.my_is_event;
415 
416  // Retain copied event
417  if ( my_is_event )
418  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
419 
421  return *this;
422  }
423 
425  if ( my_is_event )
426  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
427  }
428 
429  cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
430  void set_event( cl_event e ) const {
431  if ( my_is_event ) {
432  cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
433  if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
434  enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
435  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
436  }
437  my_is_event = true;
438  my_event = e;
439  clRetainEvent( my_event );
440  }
441 
442  void clear_event() const {
443  if ( my_is_event ) {
444  enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
445  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
446  }
447  my_is_event = false;
448  }
449 
450  template <typename Callback>
451  void register_callback( Callback c ) const {
452  __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
453  enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
454  }
455 
456  operator T&() { return data(); }
457  operator const T&() const { return data(); }
458 
459 protected:
460  // Overridden in this derived class to inform that
461  // async calculation chain is over
462  void finalize() const __TBB_override {
464  if (! my_callback_flag_ptr->fetch_and_store(true)) {
465  opencl_async_msg a(*this);
466  if (my_is_event) {
467  register_callback([a](const T& t) mutable {
468  a.set(t);
469  });
470  }
471  else {
472  a.set(my_data);
473  }
474  }
475  clear_event();
476  }
477 
478 private:
479  static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
480  tbb::internal::suppress_unused_warning( event_command_exec_status );
481  __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
482  __TBB_ASSERT( data, NULL );
483  callback_base *c = static_cast<callback_base*>(data);
484  c->call();
485  delete c;
486  }
487 
489  mutable cl_event my_event;
490  mutable bool my_is_event = false;
491 
492  std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
493 };
494 
495 template <typename K, typename T, typename Factory>
498  const T &t = dmsg.data( false );
499  __TBB_STATIC_ASSERT( true, "" );
500  return key_from_message<K, T>( t );
501 }
502 
503 template <typename Factory>
505 public:
507  opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
508  my_curr_device_id = my_factory->devices().begin()->my_device_id;
509  }
510 
512  if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
513  enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
514  }
515 
516  cl_mem get_cl_mem() const {
517  return my_cl_mem;
518  }
519 
520  void* get_host_ptr() {
521  if ( !my_host_ptr ) {
523  d.data();
524  __TBB_ASSERT( d.data() == my_host_ptr, NULL );
525  }
526  return my_host_ptr;
527  }
528 
529  Factory *factory() const { return my_factory; }
530 
533  if (e) {
535  } else {
537  }
538 
539  // Concurrent receives are prohibited so we do not worry about synchronization.
541  map_memory(*my_factory->devices().begin(), d);
543  my_host_ptr = d.data(false);
544  }
545  // Release the sending event
547  enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
548  my_sending_event_present = false;
549  }
550  return d;
551  }
552 
554  opencl_device::device_id_type device_id = device.my_device_id;
555  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
556  {
558  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
559  __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
561  my_factory->enqueue_unmap_buffer(device, *this, d);
562  my_sending_event = *d.get_event();
564  enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
565  my_host_ptr = NULL;
567  }
568  }
570  }
571 
572  // !e means that buffer has come from the host
574 
575  __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
577  }
578 
580 protected:
581  cl_mem my_cl_mem;
583  void* my_host_ptr;
584  Factory *my_factory;
585 
589 };
590 
591 template <typename Factory>
592 class opencl_buffer_impl : public opencl_memory<Factory> {
593  size_t my_size;
594 public:
595  opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
596  cl_int err;
597  this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
598  enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
599  }
600 
601  // The constructor for subbuffers.
602  opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
603  cl_int err;
604  cl_buffer_region region = { index, size };
605  this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
606  enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
607  }
608 
609  size_t size() const {
610  return my_size;
611  }
612 
614  this->my_factory->enqueue_map_buffer( device, *this, dmsg );
615  }
616 
617 #if TBB_USE_ASSERT
618  template <typename, typename>
619  friend class opencl_buffer;
620 #endif
621 };
622 
627 };
628 
629 template <typename T, typename Factory = opencl_info::default_opencl_factory>
631 
632 template <typename T, typename Factory = opencl_info::default_opencl_factory>
634 public:
635  typedef cl_mem native_object_type;
637  typedef Factory opencl_factory_type;
638 
639  template<access_type a> using iterator = T*;
640 
641  template <access_type a>
642  iterator<a> access() const {
643  T* ptr = (T*)my_impl->get_host_ptr();
644  __TBB_ASSERT( ptr, NULL );
645  return iterator<a>( ptr );
646  }
647 
648  T* data() const { return &access<read_write>()[0]; }
649 
650  template <access_type a = read_write>
651  iterator<a> begin() const { return access<a>(); }
652 
653  template <access_type a = read_write>
654  iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
655 
656  size_t size() const { return my_impl->size()/sizeof(T); }
657 
658  T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
659 
661  opencl_buffer( size_t size );
662  opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
663 
664  cl_mem native_object() const {
665  return my_impl->get_cl_mem();
666  }
667 
668  const opencl_buffer& memory_object() const {
669  return *this;
670  }
671 
672  void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
673  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
674  opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
675  const cl_event *e = d.get_event();
676  if ( e ) dependency.set_event( *e );
677  else dependency.clear_event();
678  }
679  void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
680  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
681  opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
682  const cl_event *e = d.get_event();
683  if ( e ) dependency.set_event( *e );
684  else dependency.clear_event();
685  }
686 
687  opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
688 private:
689  // The constructor for subbuffers.
690  opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
691 
693 
694  std::shared_ptr<impl_type> my_impl;
695 
696  friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
697  return lhs.my_impl == rhs.my_impl;
698  }
699 
700  template <typename>
701  friend class opencl_factory;
702  template <typename, typename>
703  friend class opencl_subbuffer;
704 };
705 
706 template <typename T, typename Factory>
707 class opencl_subbuffer : public opencl_buffer<T, Factory> {
709 public:
711  opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
712  opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
713 };
714 
715 template <typename T, typename Factory>
717  return opencl_subbuffer<T, Factory>( *this, index, size );
718 }
719 
720 
721 #define is_typedef(type) \
722  template <typename T> \
723  struct is_##type { \
724  template <typename C> \
725  static std::true_type check( typename C::type* ); \
726  template <typename C> \
727  static std::false_type check( ... ); \
728  \
729  static const bool value = decltype(check<T>(0))::value; \
730  }
731 
732 is_typedef( native_object_type );
733 is_typedef( memory_object_type );
734 
735 template <typename T>
736 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
737  return t.native_object();
738 }
739 
740 template <typename T>
742  return t;
743 }
744 
745 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
746 template <typename T, typename Factory>
748  const T &t = dmsg.data( false );
749  typedef typename T::memory_object_type mem_obj_t;
750  mem_obj_t mem_obj = t.memory_object();
752  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
753  mem_obj.send( device, d );
754  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
755 }
756 
757 template <typename T>
759  typedef typename T::memory_object_type mem_obj_t;
760  mem_obj_t mem_obj = t.memory_object();
762  mem_obj.send( device, dmsg );
763 }
764 
765 template <typename T>
767 
768 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
769 template <typename T, typename Factory>
771  const T &t = dmsg.data( false );
772  typedef typename T::memory_object_type mem_obj_t;
773  mem_obj_t mem_obj = t.memory_object();
775  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
776  mem_obj.receive( d );
777  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
778 }
779 
780 template <typename T>
782 
784 public:
785  typedef size_t range_index_type;
786  typedef std::array<range_index_type, 3> nd_range_type;
787 
788  template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
789  typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
790  opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
791  auto g_it = global_work.begin();
792  auto l_it = local_work.begin();
793  my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
794  // my_local_work_size is still uninitialized
795  for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
796  __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
797  my_global_work_size[s] = *g_it;
798  my_local_work_size[s] = *l_it;
799  }
800  }
801 
802  const nd_range_type& global_range() const { return my_global_work_size; }
803  const nd_range_type& local_range() const { return my_local_work_size; }
804 
805 private:
808 };
809 
810 template <typename DeviceFilter>
811 class opencl_factory {
812 public:
815 
817  public:
818  kernel( const kernel& k ) : my_factory( k.my_factory ) {
819  // Clone my_cl_kernel via opencl_program
820  size_t ret_size = 0;
821 
822  std::vector<char> kernel_name;
823  for ( size_t curr_size = 32;; curr_size <<= 1 ) {
824  kernel_name.resize( curr_size <<= 1 );
825  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
826  if ( ret_size < curr_size ) break;
827  }
828 
829  cl_program program;
830  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
831  __TBB_ASSERT( ret_size == sizeof(program), NULL );
832 
833  my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
834  }
835 
837  enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
838  }
839 
840  private:
842 
843  kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
844 
845  // Data
846  cl_kernel my_cl_kernel;
848 
849  template <typename DeviceFilter_>
850  friend class opencl_factory;
851 
852  template <typename Factory>
853  friend class opencl_program;
854  };
855 
856  typedef kernel kernel_type;
857 
858  // 'range_type' enables kernel_executor with range support
859  // it affects expectations for enqueue_kernel(.....) interface method
861 
864  if ( my_devices.size() ) {
865  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
866  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
867  }
868  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
869  }
870  }
871 
872  bool init( const opencl_device_list &device_list ) {
874  if ( !my_devices.size() ) {
875  my_devices = device_list;
876  return true;
877  }
878  return false;
879  }
880 
881 
882 private:
883  template <typename Factory>
885  cl_event const* e1 = dmsg.get_event();
886  cl_event e2;
887  cl_int err;
888  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
889  e1 == NULL ? 0 : 1, e1, &e2, &err );
890  enforce_cl_retcode( err, "Failed to map a buffer" );
891  dmsg.data( false ) = ptr;
892  dmsg.set_event( e2 );
893  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
894  }
895 
896 
897  template <typename Factory>
899  cl_event const* e1 = dmsg.get_event();
900  cl_event e2;
902  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
903  "Failed to unmap a buffer" );
904  dmsg.set_event( e2 );
905  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
906  }
907 
908  // --------- Kernel argument & event list helpers --------- //
909  template <size_t NUM_ARGS, typename T>
910  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
911  auto p = get_native_object(t);
912  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
913  }
914 
915  template <size_t NUM_ARGS, typename T, typename F>
916  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
917  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
918 
919  const cl_event * const e = msg.get_event();
920  if (e != NULL) {
921  events[num_events++] = *e;
922  }
923 
924  process_one_arg( kernel, events, num_events, place, msg.data(false) );
925  }
926 
927  template <size_t NUM_ARGS, typename T, typename ...Rest>
928  void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
929  process_one_arg( kernel, events, num_events, place, t );
930  process_arg_list( kernel, events, num_events, place, args... );
931  }
932 
933  template <size_t NUM_ARGS>
934  void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
935  // ------------------------------------------- //
936  template <typename T>
937  void update_one_arg( cl_event, T& ) {}
938 
939  template <typename T, typename F>
940  void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
941  msg.set_event( e );
942  }
943 
944  template <typename T, typename ...Rest>
945  void update_arg_list( cl_event e, T& t, Rest&... args ) {
946  update_one_arg( e, t );
947  update_arg_list( e, args... );
948  }
949 
950  void update_arg_list( cl_event ) {}
951  // ------------------------------------------- //
952 public:
953  template <typename ...Args>
954  void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
955  std::array<cl_event, sizeof...(Args)> events;
956  int num_events = 0;
957  int place = 0;
958  process_arg_list( kernel, events, num_events, place, args... );
959 
960  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
961 
962  update_arg_list(e, args...);
963 
964  // Release our own reference to cl_event
965  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
966  }
967 
968  // ------------------------------------------- //
969  template <typename T, typename ...Rest>
970  void send_data(opencl_device device, T& t, Rest&... args) {
971  send_if_memory_object( device, t );
972  send_data( device, args... );
973  }
974 
976  // ------------------------------------------- //
977 
978 private:
979  cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
980  const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
981  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
982  const typename range_type::nd_range_type& g_size = work_size.global_range();
983  const typename range_type::nd_range_type& l_size = work_size.local_range();
984  cl_uint s;
985  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
986  cl_event event;
988  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
989  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
990  "Failed to enqueue a kernel" );
991  return event;
992  }
993 
994  // ------------------------------------------- //
995  template <typename T>
996  bool get_event_from_one_arg( cl_event&, const T& ) {
997  return false;
998  }
999 
1000  template <typename T, typename F>
1001  bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1002  cl_event const *e_ptr = msg.get_event();
1003 
1004  if ( e_ptr != NULL ) {
1005  e = *e_ptr;
1006  return true;
1007  }
1008 
1009  return false;
1010  }
1011 
1012  template <typename T, typename ...Rest>
1013  bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1014  if ( get_event_from_one_arg( e, t ) ) {
1015  return true;
1016  }
1017 
1018  return get_event_from_args( e, args... );
1019  }
1020 
1021  bool get_event_from_args( cl_event& ) {
1022  return false;
1023  }
1024  // ------------------------------------------- //
1025 
1027  virtual ~finalize_fn() {}
1028  virtual void operator() () {}
1029  };
1030 
1031  template<typename Fn>
1032  struct finalize_fn_leaf : public finalize_fn {
1033  Fn my_fn;
1036  };
1037 
1038  static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1039  tbb::internal::suppress_unused_warning(event_command_exec_status);
1040  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1041 
1042  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1043  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1044  (*fn_ptr)();
1045 
1046  // Function pointer was created by 'new' & this callback must be called once only
1047  delete fn_ptr;
1048  }
1049 public:
1050  template <typename FinalizeFn, typename ...Args>
1051  void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1052  cl_event e;
1053 
1054  if ( get_event_from_args( e, args... ) ) {
1055  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1056  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1057  }
1058 
1059  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1060  }
1061 
1063  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1064  return my_devices;
1065  }
1066 
1067 private:
1070  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1071  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1072  return true;
1073  return d1 == d2;
1074  }
1075 private:
1076  opencl_factory( const opencl_factory& );
1078 
1079  cl_context context() {
1080  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1081  return my_cl_context;
1082  }
1083 
1084  void init_once() {
1085  {
1087  if (!my_devices.size())
1088  my_devices = DeviceFilter()( opencl_info::available_devices() );
1089  }
1090 
1091  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1092  cl_platform_id platform_id = my_devices.begin()->platform_id();
1093  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1094  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1095 
1096  std::vector<cl_device_id> cl_device_ids;
1097  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1098  cl_device_ids.push_back((*d).my_cl_device_id);
1099  }
1100 
1101  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1102  cl_int err;
1103  cl_context ctx = clCreateContext(context_properties,
1104  (cl_uint)cl_device_ids.size(),
1105  cl_device_ids.data(),
1106  NULL, NULL, &err);
1107  enforce_cl_retcode(err, "Failed to create context");
1108  my_cl_context = ctx;
1109 
1110  size_t device_counter = 0;
1111  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1112  (*d).my_device_id = device_counter++;
1113  cl_int err2;
1114  cl_command_queue cq;
1115 #if CL_VERSION_2_0
1116  if ((*d).major_version() >= 2) {
1117  if ((*d).out_of_order_exec_mode_on_host_present()) {
1118  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1119  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1120  } else {
1121  cl_queue_properties props[] = { 0 };
1122  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1123  }
1124  } else
1125 #endif
1126  {
1127  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1128  // Suppress "declared deprecated" warning for the next line.
1129 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1130 #pragma GCC diagnostic push
1131 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1132 #endif
1133 #if _MSC_VER || __INTEL_COMPILER
1134 #pragma warning( push )
1135 #if __INTEL_COMPILER
1136 #pragma warning (disable: 1478)
1137 #else
1138 #pragma warning (disable: 4996)
1139 #endif
1140 #endif
1141  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1142 #if _MSC_VER || __INTEL_COMPILER
1143 #pragma warning( pop )
1144 #endif
1145 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1146 #pragma GCC diagnostic pop
1147 #endif
1148  }
1149  enforce_cl_retcode(err2, "Failed to create command queue");
1150  (*d).my_cl_command_queue = cq;
1151  }
1152  }
1153 
1154  std::once_flag my_once_flag;
1156  cl_context my_cl_context;
1157 
1159 
1160  template <typename Factory>
1161  friend class opencl_program;
1162  template <typename Factory>
1163  friend class opencl_buffer_impl;
1164  template <typename Factory>
1165  friend class opencl_memory;
1166 }; // class opencl_factory
1167 
1168 // TODO: consider this namespace as public API
1169 namespace opencl_info {
1170 
1171 // Default types
1172 
1173 template <typename Factory>
1176  __TBB_ASSERT(!f.devices().empty(), "No available devices");
1177  return *(f.devices().begin());
1178  }
1179 };
1180 
1183  opencl_device_list dl;
1184  cl_platform_id platform_id = devices.begin()->platform_id();
1185  for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1186  if (it->platform_id() == platform_id) {
1187  dl.add(*it);
1188  }
1189  }
1190  return dl;
1191  }
1192 };
1193 
1194 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1195 public:
1197 
1199 
1200 private:
1201  default_opencl_factory() = default;
1202 };
1203 
1206  return default_factory;
1207 }
1208 
1209 } // namespace opencl_info
1210 
1211 template <typename T, typename Factory>
1212 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1213 
1214 
1216  SOURCE,
1217  PRECOMPILED,
1218  SPIR
1219 };
1220 
1221 template <typename Factory = opencl_info::default_opencl_factory>
1223 public:
1224  typedef typename Factory::kernel_type kernel_type;
1225 
1226  opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1227  opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1228  opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1229 
1230  opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1231  opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1232  opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1234 
1236  // Set my_do_once_flag to the called state.
1237  std::call_once( my_do_once_flag, [](){} );
1238  }
1239 
1240  kernel_type get_kernel( const std::string& k ) const {
1241  return kernel_type( get_cl_kernel(k), my_factory );
1242  }
1243 
1244 private:
1245  opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1246  // Set my_do_once_flag to the called state.
1247  std::call_once( my_do_once_flag, [](){} );
1248  }
1249 
1250  cl_kernel get_cl_kernel( const std::string& k ) const {
1251  std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1252  cl_int err;
1253  cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1254  enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1255  return kernel;
1256  }
1257 
1258  class file_reader {
1259  public:
1260  file_reader( const std::string& filepath ) {
1261  std::ifstream file_descriptor( filepath, std::ifstream::binary );
1262  if ( !file_descriptor.is_open() ) {
1263  std::string str = std::string( "Could not open file: " ) + filepath;
1264  std::cerr << str << std::endl;
1265  throw str;
1266  }
1267  file_descriptor.seekg( 0, file_descriptor.end );
1268  size_t length = size_t( file_descriptor.tellg() );
1269  file_descriptor.seekg( 0, file_descriptor.beg );
1270  my_content.resize( length );
1271  char* begin = &*my_content.begin();
1272  file_descriptor.read( begin, length );
1273  file_descriptor.close();
1274  }
1275  const char* content() { return &*my_content.cbegin(); }
1276  size_t length() { return my_content.length(); }
1277  private:
1278  std::string my_content;
1279  };
1280 
1282  public:
1283  typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1284  opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1285  cl_uint num_devices, cl_device_id* device_list,
1286  const char* options, cl_callback_type callback,
1287  void* user_data ) {
1288  cl_int err = clBuildProgram( program, num_devices, device_list, options,
1289  callback, user_data );
1290  if( err == CL_SUCCESS )
1291  return;
1292  std::string str = std::string( "Failed to build program: " ) + name;
1293  if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1294  const opencl_device_list &devices = f.devices();
1295  for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1296  std::cerr << "Build log for device: " << (*d).name() << std::endl;
1297  size_t log_size;
1298  cl_int query_err = clGetProgramBuildInfo(
1299  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1300  &log_size );
1301  enforce_cl_retcode( query_err, "Failed to get build log size" );
1302  if( log_size ) {
1303  std::vector<char> output;
1304  output.resize( log_size );
1305  query_err = clGetProgramBuildInfo(
1306  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1307  output.size(), output.data(), NULL );
1308  enforce_cl_retcode( query_err, "Failed to get build output" );
1309  std::cerr << output.data() << std::endl;
1310  } else {
1311  std::cerr << "No build log available" << std::endl;
1312  }
1313  }
1314  }
1315  enforce_cl_retcode( err, str );
1316  }
1317  };
1318 
1320  public:
1321  template<typename Filter>
1322  opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1323  Filter filter, const char* message ) {
1324  for ( cl_uint i = 0; i < num_devices; ++i )
1325  if ( filter(device_list[i]) ) {
1326  device_list[i--] = device_list[--num_devices];
1327  }
1328  if ( !num_devices )
1329  enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1330  }
1331  };
1332 
1333  void init( const std::string& ) const {
1334  cl_uint num_devices;
1335  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1336  "Failed to get OpenCL context info" );
1337  if ( !num_devices )
1338  enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1339  cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1340  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1341  "Failed to get OpenCL context info" );
1342  const char *options = NULL;
1343  switch ( my_type ) {
1345  file_reader fr( my_arg_str );
1346  const char *s[] = { fr.content() };
1347  const size_t l[] = { fr.length() };
1348  cl_int err;
1349  my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1350  enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1352  num_devices, device_list,
1353  []( const opencl_device& d ) -> bool {
1354  return !d.compiler_available() || !d.linker_available();
1355  }, "No one device supports building program from sources" );
1357  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1358  options, /*callback*/ NULL, /*user data*/NULL );
1359  break;
1360  }
1362  options = "-x spir";
1364  file_reader fr( my_arg_str );
1365  std::vector<const unsigned char*> s(
1366  num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1367  std::vector<size_t> l( num_devices, fr.length() );
1368  std::vector<cl_int> bin_statuses( num_devices, -1 );
1369  cl_int err;
1370  my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1371  device_list, l.data(), s.data(),
1372  bin_statuses.data(), &err );
1373  if( err != CL_SUCCESS ) {
1374  std::string statuses_str;
1375  for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1376  statuses_str += std::to_string((*st));
1377  }
1378 
1379  enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1380  std::string( ", binary_statuses = " ) + statuses_str );
1381  }
1383  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1384  options, /*callback*/ NULL, /*user data*/NULL );
1385  break;
1386  }
1387  default:
1388  __TBB_ASSERT( false, "Unsupported program type" );
1389  }
1390  }
1391 
1392  Factory& my_factory;
1394  std::string my_arg_str;
1395  mutable cl_program my_cl_program;
1396  mutable std::once_flag my_do_once_flag;
1397 
1398  template <typename DeviceFilter>
1399  friend class opencl_factory;
1400 
1401  template <typename DeviceFilter>
1402  friend class opencl_factory<DeviceFilter>::kernel;
1403 };
1404 
1405 template<typename... Args>
1407 
1408 template<typename JP, typename Factory, typename... Ports>
1409 class opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1410  typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1411 public:
1412  typedef typename base_type::kernel_type kernel_type;
1413 
1414  opencl_node( graph &g, const kernel_type& kernel )
1415  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1416  {
1417  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1418  }
1419 
1420  opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1421  : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1422  {
1423  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1424  }
1425 
1426  template <typename DeviceSelector>
1427  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1428  : base_type( g, kernel, d, f)
1429  {
1430  tbb::internal::fgt_multiinput_multioutput_node( tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1431  }
1432 };
1433 
1434 template<typename JP, typename... Ports>
1435 class opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1437 public:
1438  typedef typename base_type::kernel_type kernel_type;
1439 
1440  opencl_node( graph &g, const kernel_type& kernel )
1441  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1442  {}
1443 
1444  template <typename DeviceSelector>
1445  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1446  : base_type( g, kernel, d, opencl_info::default_factory() )
1447  {}
1448 };
1449 
1450 template<typename... Ports>
1451 class opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1453 public:
1454  typedef typename base_type::kernel_type kernel_type;
1455 
1456  opencl_node( graph &g, const kernel_type& kernel )
1457  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1458  {}
1459 
1460  template <typename DeviceSelector>
1461  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1462  : base_type( g, kernel, d, opencl_info::default_factory() )
1463  {}
1464 };
1465 
1466 } // namespace interface10
1467 
1468 using interface10::opencl_node;
1472 using interface10::opencl_buffer;
1473 using interface10::opencl_subbuffer;
1474 using interface10::opencl_device;
1475 using interface10::opencl_device_list;
1476 using interface10::opencl_program;
1478 using interface10::opencl_async_msg;
1479 using interface10::opencl_factory;
1480 using interface10::opencl_range;
1481 
1482 } // namespace flow
1483 } // namespace tbb
1484 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1485 
1486 #endif // __TBB_flow_graph_opencl_node_H
bool get_event_from_one_arg(cl_event &, const T &)
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
#define __TBB_override
Definition: tbb_stddef.h:244
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
opencl_program(const std::string &program_name)
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
bool extension_available(const std::string &ext) const
opencl_program(Factory &factory, const std::string &program_name)
No ordering.
Definition: atomic.h:51
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:536
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
friend bool operator==(opencl_device d1, opencl_device d2)
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
T device_info(cl_device_id d, cl_device_info i)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:381
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
opencl_device_list operator()(const opencl_device_list &devices)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
opencl_program(Factory &factory, cl_program program)
Base class for types that should not be copied or assigned.
Definition: tbb_stddef.h:335
opencl_async_msg< void *, Factory > receive(const cl_event *e)
K key_from_message(const T &t)
Definition: flow_graph.h:695
std::array< size_t, 3 > max_work_item_sizes() const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
opencl_async_msg(const T &data, cl_event event)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
T platform_info(cl_platform_id p, cl_platform_info i)
bool init(const opencl_device_list &device_list)
const nd_range_type & local_range() const
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
opencl_program(Factory &factory, const char *program_name)
is_typedef(native_object_type)
Acquire.
Definition: atomic.h:47
void const char const char int ITT_FORMAT __itt_group_sync p
A lock that occupies a single byte.
Definition: spin_mutex.h:40
const opencl_buffer & memory_object() const
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
Base class for types that should not be assigned.
Definition: tbb_stddef.h:324
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
default_opencl_factory & default_factory()
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
void info(cl_device_info i, T &t) const
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
kernel_type get_kernel(const std::string &k) const
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
value_type load() const
Definition: atomic.h:306
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void send_data(opencl_device device, T &t, Rest &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void set_command_queue(cl_command_queue cmd_queue)
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
The graph class.
static void fgt_multiinput_multioutput_node(string_index, void *, void *)
void enforce_cl_retcode(cl_int err, std::string msg)
bool built_in_kernel_available(const std::string &k) const
opencl_buffer_impl< Factory > impl_type
Represents acquisition of a mutex.
Definition: spin_mutex.h:54
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
cl_kernel get_cl_kernel(const std::string &k) const
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
void move(tbb_thread &t1, tbb_thread &t2)
Definition: tbb_thread.h:309
const opencl_device_list & available_devices()
virtual void map_memory(opencl_device, opencl_async_msg< void *, Factory > &)=0
Primary template for atomic.
Definition: atomic.h:407
void const char const char int ITT_FORMAT __itt_group_sync s
T event_info(cl_event e, cl_event_info i)
const nd_range_type & global_range() const
void update_arg_list(cl_event e, T &t, Rest &... args)
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
A stage in a pipeline.
Definition: pipeline.h:65
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
The graph related classes and functions.
opencl_program(opencl_program_type type, const std::string &program_name)
opencl_async_msg(const opencl_async_msg &dmsg)
opencl_device_list(std::initializer_list< opencl_device > il)
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
opencl_factory & operator=(const opencl_factory &)
void store(value_type value)
Definition: atomic.h:317
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
Release.
Definition: atomic.h:49
kernel(const cl_kernel &k, factory_type &f)
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
std::array< range_index_type, 3 > nd_range_type
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.