Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstdlib>
14#include <cstring>
15#include <limits>
16#include <memory>
17#include <type_traits>
18#include <vector>
19
20#ifdef __APPLE__
21#include <AvailabilityVersions.h>
22#include <TargetConditionals.h>
23#endif
24
25#if defined(__has_feature)
26#if __has_feature(memory_sanitizer)
27#include <sanitizer/msan_interface.h>
28#endif
29#endif
30
31#include "HalideRuntime.h"
32
33#ifdef _MSC_VER
34#include <malloc.h>
35#define HALIDE_ALLOCA _alloca
36#else
37#define HALIDE_ALLOCA __builtin_alloca
38#endif
39
40// gcc 5.1 has a false positive warning on this code
41#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
42#pragma GCC diagnostic ignored "-Warray-bounds"
43#endif
44
45#ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
46#define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
47#endif
48
49#ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
50// Conservatively align buffer allocations to 128 bytes by default.
51// This is enough alignment for all the platforms currently in use.
52// Redefine this in your compiler settings if you desire more/less alignment.
53#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
54#endif
55
57 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
58
59// Unfortunately, not all C++17 runtimes support aligned_alloc
60// (it may depends on OS/SDK version); this is provided as an opt-out
61// if you are compiling on a platform that doesn't provide a (good)
62// implementation. (Note that we actually use the C11 `::aligned_alloc()`
63// rather than the C++17 `std::aligned_alloc()` because at least one platform
64// we found supports the former but not the latter.)
65#ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
66
67// clang-format off
68#ifdef _MSC_VER
69
70 // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
71 // has stated they probably never will, so, always default it off here.
72 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
73
74#elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
75
76 // Android doesn't provide aligned_alloc until API 28
77 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
78
79#elif defined(__APPLE__)
80
81 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
82
83 // macOS doesn't provide aligned_alloc until 10.15
84 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
85
86 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
87
88 // iOS doesn't provide aligned_alloc until 14.0
89 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
90
91 #else
92
93 // Assume it's ok on all other Apple targets
94 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
95
96 #endif
97
98#else
99
100 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
101
102 // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
103 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
104
105 #else
106
107 // Not Windows, Android, or Apple: just assume it's ok
108 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
109
110 #endif
111
112#endif
113// clang-format on
114
115#endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
116
117namespace Halide {
118namespace Runtime {
119
120// Forward-declare our Buffer class
121template<typename T, int Dims, int InClassDimStorage>
122class Buffer;
123
124// A helper to check if a parameter pack is entirely implicitly
125// int-convertible to use with std::enable_if
126template<typename... Args>
127struct AllInts : std::false_type {};
128
129template<>
130struct AllInts<> : std::true_type {};
131
132template<typename T, typename... Args>
133struct AllInts<T, Args...> {
134 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
135};
136
137// Floats and doubles are technically implicitly int-convertible, but
138// doing so produces a warning we treat as an error, so just disallow
139// it here.
140template<typename... Args>
141struct AllInts<float, Args...> : std::false_type {};
142
143template<typename... Args>
144struct AllInts<double, Args...> : std::false_type {};
145
146namespace Internal {
147// A helper to detect if there are any zeros in a container
148template<typename Container>
149bool any_zero(const Container &c) {
150 for (int i : c) {
151 if (i == 0) {
152 return true;
153 }
154 }
155 return false;
156}
157
159 static inline void *(*default_allocate_fn)(size_t) = nullptr;
160 static inline void (*default_deallocate_fn)(void *) = nullptr;
161};
162} // namespace Internal
163
164/** A struct acting as a header for allocations owned by the Buffer
165 * class itself. */
167 void (*deallocate_fn)(void *);
168 std::atomic<int> ref_count;
169
170 // Note that ref_count always starts at 1
171 explicit AllocationHeader(void (*deallocate_fn)(void *))
173 }
174};
175
176/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
177enum struct BufferDeviceOwnership : int {
178 Allocated, ///> halide_device_free will be called when device ref count goes to zero
179 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
180 Unmanaged, ///> No free routine will be called when device ref count goes to zero
181 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
182 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
183};
184
185/** A similar struct for managing device allocations. */
187 // This is only ever constructed when there's something to manage,
188 // so start at one.
189 std::atomic<int> count{1};
191};
192
193constexpr int AnyDims = -1;
194
195/** A templated Buffer class that wraps halide_buffer_t and adds
196 * functionality. When using Halide from C++, this is the preferred
197 * way to create input and output buffers. The overhead of using this
198 * class relative to a naked halide_buffer_t is minimal - it uses another
199 * ~16 bytes on the stack, and does no dynamic allocations when using
200 * it to represent existing memory of a known maximum dimensionality.
201 *
202 * The template parameter T is the element type. For buffers where the
203 * element type is unknown, or may vary, use void or const void.
204 *
205 * The template parameter Dims is the number of dimensions. For buffers where
206 * the dimensionality type is unknown at, or may vary, use AnyDims.
207 *
208 * InClassDimStorage is the maximum number of dimensions that can be represented
209 * using space inside the class itself. Set it to the maximum dimensionality
210 * you expect this buffer to be. If the actual dimensionality exceeds
211 * this, heap storage is allocated to track the shape of the buffer.
212 * InClassDimStorage defaults to 4, which should cover nearly all usage.
213 *
214 * The class optionally allocates and owns memory for the image using
215 * a shared pointer allocated with the provided allocator. If they are
216 * null, malloc and free are used. Any device-side allocation is
217 * considered as owned if and only if the host-side allocation is
218 * owned. */
219template<typename T = void,
220 int Dims = AnyDims,
221 int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
222class Buffer {
223 /** The underlying halide_buffer_t */
224 halide_buffer_t buf = {};
225
226 /** Some in-class storage for shape of the dimensions. */
228
229 /** The allocation owned by this Buffer. NULL if the Buffer does not
230 * own the memory. */
231 AllocationHeader *alloc = nullptr;
232
233 /** A reference count for the device allocation owned by this
234 * buffer. */
235 mutable DeviceRefCount *dev_ref_count = nullptr;
236
237 /** True if T is of type void or const void */
238 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
239
240 /** A type function that adds a const qualifier if T is a const type. */
241 template<typename T2>
242 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
243
244 /** T unless T is (const) void, in which case (const)
245 * uint8_t. Useful for providing return types for operator() */
246 using not_void_T = typename std::conditional<T_is_void,
247 add_const_if_T_is_const<uint8_t>,
248 T>::type;
249
250 /** T with constness removed. Useful for return type of copy(). */
251 using not_const_T = typename std::remove_const<T>::type;
252
253 /** The type the elements are stored as. Equal to not_void_T
254 * unless T is a pointer, in which case uint64_t. Halide stores
255 * all pointer types as uint64s internally, even on 32-bit
256 * systems. */
257 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
258
259public:
260 /** True if the Halide type is not void (or const void). */
261 static constexpr bool has_static_halide_type = !T_is_void;
262
263 /** Get the Halide type of T. Callers should not use the result if
264 * has_static_halide_type is false. */
268
269 /** Does this Buffer own the host memory it refers to? */
270 bool owns_host_memory() const {
271 return alloc != nullptr;
272 }
273
274 static constexpr bool has_static_dimensions = (Dims != AnyDims);
275
276 /** Callers should not use the result if
277 * has_static_dimensions is false. */
278 static constexpr int static_dimensions() {
279 return Dims;
280 }
281
282 static_assert(!has_static_dimensions || static_dimensions() >= 0);
283
284private:
285 /** Increment the reference count of any owned allocation */
286 void incref() const {
287 if (owns_host_memory()) {
288 alloc->ref_count++;
289 }
290 if (buf.device) {
291 if (!dev_ref_count) {
292 // I seem to have a non-zero dev field but no
293 // reference count for it. I must have been given a
294 // device allocation by a Halide pipeline, and have
295 // never been copied from since. Take sole ownership
296 // of it.
297 dev_ref_count = new DeviceRefCount;
298 }
299 dev_ref_count->count++;
300 }
301 }
302
303 // Note that this is called "cropped" but can also encompass a slice/embed
304 // operation as well.
305 struct DevRefCountCropped : DeviceRefCount {
306 // We will only store Buffers that have a dynamic number of dimensions.
307 // Buffers that cropped or sliced from need to be first converted to
308 // one with variable size. This is required because we cannot possibly
309 // know what the actual dimensionality is of the buffer this is a
310 // crop or slice from. Since cropping a sliced buffer is also possible,
311 // no optimizations can be made for cropped buffers either.
312 Buffer<T, AnyDims> cropped_from;
313 explicit DevRefCountCropped(const Buffer<T, AnyDims> &cropped_from)
314 : cropped_from(cropped_from) {
315 ownership = BufferDeviceOwnership::Cropped;
316 }
317 };
318
319 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
320 void crop_from(const Buffer<T, AnyDims> &cropped_from) {
321 assert(dev_ref_count == nullptr);
322 dev_ref_count = new DevRefCountCropped(cropped_from);
323 }
324
325 /** Decrement the reference count of any owned allocation and free host
326 * and device memory if it hits zero. Sets alloc to nullptr. */
327 void decref(bool device_only = false) {
328 if (owns_host_memory() && !device_only) {
329 int new_count = --(alloc->ref_count);
330 if (new_count == 0) {
331 void (*fn)(void *) = alloc->deallocate_fn;
332 alloc->~AllocationHeader();
333 fn(alloc);
334 }
335 buf.host = nullptr;
336 alloc = nullptr;
337 set_host_dirty(false);
338 }
339 int new_count = 0;
340 if (dev_ref_count) {
341 new_count = --(dev_ref_count->count);
342 }
343 if (new_count == 0) {
344 if (buf.device) {
345 assert(!(alloc && device_dirty()) &&
346 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
347 "Call device_free explicitly if you want to drop dirty device-side data. "
348 "Call copy_to_host explicitly if you want the data copied to the host allocation "
349 "before the device allocation is freed.");
350 int result = halide_error_code_success;
351 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
352 result = buf.device_interface->detach_native(nullptr, &buf);
353 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
354 result = buf.device_interface->device_and_host_free(nullptr, &buf);
355 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
356 result = buf.device_interface->device_release_crop(nullptr, &buf);
357 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
358 result = buf.device_interface->device_free(nullptr, &buf);
359 }
360 // No reasonable way to return the error, but we can at least assert-fail in debug builds.
361 assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
362 (void)result;
363 }
364 if (dev_ref_count) {
365 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
366 delete (DevRefCountCropped *)dev_ref_count;
367 } else {
368 delete dev_ref_count;
369 }
370 }
371 }
372 dev_ref_count = nullptr;
373 buf.device = 0;
374 buf.device_interface = nullptr;
375 }
376
377 void free_shape_storage() {
378 if (buf.dim != shape) {
379 delete[] buf.dim;
380 buf.dim = nullptr;
381 }
382 }
383
384 template<int DimsSpecified>
385 void make_static_shape_storage() {
386 static_assert(Dims == AnyDims || Dims == DimsSpecified,
387 "Number of arguments to Buffer() does not match static dimensionality");
389 if constexpr (Dims == AnyDims) {
390 if constexpr (DimsSpecified <= InClassDimStorage) {
391 buf.dim = shape;
392 } else {
393 static_assert(DimsSpecified >= 1);
395 }
396 } else {
397 static_assert(InClassDimStorage >= Dims);
398 buf.dim = shape;
399 }
400 }
401
402 void make_shape_storage(const int dimensions) {
403 if (Dims != AnyDims && Dims != dimensions) {
404 assert(false && "Number of arguments to Buffer() does not match static dimensionality");
405 }
406 // This should usually be inlined, so if dimensions is statically known,
407 // we can skip the call to new
408 buf.dimensions = dimensions;
409 buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
410 }
411
412 void copy_shape_from(const halide_buffer_t &other) {
413 // All callers of this ensure that buf.dimensions == other.dimensions.
414 make_shape_storage(other.dimensions);
415 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
416 }
417
418 template<typename T2, int D2, int S2>
419 void move_shape_from(Buffer<T2, D2, S2> &&other) {
420 if (other.shape == other.buf.dim) {
421 copy_shape_from(other.buf);
422 } else {
423 buf.dim = other.buf.dim;
424 other.buf.dim = nullptr;
425 }
426 other.buf = halide_buffer_t();
427 }
428
429 /** Initialize the shape from a halide_buffer_t. */
430 void initialize_from_buffer(const halide_buffer_t &b,
431 BufferDeviceOwnership ownership) {
432 memcpy(&buf, &b, sizeof(halide_buffer_t));
433 copy_shape_from(b);
434 if (b.device) {
435 dev_ref_count = new DeviceRefCount;
436 dev_ref_count->ownership = ownership;
437 }
438 }
439
440 /** Initialize the shape from an array of ints */
441 void initialize_shape(const int *sizes) {
442 for (int i = 0; i < buf.dimensions; i++) {
443 buf.dim[i].min = 0;
444 buf.dim[i].extent = sizes[i];
445 if (i == 0) {
446 buf.dim[i].stride = 1;
447 } else {
448 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
449 }
450 }
451 }
452
453 /** Initialize the shape from a vector of extents */
454 void initialize_shape(const std::vector<int> &sizes) {
455 assert(buf.dimensions == (int)sizes.size());
456 initialize_shape(sizes.data());
457 }
458
459 /** Initialize the shape from the static shape of an array */
460 template<typename Array, size_t N>
461 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
462 buf.dim[next].min = 0;
463 buf.dim[next].extent = (int)N;
464 if (next == 0) {
465 buf.dim[next].stride = 1;
466 } else {
467 initialize_shape_from_array_shape(next - 1, vals[0]);
468 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
469 }
470 }
471
472 /** Base case for the template recursion above. */
473 template<typename T2>
474 void initialize_shape_from_array_shape(int, const T2 &) {
475 }
476
477 /** Get the dimensionality of a multi-dimensional C array */
478 template<typename Array, size_t N>
479 static int dimensionality_of_array(Array (&vals)[N]) {
480 return dimensionality_of_array(vals[0]) + 1;
481 }
482
483 template<typename T2>
484 static int dimensionality_of_array(const T2 &) {
485 return 0;
486 }
487
488 /** Get the underlying halide_type_t of an array's element type. */
489 template<typename Array, size_t N>
490 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
491 return scalar_type_of_array(vals[0]);
492 }
493
494 template<typename T2>
495 static halide_type_t scalar_type_of_array(const T2 &) {
497 }
498
499 /** Crop a single dimension without handling device allocation. */
500 void crop_host(int d, int min, int extent) {
501 assert(dim(d).min() <= min);
502 assert(dim(d).max() >= min + extent - 1);
503 ptrdiff_t shift = min - dim(d).min();
504 if (buf.host != nullptr) {
505 buf.host += (shift * dim(d).stride()) * type().bytes();
506 }
507 buf.dim[d].min = min;
508 buf.dim[d].extent = extent;
509 }
510
511 /** Crop as many dimensions as are in rect, without handling device allocation. */
512 void crop_host(const std::vector<std::pair<int, int>> &rect) {
513 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
514 int limit = (int)rect.size();
515 assert(limit <= dimensions());
516 for (int i = 0; i < limit; i++) {
517 crop_host(i, rect[i].first, rect[i].second);
518 }
519 }
520
521 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
522 assert(buf.device_interface != nullptr);
524 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
525 // is it possible to get to this point without incref having run at least once since
526 // the device field was set? (I.e. in the internal logic of crop. incref might have been
527 // called.)
528 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
529 result_host_cropped.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
530 } else {
531 result_host_cropped.crop_from(*this);
532 }
533 }
534 }
535
536 /** slice a single dimension without handling device allocation. */
537 void slice_host(int d, int pos) {
538 static_assert(Dims == AnyDims);
539 assert(dimensions() > 0);
540 assert(d >= 0 && d < dimensions());
541 assert(pos >= dim(d).min() && pos <= dim(d).max());
542 buf.dimensions--;
543 ptrdiff_t shift = pos - buf.dim[d].min;
544 if (buf.host != nullptr) {
545 buf.host += (shift * buf.dim[d].stride) * type().bytes();
546 }
547 for (int i = d; i < buf.dimensions; i++) {
548 buf.dim[i] = buf.dim[i + 1];
549 }
550 buf.dim[buf.dimensions] = {0, 0, 0};
551 }
552
553 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
554 assert(buf.device_interface != nullptr);
556 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
557 // is it possible to get to this point without incref having run at least once since
558 // the device field was set? (I.e. in the internal logic of slice. incref might have been
559 // called.)
560 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
561 // crop_from() is correct here, despite the fact that we are slicing.
562 result_host_sliced.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
563 } else {
564 // crop_from() is correct here, despite the fact that we are slicing.
565 result_host_sliced.crop_from(*this);
566 }
567 }
568 }
569
570public:
571 typedef T ElemType;
572
573 /** Read-only access to the shape */
574 class Dimension {
575 const halide_dimension_t &d;
576
577 public:
578 /** The lowest coordinate in this dimension */
580 return d.min;
581 }
582
583 /** The number of elements in memory you have to step over to
584 * increment this coordinate by one. */
586 return d.stride;
587 }
588
589 /** The extent of the image along this dimension */
591 return d.extent;
592 }
593
594 /** The highest coordinate in this dimension */
596 return min() + extent() - 1;
597 }
598
599 /** An iterator class, so that you can iterate over
600 * coordinates in a dimensions using a range-based for loop. */
601 struct iterator {
602 int val;
603 int operator*() const {
604 return val;
605 }
606 bool operator!=(const iterator &other) const {
607 return val != other.val;
608 }
610 val++;
611 return *this;
612 }
613 };
614
615 /** An iterator that points to the min coordinate */
617 return {min()};
618 }
619
620 /** An iterator that points to one past the max coordinate */
622 return {min() + extent()};
623 }
624
625 explicit Dimension(const halide_dimension_t &dim)
626 : d(dim) {
627 }
628 };
629
630 /** Access the shape of the buffer */
632 assert(i >= 0 && i < this->dimensions());
633 return Dimension(buf.dim[i]);
634 }
635
636 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
637 // @{
638 int min(int i) const {
639 return dim(i).min();
640 }
641 int extent(int i) const {
642 return dim(i).extent();
643 }
644 int stride(int i) const {
645 return dim(i).stride();
646 }
647 // @}
648
649 /** The total number of elements this buffer represents. Equal to
650 * the product of the extents */
651 size_t number_of_elements() const {
652 return buf.number_of_elements();
653 }
654
655 /** Get the dimensionality of the buffer. */
656 int dimensions() const {
657 if constexpr (has_static_dimensions) {
658 return Dims;
659 } else {
660 return buf.dimensions;
661 }
662 }
663
664 /** Get the type of the elements. */
666 return buf.type;
667 }
668
669 /** A pointer to the element with the lowest address. If all
670 * strides are positive, equal to the host pointer. */
671 T *begin() const {
672 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
673 return (T *)buf.begin();
674 }
675
676 /** A pointer to one beyond the element with the highest address. */
677 T *end() const {
678 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
679 return (T *)buf.end();
680 }
681
682 /** The total number of bytes spanned by the data in memory. */
683 size_t size_in_bytes() const {
684 return buf.size_in_bytes();
685 }
686
687 /** Reset the Buffer to be equivalent to a default-constructed Buffer
688 * of the same static type (if any); Buffer<void> will have its runtime
689 * type reset to uint8. */
690 void reset() {
691 *this = Buffer();
692 }
693
695 : shape() {
696 buf.type = static_halide_type();
697 // If Dims are statically known, must create storage that many.
698 // otherwise, make a zero-dimensional buffer.
699 constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
701 }
702
703 /** Make a Buffer from a halide_buffer_t */
704 explicit Buffer(const halide_buffer_t &buf,
706 assert(T_is_void || buf.type == static_halide_type());
707 initialize_from_buffer(buf, ownership);
708 }
709
710 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
711 template<typename T2, int D2, int S2>
712 friend class Buffer;
713
714private:
715 template<typename T2, int D2, int S2>
716 static void static_assert_can_convert_from() {
717 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
718 "Can't convert from a Buffer<const T> to a Buffer<T>");
719 static_assert(std::is_same<typename std::remove_const<T>::type,
720 typename std::remove_const<T2>::type>::value ||
722 "type mismatch constructing Buffer");
723 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
724 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
725 }
726
727public:
731 static void set_default_deallocate_fn(void (*deallocate_fn)(void *)) {
733 }
734
735 /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
736 * If this can be determined at compile time, fail with a static assert; otherwise
737 * return a boolean based on runtime typing. */
738 template<typename T2, int D2, int S2>
741 if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
742 if (other.type() != static_halide_type()) {
743 return false;
744 }
745 }
746 if (Dims != AnyDims) {
747 if (other.dimensions() != Dims) {
748 return false;
749 }
750 }
751 return true;
752 }
753
754 /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
755 * cannot be constructed from some other Buffer type. */
756 template<typename T2, int D2, int S2>
758 // Explicitly call static_assert_can_convert_from() here so
759 // that we always get compile-time checking, even if compiling with
760 // assertions disabled.
762 assert(can_convert_from(other));
763 }
764
765 /** Copy constructor. Does not copy underlying data. */
767 : buf(other.buf),
768 alloc(other.alloc) {
769 other.incref();
770 dev_ref_count = other.dev_ref_count;
771 copy_shape_from(other.buf);
772 }
773
774 /** Construct a Buffer from a Buffer of different dimensionality
775 * and type. Asserts that the type and dimensionality matches (at runtime,
776 * if one of the types is void). Note that this constructor is
777 * implicit. This, for example, lets you pass things like
778 * Buffer<T> or Buffer<const void> to functions expected
779 * Buffer<const T>. */
780 template<typename T2, int D2, int S2>
782 : buf(other.buf),
783 alloc(other.alloc) {
784 assert_can_convert_from(other);
785 other.incref();
786 dev_ref_count = other.dev_ref_count;
787 copy_shape_from(other.buf);
788 }
789
790 /** Move constructor */
792 : buf(other.buf),
793 alloc(other.alloc),
794 dev_ref_count(other.dev_ref_count) {
795 other.dev_ref_count = nullptr;
796 other.alloc = nullptr;
797 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
798 }
799
800 /** Move-construct a Buffer from a Buffer of different
801 * dimensionality and type. Asserts that the types match (at
802 * runtime if one of the types is void). */
803 template<typename T2, int D2, int S2>
805 : buf(other.buf),
806 alloc(other.alloc),
807 dev_ref_count(other.dev_ref_count) {
808 assert_can_convert_from(other);
809 other.dev_ref_count = nullptr;
810 other.alloc = nullptr;
811 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
812 }
813
814 /** Assign from another Buffer of possibly-different
815 * dimensionality and type. Asserts that the types match (at
816 * runtime if one of the types is void). */
817 template<typename T2, int D2, int S2>
819 if ((const void *)this == (const void *)&other) {
820 return *this;
821 }
822 assert_can_convert_from(other);
823 other.incref();
824 decref();
825 dev_ref_count = other.dev_ref_count;
826 alloc = other.alloc;
827 free_shape_storage();
828 buf = other.buf;
829 copy_shape_from(other.buf);
830 return *this;
831 }
832
833 /** Standard assignment operator */
835 // The cast to void* here is just to satisfy clang-tidy
836 if ((const void *)this == (const void *)&other) {
837 return *this;
838 }
839 other.incref();
840 decref();
841 dev_ref_count = other.dev_ref_count;
842 alloc = other.alloc;
843 free_shape_storage();
844 buf = other.buf;
845 copy_shape_from(other.buf);
846 return *this;
847 }
848
849 /** Move from another Buffer of possibly-different
850 * dimensionality and type. Asserts that the types match (at
851 * runtime if one of the types is void). */
852 template<typename T2, int D2, int S2>
854 assert_can_convert_from(other);
855 decref();
856 alloc = other.alloc;
857 other.alloc = nullptr;
858 dev_ref_count = other.dev_ref_count;
859 other.dev_ref_count = nullptr;
860 free_shape_storage();
861 buf = other.buf;
862 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
863 return *this;
864 }
865
866 /** Standard move-assignment operator */
868 decref();
869 alloc = other.alloc;
870 other.alloc = nullptr;
871 dev_ref_count = other.dev_ref_count;
872 other.dev_ref_count = nullptr;
873 free_shape_storage();
874 buf = other.buf;
875 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
876 return *this;
877 }
878
879 /** Check the product of the extents fits in memory. */
881 size_t size = type().bytes();
882 for (int i = 0; i < dimensions(); i++) {
883 size *= dim(i).extent();
884 }
885 // We allow 2^31 or 2^63 bytes, so drop the top bit.
886 size = (size << 1) >> 1;
887 for (int i = 0; i < dimensions(); i++) {
888 size /= dim(i).extent();
889 }
890 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
891 }
892
893 /** Allocate memory for this Buffer. Drops the reference to any
894 * owned memory. */
895 void allocate(void *(*allocate_fn)(size_t) = nullptr,
896 void (*deallocate_fn)(void *) = nullptr) {
897 // Drop any existing allocation
898 deallocate();
899
900 // Conservatively align images to (usually) 128 bytes. This is enough
901 // alignment for all the platforms we might use. Also ensure that the allocation
902 // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
903 constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
904
905 const auto align_up = [=](size_t value) -> size_t {
906 return (value + alignment - 1) & ~(alignment - 1);
907 };
908
909 size_t size = size_in_bytes();
910
911#if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
912 // Only use aligned_alloc() if no custom allocators are specified.
914 // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
915 // on any supported platform, so we will just overallocate by 'alignment'
916 // so that the user storage also starts at an aligned point. This is a bit
917 // wasteful, but probably not a big deal.
918 static_assert(sizeof(AllocationHeader) <= alignment);
919 void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
921 alloc = new (alloc_storage) AllocationHeader(free);
922 buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
923 return;
924 }
925 // else fall thru
926#endif
927 if (!allocate_fn) {
929 if (!allocate_fn) {
931 }
932 }
933 if (!deallocate_fn) {
935 if (!deallocate_fn) {
936 deallocate_fn = free;
937 }
938 }
939
940 static_assert(sizeof(AllocationHeader) <= alignment);
941
942 // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
943 // make sure this is OK for AllocationHeader, since it always goes at the start
944 static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
945
946 const size_t requested_size = align_up(size + alignment +
947 std::max(0, (int)sizeof(AllocationHeader) -
948 (int)sizeof(std::max_align_t)));
950 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
951 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
953 }
954
955 /** Drop reference to any owned host or device memory, possibly
956 * freeing it, if this buffer held the last reference to
957 * it. Retains the shape of the buffer. Does nothing if this
958 * buffer did not allocate its own memory. */
959 void deallocate() {
960 decref();
961 }
962
963 /** Drop reference to any owned device memory, possibly freeing it
964 * if this buffer held the last reference to it. Asserts that
965 * device_dirty is false. */
967 decref(true);
968 }
969
970 /** Allocate a new image of the given size with a runtime
971 * type. Only used when you do know what size you want but you
972 * don't know statically what type the elements are. Pass zeroes
973 * to make a buffer suitable for bounds query calls. */
974 template<typename... Args,
975 typename = typename std::enable_if<AllInts<Args...>::value>::type>
976 Buffer(halide_type_t t, int first, Args... rest) {
977 if (!T_is_void) {
978 assert(static_halide_type() == t);
979 }
980 int extents[] = {first, (int)rest...};
981 buf.type = t;
982 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
984 initialize_shape(extents);
985 if (!Internal::any_zero(extents)) {
986 check_overflow();
987 allocate();
988 }
989 }
990
991 /** Allocate a new image of the given size. Pass zeroes to make a
992 * buffer suitable for bounds query calls. */
993 // @{
994
995 // The overload with one argument is 'explicit', so that
996 // (say) int is not implicitly convertible to Buffer<int>
997 explicit Buffer(int first) {
998 static_assert(!T_is_void,
999 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1000 int extents[] = {first};
1001 buf.type = static_halide_type();
1002 constexpr int buf_dimensions = 1;
1004 initialize_shape(extents);
1005 if (first != 0) {
1006 check_overflow();
1007 allocate();
1008 }
1009 }
1010
1011 template<typename... Args,
1012 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1013 Buffer(int first, int second, Args... rest) {
1014 static_assert(!T_is_void,
1015 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1016 int extents[] = {first, second, (int)rest...};
1017 buf.type = static_halide_type();
1018 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
1020 initialize_shape(extents);
1021 if (!Internal::any_zero(extents)) {
1022 check_overflow();
1023 allocate();
1024 }
1025 }
1026 // @}
1027
1028 /** Allocate a new image of unknown type using a vector of ints as the size. */
1029 Buffer(halide_type_t t, const std::vector<int> &sizes) {
1030 if (!T_is_void) {
1031 assert(static_halide_type() == t);
1032 }
1033 buf.type = t;
1034 // make_shape_storage() will do a runtime check that dimensionality matches.
1035 make_shape_storage((int)sizes.size());
1036 initialize_shape(sizes);
1037 if (!Internal::any_zero(sizes)) {
1038 check_overflow();
1039 allocate();
1040 }
1041 }
1042
1043 /** Allocate a new image of known type using a vector of ints as the size. */
1044 explicit Buffer(const std::vector<int> &sizes)
1045 : Buffer(static_halide_type(), sizes) {
1046 }
1047
1048private:
1049 // Create a copy of the sizes vector, ordered as specified by order.
1050 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1051 assert(order.size() == sizes.size());
1052 std::vector<int> ordered_sizes(sizes.size());
1053 for (size_t i = 0; i < sizes.size(); ++i) {
1054 ordered_sizes[i] = sizes.at(order[i]);
1055 }
1056 return ordered_sizes;
1057 }
1058
1059public:
1060 /** Allocate a new image of unknown type using a vector of ints as the size and
1061 * a vector of indices indicating the storage order for each dimension. The
1062 * length of the sizes vector and the storage-order vector must match. For instance,
1063 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1064 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1065 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1066 transpose(storage_order);
1067 }
1068
1069 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1070 : Buffer(static_halide_type(), sizes, storage_order) {
1071 }
1072
1073 /** Make an Buffer that refers to a statically sized array. Does not
1074 * take ownership of the data, and does not set the host_dirty flag. */
1075 template<typename Array, size_t N>
1076 explicit Buffer(Array (&vals)[N]) {
1077 const int buf_dimensions = dimensionality_of_array(vals);
1078 buf.type = scalar_type_of_array(vals);
1079 buf.host = (uint8_t *)vals;
1080 make_shape_storage(buf_dimensions);
1081 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1082 }
1083
1084 /** Initialize an Buffer of runtime type from a pointer and some
1085 * sizes. Assumes dense row-major packing and a min coordinate of
1086 * zero. Does not take ownership of the data and does not set the
1087 * host_dirty flag. */
1088 template<typename... Args,
1089 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1090 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1091 if (!T_is_void) {
1092 assert(static_halide_type() == t);
1093 }
1094 int extents[] = {first, (int)rest...};
1095 buf.type = t;
1096 buf.host = (uint8_t *)const_cast<void *>(data);
1097 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1099 initialize_shape(extents);
1100 }
1101
1102 /** Initialize an Buffer from a pointer and some sizes. Assumes
1103 * dense row-major packing and a min coordinate of zero. Does not
1104 * take ownership of the data and does not set the host_dirty flag. */
1105 template<typename... Args,
1106 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1107 explicit Buffer(T *data, int first, Args &&...rest) {
1108 int extents[] = {first, (int)rest...};
1109 buf.type = static_halide_type();
1110 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1111 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1113 initialize_shape(extents);
1114 }
1115
1116 /** Initialize an Buffer from a pointer and a vector of
1117 * sizes. Assumes dense row-major packing and a min coordinate of
1118 * zero. Does not take ownership of the data and does not set the
1119 * host_dirty flag. */
1120 explicit Buffer(T *data, const std::vector<int> &sizes) {
1121 buf.type = static_halide_type();
1122 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1123 make_shape_storage((int)sizes.size());
1124 initialize_shape(sizes);
1125 }
1126
1127 /** Initialize an Buffer of runtime type from a pointer and a
1128 * vector of sizes. Assumes dense row-major packing and a min
1129 * coordinate of zero. Does not take ownership of the data and
1130 * does not set the host_dirty flag. */
1131 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1132 if (!T_is_void) {
1133 assert(static_halide_type() == t);
1134 }
1135 buf.type = t;
1136 buf.host = (uint8_t *)const_cast<void *>(data);
1137 make_shape_storage((int)sizes.size());
1138 initialize_shape(sizes);
1139 }
1140
1141 /** Initialize an Buffer from a pointer to the min coordinate and
1142 * an array describing the shape. Does not take ownership of the
1143 * data, and does not set the host_dirty flag. */
1144 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1145 if (!T_is_void) {
1146 assert(static_halide_type() == t);
1147 }
1148 buf.type = t;
1149 buf.host = (uint8_t *)const_cast<void *>(data);
1150 make_shape_storage(d);
1151 for (int i = 0; i < d; i++) {
1152 buf.dim[i] = shape[i];
1153 }
1154 }
1155
1156 /** Initialize a Buffer from a pointer to the min coordinate and
1157 * a vector describing the shape. Does not take ownership of the
1158 * data, and does not set the host_dirty flag. */
1159 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1160 const std::vector<halide_dimension_t> &shape)
1161 : Buffer(t, data, (int)shape.size(), shape.data()) {
1162 }
1163
1164 /** Initialize an Buffer from a pointer to the min coordinate and
1165 * an array describing the shape. Does not take ownership of the
1166 * data and does not set the host_dirty flag. */
1167 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1168 buf.type = static_halide_type();
1169 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1170 make_shape_storage(d);
1171 for (int i = 0; i < d; i++) {
1172 buf.dim[i] = shape[i];
1173 }
1174 }
1175
1176 /** Initialize a Buffer from a pointer to the min coordinate and
1177 * a vector describing the shape. Does not take ownership of the
1178 * data, and does not set the host_dirty flag. */
1179 explicit Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1180 : Buffer(data, (int)shape.size(), shape.data()) {
1181 }
1182
1183 /** Destructor. Will release any underlying owned allocation if
1184 * this is the last reference to it. Will assert fail if there are
1185 * weak references to this Buffer outstanding. */
1187 decref();
1188 free_shape_storage();
1189 }
1190
1191 /** Get a pointer to the raw halide_buffer_t this wraps. */
1192 // @{
1194 return &buf;
1195 }
1196
1198 return &buf;
1199 }
1200 // @}
1201
1202 /** Provide a cast operator to halide_buffer_t *, so that
1203 * instances can be passed directly to Halide filters. */
1204 operator halide_buffer_t *() {
1205 return &buf;
1206 }
1207
1208 /** Return a typed reference to this Buffer. Useful for converting
1209 * a reference to a Buffer<void> to a reference to, for example, a
1210 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1211 * You can also optionally sspecify a new value for Dims; this is useful
1212 * mainly for removing the dimensionality constraint on a Buffer with
1213 * explicit dimensionality. Does a runtime assert if the source buffer type
1214 * is void or the new dimensionality is incompatible. */
1215 template<typename T2, int D2 = Dims>
1220
1221 /** Return a const typed reference to this Buffer. Useful for converting
1222 * a reference to a Buffer<void> to a reference to, for example, a
1223 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1224 * You can also optionally sspecify a new value for Dims; this is useful
1225 * mainly for removing the dimensionality constraint on a Buffer with
1226 * explicit dimensionality. Does a runtime assert if the source buffer type
1227 * is void or the new dimensionality is incompatible. */
1228 template<typename T2, int D2 = Dims>
1233
1234 /** Return an rval reference to this Buffer. Useful for converting
1235 * a reference to a Buffer<void> to a reference to, for example, a
1236 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1237 * You can also optionally sspecify a new value for Dims; this is useful
1238 * mainly for removing the dimensionality constraint on a Buffer with
1239 * explicit dimensionality. Does a runtime assert if the source buffer type
1240 * is void or the new dimensionality is incompatible. */
1241 template<typename T2, int D2 = Dims>
1246
1247 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1248 * to recapitulate the type argument. */
1249 // @{
1252 // Note that we can skip the assert_can_convert_from(), since T -> const T
1253 // conversion is always legal.
1255 }
1256
1261
1266 // @}
1267
1268 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1269 * passing arguments */
1272 return as_const();
1273 }
1274
1275 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1276 * passing arguments */
1277 template<typename TVoid,
1278 typename T2 = T,
1279 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1280 !std::is_void<T2>::value &&
1281 !std::is_const<T2>::value>::type>
1283 return as<TVoid, Dims>();
1284 }
1285
1286 /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1287 * passing arguments */
1288 template<typename TVoid,
1289 typename T2 = T,
1290 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1291 !std::is_void<T2>::value &&
1292 std::is_const<T2>::value>::type>
1296
1297 /** Conventional names for the first three dimensions. */
1298 // @{
1299 int width() const {
1300 return (dimensions() > 0) ? dim(0).extent() : 1;
1301 }
1302 int height() const {
1303 return (dimensions() > 1) ? dim(1).extent() : 1;
1304 }
1305 int channels() const {
1306 return (dimensions() > 2) ? dim(2).extent() : 1;
1307 }
1308 // @}
1309
1310 /** Conventional names for the min and max value of each dimension */
1311 // @{
1312 int left() const {
1313 return dim(0).min();
1314 }
1315
1316 int right() const {
1317 return dim(0).max();
1318 }
1319
1320 int top() const {
1321 return dim(1).min();
1322 }
1323
1324 int bottom() const {
1325 return dim(1).max();
1326 }
1327 // @}
1328
1329 /** Make a new image which is a deep copy of this image. Use crop
1330 * or slice followed by copy to make a copy of only a portion of
1331 * the image. The new image uses the same memory layout as the
1332 * original, with holes compacted away. Note that the returned
1333 * Buffer is always of a non-const type T (ie:
1334 *
1335 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1336 *
1337 * which is always safe, since we are making a deep copy. (The caller
1338 * can easily cast it back to Buffer<const T> if desired, which is
1339 * always safe and free.)
1340 */
1342 void (*deallocate_fn)(void *) = nullptr) const {
1344 dst.copy_from(*this);
1345 return dst;
1346 }
1347
1348 /** Like copy(), but the copy is created in interleaved memory layout
1349 * (vs. keeping the same memory layout as the original). Requires that 'this'
1350 * has exactly 3 dimensions.
1351 */
1353 void (*deallocate_fn)(void *) = nullptr) const {
1354 static_assert(Dims == AnyDims || Dims == 3);
1355 assert(dimensions() == 3);
1357 dst.set_min(min(0), min(1), min(2));
1358 dst.allocate(allocate_fn, deallocate_fn);
1359 dst.copy_from(*this);
1360 return dst;
1361 }
1362
1363 /** Like copy(), but the copy is created in planar memory layout
1364 * (vs. keeping the same memory layout as the original).
1365 */
1367 void (*deallocate_fn)(void *) = nullptr) const {
1368 std::vector<int> mins, extents;
1369 const int dims = dimensions();
1370 mins.reserve(dims);
1371 extents.reserve(dims);
1372 for (int d = 0; d < dims; ++d) {
1373 mins.push_back(dim(d).min());
1374 extents.push_back(dim(d).extent());
1375 }
1377 dst.set_min(mins);
1378 dst.allocate(allocate_fn, deallocate_fn);
1379 dst.copy_from(*this);
1380 return dst;
1381 }
1382
1383 /** Make a copy of the Buffer which shares the underlying host and/or device
1384 * allocations as the existing Buffer. This is purely syntactic sugar for
1385 * cases where you have a const reference to a Buffer but need a temporary
1386 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1387 * inline way to create a temporary. \code
1388 * void call_my_func(const Buffer<const uint8_t>& input) {
1389 * my_func(input.alias(), output);
1390 * }\endcode
1391 */
1393 return *this;
1394 }
1395
1396 /** Fill a Buffer with the values at the same coordinates in
1397 * another Buffer. Restricts itself to coordinates contained
1398 * within the intersection of the two buffers. If the two Buffers
1399 * are not in the same coordinate system, you will need to
1400 * translate the argument Buffer first. E.g. if you're blitting a
1401 * sprite onto a framebuffer, you'll want to translate the sprite
1402 * to the correct location first like so: \code
1403 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1404 */
1405 template<typename T2, int D2, int S2>
1407 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1408 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1409 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1410
1412
1413 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1414 assert(src.dimensions() == dst.dimensions());
1415
1416 // Trim the copy to the region in common
1417 const int d = dimensions();
1418 for (int i = 0; i < d; i++) {
1419 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1420 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1421 if (max_coord < min_coord) {
1422 // The buffers do not overlap.
1423 return;
1424 }
1425 dst.crop(i, min_coord, max_coord - min_coord + 1);
1426 src.crop(i, min_coord, max_coord - min_coord + 1);
1427 }
1428
1429 // If T is void, we need to do runtime dispatch to an
1430 // appropriately-typed lambda. We're copying, so we only care
1431 // about the element size. (If not, this should optimize away
1432 // into a static dispatch to the right-sized copy.)
1433 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1434 using MemType = uint8_t;
1435 auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1436 auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1437 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1438 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1439 using MemType = uint16_t;
1440 auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1441 auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1442 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1443 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1444 using MemType = uint32_t;
1445 auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1446 auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1447 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1448 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1449 using MemType = uint64_t;
1450 auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1451 auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1452 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1453 } else {
1454 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1455 }
1456 set_host_dirty();
1457 }
1458
1459 /** Make an image that refers to a sub-range of this image along
1460 * the given dimension. Asserts that the crop region is within
1461 * the existing bounds: you cannot "crop outwards", even if you know there
1462 * is valid Buffer storage (e.g. because you already cropped inwards). */
1463 Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1464 // Make a fresh copy of the underlying buffer (but not a fresh
1465 // copy of the allocation, if there is one).
1467
1468 // This guarantees the prexisting device ref is dropped if the
1469 // device_crop call fails and maintains the buffer in a consistent
1470 // state.
1471 im.device_deallocate();
1472
1473 im.crop_host(d, min, extent);
1474 if (buf.device_interface != nullptr) {
1475 complete_device_crop(im);
1476 }
1477 return im;
1478 }
1479
1480 /** Crop an image in-place along the given dimension. This does
1481 * not move any data around in memory - it just changes the min
1482 * and extent of the given dimension. */
1483 void crop(int d, int min, int extent) {
1484 // An optimization for non-device buffers. For the device case,
1485 // a temp buffer is required, so reuse the not-in-place version.
1486 // TODO(zalman|abadams): Are nop crops common enough to special
1487 // case the device part of the if to do nothing?
1488 if (buf.device_interface != nullptr) {
1489 *this = cropped(d, min, extent);
1490 } else {
1491 crop_host(d, min, extent);
1492 }
1493 }
1494
1495 /** Make an image that refers to a sub-rectangle of this image along
1496 * the first N dimensions. Asserts that the crop region is within
1497 * the existing bounds. The cropped image may drop any device handle
1498 * if the device_interface cannot accomplish the crop in-place. */
1499 Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1500 // Make a fresh copy of the underlying buffer (but not a fresh
1501 // copy of the allocation, if there is one).
1503
1504 // This guarantees the prexisting device ref is dropped if the
1505 // device_crop call fails and maintains the buffer in a consistent
1506 // state.
1507 im.device_deallocate();
1508
1509 im.crop_host(rect);
1510 if (buf.device_interface != nullptr) {
1511 complete_device_crop(im);
1512 }
1513 return im;
1514 }
1515
1516 /** Crop an image in-place along the first N dimensions. This does
1517 * not move any data around in memory, nor does it free memory. It
1518 * just rewrites the min/extent of each dimension to refer to a
1519 * subregion of the same allocation. */
1520 void crop(const std::vector<std::pair<int, int>> &rect) {
1521 // An optimization for non-device buffers. For the device case,
1522 // a temp buffer is required, so reuse the not-in-place version.
1523 // TODO(zalman|abadams): Are nop crops common enough to special
1524 // case the device part of the if to do nothing?
1525 if (buf.device_interface != nullptr) {
1526 *this = cropped(rect);
1527 } else {
1528 crop_host(rect);
1529 }
1530 }
1531
1532 /** Make an image which refers to the same data with using
1533 * translated coordinates in the given dimension. Positive values
1534 * move the image data to the right or down relative to the
1535 * coordinate system. Drops any device handle. */
1538 im.translate(d, dx);
1539 return im;
1540 }
1541
1542 /** Translate an image in-place along one dimension by changing
1543 * how it is indexed. Does not move any data around in memory. */
1544 void translate(int d, int delta) {
1545 assert(d >= 0 && d < this->dimensions());
1546 device_deallocate();
1547 buf.dim[d].min += delta;
1548 }
1549
1550 /** Make an image which refers to the same data translated along
1551 * the first N dimensions. */
1552 Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1554 im.translate(delta);
1555 return im;
1556 }
1557
1558 /** Translate an image along the first N dimensions by changing
1559 * how it is indexed. Does not move any data around in memory. */
1560 void translate(const std::vector<int> &delta) {
1561 device_deallocate();
1562 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1563 int limit = (int)delta.size();
1564 assert(limit <= dimensions());
1565 for (int i = 0; i < limit; i++) {
1566 translate(i, delta[i]);
1567 }
1568 }
1569
1570 /** Set the min coordinate of an image in the first N dimensions. */
1571 // @{
1572 void set_min(const std::vector<int> &mins) {
1573 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1574 device_deallocate();
1575 for (size_t i = 0; i < mins.size(); i++) {
1576 buf.dim[i].min = mins[i];
1577 }
1578 }
1579
1580 template<typename... Args>
1581 void set_min(Args... args) {
1582 set_min(std::vector<int>{args...});
1583 }
1584 // @}
1585
1586 /** Test if a given coordinate is within the bounds of an image. */
1587 // @{
1588 bool contains(const std::vector<int> &coords) const {
1589 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1590 for (size_t i = 0; i < coords.size(); i++) {
1591 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1592 return false;
1593 }
1594 }
1595 return true;
1596 }
1597
1598 template<typename... Args>
1599 bool contains(Args... args) const {
1600 return contains(std::vector<int>{args...});
1601 }
1602 // @}
1603
1604 /** Make a buffer which refers to the same data in the same layout
1605 * using a swapped indexing order for the dimensions given. So
1606 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1607 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1610 im.transpose(d1, d2);
1611 return im;
1612 }
1613
1614 /** Transpose a buffer in-place by changing how it is indexed. For
1615 * example, transpose(0, 1) on a two-dimensional buffer means that
1616 * the value referred to by coordinates (i, j) is now reached at
1617 * the coordinates (j, i), and vice versa. This is done by
1618 * reordering the per-dimension metadata rather than by moving
1619 * data around in memory, so other views of the same memory will
1620 * not see the data as having been transposed. */
1621 void transpose(int d1, int d2) {
1622 assert(d1 >= 0 && d1 < this->dimensions());
1623 assert(d2 >= 0 && d2 < this->dimensions());
1624 std::swap(buf.dim[d1], buf.dim[d2]);
1625 }
1626
1627 /** A generalized transpose: instead of swapping two dimensions,
1628 * pass a vector that lists each dimension index exactly once, in
1629 * the desired order. This does not move any data around in memory
1630 * - it just permutes how it is indexed. */
1631 void transpose(const std::vector<int> &order) {
1632 assert((int)order.size() == dimensions());
1633 if (dimensions() < 2) {
1634 // My, that was easy
1635 return;
1636 }
1637
1638 std::vector<int> order_sorted = order;
1639 for (size_t i = 1; i < order_sorted.size(); i++) {
1640 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1641 std::swap(order_sorted[j], order_sorted[j - 1]);
1642 transpose(j, j - 1);
1643 }
1644 }
1645 }
1646
1647 /** Make a buffer which refers to the same data in the same
1648 * layout using a different ordering of the dimensions. */
1649 Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1651 im.transpose(order);
1652 return im;
1653 }
1654
1655 /** Make a lower-dimensional buffer that refers to one slice of
1656 * this buffer. */
1657 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1658 sliced(int d, int pos) const {
1659 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1660 assert(dimensions() > 0);
1661
1663
1664 // This guarantees the prexisting device ref is dropped if the
1665 // device_slice call fails and maintains the buffer in a consistent
1666 // state.
1667 im.device_deallocate();
1668
1669 im.slice_host(d, pos);
1670 if (buf.device_interface != nullptr) {
1671 complete_device_slice(im, d, pos);
1672 }
1673 return im;
1674 }
1675
1676 /** Make a lower-dimensional buffer that refers to one slice of this
1677 * buffer at the dimension's minimum. */
1678 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1679 sliced(int d) const {
1680 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1681 assert(dimensions() > 0);
1682
1683 return sliced(d, dim(d).min());
1684 }
1685
1686 /** Rewrite the buffer to refer to a single lower-dimensional
1687 * slice of itself along the given dimension at the given
1688 * coordinate. Does not move any data around or free the original
1689 * memory, so other views of the same data are unaffected. Can
1690 * only be called on a Buffer with dynamic dimensionality. */
1691 void slice(int d, int pos) {
1692 static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1693 assert(dimensions() > 0);
1694
1695 // An optimization for non-device buffers. For the device case,
1696 // a temp buffer is required, so reuse the not-in-place version.
1697 // TODO(zalman|abadams): Are nop slices common enough to special
1698 // case the device part of the if to do nothing?
1699 if (buf.device_interface != nullptr) {
1700 *this = sliced(d, pos);
1701 } else {
1702 slice_host(d, pos);
1703 }
1704 }
1705
1706 /** Slice a buffer in-place at the dimension's minimum. */
1707 void slice(int d) {
1708 slice(d, dim(d).min());
1709 }
1710
1711 /** Make a new buffer that views this buffer as a single slice in a
1712 * higher-dimensional space. The new dimension has extent one and
1713 * the given min. This operation is the opposite of slice. As an
1714 * example, the following condition is true:
1715 *
1716 \code
1717 im2 = im.embedded(1, 17);
1718 &im(x, y, c) == &im2(x, 17, y, c);
1719 \endcode
1720 */
1721 Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1722 embedded(int d, int pos = 0) const {
1724 im.embed(d, pos);
1725 return im;
1726 }
1727
1728 /** Embed a buffer in-place, increasing the
1729 * dimensionality. */
1730 void embed(int d, int pos = 0) {
1731 static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1732 assert(d >= 0 && d <= dimensions());
1733 add_dimension();
1734 translate(dimensions() - 1, pos);
1735 for (int i = dimensions() - 1; i > d; i--) {
1736 transpose(i, i - 1);
1737 }
1738 }
1739
1740 /** Add a new dimension with a min of zero and an extent of
1741 * one. The stride is the extent of the outermost dimension times
1742 * its stride. The new dimension is the last dimension. This is a
1743 * special case of embed. */
1745 static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1746 const int dims = buf.dimensions;
1747 buf.dimensions++;
1748 if (buf.dim != shape) {
1749 // We're already on the heap. Reallocate.
1751 for (int i = 0; i < dims; i++) {
1752 new_shape[i] = buf.dim[i];
1753 }
1754 delete[] buf.dim;
1755 buf.dim = new_shape;
1756 } else if (dims == InClassDimStorage) {
1757 // Transition from the in-class storage to the heap
1758 make_shape_storage(buf.dimensions);
1759 for (int i = 0; i < dims; i++) {
1760 buf.dim[i] = shape[i];
1761 }
1762 } else {
1763 // We still fit in the class
1764 }
1765 buf.dim[dims] = {0, 1, 0};
1766 if (dims == 0) {
1767 buf.dim[dims].stride = 1;
1768 } else {
1769 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1770 }
1771 }
1772
1773 /** Add a new dimension with a min of zero, an extent of one, and
1774 * the specified stride. The new dimension is the last
1775 * dimension. This is a special case of embed. */
1777 add_dimension();
1778 buf.dim[buf.dimensions - 1].stride = s;
1779 }
1780
1781 /** Methods for managing any GPU allocation. */
1782 // @{
1783 // Set the host dirty flag. Called by every operator()
1784 // access. Must be inlined so it can be hoisted out of loops.
1786 void set_host_dirty(bool v = true) {
1787 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1788 buf.set_host_dirty(v);
1789 }
1790
1791 // Check if the device allocation is dirty. Called by
1792 // set_host_dirty, which is called by every accessor. Must be
1793 // inlined so it can be hoisted out of loops.
1795 bool device_dirty() const {
1796 return buf.device_dirty();
1797 }
1798
1799 bool host_dirty() const {
1800 return buf.host_dirty();
1801 }
1802
1803 void set_device_dirty(bool v = true) {
1804 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1805 buf.set_device_dirty(v);
1806 }
1807
1808 int copy_to_host(void *ctx = nullptr) {
1809 if (device_dirty()) {
1810 return buf.device_interface->copy_to_host(ctx, &buf);
1811 }
1813 }
1814
1815 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1816 if (host_dirty()) {
1817 return device_interface->copy_to_device(ctx, &buf, device_interface);
1818 }
1820 }
1821
1822 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1823 return device_interface->device_malloc(ctx, &buf, device_interface);
1824 }
1825
1826 int device_free(void *ctx = nullptr) {
1827 if (dev_ref_count) {
1829 "Can't call device_free on an unmanaged or wrapped native device handle. "
1830 "Free the source allocation or call device_detach_native instead.");
1831 // Multiple people may be holding onto this dev field
1832 assert(dev_ref_count->count == 1 &&
1833 "Multiple Halide::Runtime::Buffer objects share this device "
1834 "allocation. Freeing it would create dangling references. "
1835 "Don't call device_free on Halide buffers that you have copied or "
1836 "passed by value.");
1837 }
1839 if (buf.device_interface) {
1840 ret = buf.device_interface->device_free(ctx, &buf);
1841 }
1842 if (dev_ref_count) {
1843 delete dev_ref_count;
1844 dev_ref_count = nullptr;
1845 }
1846 return ret;
1847 }
1848
1849 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1850 uint64_t handle, void *ctx = nullptr) {
1851 assert(device_interface);
1852 dev_ref_count = new DeviceRefCount;
1854 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1855 }
1856
1857 int device_detach_native(void *ctx = nullptr) {
1858 assert(dev_ref_count &&
1860 "Only call device_detach_native on buffers wrapping a native "
1861 "device handle via device_wrap_native. This buffer was allocated "
1862 "using device_malloc, or is unmanaged. "
1863 "Call device_free or free the original allocation instead.");
1864 // Multiple people may be holding onto this dev field
1865 assert(dev_ref_count->count == 1 &&
1866 "Multiple Halide::Runtime::Buffer objects share this device "
1867 "allocation. Freeing it could create dangling references. "
1868 "Don't call device_detach_native on Halide buffers that you "
1869 "have copied or passed by value.");
1871 if (buf.device_interface) {
1872 ret = buf.device_interface->detach_native(ctx, &buf);
1873 }
1874 delete dev_ref_count;
1875 dev_ref_count = nullptr;
1876 return ret;
1877 }
1878
1879 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1880 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1881 }
1882
1883 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1884 if (dev_ref_count) {
1886 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1887 "Free the source allocation or call device_detach_native instead.");
1888 // Multiple people may be holding onto this dev field
1889 assert(dev_ref_count->count == 1 &&
1890 "Multiple Halide::Runtime::Buffer objects share this device "
1891 "allocation. Freeing it would create dangling references. "
1892 "Don't call device_and_host_free on Halide buffers that you have copied or "
1893 "passed by value.");
1894 }
1896 if (buf.device_interface) {
1898 }
1899 if (dev_ref_count) {
1900 delete dev_ref_count;
1901 dev_ref_count = nullptr;
1902 }
1903 return ret;
1904 }
1905
1906 int device_sync(void *ctx = nullptr) {
1907 return buf.device_sync(ctx);
1908 }
1909
1911 return buf.device != 0;
1912 }
1913
1914 /** Return the method by which the device field is managed. */
1916 if (dev_ref_count == nullptr) {
1918 }
1919 return dev_ref_count->ownership;
1920 }
1921 // @}
1922
1923 /** If you use the (x, y, c) indexing convention, then Halide
1924 * Buffers are stored planar by default. This function constructs
1925 * an interleaved RGB or RGBA image that can still be indexed
1926 * using (x, y, c). Passing it to a generator requires that the
1927 * generator has been compiled with support for interleaved (also
1928 * known as packed or chunky) memory layouts. */
1929 static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1930 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1931 Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1932 // Note that this is equivalent to calling transpose({2, 0, 1}),
1933 // but slightly more efficient.
1934 im.transpose(0, 1);
1935 im.transpose(1, 2);
1936 return im;
1937 }
1938
1939 /** If you use the (x, y, c) indexing convention, then Halide
1940 * Buffers are stored planar by default. This function constructs
1941 * an interleaved RGB or RGBA image that can still be indexed
1942 * using (x, y, c). Passing it to a generator requires that the
1943 * generator has been compiled with support for interleaved (also
1944 * known as packed or chunky) memory layouts. */
1945 static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1946 return make_interleaved(static_halide_type(), width, height, channels);
1947 }
1948
1949 /** Wrap an existing interleaved image. */
1951 make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1952 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1953 Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1954 im.transpose(0, 1);
1955 im.transpose(1, 2);
1956 return im;
1957 }
1958
1959 /** Wrap an existing interleaved image. */
1960 static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1961 return make_interleaved(static_halide_type(), data, width, height, channels);
1962 }
1963
1964 /** Make a zero-dimensional Buffer */
1966 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1968 buf.slice(0, 0);
1969 return buf;
1970 }
1971
1972 /** Make a zero-dimensional Buffer */
1974 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1976 buf.slice(0, 0);
1977 return buf;
1978 }
1979
1980 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1982 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1984 buf.slice(0, 0);
1985 return buf;
1986 }
1987
1988 /** Make a buffer with the same shape and memory nesting order as
1989 * another buffer. It may have a different type. */
1990 template<typename T2, int D2, int S2>
1992 void *(*allocate_fn)(size_t) = nullptr,
1993 void (*deallocate_fn)(void *) = nullptr) {
1994 static_assert(Dims == D2 || Dims == AnyDims);
1997 allocate_fn, deallocate_fn);
1998 }
1999
2000private:
2001 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
2002 int dimensions,
2003 halide_dimension_t *shape,
2004 void *(*allocate_fn)(size_t),
2005 void (*deallocate_fn)(void *)) {
2006 // Reorder the dimensions of src to have strides in increasing order
2007 std::vector<int> swaps;
2008 for (int i = dimensions - 1; i > 0; i--) {
2009 for (int j = i; j > 0; j--) {
2010 if (shape[j - 1].stride > shape[j].stride) {
2011 std::swap(shape[j - 1], shape[j]);
2012 swaps.push_back(j);
2013 }
2014 }
2015 }
2016
2017 // Rewrite the strides to be dense (this messes up src, which
2018 // is why we took it by value).
2019 for (int i = 0; i < dimensions; i++) {
2020 if (i == 0) {
2021 shape[i].stride = 1;
2022 } else {
2023 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2024 }
2025 }
2026
2027 // Undo the dimension reordering
2028 while (!swaps.empty()) {
2029 int j = swaps.back();
2030 std::swap(shape[j - 1], shape[j]);
2031 swaps.pop_back();
2032 }
2033
2034 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2035 // using this method with Buffer<void> for either src or dst.
2036 Buffer<> dst(dst_type, nullptr, dimensions, shape);
2037 dst.allocate(allocate_fn, deallocate_fn);
2038
2039 return dst;
2040 }
2041
2042 template<typename... Args>
2044 ptrdiff_t
2045 offset_of(int d, int first, Args... rest) const {
2046#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2047 assert(first >= this->buf.dim[d].min);
2048 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2049#endif
2050 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2051 }
2052
2054 ptrdiff_t offset_of(int d) const {
2055 return 0;
2056 }
2057
2058 template<typename... Args>
2060 storage_T *
2061 address_of(Args... args) const {
2062 if (T_is_void) {
2063 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2064 } else {
2065 return (storage_T *)(this->buf.host) + offset_of(0, args...);
2066 }
2067 }
2068
2070 ptrdiff_t offset_of(const int *pos) const {
2071 ptrdiff_t offset = 0;
2072 for (int i = this->dimensions() - 1; i >= 0; i--) {
2073#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2074 assert(pos[i] >= this->buf.dim[i].min);
2075 assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2076#endif
2077 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2078 }
2079 return offset;
2080 }
2081
2083 storage_T *address_of(const int *pos) const {
2084 if (T_is_void) {
2085 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2086 } else {
2087 return (storage_T *)this->buf.host + offset_of(pos);
2088 }
2089 }
2090
2091public:
2092 /** Get a pointer to the address of the min coordinate. */
2093 T *data() const {
2094 return (T *)(this->buf.host);
2095 }
2096
2097 /** Access elements. Use im(...) to get a reference to an element,
2098 * and use &im(...) to get the address of an element. If you pass
2099 * fewer arguments than the buffer has dimensions, the rest are
2100 * treated as their min coordinate. The non-const versions set the
2101 * host_dirty flag to true.
2102 */
2103 //@{
2104 template<typename... Args,
2105 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2106 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2107 static_assert(!T_is_void,
2108 "Cannot use operator() on Buffer<void> types");
2109 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2110 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2111 assert(!device_dirty());
2112 return *((const not_void_T *)(address_of(first, rest...)));
2113 }
2114
2116 const not_void_T &
2117 operator()() const {
2118 static_assert(!T_is_void,
2119 "Cannot use operator() on Buffer<void> types");
2120 constexpr int expected_dims = 0;
2121 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2122 assert(!device_dirty());
2123 return *((const not_void_T *)(data()));
2124 }
2125
2127 const not_void_T &
2128 operator()(const int *pos) const {
2129 static_assert(!T_is_void,
2130 "Cannot use operator() on Buffer<void> types");
2131 assert(!device_dirty());
2132 return *((const not_void_T *)(address_of(pos)));
2133 }
2134
2135 template<typename... Args,
2136 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2138 not_void_T &
2139 operator()(int first, Args... rest) {
2140 static_assert(!T_is_void,
2141 "Cannot use operator() on Buffer<void> types");
2142 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2143 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2144 set_host_dirty();
2145 return *((not_void_T *)(address_of(first, rest...)));
2146 }
2147
2149 not_void_T &
2151 static_assert(!T_is_void,
2152 "Cannot use operator() on Buffer<void> types");
2153 constexpr int expected_dims = 0;
2154 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2155 set_host_dirty();
2156 return *((not_void_T *)(data()));
2157 }
2158
2160 not_void_T &
2161 operator()(const int *pos) {
2162 static_assert(!T_is_void,
2163 "Cannot use operator() on Buffer<void> types");
2164 set_host_dirty();
2165 return *((not_void_T *)(address_of(pos)));
2166 }
2167 // @}
2168
2169 /** Tests that all values in this buffer are equal to val. */
2170 bool all_equal(not_void_T val) const {
2171 bool all_equal = true;
2172 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2173 return all_equal;
2174 }
2175
2177 set_host_dirty();
2178 for_each_value([=](T &v) { v = val; });
2179 return *this;
2180 }
2181
2182private:
2183 /** Helper functions for for_each_value. */
2184 // @{
2185 template<int N>
2186 struct for_each_value_task_dim {
2187 std::ptrdiff_t extent;
2188 std::ptrdiff_t stride[N];
2189 };
2190
2191 // Given an array of strides, and a bunch of pointers to pointers
2192 // (all of different types), advance the pointers using the
2193 // strides.
2194 template<typename Ptr, typename... Ptrs>
2195 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2196 ptr += *stride;
2197 advance_ptrs(stride + 1, ptrs...);
2198 }
2199
2201 static void advance_ptrs(const std::ptrdiff_t *) {
2202 }
2203
2204 template<typename Fn, typename Ptr, typename... Ptrs>
2205 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2206 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2207 if (d == 0) {
2209 Ptr end = ptr + t[0].extent;
2210 while (ptr != end) {
2211 f(*ptr++, (*ptrs++)...);
2212 }
2213 } else {
2214 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2215 f(*ptr, (*ptrs)...);
2216 advance_ptrs(t[0].stride, ptr, ptrs...);
2217 }
2218 }
2219 } else {
2220 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2221 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2222 advance_ptrs(t[d].stride, ptr, ptrs...);
2223 }
2224 }
2225 }
2226
2227 // Return pair is <new_dimensions, innermost_strides_are_one>
2228 template<int N>
2229 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2230 const halide_buffer_t **buffers) {
2231 const int dimensions = buffers[0]->dimensions;
2232 assert(dimensions > 0);
2233
2234 // Check the buffers all have clean host allocations
2235 for (int i = 0; i < N; i++) {
2236 if (buffers[i]->device) {
2237 assert(buffers[i]->host &&
2238 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2239 assert(!buffers[i]->device_dirty() &&
2240 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2241 } else {
2242 assert(buffers[i]->host &&
2243 "Buffer passed to for_each_value has no host or device allocation");
2244 }
2245 }
2246
2247 // Extract the strides in all the dimensions
2248 for (int i = 0; i < dimensions; i++) {
2249 for (int j = 0; j < N; j++) {
2250 assert(buffers[j]->dimensions == dimensions);
2251 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2252 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2253 const int s = buffers[j]->dim[i].stride;
2254 t[i].stride[j] = s;
2255 }
2256 t[i].extent = buffers[0]->dim[i].extent;
2257
2258 // Order the dimensions by stride, so that the traversal is cache-coherent.
2259 // Use the last dimension for this, because this is the source in copies.
2260 // It appears to be better to optimize read order than write order.
2261 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2262 std::swap(t[j], t[j - 1]);
2263 }
2264 }
2265
2266 // flatten dimensions where possible to make a larger inner
2267 // loop for autovectorization.
2268 int d = dimensions;
2269 for (int i = 1; i < d; i++) {
2270 bool flat = true;
2271 for (int j = 0; j < N; j++) {
2272 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2273 }
2274 if (flat) {
2275 t[i - 1].extent *= t[i].extent;
2276 for (int j = i; j < d - 1; j++) {
2277 t[j] = t[j + 1];
2278 }
2279 i--;
2280 d--;
2281 }
2282 }
2283
2284 // Note that we assert() that dimensions > 0 above
2285 // (our one-and-only caller will only call us that way)
2286 // so the unchecked access to t[0] should be safe.
2287 bool innermost_strides_are_one = true;
2288 for (int i = 0; i < N; i++) {
2289 innermost_strides_are_one &= (t[0].stride[i] == 1);
2290 }
2291
2292 return {d, innermost_strides_are_one};
2293 }
2294
2295 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2296 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2297 if (dimensions() > 0) {
2298 const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2301 // Move the preparatory code into a non-templated helper to
2302 // save code size.
2303 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2305 if (new_dims > 0) {
2308 t,
2309 data(), (other_buffers.data())...);
2310 return;
2311 }
2312 // else fall thru
2313 }
2314
2315 // zero-dimensional case
2316 f(*data(), (*other_buffers.data())...);
2317 }
2318 // @}
2319
2320public:
2321 /** Call a function on every value in the buffer, and the
2322 * corresponding values in some number of other buffers of the
2323 * same size. The function should take a reference, const
2324 * reference, or value of the correct type for each buffer. This
2325 * effectively lifts a function of scalars to an element-wise
2326 * function of buffers. This produces code that the compiler can
2327 * autovectorize. This is slightly cheaper than for_each_element,
2328 * because it does not need to track the coordinates.
2329 *
2330 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2331 * 'this' or the other-buffers arguments) will allow mutation of the
2332 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2333 * a mutable reference for the lambda argument of a Buffer<const T>
2334 * will result in a compilation error. */
2335 // @{
2336 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2338 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2339 return *this;
2340 }
2341
2342 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2346 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2347 return *this;
2348 }
2349 // @}
2350
2351private:
2352 // Helper functions for for_each_element
2353 struct for_each_element_task_dim {
2354 int min, max;
2355 };
2356
2357 /** If f is callable with this many args, call it. The first
2358 * argument is just to make the overloads distinct. Actual
2359 * overload selection is done using the enable_if. */
2360 template<typename Fn,
2361 typename... Args,
2362 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2363 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2364 f(args...);
2365 }
2366
2367 /** If the above overload is impossible, we add an outer loop over
2368 * an additional argument and try again. */
2369 template<typename Fn,
2370 typename... Args>
2371 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2372 for (int i = t[d].min; i <= t[d].max; i++) {
2373 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2374 }
2375 }
2376
2377 /** Determine the minimum number of arguments a callable can take
2378 * using the same trick. */
2379 template<typename Fn,
2380 typename... Args,
2381 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2382 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2383 return (int)(sizeof...(Args));
2384 }
2385
2386 /** The recursive version is only enabled up to a recursion limit
2387 * of 256. This catches callables that aren't callable with any
2388 * number of ints. */
2389 template<typename Fn,
2390 typename... Args>
2391 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2392 static_assert(sizeof...(args) <= 256,
2393 "Callable passed to for_each_element must accept either a const int *,"
2394 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2395 return num_args(0, std::forward<Fn>(f), 0, args...);
2396 }
2397
2398 /** A version where the callable takes a position array instead,
2399 * with compile-time recursion on the dimensionality. This
2400 * overload is preferred to the one below using the same int vs
2401 * double trick as above, but is impossible once d hits -1 using
2402 * std::enable_if. */
2403 template<int d,
2404 typename Fn,
2405 typename = typename std::enable_if<(d >= 0)>::type>
2406 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2407 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2408 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2409 }
2410 }
2411
2412 /** Base case for recursion above. */
2413 template<int d,
2414 typename Fn,
2415 typename = typename std::enable_if<(d < 0)>::type>
2416 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2417 f(pos);
2418 }
2419
2420 /** A run-time-recursive version (instead of
2421 * compile-time-recursive) that requires the callable to take a
2422 * pointer to a position array instead. Dispatches to the
2423 * compile-time-recursive version once the dimensionality gets
2424 * small. */
2425 template<typename Fn>
2426 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2427 if (d == -1) {
2428 f(pos);
2429 } else if (d == 0) {
2430 // Once the dimensionality gets small enough, dispatch to
2431 // a compile-time-recursive version for better codegen of
2432 // the inner loops.
2433 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2434 } else if (d == 1) {
2435 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2436 } else if (d == 2) {
2437 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2438 } else if (d == 3) {
2439 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2440 } else {
2441 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2442 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2443 }
2444 }
2445 }
2446
2447 /** We now have two overloads for for_each_element. This one
2448 * triggers if the callable takes a const int *.
2449 */
2450 template<typename Fn,
2451 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2452 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2453 const int size = dims * sizeof(int);
2454 int *pos = (int *)HALIDE_ALLOCA(size);
2455 // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2456 // Add this memset to silence it.
2457 memset(pos, 0, size);
2458 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2459 }
2460
2461 /** This one triggers otherwise. It treats the callable as
2462 * something that takes some number of ints. */
2463 template<typename Fn>
2464 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2465 int args = num_args(0, std::forward<Fn>(f));
2466 assert(dims >= args);
2467 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2468 }
2469
2470 template<typename Fn>
2471 void for_each_element_impl(Fn &&f) const {
2472 for_each_element_task_dim *t =
2473 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2474 for (int i = 0; i < dimensions(); i++) {
2475 t[i].min = dim(i).min();
2476 t[i].max = dim(i).max();
2477 }
2478 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2479 }
2480
2481public:
2482 /** Call a function at each site in a buffer. This is likely to be
2483 * much slower than using Halide code to populate a buffer, but is
2484 * convenient for tests. If the function has more arguments than the
2485 * buffer has dimensions, the remaining arguments will be zero. If it
2486 * has fewer arguments than the buffer has dimensions then the last
2487 * few dimensions of the buffer are not iterated over. For example,
2488 * the following code exploits this to set a floating point RGB image
2489 * to red:
2490
2491 \code
2492 Buffer<float, 3> im(100, 100, 3);
2493 im.for_each_element([&](int x, int y) {
2494 im(x, y, 0) = 1.0f;
2495 im(x, y, 1) = 0.0f;
2496 im(x, y, 2) = 0.0f:
2497 });
2498 \endcode
2499
2500 * The compiled code is equivalent to writing the a nested for loop,
2501 * and compilers are capable of optimizing it in the same way.
2502 *
2503 * If the callable can be called with an int * as the sole argument,
2504 * that version is called instead. Each location in the buffer is
2505 * passed to it in a coordinate array. This version is higher-overhead
2506 * than the variadic version, but is useful for writing generic code
2507 * that accepts buffers of arbitrary dimensionality. For example, the
2508 * following sets the value at all sites in an arbitrary-dimensional
2509 * buffer to their first coordinate:
2510
2511 \code
2512 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2513 \endcode
2514
2515 * It is also possible to use for_each_element to iterate over entire
2516 * rows or columns by cropping the buffer to a single column or row
2517 * respectively and iterating over elements of the result. For example,
2518 * to set the diagonal of the image to 1 by iterating over the columns:
2519
2520 \code
2521 Buffer<float, 3> im(100, 100, 3);
2522 im.sliced(1, 0).for_each_element([&](int x, int c) {
2523 im(x, x, c) = 1.0f;
2524 });
2525 \endcode
2526
2527 * Or, assuming the memory layout is known to be dense per row, one can
2528 * memset each row of an image like so:
2529
2530 \code
2531 Buffer<float, 3> im(100, 100, 3);
2532 im.sliced(0, 0).for_each_element([&](int y, int c) {
2533 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2534 });
2535 \endcode
2536
2537 */
2538 // @{
2539 template<typename Fn>
2541 for_each_element_impl(f);
2542 return *this;
2543 }
2544
2545 template<typename Fn>
2549 for_each_element_impl(f);
2550 return *this;
2551 }
2552 // @}
2553
2554private:
2555 template<typename Fn>
2556 struct FillHelper {
2557 Fn f;
2559
2560 template<typename... Args,
2561 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2562 void operator()(Args... args) {
2563 (*buf)(args...) = f(args...);
2564 }
2565
2566 FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2567 : f(std::forward<Fn>(f)), buf(buf) {
2568 }
2569 };
2570
2571public:
2572 /** Fill a buffer by evaluating a callable at every site. The
2573 * callable should look much like a callable passed to
2574 * for_each_element, but it should return the value that should be
2575 * stored to the coordinate corresponding to the arguments. */
2576 template<typename Fn,
2577 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2579 // We'll go via for_each_element. We need a variadic wrapper lambda.
2580 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2581 return for_each_element(wrapper);
2582 }
2583
2584 /** Check if an input buffer passed extern stage is a querying
2585 * bounds. Compared to doing the host pointer check directly,
2586 * this both adds clarity to code and will facilitate moving to
2587 * another representation for bounds query arguments. */
2588 bool is_bounds_query() const {
2589 return buf.is_bounds_query();
2590 }
2591
2592 /** Convenient check to verify that all of the interesting bytes in the Buffer
2593 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2594 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2595 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2596 * the entire Buffer storage.) */
2597 void msan_check_mem_is_initialized(bool entire = false) const {
2598#if defined(__has_feature)
2599#if __has_feature(memory_sanitizer)
2600 if (entire) {
2601 __msan_check_mem_is_initialized(data(), size_in_bytes());
2602 } else {
2603 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2604 }
2605#endif
2606#endif
2607 }
2608};
2609
2610} // namespace Runtime
2611} // namespace Halide
2612
2613#undef HALIDE_ALLOCA
2614
2615#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
#define HALIDE_ALLOCA
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
Read-only access to the shape.
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Dimension(const halide_dimension_t &dim)
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static void set_default_allocate_fn(void *(*allocate_fn)(size_t))
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
int width() const
Conventional names for the first three dimensions.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
bool contains(Args... args) const
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
int device_detach_native(void *ctx=nullptr)
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE not_void_T & operator()()
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
halide_type_t type() const
Get the type of the elements.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(int first)
Allocate a new image of the given size.
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
T * end() const
A pointer to one beyond the element with the highest address.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
bool has_device_allocation() const
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int dimensions() const
Get the dimensionality of the buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
void set_min(Args... args)
size_t number_of_elements() const
The total number of elements this buffer represents.
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
int stride(int i) const
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
static void set_default_deallocate_fn(void(*deallocate_fn)(void *))
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
T * begin() const
A pointer to the element with the lowest address.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
bool any_zero(const Container &c)
constexpr int AnyDims
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
@ AllocatedDeviceAndHost
‍No free routine will be called when device ref count goes to zero
@ WrappedNative
‍halide_device_free will be called when device ref count goes to zero
@ Unmanaged
‍halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
‍Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
unsigned __INT64_TYPE__ uint64_t
__UINTPTR_TYPE__ uintptr_t
void * malloc(size_t)
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
__SIZE_TYPE__ size_t
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
int64_t min
The lower and upper bound of the interval.
A struct acting as a header for allocations owned by the Buffer class itself.
AllocationHeader(void(*deallocate_fn)(void *))
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
bool operator!=(const iterator &other) const
A similar struct for managing device allocations.
BufferDeviceOwnership ownership
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.