Halide 20.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
vulkan_resources.h
Go to the documentation of this file.
1#ifndef HALIDE_RUNTIME_VULKAN_RESOURCES_H
2#define HALIDE_RUNTIME_VULKAN_RESOURCES_H
3
4#include "vulkan_internal.h"
5#include "vulkan_memory.h"
6
7// --------------------------------------------------------------------------
8
9namespace Halide {
10namespace Runtime {
11namespace Internal {
12namespace Vulkan {
13
14// Defines the specialization constants used for dynamically overiding the dispatch size
16 uint32_t constant_id[3] = {0}; // zero if unused
17};
18
19// Data used to override specialization constants for dynamic dispatching
26
27// Specialization constant binding information
33
34// Shared memory allocation variable information
36 uint32_t constant_id = 0; // specialization constant to override allocation array size (or zero if unused)
39 const char *variable_name = nullptr;
40};
41
42// Entry point metadata for shader modules
58
59// Compiled shader module and associated bindings
67
68// Compilation cache for compiled shader modules
74
76
77// --------------------------------------------------------------------------
78
79namespace { // internalize
80
81// --------------------------------------------------------------------------
82
84#ifdef DEBUG_RUNTIME
86 << " vk_create_command_pool (user_context: " << user_context << ", "
87 << "allocator: " << (void *)allocator << ", "
88 << "queue_index: " << queue_index << ")\n";
89#endif
90
91 if (allocator == nullptr) {
92 error(user_context) << "Vulkan: Failed to create command pool ... invalid allocator pointer!\n";
94 }
95
97 {
99 nullptr, // pointer to struct extending this
100 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // flags. Assume transient short-lived single-use command buffers
101 queue_index // queue family index corresponding to the compute command queue
102 };
103
105 if (result != VK_SUCCESS) {
106 error(user_context) << "Vulkan: Failed to create command pool!\n";
108 }
110}
111
113#ifdef DEBUG_RUNTIME
115 << " vk_destroy_command_pool (user_context: " << user_context << ", "
116 << "allocator: " << (void *)allocator << ", "
117 << "command_pool: " << (void *)command_pool << ")\n";
118#endif
119 if (allocator == nullptr) {
120 error(user_context) << "Vulkan: Failed to destroy command pool ... invalid allocator pointer!\n";
122 }
126}
127
128// --
129
131#ifdef DEBUG_RUNTIME
133 << " vk_create_command_buffer (user_context: " << user_context << ", "
134 << "allocator: " << (void *)allocator << ", "
135 << "command_pool: " << (void *)command_pool << ")\n";
136#endif
137 if (allocator == nullptr) {
138 error(user_context) << "Vulkan: Failed to create command buffer ... invalid allocator pointer!\n";
140 }
141
143 {
145 nullptr, // pointer to struct extending this
146 command_pool, // command pool for allocation
147 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // command buffer level
148 1 // number to allocate
149 };
150
152 if (result != VK_SUCCESS) {
153 error(user_context) << "Vulkan: Failed to allocate command buffers!\n";
155 }
157}
158
160#ifdef DEBUG_RUNTIME
162 << " vk_destroy_command_buffer (user_context: " << user_context << ", "
163 << "allocator: " << (void *)allocator << ", "
164 << "command_pool: " << (void *)command_pool << ", "
165 << "command_buffer: " << (void *)command_buffer << ")\n";
166#endif
167 if (allocator == nullptr) {
168 error(user_context) << "Vulkan: Failed to destroy command buffer ... invalid allocator pointer!\n";
170 }
171
174}
175
176// Struct for handling destruction of a transient command buffer ... gets destroyed when object goes out of scope
177struct ScopedVulkanCommandBufferAndPool {
178 void *user_context = nullptr;
179 VulkanMemoryAllocator *allocator = nullptr;
183
184 ScopedVulkanCommandBufferAndPool(void *uc, VulkanMemoryAllocator *vma, uint32_t queue_family_index)
189 }
190 }
192 if ((allocator != nullptr) && (command_pool != VK_NULL_HANDLE)) {
193 if (command_buffer != VK_NULL_HANDLE) {
194 vk_destroy_command_buffer(user_context, allocator, command_pool, command_buffer);
195 }
196 vk_destroy_command_pool(user_context, allocator, command_pool);
197 }
198 user_context = nullptr;
199 allocator = nullptr;
202 }
203};
204
206 VkDevice device,
208 VkPipeline compute_pipeline,
209 VkPipelineLayout pipeline_layout,
210 VkDescriptorSet descriptor_set,
212 int blocksX, int blocksY, int blocksZ) {
213
214#ifdef DEBUG_RUNTIME
216 << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", "
217 << "device: " << (void *)device << ", "
218 << "command_buffer: " << (void *)command_buffer << ", "
219 << "pipeline_layout: " << (void *)pipeline_layout << ", "
220 << "descriptor_set: " << (void *)descriptor_set << ", "
221 << "descriptor_set_index: " << descriptor_set_index << ", "
222 << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n";
223#endif
224
227 nullptr, // pointer to struct extending this
229 nullptr // pointer to parent command buffer
230 };
231
233 if (result != VK_SUCCESS) {
234 error(user_context) << "vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n";
236 }
237
240 descriptor_set_index, 1, &descriptor_set, 0, nullptr);
242
244 if (result != VK_SUCCESS) {
245 error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n";
247 }
248
250}
251
253#ifdef DEBUG_RUNTIME
255 << " vk_submit_command_buffer (user_context: " << user_context << ", "
256 << "queue: " << (void *)queue << ", "
257 << "command_buffer: " << (void *)command_buffer << ")\n";
258#endif
259
261 {
262 VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type
263 nullptr, // pointer to struct extending this
264 0, // wait semaphore count
265 nullptr, // semaphores
266 nullptr, // pipeline stages where semaphore waits occur
267 1, // how many command buffers to execute
268 &command_buffer, // the command buffers
269 0, // number of semaphores to signal
270 nullptr // the semaphores to signal
271 };
272
273 VkResult result = vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
274 if (result != VK_SUCCESS) {
275 error(user_context) << "Vulkan: vkQueueSubmit returned " << vk_get_error_name(result) << "\n";
277 }
279}
280
281// --
282
284 size_t arg_sizes[],
285 void *args[],
287 int i = 0;
288 while (arg_sizes[i] > 0) {
289 if (!arg_is_buffer[i]) {
290 return true;
291 }
292 i++;
293 }
294 return false;
295}
296
298 size_t arg_sizes[],
299 void *args[],
301
302 // first binding is for passing scalar parameters in a buffer (if necessary)
304
305 int i = 0;
306 while (arg_sizes[i] > 0) {
307 if (arg_is_buffer[i]) {
308 bindings_count++;
309 }
310 i++;
311 }
312 return bindings_count;
313}
314
315// --
316
318 VulkanMemoryAllocator *allocator,
319 uint32_t uniform_buffer_count,
320 uint32_t storage_buffer_count,
321 VkDescriptorPool *descriptor_pool) {
322#ifdef DEBUG_RUNTIME
324 << " vk_create_descriptor_pool (user_context: " << user_context << ", "
325 << "allocator: " << (void *)allocator << ", "
326 << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", "
327 << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ")\n";
328#endif
329 if (allocator == nullptr) {
330 error(user_context) << "Vulkan: Failed to create descriptor pool ... invalid allocator pointer!\n";
332 }
333
335 pool_config.entry_size = sizeof(VkDescriptorPoolSize);
336 pool_config.minimum_capacity = (uniform_buffer_count ? 1 : 0) + (storage_buffer_count ? 1 : 0);
338
339 // First binding is reserved for passing scalar parameters as a uniform buffer
340 if (uniform_buffer_count > 0) {
342 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type
343 uniform_buffer_count // all kernel args are packed into uniform buffers
344 };
346 }
347
348 if (storage_buffer_count > 0) {
350 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type
351 storage_buffer_count // all halide buffers are passed as storage buffers
352 };
354 }
355
358 nullptr, // point to struct extending this
359 0, // flags
360 1, // this pool will only be used for creating one descriptor set!
361 (uint32_t)pool_sizes.size(), // pool size count
362 (const VkDescriptorPoolSize *)pool_sizes.data() // ptr to descriptr pool sizes
363 };
364
366 if (result != VK_SUCCESS) {
367 error(user_context) << "Vulkan: Failed to create descriptor pool! vkCreateDescriptorPool returned " << vk_get_error_name(result) << "\n";
369 }
371}
372
374 VulkanMemoryAllocator *allocator,
375 VkDescriptorPool descriptor_pool) {
376#ifdef DEBUG_RUNTIME
378 << " vk_destroy_descriptor_pool (user_context: " << user_context << ", "
379 << "allocator: " << (void *)allocator << ", "
380 << "descriptor_pool: " << (void *)descriptor_pool << ")\n";
381#endif
382 if (allocator == nullptr) {
383 error(user_context) << "Vulkan: Failed to destroy descriptor pool ... invalid allocator pointer!\n";
385 }
388}
389
390// --
391
393 VulkanMemoryAllocator *allocator,
394 uint32_t uniform_buffer_count,
395 uint32_t storage_buffer_count,
396 VkDescriptorSetLayout *layout) {
397
398#ifdef DEBUG_RUNTIME
400 << " vk_create_descriptor_set_layout (user_context: " << user_context << ", "
401 << "allocator: " << (void *)allocator << ", "
402 << "uniform_buffer_count: " << uniform_buffer_count << ", "
403 << "storage_buffer_count: " << storage_buffer_count << ", "
404 << "layout: " << (void *)layout << ")\n";
405#endif
406 if (allocator == nullptr) {
407 error(user_context) << "Vulkan: Failed to create descriptor set layout ... invalid allocator pointer!\n";
409 }
410
412 layout_config.entry_size = sizeof(VkDescriptorSetLayoutBinding);
413 layout_config.minimum_capacity = uniform_buffer_count + storage_buffer_count;
415
416 // add all uniform buffers first
417 for (uint32_t n = 0; n < uniform_buffer_count; ++n) {
419 (uint32_t)layout_bindings.size(), // binding index
420 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type
421 1, // descriptor count
422 VK_SHADER_STAGE_COMPUTE_BIT, // stage flags
423 nullptr // immutable samplers
424 };
425
426#ifdef DEBUG_RUNTIME
428 << " [" << (uint32_t)layout_bindings.size() << "] : UNIFORM_BUFFER\n";
429#endif
430
432 }
433
434 // Add all other storage buffers
435 for (uint32_t n = 0; n < storage_buffer_count; ++n) {
436
437 // halide buffers will be passed as STORAGE_BUFFERS
439 (uint32_t)layout_bindings.size(), // binding index
440 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type
441 1, // descriptor count
442 VK_SHADER_STAGE_COMPUTE_BIT, // stage flags
443 nullptr // immutable samplers
444 };
445#ifdef DEBUG_RUNTIME
447 << " [" << (uint32_t)layout_bindings.size() << "] : STORAGE_BUFFER\n";
448#endif
449
451 }
452
453 // Create the LayoutInfo struct
456 nullptr, // pointer to a struct extending this info
457 0, // flags
458 (uint32_t)layout_bindings.size(), // binding count
459 (VkDescriptorSetLayoutBinding *)layout_bindings.data() // pointer to layout bindings array
460 };
461
462 // Create the descriptor set layout
464 if (result != VK_SUCCESS) {
465 error(user_context) << "vkCreateDescriptorSetLayout returned " << vk_get_error_name(result) << "\n";
467 }
468
470}
471
473 VulkanMemoryAllocator *allocator,
475
476#ifdef DEBUG_RUNTIME
478 << " vk_destroy_descriptor_set_layout (user_context: " << user_context << ", "
479 << "allocator: " << (void *)allocator << ", "
480 << "layout: " << (void *)descriptor_set_layout << ")\n";
481#endif
482 if (allocator == nullptr) {
483 error(user_context) << "Vulkan: Failed to destroy descriptor set layout ... invalid allocator pointer!\n";
485 }
488}
489
490// --
491
493 VulkanMemoryAllocator *allocator,
495 VkDescriptorPool descriptor_pool,
496 VkDescriptorSet *descriptor_set) {
497#ifdef DEBUG_RUNTIME
499 << " vk_create_descriptor_set (user_context: " << user_context << ", "
500 << "allocator: " << (void *)allocator << ", "
501 << "descriptor_set_layout: " << (void *)descriptor_set_layout << ", "
502 << "descriptor_pool: " << (void *)descriptor_pool << ")\n";
503#endif
504 if (allocator == nullptr) {
505 error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n";
507 }
508
510 {
512 nullptr, // pointer to struct extending this
513 descriptor_pool, // pool from which to allocate sets
514 1, // number of descriptor sets
515 &descriptor_set_layout // pointer to array of descriptor set layouts
516 };
517
519 if (result != VK_SUCCESS) {
520 error(user_context) << "Vulkan: vkAllocateDescriptorSets returned " << vk_get_error_name(result) << "\n";
522 }
523
525}
526
528 VulkanMemoryAllocator *allocator,
530 size_t uniform_buffer_count,
531 size_t storage_buffer_count,
532 size_t arg_sizes[],
533 void *args[],
535 VkDescriptorSet descriptor_set) {
536#ifdef DEBUG_RUNTIME
538 << " vk_update_descriptor_set (user_context: " << user_context << ", "
539 << "allocator: " << (void *)allocator << ", "
540 << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", "
541 << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", "
542 << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", "
543 << "descriptor_set: " << (void *)descriptor_set << ")\n";
544#endif
545 if (allocator == nullptr) {
546 error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n";
548 }
549
551 dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
552 dbi_config.entry_size = sizeof(VkDescriptorBufferInfo);
554
556 wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
557 wds_config.entry_size = sizeof(VkWriteDescriptorSet);
559
560 // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER
562 if (scalar_args_buffer != nullptr) {
564 *scalar_args_buffer, // the buffer
565 0, // offset
566 VK_WHOLE_SIZE // range
567 };
570
571#ifdef DEBUG_RUNTIME
572 debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] UNIFORM_BUFFER : "
573 << "buffer=" << (void *)scalar_args_buffer << " "
574 << "offset=" << (uint32_t)(0) << " "
575 << "size=VK_WHOLE_SIZE\n";
576#endif
579 nullptr, // pointer to struct extending this
580 descriptor_set, // descriptor set to update
581 0, // binding slot
582 0, // array elem
583 1, // num to update
584 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type
585 nullptr, // for images
586 scalar_args_entry, // info for buffer
587 nullptr // for texel buffers
588 };
590 }
591
592 // Add all the other device buffers as STORAGE BUFFERs
593 for (size_t i = 0; arg_sizes[i] > 0; i++) {
594 if (arg_is_buffer[i]) {
595
596 // get the allocated region for the buffer
597 MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(((halide_buffer_t *)args[i])->device);
599
600 // retrieve the buffer from the region
601 VkBuffer *device_buffer = reinterpret_cast<VkBuffer *>(owner->handle);
602 if (device_buffer == nullptr) {
603 error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n";
605 }
606
607 VkDeviceSize range_offset = device_region->range.head_offset;
608 VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset;
609 halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0);
611 *device_buffer, // the buffer
612 range_offset, // range offset
613 range_size // range size
614 };
617
618#ifdef DEBUG_RUNTIME
619 debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] STORAGE_BUFFER : "
620 << "region=" << (void *)device_region << " "
621 << "buffer=" << (void *)device_buffer << " "
622 << "offset=" << (uint32_t)(range_offset) << " "
623 << "size=" << (uint32_t)(range_size) << "\n";
624#endif
625
628 nullptr, // pointer to struct extending this
629 descriptor_set, // descriptor set to update
630 (uint32_t)write_descriptor_set.size(), // binding slot
631 0, // array elem
632 1, // num to update
633 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type
634 nullptr, // for images
635 device_buffer_entry, // info for buffer
636 nullptr // for texel buffers
637 };
639 }
640 }
641
642 // issue the update call to populate the descriptor set
645}
646
647// --
648
650 size_t arg_sizes[],
651 void *args[],
653 int i = 0;
655 while (arg_sizes[i] > 0) {
656 if (!arg_is_buffer[i]) {
658 }
659 i++;
660 }
662}
663
665 VulkanMemoryAllocator *allocator,
666 size_t scalar_buffer_size) {
667
668#ifdef DEBUG_RUNTIME
670 << " vk_create_scalar_uniform_buffer (user_context: " << user_context << ", "
671 << "allocator: " << (void *)allocator << ", "
672 << "scalar_buffer_size: " << (uint32_t)scalar_buffer_size << ")\n";
673#endif
674
675 if (allocator == nullptr) {
676 error(user_context) << "Vulkan: Failed to create scalar uniform buffer ... invalid allocator pointer!\n";
677 return nullptr;
678 }
679
682 request.properties.usage = MemoryUsage::UniformStorage;
683 request.properties.caching = MemoryCaching::UncachedCoherent;
684 request.properties.visibility = MemoryVisibility::HostToDevice;
685
686 // allocate a new region
688 if ((region == nullptr) || (region->handle == nullptr)) {
689 error(user_context) << "Vulkan: Failed to create scalar uniform buffer ... unable to allocate device memory!\n";
690 return nullptr;
691 }
692
693 // return the allocated region for the uniform buffer
694 return region;
695}
696
698 VulkanMemoryAllocator *allocator,
699 MemoryRegion *region,
700 size_t arg_sizes[],
701 void *args[],
703
704#ifdef DEBUG_RUNTIME
706 << " vk_update_scalar_uniform_buffer (user_context: " << user_context << ", "
707 << "region: " << (void *)region << ")\n";
708#endif
709
710 if (allocator == nullptr) {
711 error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... invalid allocator pointer!\n";
713 }
714
715 if ((region == nullptr) || (region->handle == nullptr)) {
716 error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... invalid memory region!\n";
718 }
719
720 // map the region to a host ptr
722 if (host_ptr == nullptr) {
723 error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... unable to map host pointer to device memory!\n";
725 }
726
727 // copy to the (host-visible/coherent) scalar uniform buffer
728 size_t arg_offset = 0;
729 for (size_t i = 0; arg_sizes[i] > 0; i++) {
730 if (!arg_is_buffer[i]) {
733 }
734 }
735
736 // unmap the pointer to the buffer for the region
737 allocator->unmap(user_context, region);
739}
740
741int vk_destroy_scalar_uniform_buffer(void *user_context, VulkanMemoryAllocator *allocator,
743
744#ifdef DEBUG_RUNTIME
746 << " vk_destroy_scalar_uniform_buffer (user_context: " << user_context << ", "
747 << "allocator: " << (void *)allocator << ", "
748 << "scalar_args_region: " << (void *)scalar_args_region << ")\n";
749#endif
750 if (allocator == nullptr) {
751 error(user_context) << "Vulkan: Failed to destroy scalar uniform buffer ... invalid allocator pointer!\n";
753 }
754
755 if (!scalar_args_region) {
757 }
758
762 } else {
764 }
765 return error_code;
766}
767
768// --
769
771 VulkanMemoryAllocator *allocator,
773 VkDescriptorSetLayout *descriptor_set_layouts,
774 VkPipelineLayout *pipeline_layout) {
775
776#ifdef DEBUG_RUNTIME
778 << " vk_create_pipeline_layout (user_context: " << user_context << ", "
779 << "allocator: " << (void *)allocator << ", "
780 << "descriptor_set_count: " << descriptor_set_count << ", "
781 << "descriptor_set_layouts: " << (void *)descriptor_set_layouts << ", "
782 << "pipeline_layout: " << (void *)pipeline_layout << ")\n";
783#endif
784 if (allocator == nullptr) {
785 error(user_context) << "Vulkan: Failed to create pipeline layout ... invalid allocator pointer!\n";
787 }
788
789 if (allocator->current_physical_device_limits().maxBoundDescriptorSets > 0) {
792 error(user_context) << "Vulkan: Number of descriptor sets for pipeline layout exceeds the number that can be bound by device!\n"
793 << " requested: " << descriptor_set_count << ","
794 << " available: " << max_bound_descriptor_sets << "\n";
796 }
797 }
798
801 nullptr, // pointer to a structure extending this
802 0, // flags
803 descriptor_set_count, // number of descriptor sets
804 descriptor_set_layouts, // pointer to the descriptor sets
805 0, // number of push constant ranges
806 nullptr // pointer to push constant range structs
807 };
808
810 if (result != VK_SUCCESS) {
811 error(user_context) << "Vulkan: vkCreatePipelineLayout returned " << vk_get_error_name(result) << "\n";
813 }
815}
816
818 VulkanMemoryAllocator *allocator,
819 VkPipelineLayout pipeline_layout) {
820
821#ifdef DEBUG_RUNTIME
823 << " vk_destroy_pipeline_layout (user_context: " << user_context << ", "
824 << "allocator: " << (void *)allocator << ", "
825 << "pipeline_layout: " << (void *)pipeline_layout << ")\n";
826#endif
827
828 if (allocator == nullptr) {
829 error(user_context) << "Vulkan: Failed to destroy pipeline layout ... invalid allocator pointer!\n";
831 }
832
835}
836
837// --
838
840 VulkanMemoryAllocator *allocator,
841 const char *pipeline_name,
842 VkShaderModule shader_module,
843 VkPipelineLayout pipeline_layout,
845 VkPipeline *compute_pipeline) {
846
847#ifdef DEBUG_RUNTIME
849 << " vk_create_compute_pipeline (user_context: " << user_context << ", "
850 << "allocator: " << (void *)allocator << ", "
851 << "shader_module: " << (void *)shader_module << ", "
852 << "pipeline_layout: " << (void *)pipeline_layout << ")\n";
853#endif
854 if (allocator == nullptr) {
855 error(user_context) << "Vulkan: Failed to create compute pipeline ... invalid allocator pointer!\n";
857 }
858
860 {
862 nullptr, // pointer to a structure extending this
863 0, // flags
864 // VkPipelineShaderStageCreatInfo
865 {
867 nullptr, // pointer to a structure extending this
868 0, // flags
869 VK_SHADER_STAGE_COMPUTE_BIT, // compute stage shader
870 shader_module, // shader module
871 pipeline_name, // entry point name
872 specialization_info, // pointer to VkSpecializationInfo struct
873 },
874 pipeline_layout, // pipeline layout
875 VK_NULL_HANDLE, // base pipeline handle for derived pipeline
876 0 // base pipeline index for derived pipeline
877 };
878
880 if (result != VK_SUCCESS) {
881 error(user_context) << "Vulkan: Failed to create compute pipeline! vkCreateComputePipelines returned " << vk_get_error_name(result) << "\n";
883 }
884
886}
887
889 VulkanMemoryAllocator *allocator,
890 VulkanShaderBinding *shader_bindings,
891 VulkanDispatchData *dispatch_data,
892 VkShaderModule shader_module,
893 VkPipelineLayout pipeline_layout,
894 VkPipeline *compute_pipeline) {
895
896#ifdef DEBUG_RUNTIME
898 << " vk_setup_compute_pipeline (user_context: " << user_context << ", "
899 << "entry_point_name: '" << shader_bindings->entry_point_name << "', "
900 << "allocator: " << (void *)allocator << ", "
901 << "shader_bindings: " << (void *)shader_bindings << ", "
902 << "dispatch_data: " << (void *)dispatch_data << ", "
903 << "shader_module: " << (void *)shader_module << ", "
904 << "pipeline_layout: " << (void *)pipeline_layout << ")\n";
905#endif
906
907 if (allocator == nullptr) {
908 error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid allocator pointer!\n";
910 }
911
912 if (shader_bindings == nullptr) {
913 error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid shader bindings!\n";
915 }
916
917 if (shader_bindings == nullptr) {
918 error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid dispatch data!\n";
920 }
921
922 VkResult result = VK_SUCCESS;
923 const char *entry_point_name = shader_bindings->entry_point_name;
924 if (entry_point_name == nullptr) {
925 error(user_context) << "Vulkan: Failed to setup compute pipeline ... missing entry point name!\n";
927 }
928
930 uint32_t dispatch_constant_ids[4] = {0, 0, 0, 0};
931 uint32_t dispatch_constant_values[4] = {0, 0, 0, 0};
932
933 // locate the mapping for overriding any dynamic shared memory allocation sizes
934 if (shader_bindings->shared_memory_allocations_count && dispatch_data->shared_mem_bytes) {
935
939
940 for (uint32_t sm = 0; sm < shader_bindings->shared_memory_allocations_count; sm++) {
941 VulkanSharedMemoryAllocation *allocation = &(shader_bindings->shared_memory_allocations[sm]);
942 if (allocation->constant_id == 0) {
943 // static fixed-size allocation
944 static_shared_mem_bytes += allocation->type_size * allocation->array_size;
945 } else {
946 // dynamic allocation
947 if (shared_mem_constant_id > 0) {
948 error(user_context) << "Vulkan: Multiple dynamic shared memory allocations found! Only one is suported!!\n";
950 break;
951 }
952 shared_mem_constant_id = allocation->constant_id;
953 shared_mem_type_size = allocation->type_size;
954 }
955 }
956 uint32_t shared_mem_bytes_avail = (dispatch_data->shared_mem_bytes - static_shared_mem_bytes);
957#ifdef DEBUG_RUNTIME
958 debug(user_context) << " pipeline uses " << static_shared_mem_bytes << " bytes of static shared memory\n";
959 debug(user_context) << " dispatch requests " << dispatch_data->shared_mem_bytes << " bytes of shared memory\n";
960 debug(user_context) << " dynamic shared memory " << shared_mem_bytes_avail << " bytes available\n";
961#endif
962 // setup the dynamic array size
965#ifdef DEBUG_RUNTIME
966 debug(user_context) << " setting shared memory to " << (uint32_t)dynamic_array_size << " elements "
967 << "(or " << (uint32_t)shared_mem_bytes_avail << " bytes)\n";
968#endif
969 // save the shared mem specialization constant in the first slot
973 }
974
975 // verify the device can actually support the necessary amount of shared memory requested
976 if (allocator->current_physical_device_limits().maxComputeSharedMemorySize > 0) {
979 error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit!\n"
980 << " requested: " << static_shared_mem_bytes << " bytes,"
981 << " available: " << device_shared_mem_size << " bytes\n";
983 }
984 if (dispatch_data->shared_mem_bytes > device_shared_mem_size) {
985 error(user_context) << "Vulkan: Amount of dynamic shared memory used exceeds device limit!\n"
986 << " requested: " << dispatch_data->shared_mem_bytes << " bytes,"
987 << " available: " << device_shared_mem_size << " bytes\n";
989 }
990 }
991 }
992
993 // locate the mapping for overriding any dynamic workgroup local sizes
994 if (shader_bindings->dispatch_data.local_size_binding.constant_id[0] != 0) {
995 for (uint32_t dim = 0; dim < 3; dim++) {
996 dispatch_constant_ids[dispatch_constant_index] = shader_bindings->dispatch_data.local_size_binding.constant_id[dim];
997 dispatch_constant_values[dispatch_constant_index] = dispatch_data->local_size[dim];
999 }
1000 }
1001
1002 // verify the specialization constants actually exist
1003 for (uint32_t dc = 0; dc < dispatch_constant_index; dc++) {
1004 const uint32_t invalid_index = uint32_t(-1);
1006 for (uint32_t sc = 0; sc < shader_bindings->specialization_constants_count; sc++) {
1007 if (shader_bindings->specialization_constants[sc].constant_id == dispatch_constant_ids[dc]) {
1008#ifdef DEBUG_RUNTIME
1009 debug(user_context) << " binding specialization constant [" << dispatch_constant_ids[dc] << "] "
1010 << "'" << shader_bindings->specialization_constants[sc].constant_name << "' "
1011 << " => " << dispatch_constant_values[dc] << "\n";
1012#endif
1013 found_index = sc;
1014 break;
1015 }
1016 }
1017 if (found_index == invalid_index) {
1018 error(user_context) << "Vulkan: Failed to locate dispatch constant index for shader binding!\n";
1020 }
1021 }
1022
1023 // don't even attempt to create the pipeline layout if we encountered errors in the shader binding
1024 if (result != VK_SUCCESS) {
1025 error(user_context) << "Vulkan: Failed to decode shader bindings! " << vk_get_error_name(result) << "\n";
1027 }
1028
1029 // Prepare specialization mapping for all dispatch constants
1033 for (uint32_t dc = 0; dc < dispatch_constant_index && dc < 4; dc++) {
1035 specialization_map_entries[dc].size = sizeof(uint32_t);
1036 specialization_map_entries[dc].offset = dc * sizeof(uint32_t);
1038 }
1039
1040 if (dispatch_constant_count > 0) {
1041
1042 // Prepare specialization info block for the shader stage
1048
1049 // Recreate the pipeline with the requested shared memory allocation
1050 if (shader_bindings->compute_pipeline) {
1051 int error_code = vk_destroy_compute_pipeline(user_context, allocator, shader_bindings->compute_pipeline);
1053 error(user_context) << "Vulkan: Failed to destroy compute pipeline!\n";
1055 }
1056 shader_bindings->compute_pipeline = VK_NULL_HANDLE;
1057 }
1058
1059 int error_code = vk_create_compute_pipeline(user_context, allocator, entry_point_name, shader_module, pipeline_layout, &specialization_info, &(shader_bindings->compute_pipeline));
1061 error(user_context) << "Vulkan: Failed to create compute pipeline!\n";
1062 return error_code;
1063 }
1064
1065 } else {
1066
1067 // Construct and re-use the fixed pipeline
1068 if (shader_bindings->compute_pipeline == VK_NULL_HANDLE) {
1069 int error_code = vk_create_compute_pipeline(user_context, allocator, entry_point_name, shader_module, pipeline_layout, nullptr, &(shader_bindings->compute_pipeline));
1071 error(user_context) << "Vulkan: Failed to create compute pipeline!\n";
1072 return error_code;
1073 }
1074 }
1075 }
1076
1078}
1079
1081 VulkanMemoryAllocator *allocator,
1082 VkPipeline compute_pipeline) {
1083#ifdef DEBUG_RUNTIME
1085 << " vk_destroy_compute_pipeline (user_context: " << user_context << ", "
1086 << "allocator: " << (void *)allocator << ", "
1087 << "device: " << (void *)allocator->current_device() << ", "
1088 << "compute_pipeline: " << (void *)compute_pipeline << ")\n";
1089#endif
1090 if (allocator == nullptr) {
1091 error(user_context) << "Vulkan: Failed to destroy compute pipeline ... invalid allocator pointer!\n";
1093 }
1094
1097}
1098
1099// --------------------------------------------------------------------------
1100
1101VulkanShaderBinding *vk_decode_shader_bindings(void *user_context, VulkanMemoryAllocator *allocator, const uint32_t *module_ptr, uint32_t module_size) {
1102#ifdef DEBUG_RUNTIME
1104 << " vk_decode_shader_bindings (user_context: " << user_context << ", "
1105 << "allocator: " << (void *)allocator << ", "
1106 << "module_ptr: " << (void *)module_ptr << ", "
1107 << "module_size: " << module_size << ")\n";
1108
1110#endif
1111
1112 if (allocator == nullptr) {
1113 error(user_context) << "Vulkan: Failed to decode shader bindings ... invalid allocator pointer!\n";
1114 return nullptr;
1115 }
1116
1117 if ((module_ptr == nullptr) || (module_size < (2 * sizeof(uint32_t)))) {
1118 error(user_context) << "Vulkan: Failed to decode shader bindings ... invalid module buffer!\n";
1119 return nullptr;
1120 }
1121
1122 // Decode the sidecar for the module that lists the descriptor sets
1123 // corresponding to each entry point contained in the module.
1124 //
1125 // Construct a shader binding for each entry point that defines all
1126 // the buffers, constants, shared memory, and workgroup sizes
1127 // that are required for execution.
1128 //
1129 // Like the SPIR-V code module, each entry is one word (1x uint32_t).
1130 // Variable length sections are prefixed with their length (ie number of entries).
1131 //
1132 // [0] Header word count (total length of header)
1133 // [1] Number of descriptor sets
1134 // ... For each descriptor set ...
1135 // ... [0] Length of entry point name (padded to nearest word size)
1136 // ....... [*] Entry point string data (padded with null chars)
1137 // ... [1] Number of uniform buffers for this descriptor set
1138 // ... [2] Number of storage buffers for this descriptor set
1139 // ... [3] Number of specialization constants for this descriptor set
1140 // ....... For each specialization constant ...
1141 // ....... [0] Length of constant name string (padded to nearest word size)
1142 // ........... [*] Constant name string data (padded with null chars)
1143 // ....... [1] Constant id (as used in VkSpecializationMapEntry for binding)
1144 // ....... [2] Size of data type (in bytes)
1145 // ... [4] Number of shared memory allocations for this descriptor set
1146 // ....... For each allocation ...
1147 // ....... [0] Length of variable name string (padded to nearest word size)
1148 // ........... [*] Variable name string data (padded with null chars)
1149 // ....... [1] Constant id to use for overriding array size (zero if it is not bound to a specialization constant)
1150 // ....... [2] Size of data type (in bytes)
1151 // ....... [3] Size of array (ie element count)
1152 // ... [4] Dynamic workgroup dimensions bound to specialization constants
1153 // ....... [0] Constant id to use for local_size_x (zero if it was statically declared and not bound to a specialization constant)
1154 // ....... [1] Constant id to use for local_size_y
1155 // ....... [2] Constant id ot use for local_size_z
1156 //
1157 // NOTE: See CodeGen_Vulkan_Dev::SPIRV_Emitter::encode_header() for the encoding
1158 //
1159 // Both vk_decode_shader_bindings() and vk_compile_shader_module() will
1160 // need to be updated if the header encoding ever changes!
1161 //
1163 uint32_t idx = 1; // skip past the header_word_count
1164 uint32_t shader_count = module_ptr[idx++];
1165 if (shader_count < 1) {
1166 error(user_context) << "Vulkan: Failed to decode shader bindings ... no descriptors found!\n";
1167 return nullptr; // no descriptors
1168 }
1169
1170 // allocate an array of shader bindings (one for each entry point in the module)
1171 VkSystemAllocationScope alloc_scope = VkSystemAllocationScope::VK_SYSTEM_ALLOCATION_SCOPE_OBJECT;
1172 size_t shader_bindings_size = shader_count * sizeof(VulkanShaderBinding);
1173 VulkanShaderBinding *shader_bindings = (VulkanShaderBinding *)vk_host_malloc(user_context, shader_bindings_size, 0, alloc_scope, allocator->callbacks());
1174 if (shader_bindings == nullptr) {
1175 error(user_context) << "Vulkan: Failed to allocate shader_bindings! Out of memory!\n";
1176 return nullptr;
1177 }
1178 memset(shader_bindings, 0, shader_bindings_size);
1179
1180 // decode and fill in the shader binding for each entry point
1181 for (uint32_t n = 0; (n < shader_count) && (idx < module_entries); n++) {
1182 halide_debug_assert(user_context, (idx + 8) < module_entries); // should be at least 8 entries
1183
1184 // [0] Length of entry point name (padded to nearest word size)
1185 uint32_t entry_point_name_length = module_ptr[idx++]; // length is number of uint32_t entries
1186
1187 // [*] Entry point string data (padded with null chars)
1188 const char *entry_point_name = (const char *)(module_ptr + idx); // NOTE: module owns string data
1189 idx += entry_point_name_length; // skip past string data
1190
1191 // [1] Number of uniform buffers for this descriptor set
1192 uint32_t uniform_buffer_count = module_ptr[idx++];
1193
1194 // [2] Number of storage buffers for this descriptor set
1195 uint32_t storage_buffer_count = module_ptr[idx++];
1196
1197 // [3] Number of specialization constants for this descriptor set
1198 uint32_t specialization_constants_count = module_ptr[idx++];
1199
1200 // Decode all specialization constants
1201 VulkanSpecializationConstant *specialization_constants = nullptr;
1202 if (specialization_constants_count > 0) {
1203
1204 // Allocate an array to store the decoded specialization constant data
1205 size_t specialization_constants_size = specialization_constants_count * sizeof(VulkanSpecializationConstant);
1206 specialization_constants = (VulkanSpecializationConstant *)vk_host_malloc(user_context, specialization_constants_size, 0, alloc_scope, allocator->callbacks());
1207 if (specialization_constants == nullptr) {
1208 error(user_context) << "Vulkan: Failed to allocate specialization_constants! Out of memory!\n";
1209 return nullptr;
1210 }
1211 memset(specialization_constants, 0, specialization_constants_size);
1212
1213 // For each specialization constant ...
1214 for (uint32_t sc = 0; sc < specialization_constants_count; sc++) {
1215 halide_debug_assert(user_context, (idx + 4) < module_entries); // should be at least 4 entries
1216
1217 // [0] Length of constant name string (padded to nearest word size)
1219
1220 // [*] Constant name string data (padded with null chars)
1221 const char *constant_name = (const char *)(module_ptr + idx);
1222 specialization_constants[sc].constant_name = constant_name; // NOTE: module owns string data
1223 idx += constant_name_length; // skip past string data
1224
1225 // [1] Constant id (as used in VkSpecializationMapEntry for binding)
1226 specialization_constants[sc].constant_id = module_ptr[idx++];
1227
1228 // [2] Size of data type (in bytes)
1229 specialization_constants[sc].type_size = module_ptr[idx++];
1230 }
1231 }
1232
1233 // [4] Number of shared memory allocations for this descriptor set
1234 uint32_t shared_memory_allocations_count = module_ptr[idx++]; // [3]
1235
1236 // Decode all shared memory allocations ...
1237 VulkanSharedMemoryAllocation *shared_memory_allocations = nullptr;
1238 if (shared_memory_allocations_count > 0) {
1239
1240 // Allocate an array to store the decoded shared memory allocation data
1241 size_t shared_memory_allocations_size = shared_memory_allocations_count * sizeof(VulkanSharedMemoryAllocation);
1242 shared_memory_allocations = (VulkanSharedMemoryAllocation *)vk_host_malloc(user_context, shared_memory_allocations_size, 0, alloc_scope, allocator->callbacks());
1243 if (shared_memory_allocations == nullptr) {
1244 error(user_context) << "Vulkan: Failed to allocate shared_memory_allocations! Out of memory!\n";
1245 return nullptr;
1246 }
1247 memset(shared_memory_allocations, 0, shared_memory_allocations_size);
1248
1249 // For each shared memory allocation ...
1250 for (uint32_t sm = 0; sm < shared_memory_allocations_count && (idx < module_entries); sm++) {
1251 halide_debug_assert(user_context, (idx + 4) < module_entries); // should be at least 4 entries
1252
1253 // [0] Length of variable name string (padded to nearest word size)
1255
1256 // [*] Variable name string data (padded with null chars)
1257 const char *variable_name = (const char *)(module_ptr + idx);
1258 shared_memory_allocations[sm].variable_name = variable_name; // NOTE: module owns string data
1259 idx += variable_name_length; // skip past string data
1260
1261 // [1] Constant id to use for overriding array size
1262 shared_memory_allocations[sm].constant_id = module_ptr[idx++];
1263
1264 // [2] Size of data type (in bytes)
1265 shared_memory_allocations[sm].type_size = module_ptr[idx++];
1266
1267 // [3] Size of array (ie element count)
1268 shared_memory_allocations[sm].array_size = module_ptr[idx++];
1269 }
1270 }
1271
1272 // [4] Dynamic workgroup dimensions bound to specialization constants
1273 halide_debug_assert(user_context, (idx + 3) < module_entries); // should be at least 3 entries
1274 for (uint32_t dim = 0; dim < 3 && (idx < module_entries); dim++) {
1275 shader_bindings[n].dispatch_data.local_size_binding.constant_id[dim] = module_ptr[idx++];
1276 }
1277
1278#ifdef DEBUG_RUNTIME
1279
1280 debug(user_context) << " [" << n << "] '" << (const char *)entry_point_name << "'\n";
1281
1282 debug(user_context) << " uniform_buffer_count=" << uniform_buffer_count << "\n"
1283 << " storage_buffer_count=" << storage_buffer_count << "\n";
1284
1285 debug(user_context) << " specialization_constants_count=" << specialization_constants_count << "\n";
1286 for (uint32_t sc = 0; sc < specialization_constants_count; sc++) {
1287 debug(user_context) << " [" << sc << "] "
1288 << "constant_name='" << (const char *)specialization_constants[sc].constant_name << "' "
1289 << "constant_id=" << specialization_constants[sc].constant_id << " "
1290 << "type_size=" << specialization_constants[sc].type_size << "\n";
1291 }
1292
1293 debug(user_context) << " shared_memory_allocations_count=" << shared_memory_allocations_count << "\n";
1294 for (uint32_t sm = 0; sm < shared_memory_allocations_count; sm++) {
1295 debug(user_context) << " [" << sm << "] "
1296 << "variable_name='" << (const char *)shared_memory_allocations[sm].variable_name << "' "
1297 << "constant_id=" << shared_memory_allocations[sm].constant_id << " "
1298 << "type_size=" << shared_memory_allocations[sm].type_size << " "
1299 << "array_size=" << shared_memory_allocations[sm].array_size << "\n";
1300 }
1301 debug(user_context) << " local_size_binding=[";
1302 for (uint32_t dim = 0; dim < 3 && (idx < module_entries); dim++) {
1303 debug(user_context) << shader_bindings[n].dispatch_data.local_size_binding.constant_id[dim] << " ";
1304 }
1305 debug(user_context) << "]\n";
1306#endif
1307 shader_bindings[n].entry_point_name = (char *)vk_host_malloc(user_context, entry_point_name_length * sizeof(uint32_t), 0, alloc_scope, allocator->callbacks());
1308 if (shader_bindings[n].entry_point_name == nullptr) {
1309 error(user_context) << "Vulkan: Failed to allocate entry_point_name! Out of memory!\n";
1310 return nullptr;
1311 }
1312
1313 memcpy(shader_bindings[n].entry_point_name, entry_point_name, entry_point_name_length * sizeof(uint32_t));
1314 shader_bindings[n].uniform_buffer_count = uniform_buffer_count;
1315 shader_bindings[n].storage_buffer_count = storage_buffer_count;
1316 shader_bindings[n].specialization_constants_count = specialization_constants_count;
1317 shader_bindings[n].specialization_constants = specialization_constants;
1318 shader_bindings[n].shared_memory_allocations_count = shared_memory_allocations_count;
1319 shader_bindings[n].shared_memory_allocations = shared_memory_allocations;
1320 }
1321
1322#ifdef DEBUG_RUNTIME
1324 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
1325#endif
1326
1327 return shader_bindings;
1328}
1329
1330int vk_validate_shader_for_device(void *user_context, VulkanMemoryAllocator *allocator,
1331 const VulkanShaderBinding *shader_bindings, uint32_t shader_count) {
1332#ifdef DEBUG_RUNTIME
1334 << " vk_validate_shader_for_device (user_context: " << user_context << ", "
1335 << "allocator: " << (void *)allocator << ", "
1336 << "shader_bindings: " << (void *)shader_bindings << ", "
1337 << "shader_count: " << shader_count << ")\n";
1338#endif
1339
1340 // validate that the shared memory used is less than the available amount on device
1341 if (shader_bindings->shared_memory_allocations_count) {
1342
1344
1345 for (uint32_t sm = 0; sm < shader_bindings->shared_memory_allocations_count; sm++) {
1346 VulkanSharedMemoryAllocation *allocation = &(shader_bindings->shared_memory_allocations[sm]);
1347 if (allocation->constant_id == 0) {
1348 // static fixed-size allocation
1349 static_shared_mem_bytes += allocation->type_size * allocation->array_size;
1350 } else {
1351 // dynamic allocation (can't determine this until runtime)
1352 }
1353 }
1354
1355 // verify the device can actually support the necessary amount of shared memory requested
1356 if (allocator->current_physical_device_limits().maxComputeSharedMemorySize > 0) {
1359 error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit!\n"
1360 << " requested: " << static_shared_mem_bytes << " bytes,"
1361 << " available: " << device_shared_mem_size << " bytes\n";
1363 }
1364 }
1365 }
1366
1367 // validate the number of descriptor sets used is within the amount supported by the device
1368 if (allocator->current_physical_device_limits().maxPerStageDescriptorStorageBuffers > 0) {
1369 uint64_t max_descriptors = allocator->current_physical_device_limits().maxPerStageDescriptorStorageBuffers;
1370 if (shader_count > max_descriptors) {
1371 error(user_context) << "Vulkan: Number of required descriptor sets exceeds the amount available for device!\n"
1372 << " requested: " << shader_count << ","
1373 << " available: " << max_descriptors << "\n";
1375 }
1376 }
1378}
1379
1380VulkanCompilationCacheEntry *vk_compile_kernel_module(void *user_context, VulkanMemoryAllocator *allocator,
1381 const char *ptr, int size) {
1382#ifdef DEBUG_RUNTIME
1384 << " vk_compile_kernel_module (user_context: " << user_context << ", "
1385 << "allocator: " << (void *)allocator << ", "
1386 << "device: " << (void *)allocator->current_device() << ", "
1387 << "module: " << (void *)ptr << ", "
1388 << "size: " << size << ")\n";
1389
1391#endif
1392
1393 if (allocator == nullptr) {
1394 debug(user_context) << "Vulkan: Failed to compile kernel module ... invalid allocator pointer!\n";
1395 return nullptr;
1396 }
1397
1398 if ((ptr == nullptr) || (size <= 0)) {
1399 debug(user_context) << "Vulkan: Failed to compile kernel module ... invalid module!\n";
1400 return nullptr;
1401 }
1402
1403 VkSystemAllocationScope alloc_scope = VkSystemAllocationScope::VK_SYSTEM_ALLOCATION_SCOPE_OBJECT;
1404 VulkanCompilationCacheEntry *cache_entry = (VulkanCompilationCacheEntry *)vk_host_malloc(user_context, sizeof(VulkanCompilationCacheEntry), 0, alloc_scope, allocator->callbacks());
1405 if (cache_entry == nullptr) {
1406 debug(user_context) << "Vulkan: Failed to allocate compilation cache entry! Out of memory!\n";
1407 return nullptr;
1408 }
1409 memset(cache_entry, 0, sizeof(VulkanCompilationCacheEntry));
1410
1411 // Decode the header and the kernel modules
1413 const uint32_t *module_header = (const uint32_t *)(ptr);
1414 if ((size_t)size < sizeof(uint32_t)) {
1415 debug(user_context) << "Vulkan: Code module size is invalid!\n";
1416 return nullptr;
1417 }
1418
1419 // Extract the number of kernels from the module header
1421 debug(user_context) << " kernel_count=" << kernel_count << "\n";
1422
1423 // Allocate enough space to store the compiled modules
1424 cache_entry->compiled_modules = (VulkanCompiledShaderModule **)vk_host_malloc(user_context, sizeof(VulkanCompiledShaderModule *) * kernel_count, 0, alloc_scope, allocator->callbacks());
1425 if (cache_entry->compiled_modules == nullptr) {
1426 debug(user_context) << "Vulkan: Failed to allocate host memory!\n";
1427 return nullptr;
1428 }
1429 cache_entry->module_count = kernel_count;
1430 cache_entry->allocator = allocator;
1431
1432 // Allocate a temp buffer to decode the binary sizes of each "SPIR-V Module"
1434 if (binary_sizes == nullptr) {
1435 debug(user_context) << "Vulkan: Failed to allocate system memory!\n";
1436 return nullptr;
1437 }
1438
1439 // Extract the size of each "SPIR-V Module" for each kernel
1440 size_t byte_offset = 0;
1441 for (uint32_t i = 0; (i < kernel_count) && (byte_offset < (size_t)size); ++i) {
1442 // Extract binary size
1444
1445 // Skip past the kernel name
1447 const char *kernel_name = (const char *)(module_header + word_offset);
1449
1450 // Compute byte offset for loop range check
1451 byte_offset = (word_offset * sizeof(uint32_t));
1452 debug(user_context) << " kernel[" << i << "] name: " << kernel_name << " binary_size: " << binary_sizes[i] << " bytes\n";
1453 }
1454
1455 // Compile each "SPIR-V Module" for each kernel
1457 for (uint32_t i = 0; (i < kernel_count) && (byte_offset < (size_t)size); ++i) {
1458
1459 // Skip the header and determine the start address of the "SPIR-V Module"
1460 const uint32_t *spirv_ptr = (const uint32_t *)(ptr + byte_offset);
1461 size_t spirv_size = binary_sizes[i];
1462
1463 debug(user_context) << " spirv_size[" << i << "] = " << spirv_size << " bytes\n";
1464 debug(user_context) << " spirv_ptr[" << i << "] = " << spirv_ptr << "\n";
1465
1466 // Compile the "SPIR-V Module" for the kernel
1467 cache_entry->compiled_modules[i] = vk_compile_shader_module(user_context, allocator, (const char *)spirv_ptr, (int)spirv_size);
1468 if (cache_entry->compiled_modules[i] == nullptr) {
1469 debug(user_context) << "Vulkan: Failed to compile shader module!\n";
1471 }
1472
1473 // Skip to the next "SPIR-V Module"
1475 }
1476
1477 // Free temp buffer
1479
1480 // Cleanup if compile failed
1484 cache_entry = nullptr;
1485 }
1486
1487#ifdef DEBUG_RUNTIME
1489 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
1490#endif
1491
1492 return cache_entry;
1493}
1494
1495VulkanCompiledShaderModule *vk_compile_shader_module(void *user_context, VulkanMemoryAllocator *allocator,
1496 const char *ptr, int size) {
1497#ifdef DEBUG_RUNTIME
1499 << " vk_compile_shader_module (user_context: " << user_context << ", "
1500 << "allocator: " << (void *)allocator << ", "
1501 << "device: " << (void *)allocator->current_device() << ", "
1502 << "module: " << (void *)ptr << ", "
1503 << "size: " << size << ")\n";
1504
1506#endif
1507
1508 if (allocator == nullptr) {
1509 error(user_context) << "Vulkan: Failed to compile shader modules ... invalid allocator pointer!\n";
1510 return nullptr;
1511 }
1512
1513 if ((ptr == nullptr) || (size <= 0)) {
1514 error(user_context) << "Vulkan: Failed to compile shader modules ... invalid program source buffer!\n";
1515 return nullptr;
1516 }
1517
1518 const uint32_t *module_ptr = (const uint32_t *)ptr;
1519 const uint32_t module_size = (const uint32_t)size;
1520
1523
1525 uint32_t shader_count = module_ptr[1];
1527
1528 // skip past the preamble header to the start of the SPIR-V binary
1530 size_t binary_size = (size - header_size);
1531
1532#ifdef DEBUG_RUNTIME
1533 debug(user_context) << "Vulkan: Decoding module ("
1534 << "module_ptr: " << (void *)module_ptr << ", "
1535 << "header_word_count: " << header_word_count << ", "
1536 << "header_size: " << header_size << ", "
1537 << "binar_ptr: " << (void *)binary_ptr << ", "
1538 << "binary_size: " << (uint32_t)binary_size << ")\n";
1539#endif
1540
1543 nullptr, // pointer to structure extending this
1544 0, // flags (curently unused)
1545 (size_t)binary_size, // code size in bytes
1546 (const uint32_t *)binary_ptr // source
1547 };
1548
1549 VkSystemAllocationScope alloc_scope = VkSystemAllocationScope::VK_SYSTEM_ALLOCATION_SCOPE_OBJECT;
1550 VulkanCompiledShaderModule *compiled_module = (VulkanCompiledShaderModule *)vk_host_malloc(user_context, sizeof(VulkanCompiledShaderModule), 0, alloc_scope, allocator->callbacks());
1551 if (compiled_module == nullptr) {
1552 error(user_context) << "Vulkan: Failed to allocate compilation cache entry! Out of memory!\n";
1553 return nullptr;
1554 }
1555 memset(compiled_module, 0, sizeof(VulkanCompiledShaderModule));
1556
1557 // decode the entry point data and extract the shader bindings
1559 if (decoded_bindings == nullptr) {
1560 error(user_context) << "Vulkan: Failed to decode shader bindings!\n";
1561 return nullptr;
1562 }
1563
1564 // validate that the compiled shader can be executed by the device with the requested resources
1569 return nullptr;
1570 }
1571
1572 // save the shader bindings in the cache entry
1573 compiled_module->shader_bindings = decoded_bindings;
1574 compiled_module->shader_count = shader_count;
1575
1577 if ((result != VK_SUCCESS)) {
1578 error(user_context) << "Vulkan: vkCreateShaderModule Failed! Error returned: " << vk_get_error_name(result) << "\n";
1581 return nullptr;
1582 }
1583
1584 // allocate an array for storing the descriptor set layouts
1585 if (compiled_module->shader_count) {
1587 if (compiled_module->descriptor_set_layouts == nullptr) {
1588 error(user_context) << "Vulkan: Failed to allocate descriptor set layouts for cache entry! Out of memory!\n";
1589 return nullptr;
1590 }
1591 memset(compiled_module->descriptor_set_layouts, 0, compiled_module->shader_count * sizeof(VkDescriptorSetLayout));
1592 }
1593
1594#ifdef DEBUG_RUNTIME
1596 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
1597#endif
1598
1599 return compiled_module;
1600}
1601
1602void vk_destroy_compiled_shader_module(VulkanCompiledShaderModule *shader_module, VulkanMemoryAllocator *allocator) {
1603 void *user_context = nullptr;
1604#ifdef DEBUG_RUNTIME
1606 << " vk_destroy_compiled_shader_module (shader_module: "
1607 << shader_module << ", allocator: " << allocator << ")\n";
1608#endif
1609
1610 if (shader_module == nullptr) {
1611 return;
1612 }
1613
1614 if (allocator == nullptr) {
1615 return;
1616 }
1617
1618 if (shader_module->descriptor_set_layouts) {
1619 for (uint32_t n = 0; n < shader_module->shader_count; n++) {
1620 debug(user_context) << " destroying descriptor set layout [" << n << "] " << shader_module->descriptor_set_layouts[n] << "\n";
1621 vk_destroy_descriptor_set_layout(user_context, allocator, shader_module->descriptor_set_layouts[n]);
1622 shader_module->descriptor_set_layouts[n] = VK_NULL_HANDLE;
1623 }
1624 debug(user_context) << " destroying descriptor set layout " << (void *)shader_module->descriptor_set_layouts << "\n";
1625 vk_host_free(user_context, shader_module->descriptor_set_layouts, allocator->callbacks());
1626 shader_module->descriptor_set_layouts = nullptr;
1627 }
1628 if (shader_module->pipeline_layout) {
1629 debug(user_context) << " destroying pipeline layout " << (void *)shader_module->pipeline_layout << "\n";
1630 vk_destroy_pipeline_layout(user_context, allocator, shader_module->pipeline_layout);
1631 shader_module->pipeline_layout = VK_NULL_HANDLE;
1632 }
1633
1634 if (shader_module->shader_bindings) {
1635#ifdef DEBUG_RUNTIME
1637 << " destroying shader bindings ("
1638 << "shader_module: " << shader_module << ", "
1639 << "shader_bindings: " << shader_module->shader_bindings << ")\n";
1640#endif
1641 for (uint32_t n = 0; n < shader_module->shader_count; n++) {
1642 debug(user_context) << " destroying shader binding [" << n << "] ";
1643 if (shader_module->shader_bindings[n].entry_point_name) {
1644 debug(user_context) << shader_module->shader_bindings[n].entry_point_name << "\n";
1645 vk_host_free(user_context, shader_module->shader_bindings[n].entry_point_name, allocator->callbacks());
1646 shader_module->shader_bindings[n].entry_point_name = nullptr;
1647 } else {
1648 debug(user_context) << "<unknown entry point>\n";
1649 }
1650 if (shader_module->shader_bindings[n].args_region) {
1651 debug(user_context) << " destroying shader binding args regions [" << n << "]\n";
1652 vk_destroy_scalar_uniform_buffer(user_context, allocator, shader_module->shader_bindings[n].args_region);
1653 shader_module->shader_bindings[n].args_region = nullptr;
1654 }
1655 if (shader_module->shader_bindings[n].descriptor_pool) {
1656 debug(user_context) << " destroying shader binding descriptor pool [" << n << "]\n";
1657 vk_destroy_descriptor_pool(user_context, allocator, shader_module->shader_bindings[n].descriptor_pool);
1658 shader_module->shader_bindings[n].descriptor_pool = VK_NULL_HANDLE;
1659 }
1660 if (shader_module->shader_bindings[n].specialization_constants) {
1661 debug(user_context) << " destroying shader binding specialization constants [" << n << "]\n";
1662 vk_host_free(user_context, shader_module->shader_bindings[n].specialization_constants, allocator->callbacks());
1663 shader_module->shader_bindings[n].specialization_constants = nullptr;
1664 }
1665 if (shader_module->shader_bindings[n].shared_memory_allocations) {
1666 debug(user_context) << " destroying shader binding shared memory allocations [" << n << "]\n";
1667 vk_host_free(user_context, shader_module->shader_bindings[n].shared_memory_allocations, allocator->callbacks());
1668 shader_module->shader_bindings[n].shared_memory_allocations = nullptr;
1669 }
1670 if (shader_module->shader_bindings[n].compute_pipeline) {
1671 debug(user_context) << " destroying shader binding compute pipeline [" << n << "]\n";
1672 vk_destroy_compute_pipeline(user_context, allocator, shader_module->shader_bindings[n].compute_pipeline);
1673 shader_module->shader_bindings[n].compute_pipeline = VK_NULL_HANDLE;
1674 }
1675 }
1676 vk_host_free(user_context, shader_module->shader_bindings, allocator->callbacks());
1677 shader_module->shader_bindings = nullptr;
1678 }
1679 if (shader_module->shader_module) {
1680 debug(user_context) << " destroying shader module " << (void *)shader_module->shader_module << "\n";
1681 vkDestroyShaderModule(allocator->current_device(), shader_module->shader_module, allocator->callbacks());
1682 shader_module->shader_module = VK_NULL_HANDLE;
1683 }
1684 shader_module->shader_count = 0;
1685 vk_host_free(user_context, shader_module, allocator->callbacks());
1686 shader_module = nullptr;
1687 debug(user_context) << " Destroyed compiled shader module: " << (void *)shader_module << "\n";
1688}
1689
1690void vk_destroy_compilation_cache_entry(VulkanCompilationCacheEntry *cache_entry) {
1691 void *user_context = nullptr;
1693 << " vk_destroy_compilation_cache_entry (cache_entry: " << cache_entry << ")\n";
1694
1695 if (cache_entry == nullptr) {
1696 return;
1697 }
1698
1699 VulkanMemoryAllocator *allocator = cache_entry->allocator;
1700 if (allocator == nullptr) {
1701 return;
1702 }
1703
1705 << " Destroying " << cache_entry->module_count << " shader modules for cache entry (cache_entry: " << cache_entry << ")\n";
1706
1707 for (uint32_t m = 0; m < cache_entry->module_count; m++) {
1709 << " destroying compiled_module[" << m << "]: " << cache_entry->compiled_modules[m] << "\n";
1710
1711 VulkanCompiledShaderModule *compiled_module = cache_entry->compiled_modules[m];
1713 }
1714
1715 cache_entry->module_count = 0;
1716 cache_entry->allocator = nullptr;
1718 cache_entry = nullptr;
1719 debug(user_context) << "Vulkan: Destroyed compilation cache entry (cache_entry: " << cache_entry << ")\n";
1720}
1721
1722int vk_destroy_shader_modules(void *user_context, VulkanMemoryAllocator *allocator) {
1723
1725 << " vk_destroy_shader_modules (user_context: " << user_context << ")\n";
1726
1727#ifdef DEBUG_RUNTIME
1729#endif
1730 if (allocator != nullptr) {
1732 }
1733
1734#ifdef DEBUG_RUNTIME
1736 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
1737#endif
1739}
1740
1741// --------------------------------------------------------------------------
1742
1745 int d, bool from_host, bool to_host) {
1746 if (d == 0) {
1747
1748 if ((!from_host && to_host) ||
1749 (from_host && !to_host) ||
1750 (!from_host && !to_host)) {
1751
1752 VkBufferCopy buffer_copy = {
1753 src_offset, // srcOffset
1754 dst_offset, // dstOffset
1755 c.chunk_size // size
1756 };
1757
1758 VkBuffer *src_buffer = reinterpret_cast<VkBuffer *>(c.src);
1759 VkBuffer *dst_buffer = reinterpret_cast<VkBuffer *>(c.dst);
1760 if (!src_buffer || !dst_buffer) {
1761 error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n";
1763 }
1764
1766
1767 } else if ((c.dst + dst_offset) != (c.src + src_offset)) {
1768 // Could reach here if a user called directly into the
1769 // Vulkan API for a device->host copy on a source buffer
1770 // with device_dirty = false.
1771 memcpy((void *)(c.dst + dst_offset), (void *)(c.src + src_offset), c.chunk_size);
1772 }
1773 } else {
1774 // TODO: deal with negative strides. Currently the code in
1775 // device_buffer_utils.h does not do so either.
1776 uint64_t src_off = 0, dst_off = 0;
1777 for (uint64_t i = 0; i < c.extent[d - 1]; i++) {
1781 d - 1, from_host, to_host);
1782 dst_off += c.dst_stride_bytes[d - 1];
1783 src_off += c.src_stride_bytes[d - 1];
1784 if (err) {
1785 return err;
1786 }
1787 }
1788 }
1790}
1791
1793 const struct halide_buffer_t *src,
1794 int64_t offset,
1795 struct halide_buffer_t *dst) {
1796
1797 VulkanContext ctx(user_context);
1798 if (ctx.error != halide_error_code_success) {
1799 error(user_context) << "Vulkan: Failed to acquire context!\n";
1800 return ctx.error;
1801 }
1802
1803#ifdef DEBUG_RUNTIME
1805#endif
1806
1807 if (offset < 0) {
1808 error(user_context) << "Vulkan: Invalid offset for device crop!\n";
1810 }
1811
1812 // get the allocated region for the device
1813 MemoryRegion *device_region = reinterpret_cast<MemoryRegion *>(src->device);
1814 if (device_region == nullptr) {
1815 error(user_context) << "Vulkan: Failed to crop region! Invalide device region!\n";
1817 }
1818
1819 // create the croppeg region from the allocated region
1820 MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, (uint64_t)offset);
1821 if ((cropped_region == nullptr) || (cropped_region->handle == nullptr)) {
1822 error(user_context) << "Vulkan: Failed to crop region! Unable to create memory region!\n";
1824 }
1825
1826 // update the destination to the cropped region
1829
1830#ifdef DEBUG_RUNTIME
1832 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
1833#endif
1834
1836}
1837
1838// --------------------------------------------------------------------------
1839
1840} // namespace
1841} // namespace Vulkan
1842} // namespace Internal
1843} // namespace Runtime
1844} // namespace Halide
1845
1846#endif // HALIDE_RUNTIME_VULKAN_RESOURCES_H
#define debug(n)
For optional debugging during codegen, use the debug macro as follows:
Definition Debug.h:51
bool halide_can_reuse_device_allocations(void *user_context)
Determines whether on device_free the memory is returned immediately to the device API,...
halide_error_code_t
The error codes that may be returned by a Halide pipeline.
@ halide_error_code_incompatible_device_interface
An operation on a buffer required an allocation on a particular device interface, but a device alloca...
@ halide_error_code_internal_error
There is a bug in the Halide compiler.
@ halide_error_code_generic_error
An uncategorized error occurred.
@ halide_error_code_device_crop_failed
Cropping/slicing a buffer failed for some other reason.
@ halide_error_code_success
There was no error.
Vulkan Memory Allocator class interface for managing large memory requests stored as contiguous block...
int reclaim(void *user_context, MemoryRegion *region)
MemoryRegion * reserve(void *user_context, const MemoryRequest &request)
int release(void *user_context, MemoryRegion *region)
int unmap(void *user_context, MemoryRegion *region)
MemoryRegion * owner_of(void *user_context, MemoryRegion *region)
VkPhysicalDeviceLimits current_physical_device_limits() const
const VkAllocationCallbacks * callbacks() const
void * map(void *user_context, MemoryRegion *region)
WEAK Halide::Internal::GPUCompilationCache< VkDevice, VulkanCompilationCacheEntry * > compilation_cache
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
#define halide_debug_assert(user_context, cond)
halide_debug_assert() is like halide_assert(), but only expands into a check when DEBUG_RUNTIME is de...
unsigned __INT8_TYPE__ uint8_t
void * memcpy(void *s1, const void *s2, size_t n)
__SIZE_TYPE__ size_t
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
#define halide_abort_if_false(user_context, cond)
WEAK int64_t halide_current_time_ns(void *user_context)
signed __INT8_TYPE__ int8_t
#define WEAK
VulkanCompiledShaderModule ** compiled_modules
uint32_t module_count
VulkanMemoryAllocator * allocator
The raw representation of an image passed around by generated Halide code.
uint64_t device
A device-handle for e.g.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
VkCommandPool command_pool
int error_code
void * user_context
VkCommandBuffer command_buffer
VulkanMemoryAllocator * allocator