|
Halide 20.0.0
Halide compiler and libraries
|
Classes | |
| struct | Adams2019Params |
| struct | Anderson2021Params |
| struct | BoundContents |
| struct | Cache |
| struct | CachingOptions |
| class | ExprBranching |
| struct | Filter |
| struct | FunctionDAG |
| struct | GlobalAccessAccumulator |
| struct | GPULoopInfo |
| class | LoadJacobian |
| struct | LocalAccessAccumulator |
| struct | LoopNest |
| class | LoopNestParser |
| struct | MemInfo |
| struct | MemTraits |
| struct | MemTraits< GlobalMem > |
| struct | MemTraits< LocalMem > |
| struct | MemTraits< SharedMem > |
| struct | NoOpMutator |
| struct | OptionalRational |
| class | ParamParser |
| struct | ScopedStatistic |
| struct | ScopedTimer |
| struct | SearchSpace |
| struct | SearchSpaceOptions |
| struct | SharedAccessAccumulator |
| class | Span |
| struct | State |
| class | StateQueue |
| struct | Statistics |
| struct | Strides |
| struct | ThreadInfo |
| struct | ThreadTileOption |
| struct | Timer |
Typedefs | |
| typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > | StageMapOfScheduleFeatures |
| using | BlockCache = NodeMap< std::map< int, std::vector< IntrusivePtr< const LoopNest > > > > |
| using | Bound = IntrusivePtr< const BoundContents > |
| template<typename T > | |
| using | NodeMap = PerfectHashMap< FunctionDAG::Node, T > |
| template<typename T > | |
| using | StageMap = PerfectHashMap< FunctionDAG::Node::Stage, T > |
| using | Clock = std::chrono::high_resolution_clock |
| template<typename T > | |
| using | Accumulator = typename MemTraits< T >::Accumulator |
| template<typename T > | |
| using | MemInfoType = MemInfo< typename MemTraits< T >::MemInfoType > |
| using | GlobalMemInfo = MemInfoType< GlobalMem > |
| using | SharedMemInfo = MemInfoType< SharedMem > |
| using | LocalMemInfo = MemInfoType< LocalMem > |
| using | LoopNestMap = map< const LoopNest *, pair< const LoopNest *, int > > |
Enumerations | |
| enum class | GPU_parallelism { Block , Thread , Serial , Simd , Parallelized , None } |
| enum class | GPUMemoryType { Global , Shared , Local , Registers , Inlined } |
Variables | |
| constexpr int | kLocalMemoryLimit = 524288 |
| typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > Halide::Internal::Autoscheduler::StageMapOfScheduleFeatures |
Definition at line 12 of file AutoSchedule.h.
| using Halide::Internal::Autoscheduler::BlockCache = typedef NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest> >> > |
Definition at line 363 of file FunctionDAG.h.
| using Halide::Internal::Autoscheduler::NodeMap = typedef PerfectHashMap<FunctionDAG::Node, T> |
Definition at line 21 of file LoopNest.h.
| using Halide::Internal::Autoscheduler::StageMap = typedef PerfectHashMap<FunctionDAG::Node::Stage, T> |
Definition at line 24 of file LoopNest.h.
| typedef std::chrono::high_resolution_clock Halide::Internal::Autoscheduler::Clock |
| using Halide::Internal::Autoscheduler::Accumulator = typedef typename MemTraits<T>::Accumulator |
Definition at line 53 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::MemInfoType = typedef MemInfo<typename MemTraits<T>::MemInfoType> |
Definition at line 109 of file GPUMemInfo.h.
Definition at line 111 of file GPUMemInfo.h.
Definition at line 112 of file GPUMemInfo.h.
Definition at line 113 of file GPUMemInfo.h.
| Enumerator | |
|---|---|
| Block | |
| Thread | |
| Serial | |
| Simd | |
| Parallelized | |
| None | |
Definition at line 32 of file LoopNest.h.
| Enumerator | |
|---|---|
| Global | |
| Shared | |
| Local | |
| Registers | |
| Inlined | |
Definition at line 44 of file LoopNest.h.
| void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
| const std::vector< Function > & | outputs, | ||
| const Adams2019Params & | params, | ||
| CostModel * | cost_model, | ||
| int | beam_size, | ||
| StageMapOfScheduleFeatures * | schedule_features | ||
| ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const vector< int64_t > & | s, |
| int | d, | ||
| int | factor, | ||
| bool | allow_splits | ||
| ) |
| const LoopNest * Halide::Internal::Autoscheduler::deepest_common_ancestor | ( | const std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
| const LoopNest * | a, | ||
| const LoopNest * | b | ||
| ) |
| void Halide::Internal::Autoscheduler::compute_loop_nest_parents | ( | std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
| const LoopNest * | here, | ||
| int | depth | ||
| ) |
| void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
| const std::vector< Function > & | outputs, | ||
| const Anderson2021Params & | params, | ||
| const Target & | target, | ||
| CostModel * | cost_model, | ||
| int | beam_size, | ||
| StageMapOfScheduleFeatures * | schedule_features | ||
| ) |
| void Halide::Internal::Autoscheduler::sanitize_names | ( | std::string & | str | ) |
| std::string Halide::Internal::Autoscheduler::stringify | ( | GPU_parallelism | label | ) |
| bool Halide::Internal::Autoscheduler::may_subtile | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_shared_memory_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_shared_memory_sm_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_active_block_hardware_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_active_warp_hardware_limit | ( | const Anderson2021Params & | params | ) |
Definition at line 62 of file LoopNest.h.
| double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit_env_var | ( | ) |
| double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit | ( | ) |
| bool Halide::Internal::Autoscheduler::accessed_at_constant_indices | ( | const std::vector< int > & | unrolled, |
| const FunctionDAG::Edge * | e | ||
| ) |
| bool Halide::Internal::Autoscheduler::verify_memoized_features | ( | ) |
| bool Halide::Internal::Autoscheduler::is_memoize_blocks_enabled | ( | ) |
| double Halide::Internal::Autoscheduler::get_stack_memory_adjustment_factor | ( | ) |
| int64_t Halide::Internal::Autoscheduler::get_stack_memory_limit | ( | ) |
| bool Halide::Internal::Autoscheduler::use_adjusted_tilings | ( | ) |
| bool Halide::Internal::Autoscheduler::compute_root_and_inline_only | ( | ) |
| void Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | LoopNest * | new_loop_nest, |
| const LoopNest * | new_loop_nest_parent, | ||
| const IntrusivePtr< const LoopNest > & | existing_loop_nest, | ||
| const PostCreateMutator & | post_create_mutator | ||
| ) |
Definition at line 50 of file State.h.
References Halide::cast(), and deep_copy_loop_nest().
Referenced by deep_copy_loop_nest(), and deep_copy_loop_nest().
| LoopNest * Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | const IntrusivePtr< const LoopNest > & | loop_nest, |
| const PostCreateMutator & | post_create_mutator | ||
| ) |
Definition at line 68 of file State.h.
References Halide::cast(), and deep_copy_loop_nest().
Definition at line 17 of file test.h.
References Halide::cast(), and user_assert.
| void Halide::Internal::Autoscheduler::approx_eq | ( | int | line, |
| const A & | expected, | ||
| const B & | actual, | ||
| float | epsilon | ||
| ) |
Definition at line 25 of file test.h.
References Halide::cast(), and user_assert.
Definition at line 33 of file test.h.
References Halide::cast(), and user_assert.
| bool Halide::Internal::Autoscheduler::equal_to_existing_size | ( | const std::vector< int64_t > & | s, |
| const std::vector< int64_t > & | nums | ||
| ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_serial_tilings | ( | const std::vector< int64_t > & | s, |
| int | d, | ||
| int | last_d, | ||
| int | vectorized_index, | ||
| const std::vector< int > & | vec_dim_serial_sizes, | ||
| bool | filter_small_outer_extents = false, |
||
| bool | allow_inner_ones = false |
||
| ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const std::vector< int64_t > & | s, |
| int | d, | ||
| int | factor, | ||
| bool | allow_splits, | ||
| const std::vector< int > & | inner_sizes = std::vector< int >() |
||
| ) |
| void Halide::Internal::Autoscheduler::lowered_dims | ( | const std::vector< int64_t > & | size, |
| int | vector_loop_i, | ||
| std::vector< int64_t > & | lowered_size | ||
| ) |
moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_gpu_tilings | ( | const std::vector< std::vector< int64_t > > & | stage_sizes, |
| const std::vector< std::vector< int > > & | pure_dims, | ||
| const std::vector< int64_t > & | max_s, | ||
| int | d, | ||
| const std::vector< int > & | vectorized_indices, | ||
| bool | serial_inner, | ||
| bool | is_compute_root_stage | ||
| ) |