Halide 22.0.0
Halide compiler and libraries
Float16.h
Go to the documentation of this file.
1#ifndef HALIDE_FLOAT16_H
2#define HALIDE_FLOAT16_H
3
5#include <cstdint>
6#include <string>
7
8namespace Halide {
9
10/** Class that provides a type that implements half precision
11 * floating point (IEEE754 2008 binary16) in software.
12 *
13 * This type is enforced to be 16-bits wide and maintains no state
14 * other than the raw IEEE754 binary16 bits so that it can passed
15 * to code that checks a type's size and used for halide_buffer_t allocation.
16 * */
17struct float16_t {
18
19 static const int mantissa_bits = 10;
20 static const uint16_t sign_mask = 0x8000;
21 static const uint16_t exponent_mask = 0x7c00;
22 static const uint16_t mantissa_mask = 0x03ff;
23
24 /// \name Constructors
25 /// @{
26
27 /** Construct from a float, double, or int using
28 * round-to-nearest-ties-to-even. Out-of-range values become +/-
29 * infinity.
30 */
31 // @{
32 explicit float16_t(float value);
33 explicit float16_t(double value);
34 explicit float16_t(int value);
35 explicit float16_t(int64_t value);
36 explicit float16_t(uint64_t value);
37 // @}
38
39 /** Construct a float16_t with the bits initialised to 0. This represents
40 * positive zero.*/
41 float16_t() = default;
42
43#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
44 /** Construct a float16_t from compiler's built-in _Float16 type. */
45 explicit float16_t(_Float16 value) {
46 memcpy(&data, &value, sizeof(_Float16));
47 }
48#endif
49
50 /// @}
51
52 // Use explicit to avoid accidently raising the precision
53 /** Cast to float */
54 explicit operator float() const;
55 /** Cast to double */
56 explicit operator double() const;
57 /** Cast to int */
58 explicit operator int() const;
59
60#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
61 /** Cast to compiler's built-in _Float16 type. */
62 explicit operator _Float16() const {
63 _Float16 result;
64 memcpy(&result, &data, sizeof(_Float16));
65 return result;
66 }
67#endif
68
69 /** Get a new float16_t that represents a special value */
70 // @{
76 // @}
77
78 /** Get a new float16_t with the given raw bits
79 *
80 * \param bits The bits conformant to IEEE754 binary16
81 */
83
84 /** Return a new float16_t with a negated sign bit*/
86
87 /** Arithmetic operators. */
88 // @{
94 return (*this = *this + rhs);
95 }
97 return (*this = *this - rhs);
98 }
100 return (*this = *this * rhs);
101 }
103 return (*this = *this / rhs);
104 }
105 // @}
106
107 /** Comparison operators */
108 // @{
109 bool operator==(float16_t rhs) const;
110 bool operator!=(float16_t rhs) const {
111 return !(*this == rhs);
112 }
113 bool operator>(float16_t rhs) const;
114 bool operator<(float16_t rhs) const;
115 bool operator>=(float16_t rhs) const {
116 return (*this > rhs) || (*this == rhs);
117 }
118 bool operator<=(float16_t rhs) const {
119 return (*this < rhs) || (*this == rhs);
120 }
121 // @}
122
123 /** Properties */
124 // @{
125 bool is_nan() const;
126 bool is_infinity() const;
127 bool is_negative() const;
128 bool is_zero() const;
129 // @}
130
131 /** Returns the bits that represent this float16_t.
132 *
133 * An alternative method to access the bits is to cast a pointer
134 * to this instance as a pointer to a uint16_t.
135 **/
137
138private:
139 // The raw bits.
140 uint16_t data = 0;
141};
142
143static_assert(sizeof(float16_t) == 2, "float16_t should occupy two bytes");
144
145} // namespace Halide
146
147template<>
148HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of<Halide::float16_t>() {
150}
151
152namespace Halide {
153
154/** Class that provides a type that implements half precision
155 * floating point using the bfloat16 format.
156 *
157 * This type is enforced to be 16-bits wide and maintains no state
158 * other than the raw bits so that it can passed to code that checks
159 * a type's size and used for halide_buffer_t allocation. */
161
162 static const int mantissa_bits = 7;
163 static const uint16_t sign_mask = 0x8000;
164 static const uint16_t exponent_mask = 0x7f80;
165 static const uint16_t mantissa_mask = 0x007f;
166
168
169 /// \name Constructors
170 /// @{
171
172 /** Construct from a float, double, or int using
173 * round-to-nearest-ties-to-even. Out-of-range values become +/-
174 * infinity.
175 */
176 // @{
177 explicit bfloat16_t(float value);
178 explicit bfloat16_t(double value);
179 explicit bfloat16_t(int value);
180 explicit bfloat16_t(int64_t value);
181 explicit bfloat16_t(uint64_t value);
182 // @}
183
184 /** Construct a bfloat16_t with the bits initialised to 0. This represents
185 * positive zero.*/
186 bfloat16_t() = default;
187
188 /// @}
189
190 // Use explicit to avoid accidently raising the precision
191 /** Cast to float */
192 explicit operator float() const;
193 /** Cast to double */
194 explicit operator double() const;
195 /** Cast to int */
196 explicit operator int() const;
197
198 /** Get a new bfloat16_t that represents a special value */
199 // @{
205 // @}
206
207 /** Get a new bfloat16_t with the given raw bits
208 *
209 * \param bits The bits conformant to IEEE754 binary16
210 */
212
213 /** Return a new bfloat16_t with a negated sign bit*/
215
216 /** Arithmetic operators. */
217 // @{
223 return (*this = *this + rhs);
224 }
226 return (*this = *this - rhs);
227 }
229 return (*this = *this * rhs);
230 }
232 return (*this = *this / rhs);
233 }
234 // @}
235
236 /** Comparison operators */
237 // @{
238 bool operator==(bfloat16_t rhs) const;
239 bool operator!=(bfloat16_t rhs) const {
240 return !(*this == rhs);
241 }
242 bool operator>(bfloat16_t rhs) const;
243 bool operator<(bfloat16_t rhs) const;
244 bool operator>=(bfloat16_t rhs) const {
245 return (*this > rhs) || (*this == rhs);
246 }
247 bool operator<=(bfloat16_t rhs) const {
248 return (*this < rhs) || (*this == rhs);
249 }
250 // @}
251
252 /** Properties */
253 // @{
254 bool is_nan() const;
255 bool is_infinity() const;
256 bool is_negative() const;
257 bool is_zero() const;
258 // @}
259
260 /** Returns the bits that represent this bfloat16_t.
261 *
262 * An alternative method to access the bits is to cast a pointer
263 * to this instance as a pointer to a uint16_t.
264 **/
266
267private:
268 // The raw bits.
269 uint16_t data = 0;
270};
271
272static_assert(sizeof(bfloat16_t) == 2, "bfloat16_t should occupy two bytes");
273
274} // namespace Halide
275
276template<>
277HALIDE_ALWAYS_INLINE constexpr halide_type_t halide_type_of<Halide::bfloat16_t>() {
279}
280
281#endif
This file declares the routines used by Halide internally in its runtime.
@ halide_type_float
IEEE floating point numbers.
@ halide_type_bfloat
floating point numbers in the bfloat format
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:49
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
Class that provides a type that implements half precision floating point using the bfloat16 format.
Definition: Float16.h:160
bfloat16_t operator-=(bfloat16_t rhs)
Definition: Float16.h:225
bool operator>(bfloat16_t rhs) const
bool operator<=(bfloat16_t rhs) const
Definition: Float16.h:247
static const bfloat16_t zero
Definition: Float16.h:167
bfloat16_t operator-() const
Return a new bfloat16_t with a negated sign bit.
bool operator<(bfloat16_t rhs) const
static const uint16_t exponent_mask
Definition: Float16.h:164
bfloat16_t operator*(bfloat16_t rhs) const
bool is_infinity() const
static const uint16_t sign_mask
Definition: Float16.h:163
uint16_t to_bits() const
Returns the bits that represent this bfloat16_t.
static const bfloat16_t infinity
Definition: Float16.h:167
static const uint16_t mantissa_mask
Definition: Float16.h:165
bool operator==(bfloat16_t rhs) const
Comparison operators.
static const bfloat16_t nan
Definition: Float16.h:167
bfloat16_t operator+(bfloat16_t rhs) const
Arithmetic operators.
bfloat16_t operator*=(bfloat16_t rhs)
Definition: Float16.h:228
static const bfloat16_t negative_zero
Definition: Float16.h:167
bfloat16_t operator-(bfloat16_t rhs) const
bfloat16_t(int64_t value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bfloat16_t operator/=(bfloat16_t rhs)
Definition: Float16.h:231
bool is_negative() const
bfloat16_t()=default
Construct a bfloat16_t with the bits initialised to 0.
bfloat16_t operator/(bfloat16_t rhs) const
static bfloat16_t make_zero()
Get a new bfloat16_t that represents a special value.
static bfloat16_t make_negative_zero()
static const int mantissa_bits
Definition: Float16.h:162
static bfloat16_t make_infinity()
bfloat16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static bfloat16_t make_from_bits(uint16_t bits)
Get a new bfloat16_t with the given raw bits.
bfloat16_t(uint64_t value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool is_zero() const
bfloat16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool is_nan() const
Properties.
static bfloat16_t make_negative_infinity()
bfloat16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator!=(bfloat16_t rhs) const
Definition: Float16.h:239
static const bfloat16_t negative_infinity
Definition: Float16.h:167
bfloat16_t operator+=(bfloat16_t rhs)
Definition: Float16.h:222
bool operator>=(bfloat16_t rhs) const
Definition: Float16.h:244
static bfloat16_t make_nan()
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17
static float16_t make_infinity()
float16_t operator/(float16_t rhs) const
bool is_negative() const
float16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static const uint16_t sign_mask
Definition: Float16.h:20
float16_t operator+(float16_t rhs) const
Arithmetic operators.
bool is_zero() const
float16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator>=(float16_t rhs) const
Definition: Float16.h:115
static float16_t make_zero()
Get a new float16_t that represents a special value.
float16_t(uint64_t value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
uint16_t to_bits() const
Returns the bits that represent this float16_t.
bool operator<(float16_t rhs) const
static const uint16_t mantissa_mask
Definition: Float16.h:22
bool operator==(float16_t rhs) const
Comparison operators.
static float16_t make_negative_zero()
static float16_t make_from_bits(uint16_t bits)
Get a new float16_t with the given raw bits.
float16_t(int64_t value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
float16_t operator/=(float16_t rhs)
Definition: Float16.h:102
static float16_t make_nan()
float16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
float16_t()=default
Construct a float16_t with the bits initialised to 0.
static const uint16_t exponent_mask
Definition: Float16.h:21
float16_t operator-(float16_t rhs) const
bool is_nan() const
Properties.
static float16_t make_negative_infinity()
static const int mantissa_bits
Definition: Float16.h:19
bool is_infinity() const
float16_t operator-=(float16_t rhs)
Definition: Float16.h:96
float16_t operator*(float16_t rhs) const
float16_t operator-() const
Return a new float16_t with a negated sign bit.
bool operator!=(float16_t rhs) const
Definition: Float16.h:110
bool operator<=(float16_t rhs) const
Definition: Float16.h:118
bool operator>(float16_t rhs) const
float16_t operator*=(float16_t rhs)
Definition: Float16.h:99
float16_t operator+=(float16_t rhs)
Definition: Float16.h:93
A runtime tag for a type in the halide type system.