Open Model Railroad Network (OpenMRN)
Loading...
Searching...
No Matches
ieeehalfprecision.c
1/******************************************************************************
2 *
3 * Filename: ieeehalfprecision.c
4 * Programmer: James Tursa
5 * Version: 1.0
6 * Date: March 3, 2009
7 * Copyright: (c) 2009 by James Tursa, All Rights Reserved
8 *
9 * This code uses the BSD License:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are
13 * met:
14 *
15 * * Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * * Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the distribution
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * This file contains C code to convert between IEEE double, single, and half
34 * precision floating point formats. The intended use is for standalone C code
35 * that does not rely on MATLAB mex.h. The bit pattern for the half precision
36 * floating point format is stored in a 16-bit unsigned int variable. The half
37 * precision bit pattern definition is:
38 *
39 * 1 bit sign bit
40 * 5 bits exponent, biased by 15
41 * 10 bits mantissa, hidden leading bit, normalized to 1.0
42 *
43 * Special floating point bit patterns recognized and supported:
44 *
45 * All exponent bits zero:
46 * - If all mantissa bits are zero, then number is zero (possibly signed)
47 * - Otherwise, number is a denormalized bit pattern
48 *
49 * All exponent bits set to 1:
50 * - If all mantissa bits are zero, then number is +Infinity or -Infinity
51 * - Otherwise, number is NaN (Not a Number)
52 *
53 * For the denormalized cases, note that 2^(-24) is the smallest number that can
54 * be represented in half precision exactly. 2^(-25) will convert to 2^(-24)
55 * because of the rounding algorithm used, and 2^(-26) is too small and underflows
56 * to zero.
57 *
58 ********************************************************************************/
59
60// Includes -------------------------------------------------------------------
61
62#include <string.h>
63#include <stdint.h>
64
65// Macros ---------------------------------------------------------------------
66
67#define INT16_TYPE int16_t
68#define UINT16_TYPE uint16_t
69#define INT32_TYPE int32_t
70#define UINT32_TYPE uint32_t
71
72// Prototypes -----------------------------------------------------------------
73
74int singles2halfp(void *target, void *source, int numel);
75int doubles2halfp(void *target, void *source, int numel);
76int halfp2singles(void *target, void *source, int numel);
77int halfp2doubles(void *target, void *source, int numel);
78
79//-----------------------------------------------------------------------------
80//
81// Routine: singles2halfp
82//
83// Input: source = Address of 32-bit floating point data to convert
84// numel = Number of values at that address to convert
85//
86// Output: target = Address of 16-bit data to hold output (numel values)
87// return value = 0 if native floating point format is IEEE
88// = 1 if native floating point format is not IEEE
89//
90// Programmer: James Tursa
91//
92//-----------------------------------------------------------------------------
93
94int singles2halfp(void *target, void *source, int numel)
95{
96 UINT16_TYPE *hp = (UINT16_TYPE *) target; // Type pun output as an unsigned 16-bit int
97 UINT32_TYPE *xp = (UINT32_TYPE *) source; // Type pun input as an unsigned 32-bit int
98 UINT16_TYPE hs, he, hm;
99 UINT32_TYPE x, xs, xe, xm;
100 int hes;
101// static int next; // Little Endian adjustment
102 static int checkieee = 1; // Flag to check for IEEE754, Endian, and word size
103 double one = 1.0; // Used for checking IEEE754 floating point format
104 UINT32_TYPE *ip; // Used for checking IEEE754 floating point format
105
106 if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
107 ip = (UINT32_TYPE *) &one;
108 if( *ip ) { // If Big Endian, then no adjustment
109// next = 0;
110 } else { // If Little Endian, then adjustment will be necessary
111// next = 1;
112 ip++;
113 }
114 if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
115 return 1; // Floating point bit pattern is not IEEE 754
116 }
117 if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
118 return 1; // short is not 16-bits, or long is not 32-bits.
119 }
120 checkieee = 0; // Everything checks out OK
121 }
122
123 if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real)
124 return 0;
125 }
126
127 while( numel-- ) {
128 x = *xp++;
129 if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero
130 *hp++ = (UINT16_TYPE) (x >> 16); // Return the signed zero
131 } else { // Not zero
132 xs = x & 0x80000000u; // Pick off sign bit
133 xe = x & 0x7F800000u; // Pick off exponent bits
134 xm = x & 0x007FFFFFu; // Pick off mantissa bits
135 if( xe == 0 ) { // Denormal will underflow, return a signed zero
136 *hp++ = (UINT16_TYPE) (xs >> 16);
137 } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set)
138 if( xm == 0 ) { // If mantissa is zero ...
139 *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
140 } else {
141 *hp++ = (UINT16_TYPE) (x >> 16); // NaN, only 1st mantissa bit set
142 }
143 } else { // Normalized number
144 hs = (UINT16_TYPE) (xs >> 16); // Sign bit
145 hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
146 if( hes >= 0x1F ) { // Overflow
147 *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
148 } else if( hes <= 0 ) { // Underflow
149 if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility
150 hm = (UINT16_TYPE) 0u; // Set mantissa to zero
151 } else {
152 xm |= 0x00800000u; // Add the hidden leading bit
153 hm = (UINT16_TYPE) (xm >> (14 - hes)); // Mantissa
154 if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
155 hm += (UINT16_TYPE) 1u; // Round, might overflow into exp bit, but this is OK
156 }
157 *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
158 } else {
159 he = (UINT16_TYPE) (hes << 10); // Exponent
160 hm = (UINT16_TYPE) (xm >> 13); // Mantissa
161 if( xm & 0x00001000u ) // Check for rounding
162 *hp++ = (hs | he | hm) + (UINT16_TYPE) 1u; // Round, might overflow to inf, this is OK
163 else
164 *hp++ = (hs | he | hm); // No rounding
165 }
166 }
167 }
168 }
169 return 0;
170}
171
172//-----------------------------------------------------------------------------
173//
174// Routine: doubles2halfp
175//
176// Input: source = Address of 64-bit floating point data to convert
177// numel = Number of values at that address to convert
178//
179// Output: target = Address of 16-bit data to hold output (numel values)
180// return value = 0 if native floating point format is IEEE
181// = 1 if native floating point format is not IEEE
182//
183// Programmer: James Tursa
184//
185//-----------------------------------------------------------------------------
186
187int doubles2halfp(void *target, void *source, int numel)
188{
189 UINT16_TYPE *hp = (UINT16_TYPE *) target; // Type pun output as an unsigned 16-bit int
190 UINT32_TYPE *xp = (UINT32_TYPE *) source; // Type pun input as an unsigned 32-bit int
191 UINT16_TYPE hs, he, hm;
192 UINT32_TYPE x, xs, xe, xm;
193 int hes;
194 static int next; // Little Endian adjustment
195 static int checkieee = 1; // Flag to check for IEEE754, Endian, and word size
196 double one = 1.0; // Used for checking IEEE754 floating point format
197 UINT32_TYPE *ip; // Used for checking IEEE754 floating point format
198
199 if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
200 ip = (UINT32_TYPE *) &one;
201 if( *ip ) { // If Big Endian, then no adjustment
202 next = 0;
203 } else { // If Little Endian, then adjustment will be necessary
204 next = 1;
205 ip++;
206 }
207 if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
208 return 1; // Floating point bit pattern is not IEEE 754
209 }
210 if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
211 return 1; // short is not 16-bits, or long is not 32-bits.
212 }
213 checkieee = 0; // Everything checks out OK
214 }
215
216 xp += next; // Little Endian adjustment if necessary
217
218 if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real)
219 return 0;
220 }
221
222 while( numel-- ) {
223 x = *xp++; xp++; // The extra xp++ is to skip over the remaining 32 bits of the mantissa
224 if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero
225 *hp++ = (UINT16_TYPE) (x >> 16); // Return the signed zero
226 } else { // Not zero
227 xs = x & 0x80000000u; // Pick off sign bit
228 xe = x & 0x7FF00000u; // Pick off exponent bits
229 xm = x & 0x000FFFFFu; // Pick off mantissa bits
230 if( xe == 0 ) { // Denormal will underflow, return a signed zero
231 *hp++ = (UINT16_TYPE) (xs >> 16);
232 } else if( xe == 0x7FF00000u ) { // Inf or NaN (all the exponent bits are set)
233 if( xm == 0 ) { // If mantissa is zero ...
234 *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
235 } else {
236 *hp++ = (UINT16_TYPE) 0xFE00u; // NaN, only 1st mantissa bit set
237 }
238 } else { // Normalized number
239 hs = (UINT16_TYPE) (xs >> 16); // Sign bit
240 hes = ((int)(xe >> 20)) - 1023 + 15; // Exponent unbias the double, then bias the halfp
241 if( hes >= 0x1F ) { // Overflow
242 *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
243 } else if( hes <= 0 ) { // Underflow
244 if( (10 - hes) > 21 ) { // Mantissa shifted all the way off & no rounding possibility
245 hm = (UINT16_TYPE) 0u; // Set mantissa to zero
246 } else {
247 xm |= 0x00100000u; // Add the hidden leading bit
248 hm = (UINT16_TYPE) (xm >> (11 - hes)); // Mantissa
249 if( (xm >> (10 - hes)) & 0x00000001u ) // Check for rounding
250 hm += (UINT16_TYPE) 1u; // Round, might overflow into exp bit, but this is OK
251 }
252 *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
253 } else {
254 he = (UINT16_TYPE) (hes << 10); // Exponent
255 hm = (UINT16_TYPE) (xm >> 10); // Mantissa
256 if( xm & 0x00000200u ) // Check for rounding
257 *hp++ = (hs | he | hm) + (UINT16_TYPE) 1u; // Round, might overflow to inf, this is OK
258 else
259 *hp++ = (hs | he | hm); // No rounding
260 }
261 }
262 }
263 }
264 return 0;
265}
266
267//-----------------------------------------------------------------------------
268//
269// Routine: halfp2singles
270//
271// Input: source = address of 16-bit data to convert
272// numel = Number of values at that address to convert
273//
274// Output: target = Address of 32-bit floating point data to hold output (numel values)
275// return value = 0 if native floating point format is IEEE
276// = 1 if native floating point format is not IEEE
277//
278// Programmer: James Tursa
279//
280//-----------------------------------------------------------------------------
281
282int halfp2singles(void *target, void *source, int numel)
283{
284 UINT16_TYPE *hp = (UINT16_TYPE *) source; // Type pun input as an unsigned 16-bit int
285 UINT32_TYPE *xp = (UINT32_TYPE *) target; // Type pun output as an unsigned 32-bit int
286 UINT16_TYPE h, hs, he, hm;
287 UINT32_TYPE xs, xe, xm;
288 INT32_TYPE xes;
289 int e;
290// static int next; // Little Endian adjustment
291 static int checkieee = 1; // Flag to check for IEEE754, Endian, and word size
292 double one = 1.0; // Used for checking IEEE754 floating point format
293 UINT32_TYPE *ip; // Used for checking IEEE754 floating point format
294
295 if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
296 ip = (UINT32_TYPE *) &one;
297 if( *ip ) { // If Big Endian, then no adjustment
298// next = 0;
299 } else { // If Little Endian, then adjustment will be necessary
300// next = 1;
301 ip++;
302 }
303 if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
304 return 1; // Floating point bit pattern is not IEEE 754
305 }
306 if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
307 return 1; // short is not 16-bits, or long is not 32-bits.
308 }
309 checkieee = 0; // Everything checks out OK
310 }
311
312 if( source == NULL || target == NULL ) // Nothing to convert (e.g., imag part of pure real)
313 return 0;
314
315 while( numel-- ) {
316 h = *hp++;
317 if( (h & 0x7FFFu) == 0 ) { // Signed zero
318 *xp++ = ((UINT32_TYPE) h) << 16; // Return the signed zero
319 } else { // Not zero
320 hs = h & 0x8000u; // Pick off sign bit
321 he = h & 0x7C00u; // Pick off exponent bits
322 hm = h & 0x03FFu; // Pick off mantissa bits
323 if( he == 0 ) { // Denormal will convert to normalized
324 e = -1; // The following loop figures out how much extra to adjust the exponent
325 do {
326 e++;
327 hm <<= 1;
328 } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
329 xs = ((UINT32_TYPE) hs) << 16; // Sign bit
330 xes = ((INT32_TYPE) (he >> 10)) - 15 + 127 - e; // Exponent unbias the halfp, then bias the single
331 xe = (UINT32_TYPE) (xes << 23); // Exponent
332 xm = ((UINT32_TYPE) (hm & 0x03FFu)) << 13; // Mantissa
333 *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
334 } else if( he == 0x7C00u ) { // Inf or NaN (all the exponent bits are set)
335 if( hm == 0 ) { // If mantissa is zero ...
336 *xp++ = (((UINT32_TYPE) hs) << 16) | ((UINT32_TYPE) 0x7F800000u); // Signed Inf
337 } else {
338 *xp++ = ((UINT32_TYPE) h) << 16; // NaN, only 1st mantissa bit set
339 }
340 } else { // Normalized number
341 xs = ((UINT32_TYPE) hs) << 16; // Sign bit
342 xes = ((INT32_TYPE) (he >> 10)) - 15 + 127; // Exponent unbias the halfp, then bias the single
343 xe = (UINT32_TYPE) (xes << 23); // Exponent
344 xm = ((UINT32_TYPE) hm) << 13; // Mantissa
345 *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
346 }
347 }
348 }
349 return 0;
350}
351
352//-----------------------------------------------------------------------------
353//
354// Routine: halfp2singles
355//
356// Input: source = address of 16-bit data to convert
357// numel = Number of values at that address to convert
358//
359// Output: target = Address of 32-bit floating point data to hold output (numel values)
360// return value = 0 if native floating point format is IEEE
361// = 1 if native floating point format is not IEEE
362//
363// Programmer: James Tursa
364//
365//-----------------------------------------------------------------------------
366
367int halfp2doubles(void *target, void *source, int numel)
368{
369 UINT16_TYPE *hp = (UINT16_TYPE *) source; // Type pun input as an unsigned 16-bit int
370 UINT32_TYPE *xp = (UINT32_TYPE *) target; // Type pun output as an unsigned 32-bit int
371 UINT16_TYPE h, hs, he, hm;
372 UINT32_TYPE xs, xe, xm;
373 INT32_TYPE xes;
374 int e;
375 static int next; // Little Endian adjustment
376 static int checkieee = 1; // Flag to check for IEEE754, Endian, and word size
377 double one = 1.0; // Used for checking IEEE754 floating point format
378 UINT32_TYPE *ip; // Used for checking IEEE754 floating point format
379
380 if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
381 ip = (UINT32_TYPE *) &one;
382 if( *ip ) { // If Big Endian, then no adjustment
383 next = 0;
384 } else { // If Little Endian, then adjustment will be necessary
385 next = 1;
386 ip++;
387 }
388 if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
389 return 1; // Floating point bit pattern is not IEEE 754
390 }
391 if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
392 return 1; // short is not 16-bits, or long is not 32-bits.
393 }
394 checkieee = 0; // Everything checks out OK
395 }
396
397 xp += next; // Little Endian adjustment if necessary
398
399 if( source == NULL || target == NULL ) // Nothing to convert (e.g., imag part of pure real)
400 return 0;
401
402 while( numel-- ) {
403 h = *hp++;
404 if( (h & 0x7FFFu) == 0 ) { // Signed zero
405 *xp++ = ((UINT32_TYPE) h) << 16; // Return the signed zero
406 } else { // Not zero
407 hs = h & 0x8000u; // Pick off sign bit
408 he = h & 0x7C00u; // Pick off exponent bits
409 hm = h & 0x03FFu; // Pick off mantissa bits
410 if( he == 0 ) { // Denormal will convert to normalized
411 e = -1; // The following loop figures out how much extra to adjust the exponent
412 do {
413 e++;
414 hm <<= 1;
415 } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
416 xs = ((UINT32_TYPE) hs) << 16; // Sign bit
417 xes = ((INT32_TYPE) (he >> 10)) - 15 + 1023 - e; // Exponent unbias the halfp, then bias the double
418 xe = (UINT32_TYPE) (xes << 20); // Exponent
419 xm = ((UINT32_TYPE) (hm & 0x03FFu)) << 10; // Mantissa
420 *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
421 } else if( he == 0x7C00u ) { // Inf or NaN (all the exponent bits are set)
422 if( hm == 0 ) { // If mantissa is zero ...
423 *xp++ = (((UINT32_TYPE) hs) << 16) | ((UINT32_TYPE) 0x7FF00000u); // Signed Inf
424 } else {
425 *xp++ = (UINT32_TYPE) 0xFFF80000u; // NaN, only the 1st mantissa bit set
426 }
427 } else {
428 xs = ((UINT32_TYPE) hs) << 16; // Sign bit
429 xes = ((INT32_TYPE) (he >> 10)) - 15 + 1023; // Exponent unbias the halfp, then bias the double
430 xe = (UINT32_TYPE) (xes << 20); // Exponent
431 xm = ((UINT32_TYPE) hm) << 10; // Mantissa
432 *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
433 }
434 }
435 xp++; // Skip over the remaining 32 bits of the mantissa
436 }
437 return 0;
438}
int singles2halfp(void *target, const void *source, int numel)
Converts an IEEE single (float) to a half-precision (float16).
int halfp2singles(void *target, const void *source, int numel)
Converts an half-precision (float16) to an IEEE single (float).