|
|
@ -4,35 +4,51 @@ |
|
|
|
|
|
|
|
|
#pragma once |
|
|
#pragma once |
|
|
|
|
|
|
|
|
|
|
|
#include <cstring> |
|
|
|
|
|
|
|
|
#include "common/common_types.h" |
|
|
#include "common/common_types.h" |
|
|
|
|
|
|
|
|
namespace Pica { |
|
|
namespace Pica { |
|
|
|
|
|
|
|
|
struct float24 { |
|
|
|
|
|
static float24 FromFloat32(float val) { |
|
|
|
|
|
float24 ret; |
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
* Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision |
|
|
|
|
|
* floating point. |
|
|
|
|
|
* |
|
|
|
|
|
* When decoding, format is as follows: |
|
|
|
|
|
* - The first `M` bits are the mantissa |
|
|
|
|
|
* - The next `E` bits are the exponent |
|
|
|
|
|
* - The last bit is the sign bit |
|
|
|
|
|
* |
|
|
|
|
|
* @todo Verify on HW if this conversion is sufficently accurate. |
|
|
|
|
|
*/ |
|
|
|
|
|
template<unsigned M, unsigned E> |
|
|
|
|
|
struct Float { |
|
|
|
|
|
public: |
|
|
|
|
|
static Float<M, E> FromFloat32(float val) { |
|
|
|
|
|
Float<M, E> ret; |
|
|
ret.value = val; |
|
|
ret.value = val; |
|
|
return ret; |
|
|
return ret; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// 16 bit mantissa, 7 bit exponent, 1 bit sign |
|
|
|
|
|
// TODO: No idea if this works as intended |
|
|
|
|
|
static float24 FromRawFloat24(u32 hex) { |
|
|
|
|
|
float24 ret; |
|
|
|
|
|
if ((hex & 0xFFFFFF) == 0) { |
|
|
|
|
|
ret.value = 0; |
|
|
|
|
|
} else { |
|
|
|
|
|
u32 mantissa = hex & 0xFFFF; |
|
|
|
|
|
u32 exponent = (hex >> 16) & 0x7F; |
|
|
|
|
|
u32 sign = hex >> 23; |
|
|
|
|
|
ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); |
|
|
|
|
|
if (sign) |
|
|
|
|
|
ret.value = -ret.value; |
|
|
|
|
|
} |
|
|
|
|
|
return ret; |
|
|
|
|
|
|
|
|
static Float<M, E> FromRaw(u32 hex) { |
|
|
|
|
|
Float<M, E> res; |
|
|
|
|
|
|
|
|
|
|
|
const int width = M + E + 1; |
|
|
|
|
|
const int bias = 128 - (1 << (E - 1)); |
|
|
|
|
|
const int exponent = (hex >> M) & ((1 << E) - 1); |
|
|
|
|
|
const unsigned mantissa = hex & ((1 << M) - 1); |
|
|
|
|
|
|
|
|
|
|
|
if (hex & ((1 << (width - 1)) - 1)) |
|
|
|
|
|
hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23); |
|
|
|
|
|
else |
|
|
|
|
|
hex = ((hex >> (E + M)) << 31); |
|
|
|
|
|
|
|
|
|
|
|
std::memcpy(&res.value, &hex, sizeof(float)); |
|
|
|
|
|
|
|
|
|
|
|
return res; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static float24 Zero() { |
|
|
|
|
|
|
|
|
static Float<M, E> Zero() { |
|
|
return FromFloat32(0.f); |
|
|
return FromFloat32(0.f); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@ -41,27 +57,27 @@ struct float24 { |
|
|
return value; |
|
|
return value; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24 operator * (const float24& flt) const { |
|
|
|
|
|
|
|
|
Float<M, E> operator * (const Float<M, E>& flt) const { |
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) || |
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) || |
|
|
(flt.value == 0.f && !std::isnan(this->value))) |
|
|
(flt.value == 0.f && !std::isnan(this->value))) |
|
|
// PICA gives 0 instead of NaN when multiplying by inf |
|
|
// PICA gives 0 instead of NaN when multiplying by inf |
|
|
return Zero(); |
|
|
return Zero(); |
|
|
return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); |
|
|
|
|
|
|
|
|
return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24 operator / (const float24& flt) const { |
|
|
|
|
|
return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); |
|
|
|
|
|
|
|
|
Float<M, E> operator / (const Float<M, E>& flt) const { |
|
|
|
|
|
return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24 operator + (const float24& flt) const { |
|
|
|
|
|
return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); |
|
|
|
|
|
|
|
|
Float<M, E> operator + (const Float<M, E>& flt) const { |
|
|
|
|
|
return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24 operator - (const float24& flt) const { |
|
|
|
|
|
return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); |
|
|
|
|
|
|
|
|
Float<M, E> operator - (const Float<M, E>& flt) const { |
|
|
|
|
|
return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24& operator *= (const float24& flt) { |
|
|
|
|
|
|
|
|
Float<M, E>& operator *= (const Float<M, E>& flt) { |
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) || |
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) || |
|
|
(flt.value == 0.f && !std::isnan(this->value))) |
|
|
(flt.value == 0.f && !std::isnan(this->value))) |
|
|
// PICA gives 0 instead of NaN when multiplying by inf |
|
|
// PICA gives 0 instead of NaN when multiplying by inf |
|
|
@ -70,111 +86,61 @@ struct float24 { |
|
|
return *this; |
|
|
return *this; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24& operator /= (const float24& flt) { |
|
|
|
|
|
|
|
|
Float<M, E>& operator /= (const Float<M, E>& flt) { |
|
|
value /= flt.ToFloat32(); |
|
|
value /= flt.ToFloat32(); |
|
|
return *this; |
|
|
return *this; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24& operator += (const float24& flt) { |
|
|
|
|
|
|
|
|
Float<M, E>& operator += (const Float<M, E>& flt) { |
|
|
value += flt.ToFloat32(); |
|
|
value += flt.ToFloat32(); |
|
|
return *this; |
|
|
return *this; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24& operator -= (const float24& flt) { |
|
|
|
|
|
|
|
|
Float<M, E>& operator -= (const Float<M, E>& flt) { |
|
|
value -= flt.ToFloat32(); |
|
|
value -= flt.ToFloat32(); |
|
|
return *this; |
|
|
return *this; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
float24 operator - () const { |
|
|
|
|
|
return float24::FromFloat32(-ToFloat32()); |
|
|
|
|
|
|
|
|
Float<M, E> operator - () const { |
|
|
|
|
|
return Float<M, E>::FromFloat32(-ToFloat32()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator < (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator < (const Float<M, E>& flt) const { |
|
|
return ToFloat32() < flt.ToFloat32(); |
|
|
return ToFloat32() < flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator > (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator > (const Float<M, E>& flt) const { |
|
|
return ToFloat32() > flt.ToFloat32(); |
|
|
return ToFloat32() > flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator >= (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator >= (const Float<M, E>& flt) const { |
|
|
return ToFloat32() >= flt.ToFloat32(); |
|
|
return ToFloat32() >= flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator <= (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator <= (const Float<M, E>& flt) const { |
|
|
return ToFloat32() <= flt.ToFloat32(); |
|
|
return ToFloat32() <= flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator == (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator == (const Float<M, E>& flt) const { |
|
|
return ToFloat32() == flt.ToFloat32(); |
|
|
return ToFloat32() == flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
bool operator != (const float24& flt) const { |
|
|
|
|
|
|
|
|
bool operator != (const Float<M, E>& flt) const { |
|
|
return ToFloat32() != flt.ToFloat32(); |
|
|
return ToFloat32() != flt.ToFloat32(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
private: |
|
|
private: |
|
|
// Stored as a regular float, merely for convenience |
|
|
|
|
|
// TODO: Perform proper arithmetic on this! |
|
|
|
|
|
float value; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); |
|
|
|
|
|
|
|
|
|
|
|
struct float16 { |
|
|
|
|
|
// 10 bit mantissa, 5 bit exponent, 1 bit sign |
|
|
|
|
|
// TODO: No idea if this works as intended |
|
|
|
|
|
static float16 FromRawFloat16(u32 hex) { |
|
|
|
|
|
float16 ret; |
|
|
|
|
|
if ((hex & 0xFFFF) == 0) { |
|
|
|
|
|
ret.value = 0; |
|
|
|
|
|
} else { |
|
|
|
|
|
u32 mantissa = hex & 0x3FF; |
|
|
|
|
|
u32 exponent = (hex >> 10) & 0x1F; |
|
|
|
|
|
u32 sign = (hex >> 15) & 1; |
|
|
|
|
|
ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f)); |
|
|
|
|
|
if (sign) |
|
|
|
|
|
ret.value = -ret.value; |
|
|
|
|
|
} |
|
|
|
|
|
return ret; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
static const unsigned MASK = (1 << (M + E + 1)) - 1; |
|
|
|
|
|
static const unsigned MANTISSA_MASK = (1 << M) - 1; |
|
|
|
|
|
static const unsigned EXPONENT_MASK = (1 << E) - 1; |
|
|
|
|
|
|
|
|
float ToFloat32() const { |
|
|
|
|
|
return value; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
|
// Stored as a regular float, merely for convenience |
|
|
// Stored as a regular float, merely for convenience |
|
|
// TODO: Perform proper arithmetic on this! |
|
|
// TODO: Perform proper arithmetic on this! |
|
|
float value; |
|
|
float value; |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
struct float20 { |
|
|
|
|
|
// 12 bit mantissa, 7 bit exponent, 1 bit sign |
|
|
|
|
|
// TODO: No idea if this works as intended |
|
|
|
|
|
static float20 FromRawFloat20(u32 hex) { |
|
|
|
|
|
float20 ret; |
|
|
|
|
|
if ((hex & 0xFFFFF) == 0) { |
|
|
|
|
|
ret.value = 0; |
|
|
|
|
|
} else { |
|
|
|
|
|
u32 mantissa = hex & 0xFFF; |
|
|
|
|
|
u32 exponent = (hex >> 12) & 0x7F; |
|
|
|
|
|
u32 sign = (hex >> 19) & 1; |
|
|
|
|
|
ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f)); |
|
|
|
|
|
if (sign) |
|
|
|
|
|
ret.value = -ret.value; |
|
|
|
|
|
} |
|
|
|
|
|
return ret; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
float ToFloat32() const { |
|
|
|
|
|
return value; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
|
// Stored as a regular float, merely for convenience |
|
|
|
|
|
// TODO: Perform proper arithmetic on this! |
|
|
|
|
|
float value; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
using float24 = Float<16, 7>; |
|
|
|
|
|
using float20 = Float<12, 7>; |
|
|
|
|
|
using float16 = Float<10, 5>; |
|
|
|
|
|
|
|
|
} // namespace Pica |
|
|
} // namespace Pica |