Collect value statistics for formats

This notebook computes various statistics for a variety of float formats, by exhaustively enumerating the values. Naturally, most of these statistics can be computed directly, and indeed many are already supplied on the FormatInfo class as methods, for example max, smallest_subnormal, etc. However this method serves as a useful cross-check against the direct formulae.

Statistics collected

  • name: Format

  • B: Bits in the format

  • P: Precision in bits

  • E: Exponent field width in bits

  • T: Trailing significand field width in bits

  • lt1: Number of values x such that 0 < x < 1

  • gt1: Number of values x such that 1 < x < Inf

  • rt16: True if all values are exactly representable in IEEE binary16

  • maxFinite: Largest finite value

  • minFinite: Smallest finite value

  • maxNormal: Largest finite normal value, NaN if all finite values are subnormal

  • minNormal: Smallest positive normal value, NaN if all finite values are subnormal

  • minSubnormal: Smallest positive subnormal value, NaN if no finite values are - subnormal

  • maxSubnormal: Largest subnormal value, NaN if no finite values are subnormal

from gfloat import *
from gfloat.formats import *

import pandas
import numpy as np
from IPython.display import HTML


def collect_stats(fi: FormatInfo):
    # Generate all values
    values = [decode_float(fi, i) for i in range(2**fi.bits)]
    df = pandas.DataFrame(values)

    # Extract format information parameters
    E = fi.expBits
    S = fi.tSignificandBits

    # Compute statistics: lt1,gt1
    fval = df["fval"]
    total_01 = fval.between(0, 1, inclusive="neither").sum()
    total_1Inf = fval.between(1, np.inf, inclusive="neither").sum()

    # Compute statistics: maxFinite,minFinite
    finite_vals = fval[np.isfinite(fval)]
    maxFinite = finite_vals.loc[finite_vals.idxmax()]
    minFinite = finite_vals.loc[finite_vals.idxmin()]

    # Compute statistics: maxNormal,minNormal
    normal_vals = fval[(df["fclass"] == FloatClass.NORMAL) & (fval > 0)]
    maxNormal = normal_vals.loc[normal_vals.idxmax()] if normal_vals.any() else np.nan
    minNormal = normal_vals.loc[normal_vals.idxmin()] if normal_vals.any() else np.nan

    # Compute statistics: minSubnormal
    pos_subnormal = fval[(df["fclass"] == FloatClass.SUBNORMAL) & (fval > 0)]
    maxSubnormal = (
        pos_subnormal.loc[pos_subnormal.idxmax()] if pos_subnormal.any() else np.nan
    )
    minSubnormal = (
        pos_subnormal.loc[pos_subnormal.idxmin()] if pos_subnormal.any() else np.nan
    )

    # Compute roundtrips: rt16, rt32
    with np.errstate(over="ignore"):
        rt16 = (np.float64(np.float16(fval)) == np.float64(fval)) | ~np.isfinite(fval)
        rt32 = (np.float64(np.float32(fval)) == np.float64(fval)) | ~np.isfinite(fval)

    rt16 = rt16.all()
    rt32 = rt32.all()
    assert rt32  # If not, we should include rt32 in the table

    # Assemble tuple
    return dict(
        name=fi.name,
        B=fi.bits,
        P=fi.precision,
        E=E,
        T=S,
        lt1=total_01,
        gt1=total_1Inf,
        rt16=rt16,
        maxFinite=maxFinite,
        minFinite=minFinite,
        maxNormal=maxNormal,
        minNormal=minNormal,
        minSubnormal=minSubnormal,
        maxSubnormal=maxSubnormal,
    )


formats_to_check = (
    tiny_formats
    + fp8_formats
    + [format_info_bfloat16, format_info_ocp_int8, format_info_ocp_e8m0]
)
stats = [collect_stats(fi) for fi in formats_to_check]
df = pandas.DataFrame(stats)
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 4
      1 from gfloat import *
      2 from gfloat.formats import *
----> 4 import pandas
      5 import numpy as np
      6 from IPython.display import HTML

ModuleNotFoundError: No module named 'pandas'

Emit HTML table

# Special rendering for float values - if they don't render nicely in 10.5g,
# use float_pow2str
def render_float(v):
    s = f"{v:8.5g}"
    if not "e" in s and float(s) == v:
        return s
    else:
        return float_pow2str(v)


for field in (
    "maxFinite",
    "minFinite",
    "maxNormal",
    "minNormal",
    "minSubnormal",
    "maxSubnormal",
):
    df[field] = df[field].map(render_float)


HTML(df.style.hide().to_html())
name B P E T lt1 gt1 rt16 maxFinite minFinite maxNormal minNormal minSubnormal maxSubnormal
ocp_e2m1 4 2 2 1 1 5 True 6 -6 6 1 0.5 0.5
ocp_e2m3 6 4 2 3 7 23 True 7.5 -7.5 7.5 1 0.125 0.875
ocp_e3m2 6 3 3 2 11 19 True 28 -28 28 0.25 0.0625 0.1875
ocp_e4m3 8 4 4 3 55 70 True 448 -448 448 0.015625 1*2^-9 7/4*2^-7
ocp_e5m2 8 3 5 2 59 63 True 57344 -57344 57344 1*2^-14 1*2^-16 3/2*2^-15
p3109_p1 8 1 7 0 62 63 False 1*2^63 -1*2^63 1*2^63 1*2^-62 nan nan
p3109_p2 8 2 6 1 63 62 False 1*2^31 -1*2^31 1*2^31 1*2^-31 1*2^-32 1*2^-32
p3109_p3 8 3 5 2 63 62 True 49152 -49152 49152 1*2^-15 1*2^-17 3/2*2^-16
p3109_p4 8 4 4 3 63 62 True 224 -224 224 0.0078125 1*2^-10 7/4*2^-8
p3109_p5 8 5 3 4 63 62 True 15 -15 15 0.125 0.0078125 15/8*2^-4
p3109_p6 8 6 2 5 63 62 True 3.875 -3.875 3.875 0.5 0.015625 31/16*2^-2
bfloat16 16 8 8 7 16255 16383 False 255/128*2^127 -255/128*2^127 255/128*2^127 1*2^-126 1*2^-133 127/64*2^-127
ocp_int8 8 8 0 7 63 63 True 127/64*2^0 -2 nan nan 0.015625 127/64*2^0
ocp_e8m0 8 1 8 0 127 127 False 1*2^127 1*2^-127 1*2^127 1*2^-127 nan nan