{
  "models": {
    "lpc_estimator": {
      "source": "pytorchFormants/Estimator/LPC_NN_scaledLoss.pt",
      "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out)",
      "input": {
        "name": "input",
        "shape": [
          "batch",
          350
        ],
        "dtype": "float32"
      },
      "output": {
        "name": "formants",
        "shape": [
          "batch",
          4
        ],
        "note": "raw output ~ formant_Hz / 1000 (per repo convention)"
      },
      "opset": 17,
      "variants": {
        "fp32": {
          "file": "model.onnx",
          "size_mb": 4.067,
          "max_abs_diff": 9.5367431640625e-07,
          "max_rel_diff": 8.044200139987598e-07,
          "mean_abs_diff": 1.9110739231109618e-07,
          "threshold_abs": 0.0001,
          "threshold_rel": 0.001,
          "pass": true
        },
        "fp16": {
          "file": "model_fp16.onnx",
          "size_mb": 2.034,
          "max_abs_diff": 0.0014448165893554688,
          "max_rel_diff": 0.0005444474740661378,
          "mean_abs_diff": 0.0003232601098716259,
          "threshold_abs": 0.005,
          "threshold_rel": 0.05,
          "pass": true
        },
        "int8": {
          "file": "model_int8.onnx",
          "size_mb": 1.027,
          "max_abs_diff": 0.013593912124633789,
          "max_rel_diff": 0.014733956349833927,
          "mean_abs_diff": 0.0028839516919106243,
          "threshold_abs": 0.15,
          "threshold_rel": 0.5,
          "pass": true
        }
      }
    },
    "lpc_tracker": {
      "source": "pytorchFormants/Tracker/LPC_RNN.pt",
      "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4)",
      "input": {
        "name": "input",
        "shape": [
          "batch",
          "time",
          350
        ],
        "dtype": "float32"
      },
      "output": {
        "name": "formants",
        "shape": [
          "batch",
          "time",
          4
        ],
        "note": "raw output ~ formant_Hz / 1000 (per repo convention)"
      },
      "opset": 17,
      "variants": {
        "fp32": {
          "file": "model.onnx",
          "size_mb": 10.239,
          "max_abs_diff": 1.1920928955078125e-06,
          "max_rel_diff": 4.859505467916354e-06,
          "mean_abs_diff": 1.4127406757324932e-07,
          "threshold_abs": 0.0001,
          "threshold_rel": 0.001,
          "pass": true
        },
        "fp16": {
          "file": "model_fp16.onnx",
          "size_mb": 5.123,
          "max_abs_diff": 0.002070903778076172,
          "max_rel_diff": 0.8997685868740329,
          "mean_abs_diff": 0.00034594033626490273,
          "threshold_abs": 0.005,
          "threshold_rel": 0.05,
          "pass": true
        },
        "int8": {
          "file": "model_int8.onnx",
          "size_mb": 2.584,
          "max_abs_diff": 0.0502011775970459,
          "max_rel_diff": 0.17506545407668223,
          "mean_abs_diff": 0.005834240480326117,
          "threshold_abs": 0.15,
          "threshold_rel": 0.5,
          "pass": true
        }
      }
    },
    "lpc_estimator_torch7": {
      "source": "estimation_model.dat (Torch7 nn.Sequential, ported via torchfile)",
      "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out) \u2014 identical to LPC_NN_scaledLoss.pt; different weights",
      "input": {
        "name": "input",
        "shape": [
          "batch",
          350
        ],
        "dtype": "float32"
      },
      "output": {
        "name": "formants",
        "shape": [
          "batch",
          4
        ],
        "note": "raw output ~ formant_Hz / 1000 (\u00d71000 for Hz, per load_estimation_model.lua)"
      },
      "opset": 17,
      "variants": {
        "fp32": {
          "file": "model.onnx",
          "size_mb": 4.067,
          "max_abs_diff": 1.430511474609375e-06,
          "max_rel_diff": 2.605369743393167e-05,
          "mean_abs_diff": 1.5887635527178645e-07,
          "threshold_abs": 0.0001,
          "threshold_rel": 0.001,
          "pass": true
        },
        "fp16": {
          "file": "model_fp16.onnx",
          "size_mb": 2.034,
          "max_abs_diff": 0.0019774436950683594,
          "max_rel_diff": 0.049704955433888615,
          "mean_abs_diff": 0.000250340614002198,
          "threshold_abs": 0.005,
          "threshold_rel": 0.05,
          "pass": true
        },
        "int8": {
          "file": "model_int8.onnx",
          "size_mb": 1.027,
          "max_abs_diff": 0.04240584373474121,
          "max_rel_diff": 4.826714634495662,
          "mean_abs_diff": 0.005278422741594113,
          "threshold_abs": 0.15,
          "threshold_rel": 0.5,
          "pass": true
        }
      },
      "port_fidelity_hz": "max 0.003 Hz drift on real features vs float64 numpy reconstruction of Torch7 forward"
    },
    "lpc_tracker_torch7": {
      "source": "tracking_model.dat (Torch7 nn.Sequential of nn.Sequencer+nn.FastLSTM, ported via torchfile)",
      "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4); identical shape to LPC_RNN.pt; different weights (original paper model)",
      "input": {
        "name": "input",
        "shape": [
          "batch",
          "time",
          350
        ],
        "dtype": "float32"
      },
      "output": {
        "name": "formants",
        "shape": [
          "batch",
          "time",
          4
        ],
        "note": "raw output ~ formant_Hz / 1000"
      },
      "opset": 17,
      "variants": {
        "fp32": {
          "file": "model.onnx",
          "size_mb": 10.239,
          "max_abs_diff": 1.0728836059570312e-06,
          "max_rel_diff": 0.0009528932858925629,
          "mean_abs_diff": 5.3642434068024155e-08,
          "threshold_abs": 0.0001,
          "threshold_rel": 0.001,
          "pass": true
        },
        "fp16": {
          "file": "model_fp16.onnx",
          "size_mb": 5.123,
          "max_abs_diff": 0.0017843246459960938,
          "max_rel_diff": 1.2006545622606084,
          "mean_abs_diff": 0.00010792065120767802,
          "threshold_abs": 0.005,
          "threshold_rel": 0.05,
          "pass": true
        },
        "int8": {
          "file": "model_int8.onnx",
          "size_mb": 2.584,
          "max_abs_diff": 0.11648625135421753,
          "max_rel_diff": 72.53312782880165,
          "mean_abs_diff": 0.0050570286225411105,
          "threshold_abs": 0.15,
          "threshold_rel": 0.5,
          "pass": true
        }
      },
      "gate_remap": "Torch7 FastLSTM [i,g,f,o] -> PyTorch nn.LSTM [i,f,g,o]; block perm [0,2,1,3]",
      "bias_convention": "Torch7 i2g.bias -> bias_ih_l0 (permuted); bias_hh_l0 = 0",
      "port_fidelity_hz": "max 0.0001 Hz drift on random input vs float64 numpy FastLSTM reference forward"
    }
  },
  "license": "MIT (DeepFormants repo). Weights derived from MLSpeech/DeepFormants. Local use; redistribution not verified.",
  "skipped": {
    "CNN_estimate.pt": "Checkpoint not shipped in the public repo."
  }
}