| { |
| "models": { |
| "lpc_estimator": { |
| "source": "pytorchFormants/Estimator/LPC_NN_scaledLoss.pt", |
| "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out)", |
| "input": { |
| "name": "input", |
| "shape": [ |
| "batch", |
| 350 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "formants", |
| "shape": [ |
| "batch", |
| 4 |
| ], |
| "note": "raw output ~ formant_Hz / 1000 (per repo convention)" |
| }, |
| "opset": 17, |
| "variants": { |
| "fp32": { |
| "file": "model.onnx", |
| "size_mb": 4.067, |
| "max_abs_diff": 9.5367431640625e-07, |
| "max_rel_diff": 8.044200139987598e-07, |
| "mean_abs_diff": 1.9110739231109618e-07, |
| "threshold_abs": 0.0001, |
| "threshold_rel": 0.001, |
| "pass": true |
| }, |
| "fp16": { |
| "file": "model_fp16.onnx", |
| "size_mb": 2.034, |
| "max_abs_diff": 0.0014448165893554688, |
| "max_rel_diff": 0.0005444474740661378, |
| "mean_abs_diff": 0.0003232601098716259, |
| "threshold_abs": 0.005, |
| "threshold_rel": 0.05, |
| "pass": true |
| }, |
| "int8": { |
| "file": "model_int8.onnx", |
| "size_mb": 1.027, |
| "max_abs_diff": 0.013593912124633789, |
| "max_rel_diff": 0.014733956349833927, |
| "mean_abs_diff": 0.0028839516919106243, |
| "threshold_abs": 0.15, |
| "threshold_rel": 0.5, |
| "pass": true |
| } |
| } |
| }, |
| "lpc_tracker": { |
| "source": "pytorchFormants/Tracker/LPC_RNN.pt", |
| "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4)", |
| "input": { |
| "name": "input", |
| "shape": [ |
| "batch", |
| "time", |
| 350 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "formants", |
| "shape": [ |
| "batch", |
| "time", |
| 4 |
| ], |
| "note": "raw output ~ formant_Hz / 1000 (per repo convention)" |
| }, |
| "opset": 17, |
| "variants": { |
| "fp32": { |
| "file": "model.onnx", |
| "size_mb": 10.239, |
| "max_abs_diff": 1.1920928955078125e-06, |
| "max_rel_diff": 4.859505467916354e-06, |
| "mean_abs_diff": 1.4127406757324932e-07, |
| "threshold_abs": 0.0001, |
| "threshold_rel": 0.001, |
| "pass": true |
| }, |
| "fp16": { |
| "file": "model_fp16.onnx", |
| "size_mb": 5.123, |
| "max_abs_diff": 0.002070903778076172, |
| "max_rel_diff": 0.8997685868740329, |
| "mean_abs_diff": 0.00034594033626490273, |
| "threshold_abs": 0.005, |
| "threshold_rel": 0.05, |
| "pass": true |
| }, |
| "int8": { |
| "file": "model_int8.onnx", |
| "size_mb": 2.584, |
| "max_abs_diff": 0.0502011775970459, |
| "max_rel_diff": 0.17506545407668223, |
| "mean_abs_diff": 0.005834240480326117, |
| "threshold_abs": 0.15, |
| "threshold_rel": 0.5, |
| "pass": true |
| } |
| } |
| }, |
| "lpc_estimator_torch7": { |
| "source": "estimation_model.dat (Torch7 nn.Sequential, ported via torchfile)", |
| "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out) \u2014 identical to LPC_NN_scaledLoss.pt; different weights", |
| "input": { |
| "name": "input", |
| "shape": [ |
| "batch", |
| 350 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "formants", |
| "shape": [ |
| "batch", |
| 4 |
| ], |
| "note": "raw output ~ formant_Hz / 1000 (\u00d71000 for Hz, per load_estimation_model.lua)" |
| }, |
| "opset": 17, |
| "variants": { |
| "fp32": { |
| "file": "model.onnx", |
| "size_mb": 4.067, |
| "max_abs_diff": 1.430511474609375e-06, |
| "max_rel_diff": 2.605369743393167e-05, |
| "mean_abs_diff": 1.5887635527178645e-07, |
| "threshold_abs": 0.0001, |
| "threshold_rel": 0.001, |
| "pass": true |
| }, |
| "fp16": { |
| "file": "model_fp16.onnx", |
| "size_mb": 2.034, |
| "max_abs_diff": 0.0019774436950683594, |
| "max_rel_diff": 0.049704955433888615, |
| "mean_abs_diff": 0.000250340614002198, |
| "threshold_abs": 0.005, |
| "threshold_rel": 0.05, |
| "pass": true |
| }, |
| "int8": { |
| "file": "model_int8.onnx", |
| "size_mb": 1.027, |
| "max_abs_diff": 0.04240584373474121, |
| "max_rel_diff": 4.826714634495662, |
| "mean_abs_diff": 0.005278422741594113, |
| "threshold_abs": 0.15, |
| "threshold_rel": 0.5, |
| "pass": true |
| } |
| }, |
| "port_fidelity_hz": "max 0.003 Hz drift on real features vs float64 numpy reconstruction of Torch7 forward" |
| }, |
| "lpc_tracker_torch7": { |
| "source": "tracking_model.dat (Torch7 nn.Sequential of nn.Sequencer+nn.FastLSTM, ported via torchfile)", |
| "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4); identical shape to LPC_RNN.pt; different weights (original paper model)", |
| "input": { |
| "name": "input", |
| "shape": [ |
| "batch", |
| "time", |
| 350 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "formants", |
| "shape": [ |
| "batch", |
| "time", |
| 4 |
| ], |
| "note": "raw output ~ formant_Hz / 1000" |
| }, |
| "opset": 17, |
| "variants": { |
| "fp32": { |
| "file": "model.onnx", |
| "size_mb": 10.239, |
| "max_abs_diff": 1.0728836059570312e-06, |
| "max_rel_diff": 0.0009528932858925629, |
| "mean_abs_diff": 5.3642434068024155e-08, |
| "threshold_abs": 0.0001, |
| "threshold_rel": 0.001, |
| "pass": true |
| }, |
| "fp16": { |
| "file": "model_fp16.onnx", |
| "size_mb": 5.123, |
| "max_abs_diff": 0.0017843246459960938, |
| "max_rel_diff": 1.2006545622606084, |
| "mean_abs_diff": 0.00010792065120767802, |
| "threshold_abs": 0.005, |
| "threshold_rel": 0.05, |
| "pass": true |
| }, |
| "int8": { |
| "file": "model_int8.onnx", |
| "size_mb": 2.584, |
| "max_abs_diff": 0.11648625135421753, |
| "max_rel_diff": 72.53312782880165, |
| "mean_abs_diff": 0.0050570286225411105, |
| "threshold_abs": 0.15, |
| "threshold_rel": 0.5, |
| "pass": true |
| } |
| }, |
| "gate_remap": "Torch7 FastLSTM [i,g,f,o] -> PyTorch nn.LSTM [i,f,g,o]; block perm [0,2,1,3]", |
| "bias_convention": "Torch7 i2g.bias -> bias_ih_l0 (permuted); bias_hh_l0 = 0", |
| "port_fidelity_hz": "max 0.0001 Hz drift on random input vs float64 numpy FastLSTM reference forward" |
| } |
| }, |
| "license": "MIT (DeepFormants repo). Weights derived from MLSpeech/DeepFormants. Local use; redistribution not verified.", |
| "skipped": { |
| "CNN_estimate.pt": "Checkpoint not shipped in the public repo." |
| } |
| } |