{ "models": { "lpc_estimator": { "source": "pytorchFormants/Estimator/LPC_NN_scaledLoss.pt", "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out)", "input": { "name": "input", "shape": [ "batch", 350 ], "dtype": "float32" }, "output": { "name": "formants", "shape": [ "batch", 4 ], "note": "raw output ~ formant_Hz / 1000 (per repo convention)" }, "opset": 17, "variants": { "fp32": { "file": "model.onnx", "size_mb": 4.067, "max_abs_diff": 9.5367431640625e-07, "max_rel_diff": 8.044200139987598e-07, "mean_abs_diff": 1.9110739231109618e-07, "threshold_abs": 0.0001, "threshold_rel": 0.001, "pass": true }, "fp16": { "file": "model_fp16.onnx", "size_mb": 2.034, "max_abs_diff": 0.0014448165893554688, "max_rel_diff": 0.0005444474740661378, "mean_abs_diff": 0.0003232601098716259, "threshold_abs": 0.005, "threshold_rel": 0.05, "pass": true }, "int8": { "file": "model_int8.onnx", "size_mb": 1.027, "max_abs_diff": 0.013593912124633789, "max_rel_diff": 0.014733956349833927, "mean_abs_diff": 0.0028839516919106243, "threshold_abs": 0.15, "threshold_rel": 0.5, "pass": true } } }, "lpc_tracker": { "source": "pytorchFormants/Tracker/LPC_RNN.pt", "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4)", "input": { "name": "input", "shape": [ "batch", "time", 350 ], "dtype": "float32" }, "output": { "name": "formants", "shape": [ "batch", "time", 4 ], "note": "raw output ~ formant_Hz / 1000 (per repo convention)" }, "opset": 17, "variants": { "fp32": { "file": "model.onnx", "size_mb": 10.239, "max_abs_diff": 1.1920928955078125e-06, "max_rel_diff": 4.859505467916354e-06, "mean_abs_diff": 1.4127406757324932e-07, "threshold_abs": 0.0001, "threshold_rel": 0.001, "pass": true }, "fp16": { "file": "model_fp16.onnx", "size_mb": 5.123, "max_abs_diff": 0.002070903778076172, "max_rel_diff": 0.8997685868740329, "mean_abs_diff": 0.00034594033626490273, "threshold_abs": 0.005, "threshold_rel": 0.05, "pass": true }, "int8": { "file": "model_int8.onnx", "size_mb": 2.584, "max_abs_diff": 0.0502011775970459, "max_rel_diff": 0.17506545407668223, "mean_abs_diff": 0.005834240480326117, "threshold_abs": 0.15, "threshold_rel": 0.5, "pass": true } } }, "lpc_estimator_torch7": { "source": "estimation_model.dat (Torch7 nn.Sequential, ported via torchfile)", "architecture": "MLP 350->1024->512->256->4 (sigmoid hidden, linear out) \u2014 identical to LPC_NN_scaledLoss.pt; different weights", "input": { "name": "input", "shape": [ "batch", 350 ], "dtype": "float32" }, "output": { "name": "formants", "shape": [ "batch", 4 ], "note": "raw output ~ formant_Hz / 1000 (\u00d71000 for Hz, per load_estimation_model.lua)" }, "opset": 17, "variants": { "fp32": { "file": "model.onnx", "size_mb": 4.067, "max_abs_diff": 1.430511474609375e-06, "max_rel_diff": 2.605369743393167e-05, "mean_abs_diff": 1.5887635527178645e-07, "threshold_abs": 0.0001, "threshold_rel": 0.001, "pass": true }, "fp16": { "file": "model_fp16.onnx", "size_mb": 2.034, "max_abs_diff": 0.0019774436950683594, "max_rel_diff": 0.049704955433888615, "mean_abs_diff": 0.000250340614002198, "threshold_abs": 0.005, "threshold_rel": 0.05, "pass": true }, "int8": { "file": "model_int8.onnx", "size_mb": 1.027, "max_abs_diff": 0.04240584373474121, "max_rel_diff": 4.826714634495662, "mean_abs_diff": 0.005278422741594113, "threshold_abs": 0.15, "threshold_rel": 0.5, "pass": true } }, "port_fidelity_hz": "max 0.003 Hz drift on real features vs float64 numpy reconstruction of Torch7 forward" }, "lpc_tracker_torch7": { "source": "tracking_model.dat (Torch7 nn.Sequential of nn.Sequencer+nn.FastLSTM, ported via torchfile)", "architecture": "LSTM(350,512) -> LSTM(512,256) -> Linear(256,4); identical shape to LPC_RNN.pt; different weights (original paper model)", "input": { "name": "input", "shape": [ "batch", "time", 350 ], "dtype": "float32" }, "output": { "name": "formants", "shape": [ "batch", "time", 4 ], "note": "raw output ~ formant_Hz / 1000" }, "opset": 17, "variants": { "fp32": { "file": "model.onnx", "size_mb": 10.239, "max_abs_diff": 1.0728836059570312e-06, "max_rel_diff": 0.0009528932858925629, "mean_abs_diff": 5.3642434068024155e-08, "threshold_abs": 0.0001, "threshold_rel": 0.001, "pass": true }, "fp16": { "file": "model_fp16.onnx", "size_mb": 5.123, "max_abs_diff": 0.0017843246459960938, "max_rel_diff": 1.2006545622606084, "mean_abs_diff": 0.00010792065120767802, "threshold_abs": 0.005, "threshold_rel": 0.05, "pass": true }, "int8": { "file": "model_int8.onnx", "size_mb": 2.584, "max_abs_diff": 0.11648625135421753, "max_rel_diff": 72.53312782880165, "mean_abs_diff": 0.0050570286225411105, "threshold_abs": 0.15, "threshold_rel": 0.5, "pass": true } }, "gate_remap": "Torch7 FastLSTM [i,g,f,o] -> PyTorch nn.LSTM [i,f,g,o]; block perm [0,2,1,3]", "bias_convention": "Torch7 i2g.bias -> bias_ih_l0 (permuted); bias_hh_l0 = 0", "port_fidelity_hz": "max 0.0001 Hz drift on random input vs float64 numpy FastLSTM reference forward" } }, "license": "MIT (DeepFormants repo). Weights derived from MLSpeech/DeepFormants. Local use; redistribution not verified.", "skipped": { "CNN_estimate.pt": "Checkpoint not shipped in the public repo." } }