| { |
| "model": { |
| "name": "FormantNet", |
| "source": "expdir/mvt33_f6z1sTpF10/weights.sigmoid195-20.143 (TF checkpoint)", |
| "architecture": "LSTM(257,512,return_sequences=True) -> Dense(20,sigmoid)", |
| "input": { |
| "name": "input", |
| "shape": ["batch", "time", 257], |
| "dtype": "float32", |
| "note": "normalized log-spectral envelope, 257 bins (32ms/16kHz window)" |
| }, |
| "output": { |
| "name": "dense", |
| "shape": ["batch", "time", 20], |
| "dtype": "float32", |
| "note": "raw sigmoid [0,1]; rescale with FN_model.get_rescale_fn() to get Hz/BW/dB" |
| }, |
| "opset": 15, |
| "total_params": 1587220, |
| "framework_origin": "tensorflow-macos 2.13 / keras 2.13" |
| }, |
| "variants": { |
| "fp32": { |
| "file": "formantnet.onnx", |
| "size_mb": 6.361, |
| "max_abs_diff": 0.0, |
| "mean_abs_diff": 0.0, |
| "max_rel_diff": 0.0, |
| "threshold_abs": 0.0001, |
| "pass": true |
| }, |
| "fp16": { |
| "file": "formantnet_fp16.onnx", |
| "size_mb": 3.184, |
| "max_abs_diff": 4.09e-04, |
| "mean_abs_diff": 2.49e-05, |
| "max_rel_diff": 4.52e-03, |
| "threshold_abs": 0.005, |
| "pass": true |
| }, |
| "int8": { |
| "file": "formantnet_int8.onnx", |
| "size_mb": 1.608, |
| "max_abs_diff": 9.15e-02, |
| "mean_abs_diff": 8.71e-03, |
| "max_rel_diff": 0.737, |
| "threshold_abs": 0.15, |
| "pass": true, |
| "note": "max_rel_diff high due to near-zero sigmoid outputs; abs_diff within threshold (same pattern as DeepFormants-onnx int8)" |
| } |
| }, |
| "post_processing_outside_onnx": [ |
| "FFT / spectral envelope extraction (FN_data.py)", |
| "Input normalization by training-set mean/std (Normfile)", |
| "Output rescaling to Hz/BW/dB (FN_model.get_rescale_fn)", |
| "Formant sorting by mean frequency", |
| "Binomial smoothing (BIN_SMOOTH_PASSES=10)" |
| ], |
| "conversion_script": "convert_to_onnx.py", |
| "quantization_script": "quantize_onnx.py", |
| "validation_script": "validate_onnx.py" |
| } |
|
|