| { | |
| "model_name": "syncvsr_lrs3_visual_ctc", | |
| "source_checkpoint": "Vox+LRS2+LRS3.ckpt", | |
| "input_layout": "NTCHW", | |
| "input_channels": 1, | |
| "input_height": 88, | |
| "input_width": 88, | |
| "pixel_mean": 0.421, | |
| "pixel_std": 0.165, | |
| "output_layout": "NTV", | |
| "output_is_log_softmax": true, | |
| "blank_index": 0, | |
| "vocab_file": "syncvsr_unigram_units.txt", | |
| "notes": "Exported by tools/export_syncvsr_to_onnx.ipynb. Encoder + CTC head only -- no attention decoder, no beam search, no LM. Decode greedily or with subword CTC beam search; KenLM rescoring on top via KenLmScorer." | |
| } |