{ "model_name": "syncvsr_lrs3_visual_ctc", "source_checkpoint": "Vox+LRS2+LRS3.ckpt", "input_layout": "NTCHW", "input_channels": 1, "input_height": 88, "input_width": 88, "pixel_mean": 0.421, "pixel_std": 0.165, "output_layout": "NTV", "output_is_log_softmax": true, "blank_index": 0, "vocab_file": "syncvsr_unigram_units.txt", "notes": "Exported by tools/export_syncvsr_to_onnx.ipynb. Encoder + CTC head only -- no attention decoder, no beam search, no LM. Decode greedily or with subword CTC beam search; KenLM rescoring on top via KenLmScorer." }