Instructions to use espnet/CI_mini_an4_training_asr_transformer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ESPnet
How to use espnet/CI_mini_an4_training_asr_transformer with ESPnet:
unknown model type (must be text-to-speech or automatic-speech-recognition)
- Notebooks
- Google Colab
- Kaggle
File size: 838 Bytes
e00467a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | from __future__ import annotations
from pathlib import Path
from typing import List
def gather_training_text(manifest_path: Path) -> List[str]:
"""Collect transcript texts from a TSV manifest file."""
manifest_path = Path(manifest_path)
if not manifest_path.is_file():
raise FileNotFoundError(f"Manifest not found: {manifest_path}")
texts: list[str] = []
with manifest_path.open("r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split("\t", maxsplit=2)
if len(parts) != 3:
raise ValueError(f"Invalid manifest line: {line}")
texts.append(parts[2])
if not texts:
raise RuntimeError(f"No text found in manifest: {manifest_path}")
return texts
|