Spaces:
Running on Zero
Running on Zero
| """Speech-to-text (Whisper via faster-whisper). Portuguese by default. | |
| No torch needed — faster-whisper runs on CTranslate2 (GPU if available, else CPU). | |
| """ | |
| import os | |
| _model = None | |
| def _load(): | |
| global _model | |
| if _model is None: | |
| from faster_whisper import WhisperModel | |
| size = os.environ.get("IRIS_STT_MODEL", "small") | |
| # CTranslate2 needs CUDA 12 libs (cublas/cudnn). Fall back to CPU if missing. | |
| device = os.environ.get("IRIS_STT_DEVICE", "cpu") | |
| if device == "cuda": | |
| try: | |
| _model = WhisperModel(size, device="cuda", compute_type="float16") | |
| except Exception: | |
| device = "cpu" | |
| if device != "cuda": | |
| _model = WhisperModel(size, device="cpu", compute_type="int8") | |
| return _model | |
| def transcribe(audio_path: str, language: str = "pt") -> str: | |
| if not audio_path or not os.path.exists(audio_path): | |
| print(f"[stt] no audio: {audio_path!r}", flush=True) | |
| return "" | |
| segments, info = _load().transcribe(audio_path, language=language) | |
| text = " ".join(s.text for s in segments).strip() | |
| print(f"[stt] {audio_path} ({getattr(info, 'duration', '?')}s) -> {text!r}", flush=True) | |
| return text | |
| def transcribe_auto(audio_path: str): | |
| """Transcribe WITHOUT forcing a language; returns (text, detected_language). | |
| Used for choosing the language by voice.""" | |
| if not audio_path or not os.path.exists(audio_path): | |
| return "", "en" | |
| segments, info = _load().transcribe(audio_path, language=None) | |
| text = " ".join(s.text for s in segments).strip() | |
| lang = getattr(info, "language", "en") | |
| print(f"[stt-auto] -> {text!r} (lang={lang})", flush=True) | |
| return text, lang | |