Spaces:
Sleeping
Sleeping
| # Voice model presets for EchoCoach (ASR + TTS). | |
| # Override defaults via ECHOCOACH_ASR_PRESET / ECHOCOACH_TTS_PRESET in .env | |
| defaults: | |
| asr_preset: whisper-cpp-base | |
| tts_preset: piper-multilingual | |
| # Realtime streaming TTS for TeacherVoice VoiceOut (set ECHOCOACH_TTS_PRESET to match) | |
| realtime_tts_preset: vibevoice-realtime-0.5b | |
| coach_model: minicpm5-1b-language-lesson-hub | |
| coach_fallbacks: | |
| - minicpm5-1b-language-lesson-lora | |
| - minicpm5-1b | |
| max_seconds: 30 | |
| languages: | |
| - code: en | |
| label: English | |
| - code: fr | |
| label: French | |
| - code: de | |
| label: German | |
| - code: es | |
| label: Spanish | |
| - code: it | |
| label: Italian | |
| - code: pt | |
| label: Portuguese | |
| - code: nl | |
| label: Dutch | |
| - code: pl | |
| label: Polish | |
| - code: el | |
| label: Greek | |
| - code: ar | |
| label: Arabic | |
| - code: ja | |
| label: Japanese | |
| - code: zh | |
| label: Chinese (Mandarin) | |
| - code: vi | |
| label: Vietnamese | |
| - code: ko | |
| label: Korean | |
| asr: | |
| cohere-transcribe: | |
| label: Cohere Transcribe 2B (14 languages) | |
| backend: cohere | |
| model_id: CohereLabs/cohere-transcribe-03-2026 | |
| whisper-cpp-tiny: | |
| label: Whisper.cpp tiny (CPU, fast) | |
| backend: whisper_cpp | |
| model_size: tiny | |
| whisper-cpp-base: | |
| label: Whisper.cpp base (CPU, better WER) | |
| backend: whisper_cpp | |
| model_size: base | |
| # Optional omni speech-in/speech-out (GPU; set ECHOCOACH_VOICE_PROFILE=omni) | |
| omni: | |
| minicpm-o-4.5: | |
| label: MiniCPM-o 4.5 (GPU omni — falls back to pipeline until wired) | |
| model_id: openbmb/MiniCPM-o-4_5 | |
| languages: [en, zh] | |
| tts: | |
| piper-multilingual: | |
| label: Piper TTS (local VoiceOut) | |
| backend: piper | |
| voices: | |
| en: en_US-lessac-medium | |
| fr: fr_FR-siwis-medium | |
| de: de_DE-thorsten-medium | |
| es: es_ES-sharvard-medium | |
| it: it_IT-riccardo-medium | |
| pt: pt_BR-faber-medium | |
| nl: nl_NL-mls-medium | |
| pl: pl_PL-darkman-medium | |
| el: el_GR-rapunzelina-low | |
| ar: ar_JO-kareem-medium | |
| ja: ja_JP-natsuki-medium | |
| zh: zh_CN-huayan-medium | |
| vi: vi_VN-25hours-single | |
| ko: ko_KR-kss-medium | |
| fallback_voice: en_US-lessac-medium | |
| # Microsoft VibeVoice Realtime — streaming TTS, ~300ms to first audio (GPU recommended). | |
| # https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B | |
| # English-first; also supports de, fr, it, es, pt, nl, pl, ja, ko (experimental). | |
| # Requires transformers + torch; falls back to Piper until fully wired in echocoach. | |
| vibevoice-realtime-0.5b: | |
| label: VibeVoice Realtime 0.5B (streaming, ~300ms) | |
| backend: vibevoice | |
| model_id: microsoft/VibeVoice-Realtime-0.5B | |
| streaming: true | |
| realtime: true | |
| languages: [en, de, fr, it, es, pt, nl, pl, ja, ko] | |
| fallback_language: en | |