| """STT (Speech-to-Text) ar Whisper.""" |
|
|
| from __future__ import annotations |
|
|
| import base64 |
| import logging |
| import os |
| import tempfile |
| from contextlib import suppress |
|
|
| from fastapi import APIRouter, HTTPException |
| from pydantic import BaseModel |
|
|
| from maris_core.memory_context import memory_store |
| from maris_core.utils.emotional_context import analyze_emotional_context |
| from maris_core.utils.env import get_hf_model |
|
|
| logger = logging.getLogger(__name__) |
| router = APIRouter() |
|
|
|
|
| class SttRequest(BaseModel): |
| audio_base64: str |
| session_id: str | None = None |
| persona_id: str | None = None |
|
|
|
|
| class SttResponse(BaseModel): |
| transcript: str |
| confidence: float = 1.0 |
| detected_emotion: str = "neutral" |
| emotion_confidence: float = 0.0 |
| response_style: str = "clear_grounded" |
|
|
|
|
| def _build_asr_pipeline(model_id: str): |
| from transformers import pipeline as hf_pipeline |
|
|
| return hf_pipeline("automatic-speech-recognition", model_id, device=-1) |
|
|
|
|
| @router.post("/stt", response_model=SttResponse) |
| async def transcribe(req: SttRequest) -> SttResponse: |
| """Konvertē audio uz tekstu ar Whisper.""" |
| try: |
| audio_bytes = base64.b64decode(req.audio_base64) |
| model_id = get_hf_model("STT_MODEL") |
| asr = _build_asr_pipeline(model_id) |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
| f.write(audio_bytes) |
| tmp_path = f.name |
|
|
| try: |
| result = asr(tmp_path) |
| transcript = result["text"] if isinstance(result, dict) else str(result) |
| finally: |
| with suppress(FileNotFoundError): |
| os.unlink(tmp_path) |
|
|
| emotional_context = analyze_emotional_context(transcript) |
| session_id = (req.session_id or "").strip() |
| if session_id: |
| memory_store.remember_message(session_id, "user", transcript, source="voice_stt") |
| return SttResponse( |
| transcript=transcript, |
| confidence=0.95, |
| detected_emotion=emotional_context.emotion, |
| emotion_confidence=emotional_context.confidence, |
| response_style=emotional_context.response_style, |
| ) |
| except Exception as exc: |
| logger.error("STT kļūda: %s", exc) |
| raise HTTPException( |
| status_code=503, |
| detail="Maris AI STT nav pieejams bez konfigurēta STT_MODEL.", |
| ) from exc |
|
|