Speach-To-Text / api /schemas.py
MIP-Tech's picture
Deploy to HF Spaces
0db822c
from typing import List, Optional
from pydantic import BaseModel, Field
class HealthResponse(BaseModel):
status: str = Field(description="'ok' if Whisper is loaded, 'degraded' otherwise")
whisper_loaded: bool
gemini_available: bool
model_path: str
class TranscriptResponse(BaseModel):
"""Raw Whisper transcript — no post-processing."""
audio_filename: str
transcript: str
class AutoCorrectedResponse(BaseModel):
"""Whisper transcript + Gemini phonetic/spelling correction (no speaker labels)."""
audio_filename: str
transcript: str
corrected_transcript: str = Field(
description=(
"Phonetically and orthographically corrected Arabic text. "
"No speaker labels — single continuous stream. "
"Falls back to the raw Whisper transcript if Gemini is unavailable."
)
)
gemini_applied: bool = Field(
default=True,
description="False if Gemini was unavailable and the raw transcript was returned as-is.",
)
class CorrectedTranscriptResponse(BaseModel):
"""Whisper transcript + Gemini speaker-separated, phonetically corrected version."""
audio_filename: str
transcript: str
corrected_transcript: str = Field(
description=(
"Speaker-labelled, phonetically corrected Arabic transcript produced by Gemini. "
"SPEAKER_01 = Agent, SPEAKER_00 = Customer."
)
)
class AnalysisResponse(BaseModel):
"""Full call analysis: transcript + all structured fields from Gemini."""
audio_filename: str
transcript: str
cleaned_transcript: str
agent_name: Optional[str]
customer_name: Optional[str]
unit_number: List[str]
project_name: Optional[str]
department_mentioned: Optional[str]
call_type: str = Field(description="'Inbound' or 'Outbound'")
customer_satisfaction: int = Field(description="1–5 integer inferred from tone")
is_urgent: bool
pain_points: List[str]
action_items_promised: List[str]
next_steps: List[str]