scam / app /api /voice_schemas.py
Gankit12's picture
Relative API URLs, docker-compose port fix, Phase 2 voice, HF deploy guide
6a4a552
"""
Pydantic schemas for Phase 2 voice API endpoints.
Defines request/response models for voice-based interaction with the
ScamShield AI honeypot, including transcription metadata, voice fraud
detection results, and the voice engagement response.
"""
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field
from app.api.schemas import ExtractedIntelligence
class TranscriptionMetadata(BaseModel):
"""Speech-to-text transcription result metadata."""
text: str = Field(
...,
description="Transcribed text from the audio input",
)
language: str = Field(
...,
description="Detected or specified language (ISO 639-1 code)",
)
confidence: float = Field(
...,
ge=0.0,
le=1.0,
description="Transcription confidence score",
)
class VoiceFraudMetadata(BaseModel):
"""Voice fraud detection result metadata (optional feature).
Present in the response only when voice fraud detection is enabled
via the ``VOICE_FRAUD_DETECTION`` configuration flag.
"""
is_synthetic: bool = Field(
...,
description="Whether the voice is classified as synthetic or deepfake",
)
confidence: float = Field(
...,
ge=0.0,
le=1.0,
description="Voice fraud detection confidence score",
)
risk_level: str = Field(
...,
description="Assessed risk level: 'low', 'medium', or 'high'",
)
class VoiceEngageResponse(BaseModel):
"""Response schema for POST /api/v1/voice/engage."""
session_id: str = Field(
...,
description="Session identifier for multi-turn voice conversations",
)
scam_detected: bool = Field(
...,
description="Whether a scam was detected in the transcribed message",
)
scam_confidence: float = Field(
...,
ge=0.0,
le=1.0,
description="Scam detection confidence score",
)
scam_type: Optional[str] = Field(
None,
description="Classified scam type derived from detection indicators",
)
turn_count: int = Field(
...,
ge=0,
description="Current conversation turn number",
)
ai_reply_text: str = Field(
...,
description="AI honeypot agent's text reply to the scammer",
)
ai_reply_audio_url: Optional[str] = Field(
None,
description="URL to the synthesized audio response file",
)
transcription: TranscriptionMetadata = Field(
...,
description="Speech-to-text transcription results",
)
voice_fraud: Optional[VoiceFraudMetadata] = Field(
None,
description="Voice fraud detection results (present when enabled)",
)
extracted_intelligence: Optional[ExtractedIntelligence] = Field(
None,
description="Extracted financial intelligence from the conversation",
)
processing_time_ms: int = Field(
...,
ge=0,
description="Total end-to-end processing time in milliseconds",
)
class VoiceHealthResponse(BaseModel):
"""Response schema for GET /api/v1/voice/health."""
status: str = Field(
...,
description="Overall voice subsystem health status",
)
asr: Optional[Dict[str, Any]] = Field(
None,
description="ASR engine health details (model, device, loaded)",
)
tts: Optional[Dict[str, Any]] = Field(
None,
description="TTS engine health details (engine, loaded)",
)