Spaces:
Sleeping
Sleeping
| """ | |
| Pydantic models for API response structures. | |
| This module defines the output models for the topic segmentation API, | |
| including topic responses, segmentation results, and metadata structures. | |
| """ | |
| from typing import List, Optional, Dict, Any, Union | |
| from datetime import datetime | |
| from enum import Enum | |
| from pydantic import BaseModel, Field, ConfigDict, computed_field | |
| from pydantic.types import PositiveInt, NonNegativeFloat | |
| from models.input import SpeakerRole, LanguageCode | |
| class TopicCategory(str, Enum): | |
| """Business categories for extracted topics.""" | |
| CLIENT_NEEDS_B2B = "client_needs_b2b" | |
| CLIENT_NEEDS_B2C = "client_needs_b2c" | |
| CUSTOMER_FEEDBACK = "customer_feedback" | |
| EMPLOYEE_FEEDBACK = "employee_feedback" | |
| SOLUTION_BARRIERS = "solution_barriers" | |
| SOLUTION_BENEFITS = "solution_benefits" | |
| AHA_MOMENTS = "aha_moments" | |
| COMPANY_INFO = "company_info" | |
| TECHNICAL_REQUIREMENTS = "technical_requirements" | |
| ADDITIONAL_COMMENTS = "additional_comments" | |
| GENERAL = "general" | |
| class ConfidenceLevel(str, Enum): | |
| """Confidence levels for topic extraction.""" | |
| VERY_HIGH = "very_high" # 0.9-1.0 | |
| HIGH = "high" # 0.7-0.89 | |
| MEDIUM = "medium" # 0.5-0.69 | |
| LOW = "low" # 0.3-0.49 | |
| VERY_LOW = "very_low" # 0.0-0.29 | |
| class ProcessingStatus(str, Enum): | |
| """Status of the processing request.""" | |
| SUCCESS = "success" | |
| PARTIAL_SUCCESS = "partial_success" | |
| FAILED = "failed" | |
| TIMEOUT = "timeout" | |
| RATE_LIMITED = "rate_limited" | |
| class SpeakerInsight(BaseModel): | |
| """ | |
| Insights about a specific speaker in the transcript. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid" | |
| ) | |
| speaker: str = Field( | |
| ..., | |
| description="Speaker identifier" | |
| ) | |
| speaker_role: Optional[SpeakerRole] = Field( | |
| default=None, | |
| description="Role of the speaker" | |
| ) | |
| total_sentences: PositiveInt = Field( | |
| ..., | |
| description="Total number of sentences by this speaker" | |
| ) | |
| total_duration: NonNegativeFloat = Field( | |
| ..., | |
| description="Total speaking time in seconds" | |
| ) | |
| topics_mentioned: List[str] = Field( | |
| default_factory=list, | |
| description="List of topic names this speaker contributed to" | |
| ) | |
| key_insights: List[str] = Field( | |
| default_factory=list, | |
| description="Key insights or quotes from this speaker" | |
| ) | |
| sentiment_analysis: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Optional sentiment analysis for this speaker" | |
| ) | |
| class TopicDetail(BaseModel): | |
| """ | |
| Detailed information about an extracted topic. | |
| Contains the core topic information, timing, speakers, | |
| and business categorization. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid" | |
| ) | |
| # Core topic information | |
| topic_name: str = Field( | |
| ..., | |
| min_length=1, | |
| max_length=200, | |
| description="Descriptive name of the topic" | |
| ) | |
| topic_type: TopicCategory = Field( | |
| ..., | |
| description="Business category of the topic" | |
| ) | |
| topic_detail: str = Field( | |
| ..., | |
| min_length=1, | |
| max_length=1000, | |
| description="Detailed description or summary of the topic" | |
| ) | |
| # Timing information | |
| start_time: NonNegativeFloat = Field( | |
| ..., | |
| description="Start time of the topic in seconds" | |
| ) | |
| end_time: NonNegativeFloat = Field( | |
| ..., | |
| description="End time of the topic in seconds" | |
| ) | |
| # Sentence range | |
| start_sentence_index: PositiveInt = Field( | |
| ..., | |
| description="Index of the first sentence in this topic" | |
| ) | |
| end_sentence_index: PositiveInt = Field( | |
| ..., | |
| description="Index of the last sentence in this topic" | |
| ) | |
| # Speaker information | |
| primary_speaker: str = Field( | |
| ..., | |
| description="Primary speaker for this topic" | |
| ) | |
| all_speakers: List[str] = Field( | |
| ..., | |
| min_length=1, | |
| description="All speakers who contributed to this topic" | |
| ) | |
| # Confidence and quality metrics | |
| confidence_score: float = Field( | |
| ..., | |
| ge=0.0, | |
| le=1.0, | |
| description="Confidence score for topic extraction (0.0 to 1.0)" | |
| ) | |
| relevance_score: Optional[float] = Field( | |
| default=None, | |
| ge=0.0, | |
| le=1.0, | |
| description="Business relevance score (0.0 to 1.0)" | |
| ) | |
| # Content analysis | |
| key_phrases: List[str] = Field( | |
| default_factory=list, | |
| description="Key phrases or keywords for this topic" | |
| ) | |
| sentiment: Optional[str] = Field( | |
| default=None, | |
| description="Overall sentiment of the topic (positive/negative/neutral)" | |
| ) | |
| # Business insights | |
| actionable_insights: List[str] = Field( | |
| default_factory=list, | |
| description="Actionable business insights from this topic" | |
| ) | |
| related_topics: List[str] = Field( | |
| default_factory=list, | |
| description="Names of related topics in the transcript" | |
| ) | |
| # Additional metadata | |
| metadata: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Additional metadata for the topic" | |
| ) | |
| def duration(self) -> float: | |
| """Calculate topic duration in seconds.""" | |
| return self.end_time - self.start_time | |
| def sentence_count(self) -> int: | |
| """Calculate number of sentences in this topic.""" | |
| return self.end_sentence_index - self.start_sentence_index + 1 | |
| def confidence_level(self) -> ConfidenceLevel: | |
| """Get confidence level based on confidence score.""" | |
| if self.confidence_score >= 0.9: | |
| return ConfidenceLevel.VERY_HIGH | |
| elif self.confidence_score >= 0.7: | |
| return ConfidenceLevel.HIGH | |
| elif self.confidence_score >= 0.5: | |
| return ConfidenceLevel.MEDIUM | |
| elif self.confidence_score >= 0.3: | |
| return ConfidenceLevel.LOW | |
| else: | |
| return ConfidenceLevel.VERY_LOW | |
| class ProcessingMetadata(BaseModel): | |
| """ | |
| Metadata about the processing request and results. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid", | |
| protected_namespaces=() | |
| ) | |
| # Request information | |
| request_id: str = Field( | |
| ..., | |
| description="Unique identifier for this request" | |
| ) | |
| timestamp: datetime = Field( | |
| ..., | |
| description="Timestamp when processing started" | |
| ) | |
| # Processing details | |
| model_used: str = Field( | |
| ..., | |
| description="Anthropic model used for processing" | |
| ) | |
| processing_time: NonNegativeFloat = Field( | |
| ..., | |
| description="Total processing time in seconds" | |
| ) | |
| # Input statistics | |
| total_sentences: PositiveInt = Field( | |
| ..., | |
| description="Total number of sentences processed" | |
| ) | |
| total_duration: NonNegativeFloat = Field( | |
| ..., | |
| description="Total duration of the transcript in seconds" | |
| ) | |
| unique_speakers: PositiveInt = Field( | |
| ..., | |
| description="Number of unique speakers in the transcript" | |
| ) | |
| # Output statistics | |
| topics_extracted: int = Field( | |
| ..., | |
| ge=0, | |
| description="Number of topics extracted" | |
| ) | |
| topics_merged: int = Field( | |
| default=0, | |
| ge=0, | |
| description="Number of topics that were merged due to similarity" | |
| ) | |
| # Quality metrics | |
| average_confidence: float = Field( | |
| ..., | |
| ge=0.0, | |
| le=1.0, | |
| description="Average confidence score across all topics" | |
| ) | |
| coverage_percentage: float = Field( | |
| ..., | |
| ge=0.0, | |
| le=100.0, | |
| description="Percentage of transcript covered by extracted topics" | |
| ) | |
| # Token usage | |
| tokens_used: Optional[Dict[str, int]] = Field( | |
| default=None, | |
| description="Token usage statistics from Anthropic API" | |
| ) | |
| # Language information | |
| detected_language: Optional[LanguageCode] = Field( | |
| default=None, | |
| description="Detected primary language of the transcript" | |
| ) | |
| # Warnings and notes | |
| warnings: List[str] = Field( | |
| default_factory=list, | |
| description="Any warnings or issues during processing" | |
| ) | |
| processing_notes: List[str] = Field( | |
| default_factory=list, | |
| description="Additional notes about the processing" | |
| ) | |
| class SegmentationResult(BaseModel): | |
| """ | |
| Complete result of topic segmentation analysis. | |
| Contains all extracted topics, speaker insights, and processing metadata. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid" | |
| ) | |
| # Processing status | |
| status: ProcessingStatus = Field( | |
| ..., | |
| description="Overall status of the processing" | |
| ) | |
| # Core results | |
| topics: List[TopicDetail] = Field( | |
| ..., | |
| description="List of extracted topics with details" | |
| ) | |
| # Speaker analysis | |
| speaker_insights: List[SpeakerInsight] = Field( | |
| default_factory=list, | |
| description="Insights about each speaker in the transcript" | |
| ) | |
| # Processing information | |
| metadata: ProcessingMetadata = Field( | |
| ..., | |
| description="Metadata about the processing request and results" | |
| ) | |
| # Summary information | |
| executive_summary: Optional[str] = Field( | |
| default=None, | |
| max_length=2000, | |
| description="Executive summary of the key findings" | |
| ) | |
| key_takeaways: List[str] = Field( | |
| default_factory=list, | |
| description="Key takeaways and actionable insights" | |
| ) | |
| # Business categorization summary | |
| category_summary: Dict[TopicCategory, int] = Field( | |
| default_factory=dict, | |
| description="Count of topics by business category" | |
| ) | |
| def total_topics(self) -> int: | |
| """Get total number of topics extracted.""" | |
| return len(self.topics) | |
| def high_confidence_topics(self) -> int: | |
| """Get number of high confidence topics (>= 0.7).""" | |
| return len([t for t in self.topics if t.confidence_score >= 0.7]) | |
| def success_rate(self) -> float: | |
| """Calculate success rate based on confidence scores.""" | |
| if not self.topics: | |
| return 0.0 | |
| return sum(t.confidence_score for t in self.topics) / len(self.topics) | |
| class ErrorDetail(BaseModel): | |
| """ | |
| Detailed error information for failed requests. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid" | |
| ) | |
| error_code: str = Field( | |
| ..., | |
| description="Specific error code" | |
| ) | |
| error_message: str = Field( | |
| ..., | |
| description="Human-readable error message" | |
| ) | |
| error_type: str = Field( | |
| ..., | |
| description="Type of error (validation, processing, api, etc.)" | |
| ) | |
| field_errors: Optional[Dict[str, List[str]]] = Field( | |
| default=None, | |
| description="Field-specific validation errors" | |
| ) | |
| suggestions: List[str] = Field( | |
| default_factory=list, | |
| description="Suggestions for fixing the error" | |
| ) | |
| timestamp: datetime = Field( | |
| default_factory=datetime.now, | |
| description="When the error occurred" | |
| ) | |
| class HealthCheckResponse(BaseModel): | |
| """ | |
| Response model for health check endpoint. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid", | |
| protected_namespaces=() | |
| ) | |
| status: str = Field( | |
| ..., | |
| description="Overall health status" | |
| ) | |
| timestamp: datetime = Field( | |
| ..., | |
| description="Timestamp of the health check" | |
| ) | |
| uptime_seconds: float = Field( | |
| ..., | |
| description="Service uptime in seconds" | |
| ) | |
| anthropic_status: Dict[str, Any] = Field( | |
| ..., | |
| description="Anthropic integration status" | |
| ) | |
| model_health: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Detailed model health information" | |
| ) | |
| performance_stats: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Performance statistics" | |
| ) | |
| class ModelStatusResponse(BaseModel): | |
| """ | |
| Response model for model status endpoint. | |
| """ | |
| model_config = ConfigDict( | |
| validate_assignment=True, | |
| extra="forbid", | |
| protected_namespaces=() | |
| ) | |
| current_model: str = Field( | |
| ..., | |
| description="Currently active model" | |
| ) | |
| available_models: List[str] = Field( | |
| ..., | |
| description="List of available models" | |
| ) | |
| model_health: Dict[str, Dict[str, Any]] = Field( | |
| ..., | |
| description="Health status for each model" | |
| ) | |
| performance_stats: Dict[str, Dict[str, Any]] = Field( | |
| ..., | |
| description="Performance statistics for each model" | |
| ) | |
| best_performing_model: str = Field( | |
| ..., | |
| description="Currently best performing model" | |
| ) | |
| last_updated: datetime = Field( | |
| ..., | |
| description="When the status was last updated" | |
| ) | |
| # Type aliases for convenience | |
| TopicList = List[TopicDetail] | |
| SpeakerInsightList = List[SpeakerInsight] | |
| ErrorResponse = ErrorDetail |