| | """
|
| | Sentiment Analysis Pydantic Models for MongoDB
|
| | Author: AI Generated
|
| | Created: 2025-11-24
|
| | Purpose: Define schemas for sentiment analysis results
|
| | """
|
| |
|
| | from pydantic import BaseModel, Field
|
| | from typing import List, Optional, Dict
|
| | from datetime import datetime
|
| | from bson import ObjectId
|
| |
|
| |
|
| | class PyObjectId(ObjectId):
|
| | """Custom ObjectId type for Pydantic v2"""
|
| |
|
| | @classmethod
|
| | def __get_pydantic_core_schema__(cls, source_type, handler):
|
| | from pydantic_core import core_schema
|
| |
|
| | return core_schema.union_schema([
|
| | core_schema.is_instance_schema(ObjectId),
|
| | core_schema.chain_schema([
|
| | core_schema.str_schema(),
|
| | core_schema.no_info_plain_validator_function(cls.validate),
|
| | ])
|
| | ],
|
| | serialization=core_schema.plain_serializer_function_ser_schema(
|
| | lambda x: str(x)
|
| | ))
|
| |
|
| | @classmethod
|
| | def validate(cls, v):
|
| | if isinstance(v, ObjectId):
|
| | return v
|
| | if isinstance(v, str):
|
| | if not ObjectId.is_valid(v):
|
| | raise ValueError(f"Invalid ObjectId: {v}")
|
| | return ObjectId(v)
|
| | raise ValueError(f"Expected ObjectId or string, got {type(v)}")
|
| |
|
| |
|
| |
|
| | class SentimentAnalysisResult(BaseModel):
|
| | """Individual sentiment analysis result for a comment/feedback"""
|
| | id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
| | source_id: PyObjectId = Field(..., description="ID of the original comment/post")
|
| | source_type: str = Field(default="UserCommentPost", description="Type of source")
|
| |
|
| |
|
| | event_code: str = Field(..., description="Event identifier this comment belongs to")
|
| |
|
| | sentiment_label: str = Field(..., description="Positive, Negative, or Neutral")
|
| | confidence_score: float = Field(..., ge=0.0, le=1.0, description="Model confidence (0-1)")
|
| |
|
| | key_phrases: List[str] = Field(
|
| | default_factory=list,
|
| | description="Extracted keywords/phrases from the text"
|
| | )
|
| |
|
| | analyzed_at: datetime = Field(default_factory=datetime.utcnow)
|
| |
|
| | class Config:
|
| | populate_by_name = True
|
| | arbitrary_types_allowed = True
|
| | json_encoders = {ObjectId: str}
|
| |
|
| |
|
| | class EventInsightReport(BaseModel):
|
| | """
|
| | High-level insights for an event, generated by LLM.
|
| | Includes Top 5 issues, NPS prediction, and improvement suggestions.
|
| | """
|
| | id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
| | event_code: str = Field(..., description="Reference to EventVersion.EventCode")
|
| | report_date: datetime = Field(default_factory=datetime.utcnow)
|
| | total_comments: int = Field(0, description="Total number of comments analyzed")
|
| | sentiment_breakdown: Dict[str, int] = Field(
|
| | default_factory=dict,
|
| | description="Count by sentiment: { 'Positive': 50, 'Negative': 10, 'Neutral': 20 }"
|
| | )
|
| | predicted_nps: Optional[float] = Field(None, description="Predicted NPS score (0-100)")
|
| | top_issues: List[str] = Field(
|
| | default_factory=list,
|
| | description="Top 5 recurring issues, e.g., ['Check-in slow', 'Sound quality poor']"
|
| | )
|
| | improvement_suggestions: List[str] = Field(
|
| | default_factory=list,
|
| | description="AI-generated suggestions for improvement"
|
| | )
|
| |
|
| | class Config:
|
| | populate_by_name = True
|
| | arbitrary_types_allowed = True
|
| | json_encoders = {ObjectId: str}
|
| |
|