""" MongoDB Models for Audience Segmentation Author: AI Generated Created: 2025-11-24 Purpose: Define data models for storing audience segmentation results """ from datetime import datetime from typing import Optional, List, Dict from pydantic import BaseModel, Field from bson import ObjectId class PyObjectId(ObjectId): """Custom ObjectId type for Pydantic v2""" @classmethod def __get_pydantic_core_schema__(cls, source_type, handler): from pydantic_core import core_schema return core_schema.union_schema([ core_schema.is_instance_schema(ObjectId), core_schema.chain_schema([ core_schema.str_schema(), core_schema.no_info_plain_validator_function(cls.validate), ]) ], serialization=core_schema.plain_serializer_function_ser_schema( lambda x: str(x) )) @classmethod def validate(cls, v): if isinstance(v, ObjectId): return v if isinstance(v, str): if not ObjectId.is_valid(v): raise ValueError(f"Invalid ObjectId: {v}") return ObjectId(v) raise ValueError(f"Expected ObjectId or string, got {type(v)}") class AudienceSegment(BaseModel): """ Defines the characteristics of an audience segment. This is the result of K-Means clustering on user behavior data. """ id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id") segment_name: str = Field(..., description="Human-readable segment name, e.g., 'Big Spenders', 'Music Lovers'") description: Optional[str] = Field(None, description="Detailed description of this segment") criteria: Dict = Field(default_factory=dict, description="Statistical criteria: min_spend, max_spend, top_categories, etc.") user_count: int = Field(0, description="Number of users in this segment") last_updated: datetime = Field(default_factory=datetime.utcnow) # Generative AI Output marketing_content: Optional[Dict] = Field( None, description="AI-generated marketing content: { 'email_subject': str, 'email_body': str }" ) class Config: allow_population_by_field_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str} class UserSegmentAssignment(BaseModel): """ Links a user to their assigned segment. Many-to-one relationship: many users belong to one segment. """ id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id") user_id: PyObjectId = Field(..., description="Reference to User._id") segment_id: PyObjectId = Field(..., description="Reference to AudienceSegment._id") confidence_score: float = Field(..., description="Distance to cluster center (lower is better)") assigned_at: datetime = Field(default_factory=datetime.utcnow) class Config: allow_population_by_field_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str}