|
|
"""
|
|
|
MongoDB Models for Audience Segmentation
|
|
|
Author: AI Generated
|
|
|
Created: 2025-11-24
|
|
|
Purpose: Define data models for storing audience segmentation results
|
|
|
"""
|
|
|
|
|
|
from datetime import datetime
|
|
|
from typing import Optional, List, Dict
|
|
|
from pydantic import BaseModel, Field
|
|
|
from bson import ObjectId
|
|
|
|
|
|
|
|
|
class PyObjectId(ObjectId):
|
|
|
"""Custom ObjectId type for Pydantic v2"""
|
|
|
|
|
|
@classmethod
|
|
|
def __get_pydantic_core_schema__(cls, source_type, handler):
|
|
|
from pydantic_core import core_schema
|
|
|
|
|
|
return core_schema.union_schema([
|
|
|
core_schema.is_instance_schema(ObjectId),
|
|
|
core_schema.chain_schema([
|
|
|
core_schema.str_schema(),
|
|
|
core_schema.no_info_plain_validator_function(cls.validate),
|
|
|
])
|
|
|
],
|
|
|
serialization=core_schema.plain_serializer_function_ser_schema(
|
|
|
lambda x: str(x)
|
|
|
))
|
|
|
|
|
|
@classmethod
|
|
|
def validate(cls, v):
|
|
|
if isinstance(v, ObjectId):
|
|
|
return v
|
|
|
if isinstance(v, str):
|
|
|
if not ObjectId.is_valid(v):
|
|
|
raise ValueError(f"Invalid ObjectId: {v}")
|
|
|
return ObjectId(v)
|
|
|
raise ValueError(f"Expected ObjectId or string, got {type(v)}")
|
|
|
|
|
|
|
|
|
class AudienceSegment(BaseModel):
|
|
|
"""
|
|
|
Defines the characteristics of an audience segment.
|
|
|
This is the result of K-Means clustering on user behavior data.
|
|
|
"""
|
|
|
id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
|
|
|
segment_name: str = Field(..., description="Human-readable segment name, e.g., 'Big Spenders', 'Music Lovers'")
|
|
|
description: Optional[str] = Field(None, description="Detailed description of this segment")
|
|
|
criteria: Dict = Field(default_factory=dict, description="Statistical criteria: min_spend, max_spend, top_categories, etc.")
|
|
|
user_count: int = Field(0, description="Number of users in this segment")
|
|
|
last_updated: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
|
|
|
marketing_content: Optional[Dict] = Field(
|
|
|
None,
|
|
|
description="AI-generated marketing content: { 'email_subject': str, 'email_body': str }"
|
|
|
)
|
|
|
|
|
|
class Config:
|
|
|
allow_population_by_field_name = True
|
|
|
arbitrary_types_allowed = True
|
|
|
json_encoders = {ObjectId: str}
|
|
|
|
|
|
|
|
|
class UserSegmentAssignment(BaseModel):
|
|
|
"""
|
|
|
Links a user to their assigned segment.
|
|
|
Many-to-one relationship: many users belong to one segment.
|
|
|
"""
|
|
|
id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
|
|
|
user_id: PyObjectId = Field(..., description="Reference to User._id")
|
|
|
segment_id: PyObjectId = Field(..., description="Reference to AudienceSegment._id")
|
|
|
confidence_score: float = Field(..., description="Distance to cluster center (lower is better)")
|
|
|
assigned_at: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
class Config:
|
|
|
allow_population_by_field_name = True
|
|
|
arbitrary_types_allowed = True
|
|
|
json_encoders = {ObjectId: str}
|
|
|
|