Aus_F / models /segmentation_models.py
minhvtt's picture
Upload 19 files
ea06065 verified
"""
MongoDB Models for Audience Segmentation
Author: AI Generated
Created: 2025-11-24
Purpose: Define data models for storing audience segmentation results
"""
from datetime import datetime
from typing import Optional, List, Dict
from pydantic import BaseModel, Field
from bson import ObjectId
class PyObjectId(ObjectId):
"""Custom ObjectId type for Pydantic v2"""
@classmethod
def __get_pydantic_core_schema__(cls, source_type, handler):
from pydantic_core import core_schema
return core_schema.union_schema([
core_schema.is_instance_schema(ObjectId),
core_schema.chain_schema([
core_schema.str_schema(),
core_schema.no_info_plain_validator_function(cls.validate),
])
],
serialization=core_schema.plain_serializer_function_ser_schema(
lambda x: str(x)
))
@classmethod
def validate(cls, v):
if isinstance(v, ObjectId):
return v
if isinstance(v, str):
if not ObjectId.is_valid(v):
raise ValueError(f"Invalid ObjectId: {v}")
return ObjectId(v)
raise ValueError(f"Expected ObjectId or string, got {type(v)}")
class AudienceSegment(BaseModel):
"""
Defines the characteristics of an audience segment.
This is the result of K-Means clustering on user behavior data.
"""
id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
segment_name: str = Field(..., description="Human-readable segment name, e.g., 'Big Spenders', 'Music Lovers'")
description: Optional[str] = Field(None, description="Detailed description of this segment")
criteria: Dict = Field(default_factory=dict, description="Statistical criteria: min_spend, max_spend, top_categories, etc.")
user_count: int = Field(0, description="Number of users in this segment")
last_updated: datetime = Field(default_factory=datetime.utcnow)
# Generative AI Output
marketing_content: Optional[Dict] = Field(
None,
description="AI-generated marketing content: { 'email_subject': str, 'email_body': str }"
)
class Config:
allow_population_by_field_name = True
arbitrary_types_allowed = True
json_encoders = {ObjectId: str}
class UserSegmentAssignment(BaseModel):
"""
Links a user to their assigned segment.
Many-to-one relationship: many users belong to one segment.
"""
id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
user_id: PyObjectId = Field(..., description="Reference to User._id")
segment_id: PyObjectId = Field(..., description="Reference to AudienceSegment._id")
confidence_score: float = Field(..., description="Distance to cluster center (lower is better)")
assigned_at: datetime = Field(default_factory=datetime.utcnow)
class Config:
allow_population_by_field_name = True
arbitrary_types_allowed = True
json_encoders = {ObjectId: str}