File size: 3,126 Bytes
34b2632
 
 
 
 
 
 
 
 
 
 
 
 
 
ea06065
 
34b2632
ea06065
 
 
 
 
 
 
 
 
 
 
 
 
 
34b2632
 
ea06065
 
 
 
 
 
 
34b2632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""

MongoDB Models for Audience Segmentation

Author: AI Generated

Created: 2025-11-24

Purpose: Define data models for storing audience segmentation results

"""

from datetime import datetime
from typing import Optional, List, Dict
from pydantic import BaseModel, Field
from bson import ObjectId


class PyObjectId(ObjectId):
    """Custom ObjectId type for Pydantic v2"""
    
    @classmethod
    def __get_pydantic_core_schema__(cls, source_type, handler):
        from pydantic_core import core_schema
        
        return core_schema.union_schema([
            core_schema.is_instance_schema(ObjectId),
            core_schema.chain_schema([
                core_schema.str_schema(),
                core_schema.no_info_plain_validator_function(cls.validate),
            ])
        ],
        serialization=core_schema.plain_serializer_function_ser_schema(
            lambda x: str(x)
        ))
    
    @classmethod
    def validate(cls, v):
        if isinstance(v, ObjectId):
            return v
        if isinstance(v, str):
            if not ObjectId.is_valid(v):
                raise ValueError(f"Invalid ObjectId: {v}")
            return ObjectId(v)
        raise ValueError(f"Expected ObjectId or string, got {type(v)}")


class AudienceSegment(BaseModel):
    """

    Defines the characteristics of an audience segment.

    This is the result of K-Means clustering on user behavior data.

    """
    id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
    segment_name: str = Field(..., description="Human-readable segment name, e.g., 'Big Spenders', 'Music Lovers'")
    description: Optional[str] = Field(None, description="Detailed description of this segment")
    criteria: Dict = Field(default_factory=dict, description="Statistical criteria: min_spend, max_spend, top_categories, etc.")
    user_count: int = Field(0, description="Number of users in this segment")
    last_updated: datetime = Field(default_factory=datetime.utcnow)
    
    # Generative AI Output
    marketing_content: Optional[Dict] = Field(
        None,
        description="AI-generated marketing content: { 'email_subject': str, 'email_body': str }"
    )

    class Config:
        allow_population_by_field_name = True
        arbitrary_types_allowed = True
        json_encoders = {ObjectId: str}


class UserSegmentAssignment(BaseModel):
    """

    Links a user to their assigned segment.

    Many-to-one relationship: many users belong to one segment.

    """
    id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
    user_id: PyObjectId = Field(..., description="Reference to User._id")
    segment_id: PyObjectId = Field(..., description="Reference to AudienceSegment._id")
    confidence_score: float = Field(..., description="Distance to cluster center (lower is better)")
    assigned_at: datetime = Field(default_factory=datetime.utcnow)

    class Config:
        allow_population_by_field_name = True
        arbitrary_types_allowed = True
        json_encoders = {ObjectId: str}