File size: 1,571 Bytes
783a952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""Pydantic models used in the ml_module."""
from pydantic import BaseModel, Field
from typing import Dict, Any, Optional
import datetime


def _utcnow() -> datetime.datetime:
    return datetime.datetime.now(datetime.timezone.utc)


class ProjectVersions(BaseModel):
    """Tracks version numbers for different artifact types."""
    raw: int = 1
    processed: int = 0
    model: int = 0
    evaluation: int = 0


class ProjectArtifacts(BaseModel):
    """Tracks paths to all project artifacts by type."""
    raw: Optional[str] = None
    analysis: Dict[str, str] = Field(default_factory=dict)
    processed: Dict[str, str] = Field(default_factory=dict) 
    model: Dict[str, str] = Field(default_factory=dict)  # Phase 4: includes training_code paths
    evaluation: Dict[str, str] = Field(default_factory=dict)


class Project(BaseModel):
    """Represents the metadata for a single ML project."""
    project_id: str
    user_id: str
    project_name: str
    created_at: datetime.datetime = Field(default_factory=_utcnow)
    updated_at: datetime.datetime = Field(default_factory=_utcnow)
    
    # State management for conversational workflow
    current_step: str = Field(default="ready_for_analysis")
    
    # Versioning and artifact tracking
    versions: ProjectVersions = Field(default_factory=ProjectVersions)
    artifacts: ProjectArtifacts = Field(default_factory=ProjectArtifacts)
    
    # ML workflow metadata
    model_choice: Optional[str] = None
    target_column: Optional[str] = None
    metadata: Dict[str, Any] = Field(default_factory=dict)