sirus / backend /ml_module /core /constants.py
ranilmukesh's picture
Deploy SiRUS SQL Agent backend
783a952
"""Application-wide constants and simple enums for ml_module."""
from enum import Enum
from typing import Literal
class ProjectStatus(str, Enum):
CREATED = "created"
RUNNING = "running"
FAILED = "failed"
COMPLETED = "completed"
class ProjectStep(str, Enum):
"""Valid states for ML workflow progression."""
READY_FOR_ANALYSIS = "ready_for_analysis"
ANALYSIS_IN_PROGRESS = "analysis_in_progress"
ANALYSIS_COMPLETE = "analysis_complete"
AWAITING_PREPROCESSING_APPROVAL = "awaiting_preprocessing_approval"
PREPROCESSING_IN_PROGRESS = "preprocessing_in_progress"
PREPROCESSING_COMPLETE = "preprocessing_complete"
AWAITING_PREPROCESSING_FEEDBACK = "awaiting_preprocessing_feedback"
TRAINING_CODE_GENERATED = "training_code_generated"
AWAITING_TRAINING_CODE_REVIEW = "awaiting_training_code_review"
AWAITING_TRAINING_CODE_EDIT = "awaiting_training_code_edit"
TRAINING_IN_PROGRESS = "training_in_progress"
TRAINING_COMPLETE = "training_complete"
AWAITING_EVALUATION_APPROVAL = "awaiting_evaluation_approval"
EVALUATION_IN_PROGRESS = "evaluation_in_progress"
EVALUATION_COMPLETE = "evaluation_complete"
class StoragePaths:
"""Standardized storage path templates for project artifacts."""
# Base structure
RAW_FOLDER = "{tenant_id}/projects/{project_id}/raw"
ANALYSIS_FOLDER = "{tenant_id}/projects/{project_id}/analysis"
PROCESSED_FOLDER = "{tenant_id}/projects/{project_id}/processed"
MODELS_FOLDER = "{tenant_id}/projects/{project_id}/models"
EVALUATION_FOLDER = "{tenant_id}/projects/{project_id}/evaluation"
DRAFTS_FOLDER = "{tenant_id}/projects/{project_id}/drafts"
# General Files
GENERAL_FILES = "{tenant_id}/files/"
# Project metadata
PROJECT_METADATA = "{tenant_id}/projects/{project_id}/project_metadata.json"
# Analysis artifacts
DATA_PROFILE = "{tenant_id}/projects/{project_id}/analysis/data_profile.json"
MISSING_VALUES_REPORT = "{tenant_id}/projects/{project_id}/analysis/missing_values_report.json"
SAMPLE_RAW_HEAD = "{tenant_id}/projects/{project_id}/analysis/sample_raw_head_v{version}.json"
# Processed data artifacts
CLEANED_DATA = "{tenant_id}/projects/{project_id}/processed/cleaned_data_v{version}.csv"
CLEANED_SAMPLE = "{tenant_id}/projects/{project_id}/processed/samples/cleaned_head_v{version}.json"
CHANGE_LOG = "{tenant_id}/projects/{project_id}/processed/change_log_v{version}.json"
# Model artifacts
MODEL_ARTIFACT = "{tenant_id}/projects/{project_id}/models/{model_type}_model_v{version}.joblib"
MODEL_METRICS = "{tenant_id}/projects/{project_id}/models/{model_type}_metrics_v{version}.json"
TRAINING_CODE = "{tenant_id}/projects/{project_id}/models/training_code_v{version}.py"
TRAINING_LOG = "{tenant_id}/projects/{project_id}/models/training_log_v{version}.txt"
# Evaluation artifacts
CONFUSION_MATRIX = "{tenant_id}/projects/{project_id}/evaluation/confusion_matrix_v{version}.json"
FEATURE_IMPORTANCE = "{tenant_id}/projects/{project_id}/evaluation/feature_importance_v{version}.json"
EVALUATION_REPORT = "{tenant_id}/projects/{project_id}/evaluation/evaluation_report_v{version}.md"
EVALUATION_CODE = "{tenant_id}/projects/{project_id}/evaluation/evaluation_code_v{version}.py"
# Draft artifacts
DRAFT_TRAINING_CODE = "{tenant_id}/projects/{project_id}/drafts/training_code_draft_v{version}.py"
class ArtifactTypes:
"""Standard artifact type names for consistent referencing."""
# Raw data
RAW_DATASET = "raw_dataset"
# Analysis
DATA_PROFILE = "data_profile"
MISSING_VALUES = "missing_values"
SAMPLE_RAW_HEAD = "sample_raw_head"
# Processed
CLEANED_DATA = "cleaned_data"
CLEANED_SAMPLE = "cleaned_sample"
CHANGE_LOG = "change_log"
# Models
MODEL_ARTIFACT = "model_artifact"
MODEL_METRICS = "model_metrics"
TRAINING_CODE = "training_code"
TRAINING_LOG = "training_log"
# Evaluation
CONFUSION_MATRIX = "confusion_matrix"
FEATURE_IMPORTANCE = "feature_importance"
EVALUATION_REPORT = "evaluation_report"
EVALUATION_CODE = "evaluation_code"
# Drafts
DRAFT_TRAINING_CODE = "draft_training_code"
# Version types for type safety
VersionType = Literal["raw", "processed", "model", "evaluation"]
DEFAULT_PROJECT_PREFIX = "proj_"
# Default limits and settings
DEFAULT_SAMPLE_ROWS = 20
MAX_SAMPLE_ROWS = 100
DEFAULT_MODEL_TYPE = "RandomForest"