"""Application-wide constants and simple enums for ml_module.""" from enum import Enum from typing import Literal class ProjectStatus(str, Enum): CREATED = "created" RUNNING = "running" FAILED = "failed" COMPLETED = "completed" class ProjectStep(str, Enum): """Valid states for ML workflow progression.""" READY_FOR_ANALYSIS = "ready_for_analysis" ANALYSIS_IN_PROGRESS = "analysis_in_progress" ANALYSIS_COMPLETE = "analysis_complete" AWAITING_PREPROCESSING_APPROVAL = "awaiting_preprocessing_approval" PREPROCESSING_IN_PROGRESS = "preprocessing_in_progress" PREPROCESSING_COMPLETE = "preprocessing_complete" AWAITING_PREPROCESSING_FEEDBACK = "awaiting_preprocessing_feedback" TRAINING_CODE_GENERATED = "training_code_generated" AWAITING_TRAINING_CODE_REVIEW = "awaiting_training_code_review" AWAITING_TRAINING_CODE_EDIT = "awaiting_training_code_edit" TRAINING_IN_PROGRESS = "training_in_progress" TRAINING_COMPLETE = "training_complete" AWAITING_EVALUATION_APPROVAL = "awaiting_evaluation_approval" EVALUATION_IN_PROGRESS = "evaluation_in_progress" EVALUATION_COMPLETE = "evaluation_complete" class StoragePaths: """Standardized storage path templates for project artifacts.""" # Base structure RAW_FOLDER = "{tenant_id}/projects/{project_id}/raw" ANALYSIS_FOLDER = "{tenant_id}/projects/{project_id}/analysis" PROCESSED_FOLDER = "{tenant_id}/projects/{project_id}/processed" MODELS_FOLDER = "{tenant_id}/projects/{project_id}/models" EVALUATION_FOLDER = "{tenant_id}/projects/{project_id}/evaluation" DRAFTS_FOLDER = "{tenant_id}/projects/{project_id}/drafts" # General Files GENERAL_FILES = "{tenant_id}/files/" # Project metadata PROJECT_METADATA = "{tenant_id}/projects/{project_id}/project_metadata.json" # Analysis artifacts DATA_PROFILE = "{tenant_id}/projects/{project_id}/analysis/data_profile.json" MISSING_VALUES_REPORT = "{tenant_id}/projects/{project_id}/analysis/missing_values_report.json" SAMPLE_RAW_HEAD = "{tenant_id}/projects/{project_id}/analysis/sample_raw_head_v{version}.json" # Processed data artifacts CLEANED_DATA = "{tenant_id}/projects/{project_id}/processed/cleaned_data_v{version}.csv" CLEANED_SAMPLE = "{tenant_id}/projects/{project_id}/processed/samples/cleaned_head_v{version}.json" CHANGE_LOG = "{tenant_id}/projects/{project_id}/processed/change_log_v{version}.json" # Model artifacts MODEL_ARTIFACT = "{tenant_id}/projects/{project_id}/models/{model_type}_model_v{version}.joblib" MODEL_METRICS = "{tenant_id}/projects/{project_id}/models/{model_type}_metrics_v{version}.json" TRAINING_CODE = "{tenant_id}/projects/{project_id}/models/training_code_v{version}.py" TRAINING_LOG = "{tenant_id}/projects/{project_id}/models/training_log_v{version}.txt" # Evaluation artifacts CONFUSION_MATRIX = "{tenant_id}/projects/{project_id}/evaluation/confusion_matrix_v{version}.json" FEATURE_IMPORTANCE = "{tenant_id}/projects/{project_id}/evaluation/feature_importance_v{version}.json" EVALUATION_REPORT = "{tenant_id}/projects/{project_id}/evaluation/evaluation_report_v{version}.md" EVALUATION_CODE = "{tenant_id}/projects/{project_id}/evaluation/evaluation_code_v{version}.py" # Draft artifacts DRAFT_TRAINING_CODE = "{tenant_id}/projects/{project_id}/drafts/training_code_draft_v{version}.py" class ArtifactTypes: """Standard artifact type names for consistent referencing.""" # Raw data RAW_DATASET = "raw_dataset" # Analysis DATA_PROFILE = "data_profile" MISSING_VALUES = "missing_values" SAMPLE_RAW_HEAD = "sample_raw_head" # Processed CLEANED_DATA = "cleaned_data" CLEANED_SAMPLE = "cleaned_sample" CHANGE_LOG = "change_log" # Models MODEL_ARTIFACT = "model_artifact" MODEL_METRICS = "model_metrics" TRAINING_CODE = "training_code" TRAINING_LOG = "training_log" # Evaluation CONFUSION_MATRIX = "confusion_matrix" FEATURE_IMPORTANCE = "feature_importance" EVALUATION_REPORT = "evaluation_report" EVALUATION_CODE = "evaluation_code" # Drafts DRAFT_TRAINING_CODE = "draft_training_code" # Version types for type safety VersionType = Literal["raw", "processed", "model", "evaluation"] DEFAULT_PROJECT_PREFIX = "proj_" # Default limits and settings DEFAULT_SAMPLE_ROWS = 20 MAX_SAMPLE_ROWS = 100 DEFAULT_MODEL_TYPE = "RandomForest"