File size: 1,963 Bytes
c7d4394 16ec2cf c7d4394 16ec2cf c7d4394 16ec2cf c7d4394 16ec2cf c7d4394 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
"""
models/anomaly-detection/src/entity/artifact_entity.py
Artifact entities for pipeline outputs
"""
from dataclasses import dataclass
from typing import List, Dict, Any, Optional
from pathlib import Path
@dataclass
class DataIngestionArtifact:
"""Artifact from data ingestion step"""
raw_data_path: str
total_records: int
records_from_sqlite: int
records_from_csv: int
ingestion_timestamp: str
is_data_available: bool
@dataclass
class DataValidationArtifact:
"""Artifact from data validation step"""
validated_data_path: str
validation_report_path: str
total_records: int
valid_records: int
invalid_records: int
validation_status: bool
validation_errors: List[Dict[str, Any]]
@dataclass
class DataTransformationArtifact:
"""Artifact from data transformation step"""
transformed_data_path: str
vector_embeddings_path: str
feature_store_path: str
total_records: int
language_distribution: Dict[str, int]
transformation_report: Dict[str, Any]
@dataclass
class ModelTrainerArtifact:
"""Artifact from model training step"""
# Best model info
best_model_name: str
best_model_path: str
best_model_metrics: Dict[str, float]
# All trained models
trained_models: List[Dict[str, Any]]
# MLflow tracking
mlflow_run_id: str
mlflow_experiment_id: str
# Cluster/anomaly results
n_clusters: Optional[int]
n_anomalies: Optional[int]
anomaly_indices: Optional[List[int]]
# Training info
training_duration_seconds: float
optuna_study_name: Optional[str]
@dataclass
class PipelineArtifact:
"""Complete pipeline artifact"""
data_ingestion: DataIngestionArtifact
data_validation: DataValidationArtifact
data_transformation: DataTransformationArtifact
model_trainer: ModelTrainerArtifact
pipeline_run_id: str
pipeline_start_time: str
pipeline_end_time: str
pipeline_status: str
|