from .dataset_analyzer import DatasetAnalyzer from .problem_inference import ProblemInference from .strategy_reasoner import StrategyReasoner from .model_factory import ModelFactory from .explainability import ExplainabilityEngine from .deployment_generator import DeploymentGenerator from .monitoring import MonitoringEngine from ..utils.logger import logger from ..utils.validators import DataValidator from ..utils.model_io import ModelIO import json import os class Orchestrator: def __init__(self): self.validator = DataValidator() self.analyzer = DatasetAnalyzer() self.inferencer = ProblemInference() self.reasoner = StrategyReasoner() self.model_factory = ModelFactory() self.explainer = ExplainabilityEngine() self.deployer = DeploymentGenerator() self.monitor = MonitoringEngine() self.model_io = ModelIO() def run(self, df, target_column, train=False): self.validator.validate_dataframe(df, target_column) logger.info("Validation passed") dataset_info = self.analyzer.analyze(df, target_column) problem_type = self.inferencer.infer(dataset_info, target_column) strategy = self.reasoner.decide(dataset_info, problem_type) tradeoff_explanation = self.reasoner.explain_tradeoffs(strategy) # Log strategy behavior log_data = { "dataset_characteristics": dataset_info, "chosen_model_family": strategy.get("model_family"), "detected_risks": strategy.get("risks", []), "confidence_score": strategy.get("confidence", 0) } os.makedirs("experiments/logs", exist_ok=True) with open(f"experiments/logs/{target_column}_strategy.json", "w") as f: json.dump(log_data, f, indent=4, default=str) response = { "dataset_info": dataset_info, "problem_type": problem_type, "strategy": strategy, "strategy_tradeoffs": tradeoff_explanation } if problem_type == "nlp": response["nlp_mode"] = "activated" if train: model, metrics = self.model_factory.build_and_train( df, target_column, dataset_info, problem_type, strategy ) response["metrics"] = metrics explanation = self.reasoner.explain_strategy(strategy) response["strategy_explanation"] = explanation X_sample = df.drop(columns=[target_column]).head(100) # Sample for SHAP feature_importance = self.explainer.explain_tabular(model, X_sample) response["feature_importance"] = feature_importance # Save the trained model os.makedirs("exports/models", exist_ok=True) os.makedirs("exports/deployment", exist_ok=True) model_path = "exports/models/trained_model.pkl" self.model_io.save(model, model_path) # Generate deployment artifacts fastapi_app = self.deployer.generate_fastapi_app(model_path) dockerfile = self.deployer.generate_dockerfile() with open("exports/deployment/main.py", "w") as f: f.write(fastapi_app) with open("exports/deployment/Dockerfile", "w") as f: f.write(dockerfile) return response