Spaces:
Sleeping
Sleeping
File size: 3,444 Bytes
a309487 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from .dataset_analyzer import DatasetAnalyzer
from .problem_inference import ProblemInference
from .strategy_reasoner import StrategyReasoner
from .model_factory import ModelFactory
from .explainability import ExplainabilityEngine
from .deployment_generator import DeploymentGenerator
from .monitoring import MonitoringEngine
from ..utils.logger import logger
from ..utils.validators import DataValidator
from ..utils.model_io import ModelIO
import json
import os
class Orchestrator:
def __init__(self):
self.validator = DataValidator()
self.analyzer = DatasetAnalyzer()
self.inferencer = ProblemInference()
self.reasoner = StrategyReasoner()
self.model_factory = ModelFactory()
self.explainer = ExplainabilityEngine()
self.deployer = DeploymentGenerator()
self.monitor = MonitoringEngine()
self.model_io = ModelIO()
def run(self, df, target_column, train=False):
self.validator.validate_dataframe(df, target_column)
logger.info("Validation passed")
dataset_info = self.analyzer.analyze(df, target_column)
problem_type = self.inferencer.infer(dataset_info, target_column)
strategy = self.reasoner.decide(dataset_info, problem_type)
tradeoff_explanation = self.reasoner.explain_tradeoffs(strategy)
# Log strategy behavior
log_data = {
"dataset_characteristics": dataset_info,
"chosen_model_family": strategy.get("model_family"),
"detected_risks": strategy.get("risks", []),
"confidence_score": strategy.get("confidence", 0)
}
os.makedirs("experiments/logs", exist_ok=True)
with open(f"experiments/logs/{target_column}_strategy.json", "w") as f:
json.dump(log_data, f, indent=4, default=str)
response = {
"dataset_info": dataset_info,
"problem_type": problem_type,
"strategy": strategy,
"strategy_tradeoffs": tradeoff_explanation
}
if problem_type == "nlp":
response["nlp_mode"] = "activated"
if train:
model, metrics = self.model_factory.build_and_train(
df, target_column, dataset_info, problem_type, strategy
)
response["metrics"] = metrics
explanation = self.reasoner.explain_strategy(strategy)
response["strategy_explanation"] = explanation
X_sample = df.drop(columns=[target_column]).head(100) # Sample for SHAP
feature_importance = self.explainer.explain_tabular(model, X_sample)
response["feature_importance"] = feature_importance
# Save the trained model
os.makedirs("exports/models", exist_ok=True)
os.makedirs("exports/deployment", exist_ok=True)
model_path = "exports/models/trained_model.pkl"
self.model_io.save(model, model_path)
# Generate deployment artifacts
fastapi_app = self.deployer.generate_fastapi_app(model_path)
dockerfile = self.deployer.generate_dockerfile()
with open("exports/deployment/main.py", "w") as f:
f.write(fastapi_app)
with open("exports/deployment/Dockerfile", "w") as f:
f.write(dockerfile)
return response
|