diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..bf8d473627e596b7d8106a03ccf5b1f479763d52 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.10-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 7860 + +CMD ["uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/README.md b/README.md index 0865c248155a04070be01c2345ad47a5c9adbcdb..dacb45d754fd08da15dcde22d65cc519fc1832c7 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,62 @@ ---- -title: ModelSmith AI -emoji: 🐨 -colorFrom: pink -colorTo: indigo -sdk: docker -pinned: false -license: apache-2.0 -short_description: Intelligent system that designs, explains and deploys ML sol ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# ModelSmith AI + +An intelligent ML platform that automates tabular classification and regression tasks. It analyzes datasets, recommends optimal strategies, trains models, and provides explanations. + +## Features + +- **Dataset Analysis**: Automatic detection of data types, missing values, and potential issues +- **Strategy Reasoning**: Intelligent model selection based on dataset characteristics +- **Automated Training**: End-to-end model training with preprocessing pipelines +- **Explainability**: SHAP-based feature importance explanations +- **FastAPI Backend**: RESTful API for seamless integration + +## Supported Scope + +- **Task**: Tabular classification and regression +- **Input**: CSV files with ≥1200 rows +- **Target**: Binary or multiclass classification, regression +- **Features**: At least 2 usable features after preprocessing + +## API Endpoints + +- `POST /analyze`: Analyze dataset and get strategy recommendations +- `POST /train`: Train a model on the dataset +- `POST /explain`: Get model explanations and feature importance +- `POST /predict`: Make predictions with trained model +- `GET /health`: Health check + +## Deployment + +This project is designed for deployment on Hugging Face Spaces using Docker. + +### Files for Deployment + +- `Dockerfile` +- `requirements.txt` +- `backend/` (entire directory) + +### Running Locally + +```bash +pip install -r requirements.txt +uvicorn backend.api.main:app --host 0.0.0.0 --port 7860 +``` + +## Limitations + +- NLP functionality is disabled +- Requires datasets with ≥1200 rows +- CPU-only, no GPU support +- Stateless API (models saved temporarily) + +## Architecture + +- **Orchestrator**: Main workflow coordinator +- **Dataset Analyzer**: Data profiling and preprocessing +- **Strategy Reasoner**: Model selection logic +- **Model Factory**: Training and evaluation +- **Explainability Engine**: SHAP explanations + +## License + +MIT License diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/__pycache__/__init__.cpython-310.pyc b/backend/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56a647137f638ed7959089f900d5f66ed3f11b26 Binary files /dev/null and b/backend/__pycache__/__init__.cpython-310.pyc differ diff --git a/backend/__pycache__/__init__.cpython-313.pyc b/backend/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0d3c2f907882b9f974e2134f73aab26f8ede7a5 Binary files /dev/null and b/backend/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/api/__pycache__/__init__.cpython-310.pyc b/backend/api/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d61d7060e8ebbcf4a3e7446c3f8caca4b85a50dc Binary files /dev/null and b/backend/api/__pycache__/__init__.cpython-310.pyc differ diff --git a/backend/api/__pycache__/__init__.cpython-313.pyc b/backend/api/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7152a6f0869a0f30d89528ad763a45262ce858b3 Binary files /dev/null and b/backend/api/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/api/__pycache__/main.cpython-310.pyc b/backend/api/__pycache__/main.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e36a9d837160170f39f55b2565b473aceb19460 Binary files /dev/null and b/backend/api/__pycache__/main.cpython-310.pyc differ diff --git a/backend/api/__pycache__/main.cpython-313.pyc b/backend/api/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1c9b753070682ee246a1aa5acfac6cf66b265ca Binary files /dev/null and b/backend/api/__pycache__/main.cpython-313.pyc differ diff --git a/backend/api/main.py b/backend/api/main.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc6c16538039a8d7ac5d9dc15818712110c6abe --- /dev/null +++ b/backend/api/main.py @@ -0,0 +1,80 @@ +from fastapi import FastAPI, UploadFile, File, HTTPException +import pandas as pd +from backend.core.orchestrator import Orchestrator + +app = FastAPI() +orchestrator = Orchestrator() + +@app.post("/analyze") +async def analyze_dataset(file: UploadFile = File(...), target_column: str = "target"): + try: + df = pd.read_csv(file.file) + result = orchestrator.run(df, target_column) + + # Format response for frontend + dataset_info = result.get("dataset_info", {}) + strategy = result.get("strategy", {}) + + response = { + "columns": list(df.columns), + "dataTypes": dataset_info.get("data_types", {}), + "risks": dataset_info.get("risks", []), + "problemType": result.get("problem_type"), + "confidence": strategy.get("confidence", 0), + "strategy": strategy + } + return response + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + +@app.post("/train") +async def train_model(file: UploadFile = File(...), target_column: str = "target"): + try: + df = pd.read_csv(file.file) + result = orchestrator.run(df, target_column, train=True) + + # Ensure strategy is included in the response + strategy = result.get("strategy", {}) + response = { + "strategy": strategy, + "metrics": result.get("metrics", {}), + "model_path": result.get("model_path", "/path/to/model.pkl"), + "training_time": result.get("training_time", 0), + "model_id": result.get("model_id", "trained_model_123") + } + return response + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + +@app.post("/explain") +async def explain_model(file: UploadFile = File(...), target_column: str = "target"): + try: + df = pd.read_csv(file.file) + result = orchestrator.run(df, target_column, train=True) + return { + "strategy_explanation": result.get("strategy_explanation"), + "metrics": result.get("metrics", {}), + "feature_importance": result.get("feature_importance", []) + } + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + +@app.post("/predict") +async def predict(data: dict): + try: + # Load the trained model + model = orchestrator.model_io.load("exports/models/trained_model.pkl") + # Prepare data for prediction + df = pd.DataFrame([data]) + preds = model.predict(df) + return {"prediction": preds.tolist()} + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + +@app.get("/health") +def health(): + return {"status": "ok"} + + + + diff --git a/backend/config/__init__.py b/backend/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/core/__init__.py b/backend/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/core/__pycache__/__init__.cpython-310.pyc b/backend/core/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2daa0a97297759e68a0eaa4b4f4746fe2f0e3d9 Binary files /dev/null and b/backend/core/__pycache__/__init__.cpython-310.pyc differ diff --git a/backend/core/__pycache__/__init__.cpython-313.pyc b/backend/core/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27593cc25df5e9a4f4d2563c7987370690b6947f Binary files /dev/null and b/backend/core/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc b/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e9fd554dd57cfbf881cd4522ca1374233e92a295 Binary files /dev/null and b/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc differ diff --git a/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc b/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30ab6f434666ecf7ab70cec8fca1afcac6c18f68 Binary files /dev/null and b/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc differ diff --git a/backend/core/__pycache__/deployment_generator.cpython-310.pyc b/backend/core/__pycache__/deployment_generator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d2112e0beaf786796915ecf31af1847a1c9c5ee Binary files /dev/null and b/backend/core/__pycache__/deployment_generator.cpython-310.pyc differ diff --git a/backend/core/__pycache__/deployment_generator.cpython-313.pyc b/backend/core/__pycache__/deployment_generator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a139953edc675d4ceb519873c90bd4c193ba5394 Binary files /dev/null and b/backend/core/__pycache__/deployment_generator.cpython-313.pyc differ diff --git a/backend/core/__pycache__/explainability.cpython-310.pyc b/backend/core/__pycache__/explainability.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a7f108fd98d316c8cf09a1a1995cbaa8caedd8c Binary files /dev/null and b/backend/core/__pycache__/explainability.cpython-310.pyc differ diff --git a/backend/core/__pycache__/explainability.cpython-313.pyc b/backend/core/__pycache__/explainability.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d73352e3d1b09d22aa3ece379e6296cfe9de9a8 Binary files /dev/null and b/backend/core/__pycache__/explainability.cpython-313.pyc differ diff --git a/backend/core/__pycache__/model_factory.cpython-310.pyc b/backend/core/__pycache__/model_factory.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..673dada15c46c8f5c1f2d9cb3db77f66162196a6 Binary files /dev/null and b/backend/core/__pycache__/model_factory.cpython-310.pyc differ diff --git a/backend/core/__pycache__/model_factory.cpython-313.pyc b/backend/core/__pycache__/model_factory.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fe2c65959a59ade827aaf018437116fbeea6878 Binary files /dev/null and b/backend/core/__pycache__/model_factory.cpython-313.pyc differ diff --git a/backend/core/__pycache__/monitoring.cpython-310.pyc b/backend/core/__pycache__/monitoring.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..863936a6551e3458c51e8332a194b1e4dba81ac0 Binary files /dev/null and b/backend/core/__pycache__/monitoring.cpython-310.pyc differ diff --git a/backend/core/__pycache__/monitoring.cpython-313.pyc b/backend/core/__pycache__/monitoring.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..422a8e1e499896091167a001bc63cfa02a57f4f0 Binary files /dev/null and b/backend/core/__pycache__/monitoring.cpython-313.pyc differ diff --git a/backend/core/__pycache__/orchestrator.cpython-310.pyc b/backend/core/__pycache__/orchestrator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0eea49856241b776f15753c5df344b9ea220cb4 Binary files /dev/null and b/backend/core/__pycache__/orchestrator.cpython-310.pyc differ diff --git a/backend/core/__pycache__/orchestrator.cpython-313.pyc b/backend/core/__pycache__/orchestrator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e892a6d25878052d281e4e6f904a66b8ddc66c88 Binary files /dev/null and b/backend/core/__pycache__/orchestrator.cpython-313.pyc differ diff --git a/backend/core/__pycache__/problem_inference.cpython-310.pyc b/backend/core/__pycache__/problem_inference.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e15075dc9c9502e3a002a09be17e694c62ec920a Binary files /dev/null and b/backend/core/__pycache__/problem_inference.cpython-310.pyc differ diff --git a/backend/core/__pycache__/problem_inference.cpython-313.pyc b/backend/core/__pycache__/problem_inference.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59e3c787f484c9acc4469c82de6122599a0a2ef4 Binary files /dev/null and b/backend/core/__pycache__/problem_inference.cpython-313.pyc differ diff --git a/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc b/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33e91aec1ae03b3d4318ec91c35c4b448a186fea Binary files /dev/null and b/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc differ diff --git a/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc b/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff593d66f7d41f0e8331b7629d36d974d311120b Binary files /dev/null and b/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc differ diff --git a/backend/core/dataset_analyzer.py b/backend/core/dataset_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..50bc7773fc66f75af140126f60e46f7eb4ec7d97 --- /dev/null +++ b/backend/core/dataset_analyzer.py @@ -0,0 +1,99 @@ +import pandas as pd +import numpy as np +from backend.utils.logger import logger + +def convert_numpy_types(obj): + """Recursively convert numpy types to Python types for JSON serialization.""" + if isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, (np.integer, np.int64, np.int32)): + return int(obj) + elif isinstance(obj, (np.floating, np.float64, np.float32)): + return float(obj) + elif isinstance(obj, np.bool_): + return bool(obj) + elif isinstance(obj, dict): + return {key: convert_numpy_types(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [convert_numpy_types(item) for item in obj] + else: + return obj + +class DatasetAnalyzer: + def analyze(self, df: pd.DataFrame, target_column: str = None): + logger.info("Starting dataset analysis...") + # Remove all-null columns + null_columns = df.columns[df.isnull().all()] + if len(null_columns) > 0: + logger.warning(f"Removing all-null columns: {list(null_columns)}") + df = df.drop(columns=null_columns) + + # Remove duplicate rows + duplicate_rows = df.duplicated().sum() + if duplicate_rows > 0: + logger.warning(f"Removing {duplicate_rows} duplicate rows") + df = df.drop_duplicates() + + # Remove constant columns + constant_columns = [col for col in df.columns if df[col].nunique() == 1] + if len(constant_columns) > 0: + logger.warning(f"Removing constant columns: {constant_columns}") + df = df.drop(columns=constant_columns) + + # Ensure at least 2 usable features after preprocessing + usable_features = [col for col in df.columns if col != target_column] + if len(usable_features) < 2: + raise ValueError(f"Insufficient features: only {len(usable_features)} usable features after preprocessing, need at least 2") + + info = {} + info["num_rows"] = df.shape[0] + info["num_columns"] = df.shape[1] + info["missing_ratio"] = df.isnull().mean().mean() + info["row_count"] = df.shape[0] + info["high_dimensional"] = bool(df.shape[1] > 50) + info["small_data"] = bool(df.shape[0] < 1200) + info["sparse_data"] = bool(df.isnull().mean().mean() > 0.4) + all_numeric_cols = df.select_dtypes(include="number").columns.tolist() + all_categorical_cols = df.select_dtypes(exclude="number").columns.tolist() + info["numeric_cols"] = [col for col in all_numeric_cols if col != target_column] + info["categorical_cols"] = [col for col in all_categorical_cols if col != target_column] + + if len(info["numeric_cols"]) + len(info["categorical_cols"]) < 2: + raise ValueError("Dataset must have at least 2 usable features after preprocessing") + + # Cardinality + cardinality = {col: df[col].nunique() for col in df.columns} + info["cardinality"] = cardinality + + # Target-specific checks + if target_column and target_column in df.columns: + target = df[target_column] + unique_vals = target.nunique() + if target.dtype in ['int64', 'float64'] and unique_vals > 10: + info["target_type"] = "regression" + info["class_distribution"] = None + info["imbalance"] = None + else: + info["target_type"] = "classification" + value_counts = target.value_counts(normalize=True) + info["class_distribution"] = value_counts.to_dict() + info["imbalance"] = bool(value_counts.max() > 0.8) + else: + info["target_type"] = None + info["class_distribution"] = None + info["imbalance"] = None + + # NLP detection heuristic + avg_text_len = None + text_columns = [] + for col in info["categorical_cols"]: + if df[col].astype(str).str.len().mean() > 30: + text_columns.append(col) + info["text_columns"] = text_columns + info["possible_nlp"] = len(text_columns) > 0 + + return convert_numpy_types(info) + + + + diff --git a/backend/core/deployment_generator.py b/backend/core/deployment_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..32b7ccf1764d4be6f353102513ede02426320e56 --- /dev/null +++ b/backend/core/deployment_generator.py @@ -0,0 +1,30 @@ +class DeploymentGenerator: + def generate_fastapi_app(self, model_path): + template = f''' +from fastapi import FastAPI +import joblib +import pandas as pd + +app = FastAPI() +model = joblib.load("{model_path}") + +@app.post("/predict") +async def predict(data: dict): + df = pd.DataFrame([data]) + preds = model.predict(df) + return {{"prediction": preds.tolist()}} +''' + return template + + def generate_dockerfile(self): + return ''' +FROM python:3.9 +WORKDIR /app +COPY . /app +RUN pip install fastapi uvicorn pandas scikit-learn joblib +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +''' + + + + diff --git a/backend/core/explainability.py b/backend/core/explainability.py new file mode 100644 index 0000000000000000000000000000000000000000..58aca12a2b2c61d56e41013a4f132af818574466 --- /dev/null +++ b/backend/core/explainability.py @@ -0,0 +1,33 @@ +import shap +import numpy as np + +class ExplainabilityEngine: + def explain_tabular(self, model_pipeline, X_sample): + if X_sample.empty: + raise ValueError("Sample data is empty, cannot compute explanations") + + # Extract trained model and preprocessor + preprocessor = model_pipeline.named_steps["preprocessor"] + model = model_pipeline.named_steps["model"] + + X_transformed = preprocessor.transform(X_sample) + + if X_transformed.shape[0] == 0: + raise ValueError("Transformed sample data is empty after preprocessing") + + explainer = shap.Explainer(model, X_transformed) + shap_values = explainer(X_transformed, check_additivity=False) + + if shap_values is None or shap_values.values is None: + raise ValueError("SHAP computation failed") + + global_importance = np.abs(shap_values.values).mean(axis=0).tolist() + + if len(global_importance) == 0: + raise ValueError("No feature importance computed") + + return global_importance + + + + diff --git a/backend/core/model_factory.py b/backend/core/model_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8142fa17a9ce996e9ce0658a32fa10f0ed5fce --- /dev/null +++ b/backend/core/model_factory.py @@ -0,0 +1,40 @@ +import os +from sklearn.model_selection import train_test_split +from ..tabular.pipelines import build_preprocessing_pipeline +from ..tabular.trainers import train_model +from ..tabular.evaluators import evaluate_model +from ..nlp.trainers import TextClassifier +from ..nlp.evaluators import evaluate_nlp_model +from ..utils.model_io import ModelIO + +class ModelFactory: + def __init__(self): + self.model_io = ModelIO() + + def build_and_train(self, df, target_column, dataset_info, problem_type, strategy): + if dataset_info["small_data"]: + raise ValueError("Dataset is too small for training. Minimum 1200 rows required.") + + if problem_type == "nlp": + raise ValueError("NLP functionality is not supported in this version.") + else: + # Tabular + X = df.drop(columns=[target_column]) + y = df[target_column] + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"]) + pipeline.fit(X_train, y_train) + + model = train_model(pipeline, X_train, y_train, problem_type, strategy) + metrics = evaluate_model(model, X_test, y_test, problem_type) + + # Save model + self.model_io.save(model, "exports/models/trained_model.pkl") + + return model, metrics + + + + diff --git a/backend/core/monitoring.py b/backend/core/monitoring.py new file mode 100644 index 0000000000000000000000000000000000000000..ce1ffb26905694074f02c33f5b5fde66f47fee6a --- /dev/null +++ b/backend/core/monitoring.py @@ -0,0 +1,13 @@ +import numpy as np + +class MonitoringEngine: + def detect_drift(self, train_stats, new_data_stats, threshold=0.2): + drift_flags = {} + for feature in train_stats: + if abs(train_stats[feature] - new_data_stats.get(feature, train_stats[feature])) > threshold: + drift_flags[feature] = True + return drift_flags + + + + diff --git a/backend/core/orchestrator.py b/backend/core/orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..2ffa6be1dd3abe05fb1ee71c55b0bc72696dde20 --- /dev/null +++ b/backend/core/orchestrator.py @@ -0,0 +1,88 @@ +from .dataset_analyzer import DatasetAnalyzer +from .problem_inference import ProblemInference +from .strategy_reasoner import StrategyReasoner +from .model_factory import ModelFactory +from .explainability import ExplainabilityEngine +from .deployment_generator import DeploymentGenerator +from .monitoring import MonitoringEngine +from ..utils.logger import logger +from ..utils.validators import DataValidator +from ..utils.model_io import ModelIO +import json +import os + +class Orchestrator: + def __init__(self): + self.validator = DataValidator() + self.analyzer = DatasetAnalyzer() + self.inferencer = ProblemInference() + self.reasoner = StrategyReasoner() + self.model_factory = ModelFactory() + self.explainer = ExplainabilityEngine() + self.deployer = DeploymentGenerator() + self.monitor = MonitoringEngine() + self.model_io = ModelIO() + + def run(self, df, target_column, train=False): + self.validator.validate_dataframe(df, target_column) + logger.info("Validation passed") + dataset_info = self.analyzer.analyze(df, target_column) + problem_type = self.inferencer.infer(dataset_info, target_column) + strategy = self.reasoner.decide(dataset_info, problem_type) + + tradeoff_explanation = self.reasoner.explain_tradeoffs(strategy) + + # Log strategy behavior + log_data = { + "dataset_characteristics": dataset_info, + "chosen_model_family": strategy.get("model_family"), + "detected_risks": strategy.get("risks", []), + "confidence_score": strategy.get("confidence", 0) + } + os.makedirs("experiments/logs", exist_ok=True) + with open(f"experiments/logs/{target_column}_strategy.json", "w") as f: + json.dump(log_data, f, indent=4, default=str) + + response = { + "dataset_info": dataset_info, + "problem_type": problem_type, + "strategy": strategy, + "strategy_tradeoffs": tradeoff_explanation + } + + if problem_type == "nlp": + response["nlp_mode"] = "activated" + + if train: + model, metrics = self.model_factory.build_and_train( + df, target_column, dataset_info, problem_type, strategy + ) + response["metrics"] = metrics + + explanation = self.reasoner.explain_strategy(strategy) + response["strategy_explanation"] = explanation + + X_sample = df.drop(columns=[target_column]).head(100) # Sample for SHAP + feature_importance = self.explainer.explain_tabular(model, X_sample) + response["feature_importance"] = feature_importance + + # Save the trained model + os.makedirs("exports/models", exist_ok=True) + os.makedirs("exports/deployment", exist_ok=True) + model_path = "exports/models/trained_model.pkl" + self.model_io.save(model, model_path) + + # Generate deployment artifacts + fastapi_app = self.deployer.generate_fastapi_app(model_path) + dockerfile = self.deployer.generate_dockerfile() + + with open("exports/deployment/main.py", "w") as f: + f.write(fastapi_app) + with open("exports/deployment/Dockerfile", "w") as f: + f.write(dockerfile) + + return response + + + + diff --git a/backend/core/problem_inference.py b/backend/core/problem_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a7ea6920a3b8b1236d2e46f7a2b10d8cea613792 --- /dev/null +++ b/backend/core/problem_inference.py @@ -0,0 +1,13 @@ +class ProblemInference: + def infer(self, dataset_info, target_column): + if dataset_info.get("possible_nlp"): + return "nlp" + + if target_column: + return "classification" if dataset_info.get("class_distribution") else "regression" + + return "unknown" + + + + diff --git a/backend/core/strategy_reasoner.py b/backend/core/strategy_reasoner.py new file mode 100644 index 0000000000000000000000000000000000000000..801f4c991139ef303a5d5189982bb1329e186b24 --- /dev/null +++ b/backend/core/strategy_reasoner.py @@ -0,0 +1,68 @@ +class StrategyReasoner: + def decide(self, dataset_info, problem_type): + strategy = {} + risks = [] + score = 0.0 + + if dataset_info.get("small_data"): + risks.append("small_dataset") + score += 0.1 + + if dataset_info.get("high_dimensional"): + risks.append("high_dimensionality") + score += 0.1 + + if dataset_info.get("imbalance"): + risks.append("class_imbalance") + score += 0.2 + + if dataset_info.get("sparse_data"): + risks.append("high_missingness") + score += 0.2 + + if problem_type == "classification": + if "small_dataset" in risks: + model_family = "tree_ensemble" + reason = "Small datasets benefit from simpler models" + elif "high_dimensionality" in risks: + model_family = "tree_ensemble" + reason = "Tree ensembles handle high-dimensional data better" + else: + model_family = "tree_ensemble" + reason = "Tree ensembles handle complexity well" + + elif problem_type == "regression": + if "high_dimensionality" in risks: + model_family = "tree_ensemble" + reason = "Tree ensembles handle high-dimensional data better" + else: + model_family = "linear_or_tree" + reason = "Balances interpretability and accuracy" + + elif problem_type == "nlp": + model_family = "transformer" + reason = "Transformers best capture language semantics" + + strategy["model_family"] = model_family + strategy["reason"] = reason + strategy["risks"] = risks + strategy["confidence"] = round(1 - min(score, 0.9), 2) + + return strategy + + def explain_strategy(self, strategy): + explanation = f"Selected {strategy['model_family']} models because: {strategy['reason']}." + if strategy.get("risks"): + explanation += f" Identified risks: {', '.join(strategy['risks'])}." + return explanation + + def explain_tradeoffs(self, strategy): + explanation = f"Chose {strategy['model_family']} due to: {strategy['reason']}." + if strategy.get("risks"): + explanation += f" Risks detected: {', '.join(strategy['risks'])}." + explanation += f" Confidence score: {strategy.get('confidence')}." + return explanation + + + + diff --git a/backend/experiments/__init__.py b/backend/experiments/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc b/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0fd8ffe4f18f7e24ecbd8cede92f971b9a717fc5 Binary files /dev/null and b/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc differ diff --git a/backend/experiments/benchmark_runner.py b/backend/experiments/benchmark_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..52887407319f76da748c68a074bca40df1191d1b --- /dev/null +++ b/backend/experiments/benchmark_runner.py @@ -0,0 +1,32 @@ +import time + +class BenchmarkRunner: + def run(self, orchestrator, datasets): + results = [] + for name, (df, target) in datasets.items(): + start = time.time() + try: + output = orchestrator.run(df, target, train=True) + end = time.time() + + results.append({ + "dataset": name, + "strategy": output.get("strategy"), + "metrics": output.get("metrics"), + "time": round(end - start, 2), + "error": None + }) + except Exception as e: + end = time.time() + results.append({ + "dataset": name, + "strategy": None, + "metrics": None, + "time": round(end - start, 2), + "error": str(e) + }) + return results + + + + diff --git a/backend/experiments/run_benchmarks.py b/backend/experiments/run_benchmarks.py new file mode 100644 index 0000000000000000000000000000000000000000..9296b0261dc0e51f0bfdfc2680db09bbcb15d12d --- /dev/null +++ b/backend/experiments/run_benchmarks.py @@ -0,0 +1,32 @@ +import pandas as pd +from benchmark_runner import BenchmarkRunner +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) +from backend.core.orchestrator import Orchestrator + +# Load datasets +datasets = { + "titanic": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'titanic.csv')), "Survived"), + "credit_default": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'credit_default.csv')), "default.payment.next.month"), + "house_prices": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'house_prices.csv')), "Price"), + "telecom_churn": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'telecom_churn.csv')), "Churn"), + "news_classification": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'news_classification.csv')), "label"), +} + +orchestrator = Orchestrator() +runner = BenchmarkRunner() +results = runner.run(orchestrator, datasets) + +print("Benchmark Results:") +for result in results: + print(result) + +# Save results to file +with open("experiments/benchmark_results.json", "w") as f: + import json + json.dump(results, f, indent=4) + + + + diff --git a/backend/nlp/__pycache__/evaluators.cpython-310.pyc b/backend/nlp/__pycache__/evaluators.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b37fff31ce83be937b5075dcee6bfa2f6dbdac7 Binary files /dev/null and b/backend/nlp/__pycache__/evaluators.cpython-310.pyc differ diff --git a/backend/nlp/__pycache__/evaluators.cpython-313.pyc b/backend/nlp/__pycache__/evaluators.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55e9fb0f1ffc1cedeae7b9bce03f84160f694d70 Binary files /dev/null and b/backend/nlp/__pycache__/evaluators.cpython-313.pyc differ diff --git a/backend/nlp/__pycache__/preprocess.cpython-310.pyc b/backend/nlp/__pycache__/preprocess.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ca814115dad17b0386ad1bcd793755fcb9d1af5 Binary files /dev/null and b/backend/nlp/__pycache__/preprocess.cpython-310.pyc differ diff --git a/backend/nlp/__pycache__/preprocess.cpython-313.pyc b/backend/nlp/__pycache__/preprocess.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c374d050b242f4b5dc055dc42b4f7ac993bce36 Binary files /dev/null and b/backend/nlp/__pycache__/preprocess.cpython-313.pyc differ diff --git a/backend/nlp/__pycache__/trainers.cpython-310.pyc b/backend/nlp/__pycache__/trainers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da9bc54e03f90b31fff271ca212a6af9d911a891 Binary files /dev/null and b/backend/nlp/__pycache__/trainers.cpython-310.pyc differ diff --git a/backend/nlp/__pycache__/trainers.cpython-313.pyc b/backend/nlp/__pycache__/trainers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bbdba3d6f38a9b58b908978b8fac6f4b4395c25 Binary files /dev/null and b/backend/nlp/__pycache__/trainers.cpython-313.pyc differ diff --git a/backend/nlp/embeddings.py b/backend/nlp/embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..46c785de2f38d3a5a628f933edbac780044a9b9a --- /dev/null +++ b/backend/nlp/embeddings.py @@ -0,0 +1,12 @@ +from sentence_transformers import SentenceTransformer + +class EmbeddingEngine: + def __init__(self, model_name="all-MiniLM-L6-v2"): + self.model = SentenceTransformer(model_name) + + def encode(self, texts): + return self.model.encode(texts) + + + + diff --git a/backend/nlp/evaluators.py b/backend/nlp/evaluators.py new file mode 100644 index 0000000000000000000000000000000000000000..c6fa3224849a396903ca49e41ea75b40396315bd --- /dev/null +++ b/backend/nlp/evaluators.py @@ -0,0 +1,18 @@ +import torch +from transformers import Trainer +from ..nlp.trainers import TextDataset + +def evaluate_nlp_model(model, tokenizer, texts, labels): + encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt") + dataset = TextDataset(encodings, labels) + + trainer = Trainer(model=model) + predictions = trainer.predict(dataset) + preds = torch.argmax(torch.tensor(predictions.predictions), axis=1) + + accuracy = (preds == torch.tensor(labels)).float().mean().item() + return {"accuracy": accuracy} + + + + diff --git a/backend/nlp/preprocess.py b/backend/nlp/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..44adb7650bd300156b23222469d2526b733ee275 --- /dev/null +++ b/backend/nlp/preprocess.py @@ -0,0 +1,12 @@ +import re + +class NLPPreprocessor: + def clean(self, text: str): + text = text.lower() + text = re.sub(r"[^a-zA-Z0-9\s]", "", text) + text = re.sub(r"\s+", " ", text) + return text.strip() + + + + diff --git a/backend/nlp/rag.py b/backend/nlp/rag.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/nlp/trainers.py b/backend/nlp/trainers.py new file mode 100644 index 0000000000000000000000000000000000000000..feac87f7a393b6efd113fc48007bc245b82ad2b0 --- /dev/null +++ b/backend/nlp/trainers.py @@ -0,0 +1,61 @@ +import torch +from torch.utils.data import Dataset +from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments +from ..nlp.preprocess import NLPPreprocessor + +class TextDataset(Dataset): + def __init__(self, encodings, labels): + self.encodings = encodings + self.labels = labels + + def __getitem__(self, idx): + item = {key: val[idx] for key, val in self.encodings.items()} + item['labels'] = torch.tensor(self.labels[idx]) + return item + + def __len__(self): + return len(self.labels) + +class TextClassifier: + def __init__(self, model_name="distilbert-base-uncased", num_labels=2): + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) + self.preprocessor = NLPPreprocessor() + + def train(self, texts, labels): + # Clean texts + texts = [self.preprocessor.clean(text) for text in texts] + + # Tokenize + encodings = self.tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt") + + # Create dataset + dataset = TextDataset(encodings, labels) + + # Training arguments + training_args = TrainingArguments( + output_dir='./results', + num_train_epochs=3, + per_device_train_batch_size=16, + per_device_eval_batch_size=64, + warmup_steps=500, + weight_decay=0.01, + logging_dir='./logs', + logging_steps=10, + save_steps=500, + evaluation_strategy="no", + save_strategy="no", + ) + + trainer = Trainer( + model=self.model, + args=training_args, + train_dataset=dataset, + ) + + trainer.train() + return self.model + + + + diff --git a/backend/tabular/__init__.py b/backend/tabular/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/tabular/__pycache__/__init__.cpython-310.pyc b/backend/tabular/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1db39853b2ee08579566076a25682ba6e5edf13a Binary files /dev/null and b/backend/tabular/__pycache__/__init__.cpython-310.pyc differ diff --git a/backend/tabular/__pycache__/__init__.cpython-313.pyc b/backend/tabular/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cded5484e91fbf647eb49f9609c753c9be612e66 Binary files /dev/null and b/backend/tabular/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/tabular/__pycache__/evaluators.cpython-310.pyc b/backend/tabular/__pycache__/evaluators.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a20686334eb82cf2885f3a077052e4937aa1c094 Binary files /dev/null and b/backend/tabular/__pycache__/evaluators.cpython-310.pyc differ diff --git a/backend/tabular/__pycache__/evaluators.cpython-313.pyc b/backend/tabular/__pycache__/evaluators.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba7f083e1520674cab223b608e7aa40d5e8727e8 Binary files /dev/null and b/backend/tabular/__pycache__/evaluators.cpython-313.pyc differ diff --git a/backend/tabular/__pycache__/pipelines.cpython-310.pyc b/backend/tabular/__pycache__/pipelines.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e2d11267b237ddd21e6d7631d17bd4bcb18386a Binary files /dev/null and b/backend/tabular/__pycache__/pipelines.cpython-310.pyc differ diff --git a/backend/tabular/__pycache__/pipelines.cpython-313.pyc b/backend/tabular/__pycache__/pipelines.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7afa666e830e35a4f939826900348f10e7c49c86 Binary files /dev/null and b/backend/tabular/__pycache__/pipelines.cpython-313.pyc differ diff --git a/backend/tabular/__pycache__/trainers.cpython-310.pyc b/backend/tabular/__pycache__/trainers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d97f70bf42b571ce732944dab6883a1c7157d18 Binary files /dev/null and b/backend/tabular/__pycache__/trainers.cpython-310.pyc differ diff --git a/backend/tabular/__pycache__/trainers.cpython-313.pyc b/backend/tabular/__pycache__/trainers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..978ddb3f0d4061a5b75368cddecb61d74f0ef36c Binary files /dev/null and b/backend/tabular/__pycache__/trainers.cpython-313.pyc differ diff --git a/backend/tabular/evaluators.py b/backend/tabular/evaluators.py new file mode 100644 index 0000000000000000000000000000000000000000..d9c22b3a2d4f146811a8bfe8998a26d59a848241 --- /dev/null +++ b/backend/tabular/evaluators.py @@ -0,0 +1,27 @@ +from sklearn.metrics import accuracy_score, f1_score, mean_squared_error +import numpy as np + + +def evaluate_model(model, X_test, y_test, problem_type): + preds = model.predict(X_test) + + if problem_type == "classification": + acc = accuracy_score(y_test, preds) + f1 = f1_score(y_test, preds, average="weighted") + if np.isnan(acc) or acc == 0 or np.isnan(f1) or f1 == 0: + raise ValueError("Invalid metrics computed for classification") + return { + "accuracy": acc, + "f1": f1 + } + else: + rmse = np.sqrt(mean_squared_error(y_test, preds)) + if np.isnan(rmse) or np.isinf(rmse): + raise ValueError("Invalid metrics computed for regression") + return { + "rmse": rmse + } + + + + diff --git a/backend/tabular/pipelines.py b/backend/tabular/pipelines.py new file mode 100644 index 0000000000000000000000000000000000000000..bc1f4d6d01b8a72e2895ba5547a25f35a8db2c05 --- /dev/null +++ b/backend/tabular/pipelines.py @@ -0,0 +1,29 @@ +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.compose import ColumnTransformer +from sklearn.impute import SimpleImputer + + +def build_preprocessing_pipeline(numeric_features, categorical_features): + numeric_transformer = Pipeline(steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()) + ]) + + categorical_transformer = Pipeline(steps=[ + ("imputer", SimpleImputer(strategy="most_frequent")), + ("encoder", OneHotEncoder(handle_unknown="ignore")) + ]) + + preprocessor = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features) + ] + ) + + return preprocessor + + + + diff --git a/backend/tabular/trainers.py b/backend/tabular/trainers.py new file mode 100644 index 0000000000000000000000000000000000000000..e75a5f422ff28a9eae8875563c1347f016a7fcce --- /dev/null +++ b/backend/tabular/trainers.py @@ -0,0 +1,34 @@ +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.pipeline import Pipeline + + +def train_model(preprocessor, X, y, problem_type, strategy): + if strategy["model_family"] == "tree_ensemble": + if problem_type == "classification": + model = RandomForestClassifier(n_estimators=100) + else: + model = RandomForestRegressor(n_estimators=100) + elif strategy["model_family"] == "linear_or_tree": + if problem_type == "classification": + model = LogisticRegression() + else: + model = LinearRegression() + else: + # Fallback to RandomForest if strategy is not recognized + if problem_type == "classification": + model = RandomForestClassifier(n_estimators=100) + else: + model = RandomForestRegressor(n_estimators=100) + + clf = Pipeline(steps=[ + ("preprocessor", preprocessor), + ("model", model) + ]) + + clf.fit(X, y) + return clf + + + + diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/utils/__pycache__/__init__.cpython-310.pyc b/backend/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4c7f8734763f7b1e1d9ce4de11c615e5591a3f20 Binary files /dev/null and b/backend/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/backend/utils/__pycache__/__init__.cpython-313.pyc b/backend/utils/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6caf59f34c7c43f60b705c452aafc8f01b0a33ea Binary files /dev/null and b/backend/utils/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/utils/__pycache__/logger.cpython-310.pyc b/backend/utils/__pycache__/logger.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cacbb67d7c318906d88a971e022f35d87f2a5970 Binary files /dev/null and b/backend/utils/__pycache__/logger.cpython-310.pyc differ diff --git a/backend/utils/__pycache__/logger.cpython-313.pyc b/backend/utils/__pycache__/logger.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f9c2a7862043b8cb278a67006b6d3ee5ff8c3762 Binary files /dev/null and b/backend/utils/__pycache__/logger.cpython-313.pyc differ diff --git a/backend/utils/__pycache__/model_io.cpython-310.pyc b/backend/utils/__pycache__/model_io.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0136a31ecc63746f511c793e56be08264e35fa85 Binary files /dev/null and b/backend/utils/__pycache__/model_io.cpython-310.pyc differ diff --git a/backend/utils/__pycache__/model_io.cpython-313.pyc b/backend/utils/__pycache__/model_io.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..718985b934c6b1acb8f29483587358c941e910c0 Binary files /dev/null and b/backend/utils/__pycache__/model_io.cpython-313.pyc differ diff --git a/backend/utils/__pycache__/validators.cpython-310.pyc b/backend/utils/__pycache__/validators.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c9c69ff5c4e6e053f2dd5c42b6349df8789a253 Binary files /dev/null and b/backend/utils/__pycache__/validators.cpython-310.pyc differ diff --git a/backend/utils/__pycache__/validators.cpython-313.pyc b/backend/utils/__pycache__/validators.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5ba11574bdf1bc14aee64407371394b1d40032a Binary files /dev/null and b/backend/utils/__pycache__/validators.cpython-313.pyc differ diff --git a/backend/utils/logger.py b/backend/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..013d3ddbec12984e75d07e87cfe24318f6f4af4d --- /dev/null +++ b/backend/utils/logger.py @@ -0,0 +1,12 @@ +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s" +) + +logger = logging.getLogger("modelsmith") + + + + diff --git a/backend/utils/model_io.py b/backend/utils/model_io.py new file mode 100644 index 0000000000000000000000000000000000000000..ef0062ca107403329195a18dc4455b7c1728817a --- /dev/null +++ b/backend/utils/model_io.py @@ -0,0 +1,12 @@ +import joblib + +class ModelIO: + def save(self, model, path): + joblib.dump(model, path) + + def load(self, path): + return joblib.load(path) + + + + diff --git a/backend/utils/validators.py b/backend/utils/validators.py new file mode 100644 index 0000000000000000000000000000000000000000..0a89dbdd461651fce276e627c7087097569f176c --- /dev/null +++ b/backend/utils/validators.py @@ -0,0 +1,18 @@ +import pandas as pd + +class DataValidator: + def validate_dataframe(self, df: pd.DataFrame, target_column: str): + if df.empty: + raise ValueError("Uploaded dataset is empty.") + + if target_column not in df.columns: + raise ValueError(f"Target column '{target_column}' not found in dataset.") + + if df.shape[1] < 2: + raise ValueError("Dataset must have at least 2 columns.") + + return True + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8cb52599f925bb49a612c4c964d02d689779bca7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn +pandas +numpy +scikit-learn +shap +python-multipart