diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..bf8d473627e596b7d8106a03ccf5b1f479763d52
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 7860
+
+CMD ["uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/README.md b/README.md
index 0865c248155a04070be01c2345ad47a5c9adbcdb..dacb45d754fd08da15dcde22d65cc519fc1832c7 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,62 @@
----
-title: ModelSmith AI
-emoji: 🐨
-colorFrom: pink
-colorTo: indigo
-sdk: docker
-pinned: false
-license: apache-2.0
-short_description: Intelligent system that designs, explains and deploys ML sol
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# ModelSmith AI
+
+An intelligent ML platform that automates tabular classification and regression tasks. It analyzes datasets, recommends optimal strategies, trains models, and provides explanations.
+
+## Features
+
+- **Dataset Analysis**: Automatic detection of data types, missing values, and potential issues
+- **Strategy Reasoning**: Intelligent model selection based on dataset characteristics
+- **Automated Training**: End-to-end model training with preprocessing pipelines
+- **Explainability**: SHAP-based feature importance explanations
+- **FastAPI Backend**: RESTful API for seamless integration
+
+## Supported Scope
+
+- **Task**: Tabular classification and regression
+- **Input**: CSV files with ≥1200 rows
+- **Target**: Binary or multiclass classification, regression
+- **Features**: At least 2 usable features after preprocessing
+
+## API Endpoints
+
+- `POST /analyze`: Analyze dataset and get strategy recommendations
+- `POST /train`: Train a model on the dataset
+- `POST /explain`: Get model explanations and feature importance
+- `POST /predict`: Make predictions with trained model
+- `GET /health`: Health check
+
+## Deployment
+
+This project is designed for deployment on Hugging Face Spaces using Docker.
+
+### Files for Deployment
+
+- `Dockerfile`
+- `requirements.txt`
+- `backend/` (entire directory)
+
+### Running Locally
+
+```bash
+pip install -r requirements.txt
+uvicorn backend.api.main:app --host 0.0.0.0 --port 7860
+```
+
+## Limitations
+
+- NLP functionality is disabled
+- Requires datasets with ≥1200 rows
+- CPU-only, no GPU support
+- Stateless API (models saved temporarily)
+
+## Architecture
+
+- **Orchestrator**: Main workflow coordinator
+- **Dataset Analyzer**: Data profiling and preprocessing
+- **Strategy Reasoner**: Model selection logic
+- **Model Factory**: Training and evaluation
+- **Explainability Engine**: SHAP explanations
+
+## License
+
+MIT License
diff --git a/backend/__init__.py b/backend/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/__pycache__/__init__.cpython-310.pyc b/backend/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..56a647137f638ed7959089f900d5f66ed3f11b26
Binary files /dev/null and b/backend/__pycache__/__init__.cpython-310.pyc differ
diff --git a/backend/__pycache__/__init__.cpython-313.pyc b/backend/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0d3c2f907882b9f974e2134f73aab26f8ede7a5
Binary files /dev/null and b/backend/__pycache__/__init__.cpython-313.pyc differ
diff --git a/backend/api/__init__.py b/backend/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/api/__pycache__/__init__.cpython-310.pyc b/backend/api/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d61d7060e8ebbcf4a3e7446c3f8caca4b85a50dc
Binary files /dev/null and b/backend/api/__pycache__/__init__.cpython-310.pyc differ
diff --git a/backend/api/__pycache__/__init__.cpython-313.pyc b/backend/api/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7152a6f0869a0f30d89528ad763a45262ce858b3
Binary files /dev/null and b/backend/api/__pycache__/__init__.cpython-313.pyc differ
diff --git a/backend/api/__pycache__/main.cpython-310.pyc b/backend/api/__pycache__/main.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e36a9d837160170f39f55b2565b473aceb19460
Binary files /dev/null and b/backend/api/__pycache__/main.cpython-310.pyc differ
diff --git a/backend/api/__pycache__/main.cpython-313.pyc b/backend/api/__pycache__/main.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1c9b753070682ee246a1aa5acfac6cf66b265ca
Binary files /dev/null and b/backend/api/__pycache__/main.cpython-313.pyc differ
diff --git a/backend/api/main.py b/backend/api/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc6c16538039a8d7ac5d9dc15818712110c6abe
--- /dev/null
+++ b/backend/api/main.py
@@ -0,0 +1,80 @@
+from fastapi import FastAPI, UploadFile, File, HTTPException
+import pandas as pd
+from backend.core.orchestrator import Orchestrator
+
+app = FastAPI()
+orchestrator = Orchestrator()
+
+@app.post("/analyze")
+async def analyze_dataset(file: UploadFile = File(...), target_column: str = "target"):
+    try:
+        df = pd.read_csv(file.file)
+        result = orchestrator.run(df, target_column)
+
+        # Format response for frontend
+        dataset_info = result.get("dataset_info", {})
+        strategy = result.get("strategy", {})
+
+        response = {
+            "columns": list(df.columns),
+            "dataTypes": dataset_info.get("data_types", {}),
+            "risks": dataset_info.get("risks", []),
+            "problemType": result.get("problem_type"),
+            "confidence": strategy.get("confidence", 0),
+            "strategy": strategy
+        }
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+@app.post("/train")
+async def train_model(file: UploadFile = File(...), target_column: str = "target"):
+    try:
+        df = pd.read_csv(file.file)
+        result = orchestrator.run(df, target_column, train=True)
+
+        # Ensure strategy is included in the response
+        strategy = result.get("strategy", {})
+        response = {
+            "strategy": strategy,
+            "metrics": result.get("metrics", {}),
+            "model_path": result.get("model_path", "/path/to/model.pkl"),
+            "training_time": result.get("training_time", 0),
+            "model_id": result.get("model_id", "trained_model_123")
+        }
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+@app.post("/explain")
+async def explain_model(file: UploadFile = File(...), target_column: str = "target"):
+    try:
+        df = pd.read_csv(file.file)
+        result = orchestrator.run(df, target_column, train=True)
+        return {
+            "strategy_explanation": result.get("strategy_explanation"),
+            "metrics": result.get("metrics", {}),
+            "feature_importance": result.get("feature_importance", [])
+        }
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+@app.post("/predict")
+async def predict(data: dict):
+    try:
+        # Load the trained model
+        model = orchestrator.model_io.load("exports/models/trained_model.pkl")
+        # Prepare data for prediction
+        df = pd.DataFrame([data])
+        preds = model.predict(df)
+        return {"prediction": preds.tolist()}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+
+
diff --git a/backend/config/__init__.py b/backend/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/core/__init__.py b/backend/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/core/__pycache__/__init__.cpython-310.pyc b/backend/core/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b2daa0a97297759e68a0eaa4b4f4746fe2f0e3d9
Binary files /dev/null and b/backend/core/__pycache__/__init__.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/__init__.cpython-313.pyc b/backend/core/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27593cc25df5e9a4f4d2563c7987370690b6947f
Binary files /dev/null and b/backend/core/__pycache__/__init__.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc b/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e9fd554dd57cfbf881cd4522ca1374233e92a295
Binary files /dev/null and b/backend/core/__pycache__/dataset_analyzer.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc b/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30ab6f434666ecf7ab70cec8fca1afcac6c18f68
Binary files /dev/null and b/backend/core/__pycache__/dataset_analyzer.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/deployment_generator.cpython-310.pyc b/backend/core/__pycache__/deployment_generator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d2112e0beaf786796915ecf31af1847a1c9c5ee
Binary files /dev/null and b/backend/core/__pycache__/deployment_generator.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/deployment_generator.cpython-313.pyc b/backend/core/__pycache__/deployment_generator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a139953edc675d4ceb519873c90bd4c193ba5394
Binary files /dev/null and b/backend/core/__pycache__/deployment_generator.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/explainability.cpython-310.pyc b/backend/core/__pycache__/explainability.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a7f108fd98d316c8cf09a1a1995cbaa8caedd8c
Binary files /dev/null and b/backend/core/__pycache__/explainability.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/explainability.cpython-313.pyc b/backend/core/__pycache__/explainability.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d73352e3d1b09d22aa3ece379e6296cfe9de9a8
Binary files /dev/null and b/backend/core/__pycache__/explainability.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/model_factory.cpython-310.pyc b/backend/core/__pycache__/model_factory.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..673dada15c46c8f5c1f2d9cb3db77f66162196a6
Binary files /dev/null and b/backend/core/__pycache__/model_factory.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/model_factory.cpython-313.pyc b/backend/core/__pycache__/model_factory.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fe2c65959a59ade827aaf018437116fbeea6878
Binary files /dev/null and b/backend/core/__pycache__/model_factory.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/monitoring.cpython-310.pyc b/backend/core/__pycache__/monitoring.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..863936a6551e3458c51e8332a194b1e4dba81ac0
Binary files /dev/null and b/backend/core/__pycache__/monitoring.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/monitoring.cpython-313.pyc b/backend/core/__pycache__/monitoring.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..422a8e1e499896091167a001bc63cfa02a57f4f0
Binary files /dev/null and b/backend/core/__pycache__/monitoring.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/orchestrator.cpython-310.pyc b/backend/core/__pycache__/orchestrator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0eea49856241b776f15753c5df344b9ea220cb4
Binary files /dev/null and b/backend/core/__pycache__/orchestrator.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/orchestrator.cpython-313.pyc b/backend/core/__pycache__/orchestrator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e892a6d25878052d281e4e6f904a66b8ddc66c88
Binary files /dev/null and b/backend/core/__pycache__/orchestrator.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/problem_inference.cpython-310.pyc b/backend/core/__pycache__/problem_inference.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e15075dc9c9502e3a002a09be17e694c62ec920a
Binary files /dev/null and b/backend/core/__pycache__/problem_inference.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/problem_inference.cpython-313.pyc b/backend/core/__pycache__/problem_inference.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59e3c787f484c9acc4469c82de6122599a0a2ef4
Binary files /dev/null and b/backend/core/__pycache__/problem_inference.cpython-313.pyc differ
diff --git a/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc b/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33e91aec1ae03b3d4318ec91c35c4b448a186fea
Binary files /dev/null and b/backend/core/__pycache__/strategy_reasoner.cpython-310.pyc differ
diff --git a/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc b/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff593d66f7d41f0e8331b7629d36d974d311120b
Binary files /dev/null and b/backend/core/__pycache__/strategy_reasoner.cpython-313.pyc differ
diff --git a/backend/core/dataset_analyzer.py b/backend/core/dataset_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..50bc7773fc66f75af140126f60e46f7eb4ec7d97
--- /dev/null
+++ b/backend/core/dataset_analyzer.py
@@ -0,0 +1,99 @@
+import pandas as pd
+import numpy as np
+from backend.utils.logger import logger
+
+def convert_numpy_types(obj):
+    """Recursively convert numpy types to Python types for JSON serialization."""
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, (np.integer, np.int64, np.int32)):
+        return int(obj)
+    elif isinstance(obj, (np.floating, np.float64, np.float32)):
+        return float(obj)
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    elif isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_types(item) for item in obj]
+    else:
+        return obj
+
+class DatasetAnalyzer:
+    def analyze(self, df: pd.DataFrame, target_column: str = None):
+        logger.info("Starting dataset analysis...")
+        # Remove all-null columns
+        null_columns = df.columns[df.isnull().all()]
+        if len(null_columns) > 0:
+            logger.warning(f"Removing all-null columns: {list(null_columns)}")
+            df = df.drop(columns=null_columns)
+
+        # Remove duplicate rows
+        duplicate_rows = df.duplicated().sum()
+        if duplicate_rows > 0:
+            logger.warning(f"Removing {duplicate_rows} duplicate rows")
+            df = df.drop_duplicates()
+
+        # Remove constant columns
+        constant_columns = [col for col in df.columns if df[col].nunique() == 1]
+        if len(constant_columns) > 0:
+            logger.warning(f"Removing constant columns: {constant_columns}")
+            df = df.drop(columns=constant_columns)
+
+        # Ensure at least 2 usable features after preprocessing
+        usable_features = [col for col in df.columns if col != target_column]
+        if len(usable_features) < 2:
+            raise ValueError(f"Insufficient features: only {len(usable_features)} usable features after preprocessing, need at least 2")
+
+        info = {}
+        info["num_rows"] = df.shape[0]
+        info["num_columns"] = df.shape[1]
+        info["missing_ratio"] = df.isnull().mean().mean()
+        info["row_count"] = df.shape[0]
+        info["high_dimensional"] = bool(df.shape[1] > 50)
+        info["small_data"] = bool(df.shape[0] < 1200)
+        info["sparse_data"] = bool(df.isnull().mean().mean() > 0.4)
+        all_numeric_cols = df.select_dtypes(include="number").columns.tolist()
+        all_categorical_cols = df.select_dtypes(exclude="number").columns.tolist()
+        info["numeric_cols"] = [col for col in all_numeric_cols if col != target_column]
+        info["categorical_cols"] = [col for col in all_categorical_cols if col != target_column]
+
+        if len(info["numeric_cols"]) + len(info["categorical_cols"]) < 2:
+            raise ValueError("Dataset must have at least 2 usable features after preprocessing")
+
+        # Cardinality
+        cardinality = {col: df[col].nunique() for col in df.columns}
+        info["cardinality"] = cardinality
+
+        # Target-specific checks
+        if target_column and target_column in df.columns:
+            target = df[target_column]
+            unique_vals = target.nunique()
+            if target.dtype in ['int64', 'float64'] and unique_vals > 10:
+                info["target_type"] = "regression"
+                info["class_distribution"] = None
+                info["imbalance"] = None
+            else:
+                info["target_type"] = "classification"
+                value_counts = target.value_counts(normalize=True)
+                info["class_distribution"] = value_counts.to_dict()
+                info["imbalance"] = bool(value_counts.max() > 0.8)
+        else:
+            info["target_type"] = None
+            info["class_distribution"] = None
+            info["imbalance"] = None
+
+        # NLP detection heuristic
+        avg_text_len = None
+        text_columns = []
+        for col in info["categorical_cols"]:
+            if df[col].astype(str).str.len().mean() > 30:
+                text_columns.append(col)
+        info["text_columns"] = text_columns
+        info["possible_nlp"] = len(text_columns) > 0
+
+        return convert_numpy_types(info)
+
+
+
+
diff --git a/backend/core/deployment_generator.py b/backend/core/deployment_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..32b7ccf1764d4be6f353102513ede02426320e56
--- /dev/null
+++ b/backend/core/deployment_generator.py
@@ -0,0 +1,30 @@
+class DeploymentGenerator:
+    def generate_fastapi_app(self, model_path):
+        template = f'''
+from fastapi import FastAPI
+import joblib
+import pandas as pd
+
+app = FastAPI()
+model = joblib.load("{model_path}")
+
+@app.post("/predict")
+async def predict(data: dict):
+    df = pd.DataFrame([data])
+    preds = model.predict(df)
+    return {{"prediction": preds.tolist()}}
+'''
+        return template
+
+    def generate_dockerfile(self):
+        return '''
+FROM python:3.9
+WORKDIR /app
+COPY . /app
+RUN pip install fastapi uvicorn pandas scikit-learn joblib
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+'''
+
+
+
+
diff --git a/backend/core/explainability.py b/backend/core/explainability.py
new file mode 100644
index 0000000000000000000000000000000000000000..58aca12a2b2c61d56e41013a4f132af818574466
--- /dev/null
+++ b/backend/core/explainability.py
@@ -0,0 +1,33 @@
+import shap
+import numpy as np
+
+class ExplainabilityEngine:
+    def explain_tabular(self, model_pipeline, X_sample):
+        if X_sample.empty:
+            raise ValueError("Sample data is empty, cannot compute explanations")
+
+        # Extract trained model and preprocessor
+        preprocessor = model_pipeline.named_steps["preprocessor"]
+        model = model_pipeline.named_steps["model"]
+
+        X_transformed = preprocessor.transform(X_sample)
+
+        if X_transformed.shape[0] == 0:
+            raise ValueError("Transformed sample data is empty after preprocessing")
+
+        explainer = shap.Explainer(model, X_transformed)
+        shap_values = explainer(X_transformed, check_additivity=False)
+
+        if shap_values is None or shap_values.values is None:
+            raise ValueError("SHAP computation failed")
+
+        global_importance = np.abs(shap_values.values).mean(axis=0).tolist()
+
+        if len(global_importance) == 0:
+            raise ValueError("No feature importance computed")
+
+        return global_importance
+
+
+
+
diff --git a/backend/core/model_factory.py b/backend/core/model_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e8142fa17a9ce996e9ce0658a32fa10f0ed5fce
--- /dev/null
+++ b/backend/core/model_factory.py
@@ -0,0 +1,40 @@
+import os
+from sklearn.model_selection import train_test_split
+from ..tabular.pipelines import build_preprocessing_pipeline
+from ..tabular.trainers import train_model
+from ..tabular.evaluators import evaluate_model
+from ..nlp.trainers import TextClassifier
+from ..nlp.evaluators import evaluate_nlp_model
+from ..utils.model_io import ModelIO
+
+class ModelFactory:
+    def __init__(self):
+        self.model_io = ModelIO()
+
+    def build_and_train(self, df, target_column, dataset_info, problem_type, strategy):
+        if dataset_info["small_data"]:
+            raise ValueError("Dataset is too small for training. Minimum 1200 rows required.")
+
+        if problem_type == "nlp":
+            raise ValueError("NLP functionality is not supported in this version.")
+        else:
+            # Tabular
+            X = df.drop(columns=[target_column])
+            y = df[target_column]
+
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+            pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"])
+            pipeline.fit(X_train, y_train)
+
+            model = train_model(pipeline, X_train, y_train, problem_type, strategy)
+            metrics = evaluate_model(model, X_test, y_test, problem_type)
+
+            # Save model
+            self.model_io.save(model, "exports/models/trained_model.pkl")
+
+            return model, metrics
+
+
+
+
diff --git a/backend/core/monitoring.py b/backend/core/monitoring.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce1ffb26905694074f02c33f5b5fde66f47fee6a
--- /dev/null
+++ b/backend/core/monitoring.py
@@ -0,0 +1,13 @@
+import numpy as np
+
+class MonitoringEngine:
+    def detect_drift(self, train_stats, new_data_stats, threshold=0.2):
+        drift_flags = {}
+        for feature in train_stats:
+            if abs(train_stats[feature] - new_data_stats.get(feature, train_stats[feature])) > threshold:
+                drift_flags[feature] = True
+        return drift_flags
+
+
+
+
diff --git a/backend/core/orchestrator.py b/backend/core/orchestrator.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ffa6be1dd3abe05fb1ee71c55b0bc72696dde20
--- /dev/null
+++ b/backend/core/orchestrator.py
@@ -0,0 +1,88 @@
+from .dataset_analyzer import DatasetAnalyzer
+from .problem_inference import ProblemInference
+from .strategy_reasoner import StrategyReasoner
+from .model_factory import ModelFactory
+from .explainability import ExplainabilityEngine
+from .deployment_generator import DeploymentGenerator
+from .monitoring import MonitoringEngine
+from ..utils.logger import logger
+from ..utils.validators import DataValidator
+from ..utils.model_io import ModelIO
+import json
+import os
+
+class Orchestrator:
+    def __init__(self):
+        self.validator = DataValidator()
+        self.analyzer = DatasetAnalyzer()
+        self.inferencer = ProblemInference()
+        self.reasoner = StrategyReasoner()
+        self.model_factory = ModelFactory()
+        self.explainer = ExplainabilityEngine()
+        self.deployer = DeploymentGenerator()
+        self.monitor = MonitoringEngine()
+        self.model_io = ModelIO()
+
+    def run(self, df, target_column, train=False):
+        self.validator.validate_dataframe(df, target_column)
+        logger.info("Validation passed")
+        dataset_info = self.analyzer.analyze(df, target_column)
+        problem_type = self.inferencer.infer(dataset_info, target_column)
+        strategy = self.reasoner.decide(dataset_info, problem_type)
+
+        tradeoff_explanation = self.reasoner.explain_tradeoffs(strategy)
+
+        # Log strategy behavior
+        log_data = {
+            "dataset_characteristics": dataset_info,
+            "chosen_model_family": strategy.get("model_family"),
+            "detected_risks": strategy.get("risks", []),
+            "confidence_score": strategy.get("confidence", 0)
+        }
+        os.makedirs("experiments/logs", exist_ok=True)
+        with open(f"experiments/logs/{target_column}_strategy.json", "w") as f:
+            json.dump(log_data, f, indent=4, default=str)
+
+        response = {
+            "dataset_info": dataset_info,
+            "problem_type": problem_type,
+            "strategy": strategy,
+            "strategy_tradeoffs": tradeoff_explanation
+        }
+
+        if problem_type == "nlp":
+            response["nlp_mode"] = "activated"
+
+        if train:
+            model, metrics = self.model_factory.build_and_train(
+                df, target_column, dataset_info, problem_type, strategy
+            )
+            response["metrics"] = metrics
+
+            explanation = self.reasoner.explain_strategy(strategy)
+            response["strategy_explanation"] = explanation
+
+            X_sample = df.drop(columns=[target_column]).head(100)  # Sample for SHAP
+            feature_importance = self.explainer.explain_tabular(model, X_sample)
+            response["feature_importance"] = feature_importance
+
+            # Save the trained model
+            os.makedirs("exports/models", exist_ok=True)
+            os.makedirs("exports/deployment", exist_ok=True)
+            model_path = "exports/models/trained_model.pkl"
+            self.model_io.save(model, model_path)
+
+            # Generate deployment artifacts
+            fastapi_app = self.deployer.generate_fastapi_app(model_path)
+            dockerfile = self.deployer.generate_dockerfile()
+
+            with open("exports/deployment/main.py", "w") as f:
+                f.write(fastapi_app)
+            with open("exports/deployment/Dockerfile", "w") as f:
+                f.write(dockerfile)
+
+        return response
+
+
+
+
diff --git a/backend/core/problem_inference.py b/backend/core/problem_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7ea6920a3b8b1236d2e46f7a2b10d8cea613792
--- /dev/null
+++ b/backend/core/problem_inference.py
@@ -0,0 +1,13 @@
+class ProblemInference:
+    def infer(self, dataset_info, target_column):
+        if dataset_info.get("possible_nlp"):
+            return "nlp"
+        
+        if target_column:
+            return "classification" if dataset_info.get("class_distribution") else "regression"
+        
+        return "unknown"
+
+
+
+
diff --git a/backend/core/strategy_reasoner.py b/backend/core/strategy_reasoner.py
new file mode 100644
index 0000000000000000000000000000000000000000..801f4c991139ef303a5d5189982bb1329e186b24
--- /dev/null
+++ b/backend/core/strategy_reasoner.py
@@ -0,0 +1,68 @@
+class StrategyReasoner:
+    def decide(self, dataset_info, problem_type):
+        strategy = {}
+        risks = []
+        score = 0.0
+
+        if dataset_info.get("small_data"):
+            risks.append("small_dataset")
+            score += 0.1
+
+        if dataset_info.get("high_dimensional"):
+            risks.append("high_dimensionality")
+            score += 0.1
+
+        if dataset_info.get("imbalance"):
+            risks.append("class_imbalance")
+            score += 0.2
+
+        if dataset_info.get("sparse_data"):
+            risks.append("high_missingness")
+            score += 0.2
+
+        if problem_type == "classification":
+            if "small_dataset" in risks:
+                model_family = "tree_ensemble"
+                reason = "Small datasets benefit from simpler models"
+            elif "high_dimensionality" in risks:
+                model_family = "tree_ensemble"
+                reason = "Tree ensembles handle high-dimensional data better"
+            else:
+                model_family = "tree_ensemble"
+                reason = "Tree ensembles handle complexity well"
+
+        elif problem_type == "regression":
+            if "high_dimensionality" in risks:
+                model_family = "tree_ensemble"
+                reason = "Tree ensembles handle high-dimensional data better"
+            else:
+                model_family = "linear_or_tree"
+                reason = "Balances interpretability and accuracy"
+
+        elif problem_type == "nlp":
+            model_family = "transformer"
+            reason = "Transformers best capture language semantics"
+
+        strategy["model_family"] = model_family
+        strategy["reason"] = reason
+        strategy["risks"] = risks
+        strategy["confidence"] = round(1 - min(score, 0.9), 2)
+
+        return strategy
+
+    def explain_strategy(self, strategy):
+        explanation = f"Selected {strategy['model_family']} models because: {strategy['reason']}."
+        if strategy.get("risks"):
+            explanation += f" Identified risks: {', '.join(strategy['risks'])}."
+        return explanation
+
+    def explain_tradeoffs(self, strategy):
+        explanation = f"Chose {strategy['model_family']} due to: {strategy['reason']}."
+        if strategy.get("risks"):
+            explanation += f" Risks detected: {', '.join(strategy['risks'])}."
+        explanation += f" Confidence score: {strategy.get('confidence')}."
+        return explanation
+
+
+
+
diff --git a/backend/experiments/__init__.py b/backend/experiments/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc b/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0fd8ffe4f18f7e24ecbd8cede92f971b9a717fc5
Binary files /dev/null and b/backend/experiments/__pycache__/benchmark_runner.cpython-313.pyc differ
diff --git a/backend/experiments/benchmark_runner.py b/backend/experiments/benchmark_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..52887407319f76da748c68a074bca40df1191d1b
--- /dev/null
+++ b/backend/experiments/benchmark_runner.py
@@ -0,0 +1,32 @@
+import time
+
+class BenchmarkRunner:
+    def run(self, orchestrator, datasets):
+        results = []
+        for name, (df, target) in datasets.items():
+            start = time.time()
+            try:
+                output = orchestrator.run(df, target, train=True)
+                end = time.time()
+
+                results.append({
+                    "dataset": name,
+                    "strategy": output.get("strategy"),
+                    "metrics": output.get("metrics"),
+                    "time": round(end - start, 2),
+                    "error": None
+                })
+            except Exception as e:
+                end = time.time()
+                results.append({
+                    "dataset": name,
+                    "strategy": None,
+                    "metrics": None,
+                    "time": round(end - start, 2),
+                    "error": str(e)
+                })
+        return results
+
+
+
+
diff --git a/backend/experiments/run_benchmarks.py b/backend/experiments/run_benchmarks.py
new file mode 100644
index 0000000000000000000000000000000000000000..9296b0261dc0e51f0bfdfc2680db09bbcb15d12d
--- /dev/null
+++ b/backend/experiments/run_benchmarks.py
@@ -0,0 +1,32 @@
+import pandas as pd
+from benchmark_runner import BenchmarkRunner
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from backend.core.orchestrator import Orchestrator
+
+# Load datasets
+datasets = {
+    "titanic": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'titanic.csv')), "Survived"),
+    "credit_default": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'credit_default.csv')), "default.payment.next.month"),
+    "house_prices": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'house_prices.csv')), "Price"),
+    "telecom_churn": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'telecom_churn.csv')), "Churn"),
+    "news_classification": (pd.read_csv(os.path.join(os.path.dirname(__file__), '..', '..', 'datasets', 'real_world', 'news_classification.csv')), "label"),
+}
+
+orchestrator = Orchestrator()
+runner = BenchmarkRunner()
+results = runner.run(orchestrator, datasets)
+
+print("Benchmark Results:")
+for result in results:
+    print(result)
+
+# Save results to file
+with open("experiments/benchmark_results.json", "w") as f:
+    import json
+    json.dump(results, f, indent=4)
+
+
+
+
diff --git a/backend/nlp/__pycache__/evaluators.cpython-310.pyc b/backend/nlp/__pycache__/evaluators.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b37fff31ce83be937b5075dcee6bfa2f6dbdac7
Binary files /dev/null and b/backend/nlp/__pycache__/evaluators.cpython-310.pyc differ
diff --git a/backend/nlp/__pycache__/evaluators.cpython-313.pyc b/backend/nlp/__pycache__/evaluators.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55e9fb0f1ffc1cedeae7b9bce03f84160f694d70
Binary files /dev/null and b/backend/nlp/__pycache__/evaluators.cpython-313.pyc differ
diff --git a/backend/nlp/__pycache__/preprocess.cpython-310.pyc b/backend/nlp/__pycache__/preprocess.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ca814115dad17b0386ad1bcd793755fcb9d1af5
Binary files /dev/null and b/backend/nlp/__pycache__/preprocess.cpython-310.pyc differ
diff --git a/backend/nlp/__pycache__/preprocess.cpython-313.pyc b/backend/nlp/__pycache__/preprocess.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c374d050b242f4b5dc055dc42b4f7ac993bce36
Binary files /dev/null and b/backend/nlp/__pycache__/preprocess.cpython-313.pyc differ
diff --git a/backend/nlp/__pycache__/trainers.cpython-310.pyc b/backend/nlp/__pycache__/trainers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da9bc54e03f90b31fff271ca212a6af9d911a891
Binary files /dev/null and b/backend/nlp/__pycache__/trainers.cpython-310.pyc differ
diff --git a/backend/nlp/__pycache__/trainers.cpython-313.pyc b/backend/nlp/__pycache__/trainers.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7bbdba3d6f38a9b58b908978b8fac6f4b4395c25
Binary files /dev/null and b/backend/nlp/__pycache__/trainers.cpython-313.pyc differ
diff --git a/backend/nlp/embeddings.py b/backend/nlp/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..46c785de2f38d3a5a628f933edbac780044a9b9a
--- /dev/null
+++ b/backend/nlp/embeddings.py
@@ -0,0 +1,12 @@
+from sentence_transformers import SentenceTransformer
+
+class EmbeddingEngine:
+    def __init__(self, model_name="all-MiniLM-L6-v2"):
+        self.model = SentenceTransformer(model_name)
+
+    def encode(self, texts):
+        return self.model.encode(texts)
+
+
+
+
diff --git a/backend/nlp/evaluators.py b/backend/nlp/evaluators.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6fa3224849a396903ca49e41ea75b40396315bd
--- /dev/null
+++ b/backend/nlp/evaluators.py
@@ -0,0 +1,18 @@
+import torch
+from transformers import Trainer
+from ..nlp.trainers import TextDataset
+
+def evaluate_nlp_model(model, tokenizer, texts, labels):
+    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
+    dataset = TextDataset(encodings, labels)
+
+    trainer = Trainer(model=model)
+    predictions = trainer.predict(dataset)
+    preds = torch.argmax(torch.tensor(predictions.predictions), axis=1)
+
+    accuracy = (preds == torch.tensor(labels)).float().mean().item()
+    return {"accuracy": accuracy}
+
+
+
+
diff --git a/backend/nlp/preprocess.py b/backend/nlp/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..44adb7650bd300156b23222469d2526b733ee275
--- /dev/null
+++ b/backend/nlp/preprocess.py
@@ -0,0 +1,12 @@
+import re
+
+class NLPPreprocessor:
+    def clean(self, text: str):
+        text = text.lower()
+        text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
+        text = re.sub(r"\s+", " ", text)
+        return text.strip()
+
+
+
+
diff --git a/backend/nlp/rag.py b/backend/nlp/rag.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/nlp/trainers.py b/backend/nlp/trainers.py
new file mode 100644
index 0000000000000000000000000000000000000000..feac87f7a393b6efd113fc48007bc245b82ad2b0
--- /dev/null
+++ b/backend/nlp/trainers.py
@@ -0,0 +1,61 @@
+import torch
+from torch.utils.data import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from ..nlp.preprocess import NLPPreprocessor
+
+class TextDataset(Dataset):
+    def __init__(self, encodings, labels):
+        self.encodings = encodings
+        self.labels = labels
+
+    def __getitem__(self, idx):
+        item = {key: val[idx] for key, val in self.encodings.items()}
+        item['labels'] = torch.tensor(self.labels[idx])
+        return item
+
+    def __len__(self):
+        return len(self.labels)
+
+class TextClassifier:
+    def __init__(self, model_name="distilbert-base-uncased", num_labels=2):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
+        self.preprocessor = NLPPreprocessor()
+
+    def train(self, texts, labels):
+        # Clean texts
+        texts = [self.preprocessor.clean(text) for text in texts]
+
+        # Tokenize
+        encodings = self.tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
+
+        # Create dataset
+        dataset = TextDataset(encodings, labels)
+
+        # Training arguments
+        training_args = TrainingArguments(
+            output_dir='./results',
+            num_train_epochs=3,
+            per_device_train_batch_size=16,
+            per_device_eval_batch_size=64,
+            warmup_steps=500,
+            weight_decay=0.01,
+            logging_dir='./logs',
+            logging_steps=10,
+            save_steps=500,
+            evaluation_strategy="no",
+            save_strategy="no",
+        )
+
+        trainer = Trainer(
+            model=self.model,
+            args=training_args,
+            train_dataset=dataset,
+        )
+
+        trainer.train()
+        return self.model
+
+
+
+
diff --git a/backend/tabular/__init__.py b/backend/tabular/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/tabular/__pycache__/__init__.cpython-310.pyc b/backend/tabular/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1db39853b2ee08579566076a25682ba6e5edf13a
Binary files /dev/null and b/backend/tabular/__pycache__/__init__.cpython-310.pyc differ
diff --git a/backend/tabular/__pycache__/__init__.cpython-313.pyc b/backend/tabular/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cded5484e91fbf647eb49f9609c753c9be612e66
Binary files /dev/null and b/backend/tabular/__pycache__/__init__.cpython-313.pyc differ
diff --git a/backend/tabular/__pycache__/evaluators.cpython-310.pyc b/backend/tabular/__pycache__/evaluators.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a20686334eb82cf2885f3a077052e4937aa1c094
Binary files /dev/null and b/backend/tabular/__pycache__/evaluators.cpython-310.pyc differ
diff --git a/backend/tabular/__pycache__/evaluators.cpython-313.pyc b/backend/tabular/__pycache__/evaluators.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba7f083e1520674cab223b608e7aa40d5e8727e8
Binary files /dev/null and b/backend/tabular/__pycache__/evaluators.cpython-313.pyc differ
diff --git a/backend/tabular/__pycache__/pipelines.cpython-310.pyc b/backend/tabular/__pycache__/pipelines.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e2d11267b237ddd21e6d7631d17bd4bcb18386a
Binary files /dev/null and b/backend/tabular/__pycache__/pipelines.cpython-310.pyc differ
diff --git a/backend/tabular/__pycache__/pipelines.cpython-313.pyc b/backend/tabular/__pycache__/pipelines.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7afa666e830e35a4f939826900348f10e7c49c86
Binary files /dev/null and b/backend/tabular/__pycache__/pipelines.cpython-313.pyc differ
diff --git a/backend/tabular/__pycache__/trainers.cpython-310.pyc b/backend/tabular/__pycache__/trainers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d97f70bf42b571ce732944dab6883a1c7157d18
Binary files /dev/null and b/backend/tabular/__pycache__/trainers.cpython-310.pyc differ
diff --git a/backend/tabular/__pycache__/trainers.cpython-313.pyc b/backend/tabular/__pycache__/trainers.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..978ddb3f0d4061a5b75368cddecb61d74f0ef36c
Binary files /dev/null and b/backend/tabular/__pycache__/trainers.cpython-313.pyc differ
diff --git a/backend/tabular/evaluators.py b/backend/tabular/evaluators.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9c22b3a2d4f146811a8bfe8998a26d59a848241
--- /dev/null
+++ b/backend/tabular/evaluators.py
@@ -0,0 +1,27 @@
+from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
+import numpy as np
+
+
+def evaluate_model(model, X_test, y_test, problem_type):
+    preds = model.predict(X_test)
+
+    if problem_type == "classification":
+        acc = accuracy_score(y_test, preds)
+        f1 = f1_score(y_test, preds, average="weighted")
+        if np.isnan(acc) or acc == 0 or np.isnan(f1) or f1 == 0:
+            raise ValueError("Invalid metrics computed for classification")
+        return {
+            "accuracy": acc,
+            "f1": f1
+        }
+    else:
+        rmse = np.sqrt(mean_squared_error(y_test, preds))
+        if np.isnan(rmse) or np.isinf(rmse):
+            raise ValueError("Invalid metrics computed for regression")
+        return {
+            "rmse": rmse
+        }
+
+
+
+
diff --git a/backend/tabular/pipelines.py b/backend/tabular/pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc1f4d6d01b8a72e2895ba5547a25f35a8db2c05
--- /dev/null
+++ b/backend/tabular/pipelines.py
@@ -0,0 +1,29 @@
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+
+
+def build_preprocessing_pipeline(numeric_features, categorical_features):
+    numeric_transformer = Pipeline(steps=[
+        ("imputer", SimpleImputer(strategy="median")),
+        ("scaler", StandardScaler())
+    ])
+
+    categorical_transformer = Pipeline(steps=[
+        ("imputer", SimpleImputer(strategy="most_frequent")),
+        ("encoder", OneHotEncoder(handle_unknown="ignore"))
+    ])
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ("num", numeric_transformer, numeric_features),
+            ("cat", categorical_transformer, categorical_features)
+        ]
+    )
+
+    return preprocessor
+
+
+
+
diff --git a/backend/tabular/trainers.py b/backend/tabular/trainers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e75a5f422ff28a9eae8875563c1347f016a7fcce
--- /dev/null
+++ b/backend/tabular/trainers.py
@@ -0,0 +1,34 @@
+from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.pipeline import Pipeline
+
+
+def train_model(preprocessor, X, y, problem_type, strategy):
+    if strategy["model_family"] == "tree_ensemble":
+        if problem_type == "classification":
+            model = RandomForestClassifier(n_estimators=100)
+        else:
+            model = RandomForestRegressor(n_estimators=100)
+    elif strategy["model_family"] == "linear_or_tree":
+        if problem_type == "classification":
+            model = LogisticRegression()
+        else:
+            model = LinearRegression()
+    else:
+        # Fallback to RandomForest if strategy is not recognized
+        if problem_type == "classification":
+            model = RandomForestClassifier(n_estimators=100)
+        else:
+            model = RandomForestRegressor(n_estimators=100)
+
+    clf = Pipeline(steps=[
+        ("preprocessor", preprocessor),
+        ("model", model)
+    ])
+
+    clf.fit(X, y)
+    return clf
+
+
+
+
diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/backend/utils/__pycache__/__init__.cpython-310.pyc b/backend/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c7f8734763f7b1e1d9ce4de11c615e5591a3f20
Binary files /dev/null and b/backend/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/backend/utils/__pycache__/__init__.cpython-313.pyc b/backend/utils/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6caf59f34c7c43f60b705c452aafc8f01b0a33ea
Binary files /dev/null and b/backend/utils/__pycache__/__init__.cpython-313.pyc differ
diff --git a/backend/utils/__pycache__/logger.cpython-310.pyc b/backend/utils/__pycache__/logger.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cacbb67d7c318906d88a971e022f35d87f2a5970
Binary files /dev/null and b/backend/utils/__pycache__/logger.cpython-310.pyc differ
diff --git a/backend/utils/__pycache__/logger.cpython-313.pyc b/backend/utils/__pycache__/logger.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9c2a7862043b8cb278a67006b6d3ee5ff8c3762
Binary files /dev/null and b/backend/utils/__pycache__/logger.cpython-313.pyc differ
diff --git a/backend/utils/__pycache__/model_io.cpython-310.pyc b/backend/utils/__pycache__/model_io.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0136a31ecc63746f511c793e56be08264e35fa85
Binary files /dev/null and b/backend/utils/__pycache__/model_io.cpython-310.pyc differ
diff --git a/backend/utils/__pycache__/model_io.cpython-313.pyc b/backend/utils/__pycache__/model_io.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..718985b934c6b1acb8f29483587358c941e910c0
Binary files /dev/null and b/backend/utils/__pycache__/model_io.cpython-313.pyc differ
diff --git a/backend/utils/__pycache__/validators.cpython-310.pyc b/backend/utils/__pycache__/validators.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c9c69ff5c4e6e053f2dd5c42b6349df8789a253
Binary files /dev/null and b/backend/utils/__pycache__/validators.cpython-310.pyc differ
diff --git a/backend/utils/__pycache__/validators.cpython-313.pyc b/backend/utils/__pycache__/validators.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5ba11574bdf1bc14aee64407371394b1d40032a
Binary files /dev/null and b/backend/utils/__pycache__/validators.cpython-313.pyc differ
diff --git a/backend/utils/logger.py b/backend/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..013d3ddbec12984e75d07e87cfe24318f6f4af4d
--- /dev/null
+++ b/backend/utils/logger.py
@@ -0,0 +1,12 @@
+import logging
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+logger = logging.getLogger("modelsmith")
+
+
+
+
diff --git a/backend/utils/model_io.py b/backend/utils/model_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef0062ca107403329195a18dc4455b7c1728817a
--- /dev/null
+++ b/backend/utils/model_io.py
@@ -0,0 +1,12 @@
+import joblib
+
+class ModelIO:
+    def save(self, model, path):
+        joblib.dump(model, path)
+
+    def load(self, path):
+        return joblib.load(path)
+
+
+
+
diff --git a/backend/utils/validators.py b/backend/utils/validators.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a89dbdd461651fce276e627c7087097569f176c
--- /dev/null
+++ b/backend/utils/validators.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+class DataValidator:
+    def validate_dataframe(self, df: pd.DataFrame, target_column: str):
+        if df.empty:
+            raise ValueError("Uploaded dataset is empty.")
+
+        if target_column not in df.columns:
+            raise ValueError(f"Target column '{target_column}' not found in dataset.")
+
+        if df.shape[1] < 2:
+            raise ValueError("Dataset must have at least 2 columns.")
+
+        return True
+
+
+
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb52599f925bb49a612c4c964d02d689779bca7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+fastapi
+uvicorn
+pandas
+numpy
+scikit-learn
+shap
+python-multipart