Spaces:

Che237
/

cyberforge

Sleeping

App Files Files Community

Che237 commited on Jan 31

Commit

7f42f86

verified ·

1 Parent(s): 94487f7

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +393 -634

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-🔐 CyberForge AI - ML Training & Inference Platform
-Hugging Face Spaces deployment for training cybersecurity ML models
 """
 import gradio as gr
@@ -8,12 +8,12 @@ import pandas as pd
 import numpy as np
 import json
 import os
-import joblib
 from pathlib import Path
 from datetime import datetime
 import logging
 from typing import Dict, List, Any, Optional, Tuple
-import asyncio
 # ML Libraries
 from sklearn.model_selection import train_test_split, cross_val_score
@@ -21,12 +21,10 @@ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier,
 from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
-import torch
-import torch.nn as nn
-from transformers import AutoTokenizer, AutoModel
 # Hugging Face Hub
-from huggingface_hub import HfApi, hf_hub_download, upload_file, create_repo
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -41,6 +39,8 @@ MODELS_DIR.mkdir(exist_ok=True)
 DATASETS_DIR = Path("./datasets")
 DATASETS_DIR.mkdir(exist_ok=True)
 # Model types available for training
 MODEL_TYPES = {
     "Random Forest": RandomForestClassifier,
@@ -64,709 +64,468 @@ SECURITY_TASKS = [
 ]
 # ============================================================================
-# MODEL REGISTRY
 # ============================================================================
-class ModelRegistry:
-    """Manages trained models and their metadata"""
-    def __init__(self):
-        self.models = {}
-        self.scalers = {}
-        self.metadata = {}
-        self.registry_file = MODELS_DIR / "registry.json"
-        self._load_registry()
-    def _load_registry(self):
-        """Load existing model registry"""
-        if self.registry_file.exists():
-            with open(self.registry_file, 'r') as f:
-                self.metadata = json.load(f)
-        else:
-            self.metadata = {}
-    def _save_registry(self):
-        """Save model registry"""
-        with open(self.registry_file, 'w') as f:
-            json.dump(self.metadata, f, indent=2, default=str)
-    def register_model(self, model_id: str, model, scaler, metrics: Dict):
-        """Register a trained model"""
-        self.models[model_id] = model
-        self.scalers[model_id] = scaler
-        # Save model and scaler
-        model_path = MODELS_DIR / f"{model_id}_model.pkl"
-        scaler_path = MODELS_DIR / f"{model_id}_scaler.pkl"
-        joblib.dump(model, model_path)
-        joblib.dump(scaler, scaler_path)
-        # Update metadata
-        self.metadata[model_id] = {
-            "created_at": datetime.now().isoformat(),
-            "metrics": metrics,
-            "model_path": str(model_path),
-            "scaler_path": str(scaler_path),
-            "status": "ready"
-        }
-        self._save_registry()
-        return model_id
-    def get_model(self, model_id: str):
-        """Load a model from registry"""
-        if model_id in self.models:
-            return self.models[model_id], self.scalers[model_id]
-        if model_id in self.metadata:
-            model = joblib.load(self.metadata[model_id]["model_path"])
-            scaler = joblib.load(self.metadata[model_id]["scaler_path"])
-            self.models[model_id] = model
-            self.scalers[model_id] = scaler
-            return model, scaler
-        return None, None
-    def list_models(self) -> List[Dict]:
-        """List all registered models"""
-        return [
-            {"id": k, **v} for k, v in self.metadata.items()
-        ]
-# Global registry
-model_registry = ModelRegistry()
-# ============================================================================
-# TRAINING FUNCTIONS
-# ============================================================================
-def prepare_dataset(file, task_type: str) -> Tuple[pd.DataFrame, str]:
-    """Load and prepare dataset for training"""
     try:
-        if file is None:
-            return None, "No file uploaded"
-        # Load based on file type
-        if file.name.endswith('.csv'):
-            df = pd.read_csv(file.name)
-        elif file.name.endswith('.json'):
-            df = pd.read_json(file.name)
-        elif file.name.endswith('.parquet'):
-            df = pd.read_parquet(file.name)
-        else:
-            return None, f"Unsupported file format: {file.name}"
-        logger.info(f"Loaded dataset with shape: {df.shape}")
-        return df, f"✅ Loaded dataset with {len(df)} samples and {len(df.columns)} features"
     except Exception as e:
-        logger.error(f"Error loading dataset: {e}")
-        return None, f"❌ Error: {str(e)}"
-def train_model(
-    file,
-    task_type: str,
-    model_type: str,
-    target_column: str,
-    test_size: float,
-    model_name: str,
-    progress=gr.Progress()
-) -> Tuple[str, str, str]:
-    """Train a machine learning model"""
     try:
-        progress(0, desc="Loading dataset...")
-        # Load dataset
-        df, msg = prepare_dataset(file, task_type)
-        if df is None:
-            return msg, "", ""
-        progress(0.1, desc="Preparing features...")
-        # Validate target column
-        if target_column not in df.columns:
-            return f"❌ Target column '{target_column}' not found in dataset. Available: {list(df.columns)}", "", ""
-        # Prepare features and target
-        X = df.drop(columns=[target_column])
-        y = df[target_column]
-        # Handle categorical features
-        for col in X.select_dtypes(include=['object', 'category']).columns:
-            le = LabelEncoder()
-            X[col] = le.fit_transform(X[col].astype(str))
-        # Handle target encoding
-        if y.dtype == 'object' or y.dtype.name == 'category':
-            le = LabelEncoder()
-            y = le.fit_transform(y.astype(str))
-        # Fill NaN values
-        X = X.fillna(0)
-        progress(0.2, desc="Splitting data...")
-        # Split data
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=test_size, random_state=42
-        )
-        progress(0.3, desc="Scaling features...")
-        # Scale features
-        scaler = StandardScaler()
-        X_train_scaled = scaler.fit_transform(X_train)
-        X_test_scaled = scaler.transform(X_test)
-        progress(0.4, desc=f"Training {model_type}...")
-        # Get model class
         if model_type not in MODEL_TYPES:
-            return f"❌ Unknown model type: {model_type}", "", ""
         model_class = MODEL_TYPES[model_type]
-        # Configure and train model
         if model_type == "Isolation Forest (Anomaly)":
-            model = model_class(contamination=0.1, random_state=42, n_estimators=100)
-            model.fit(X_train_scaled)
-            y_pred = model.predict(X_test_scaled)
-            y_pred = np.where(y_pred == -1, 1, 0)  # Convert to binary
         else:
             model = model_class(random_state=42)
-            model.fit(X_train_scaled, y_train)
-            y_pred = model.predict(X_test_scaled)
-        progress(0.7, desc="Evaluating model...")
-        # Calculate metrics
-        accuracy = accuracy_score(y_test, y_pred)
-        f1 = f1_score(y_test, y_pred, average='weighted')
         metrics = {
-            "accuracy": float(accuracy),
-            "f1_score": float(f1),
-            "model_type": model_type,
-            "task_type": task_type,
-            "samples": len(df),
-            "features": len(X.columns),
         }
-        progress(0.85, desc="Saving model...")
-        # Generate model ID
-        model_id = f"{model_name}_{task_type.lower().replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        # Register model
-        model_registry.register_model(model_id, model, scaler, metrics)
-        progress(1.0, desc="Complete!")
-        # Format results
-        training_log = f"""
-## 🎯 Training Complete!
-**Model ID:** `{model_id}`
-**Task:** {task_type}
-**Model Type:** {model_type}
-### 📊 Dataset Info
-- Samples: {len(df):,}
-- Features: {len(X.columns)}
-- Train/Test Split: {int((1-test_size)*100)}/{int(test_size*100)}
-### 📈 Metrics
-- **Accuracy:** {accuracy:.4f} ({accuracy*100:.2f}%)
-- **F1 Score:** {f1:.4f}
-### 💾 Model Saved
-- Path: `{MODELS_DIR / f'{model_id}_model.pkl'}`
-"""
-        # Generate classification report
-        try:
-            report = classification_report(y_test, y_pred)
-        except:
-            report = "Classification report not available for this model type"
-        return training_log, report, model_id
-    except Exception as e:
-        logger.error(f"Training error: {e}")
-        import traceback
-        return f"❌ Training failed: {str(e)}\n\n{traceback.format_exc()}", "", ""
-def list_trained_models() -> str:
-    """List all trained models"""
-    models = model_registry.list_models()
-    if not models:
-        return "No models trained yet. Upload a dataset and train a model to get started!"
-    output = "## 🤖 Trained Models\n\n"
-    for model in models:
-        output += f"""
-### {model['id']}
-- **Created:** {model.get('created_at', 'Unknown')}
-- **Accuracy:** {model.get('metrics', {}).get('accuracy', 0):.4f}
-- **F1 Score:** {model.get('metrics', {}).get('f1_score', 0):.4f}
-- **Status:** {model.get('status', 'Unknown')}
----
-"""
-    return output
-def run_inference(model_id: str, input_data: str) -> str:
-    """Run inference on a trained model"""
     try:
-        model, scaler = model_registry.get_model(model_id)
-        if model is None:
-            return f"❌ Model '{model_id}' not found"
-        # Parse input data (expect JSON format)
-        try:
-            data = json.loads(input_data)
-            if isinstance(data, dict):
-                data = [data]
-            df = pd.DataFrame(data)
-        except json.JSONDecodeError:
-            return "❌ Invalid JSON input. Please provide data in JSON format."
-        # Scale and predict
-        X_scaled = scaler.transform(df.fillna(0))
-        predictions = model.predict(X_scaled)
-        # Get probabilities if available
-        try:
-            probabilities = model.predict_proba(X_scaled)
-            results = []
-            for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
-                results.append({
-                    "sample": i,
-                    "prediction": int(pred),
-                    "confidence": float(max(probs)),
-                    "probabilities": probs.tolist()
-                })
-        except:
-            results = [{"sample": i, "prediction": int(p)} for i, p in enumerate(predictions)]
-        return json.dumps(results, indent=2)
-    except Exception as e:
-        logger.error(f"Inference error: {e}")
-        return f"❌ Inference failed: {str(e)}"
-# ============================================================================
-# HUGGING FACE INTEGRATION
-# ============================================================================
-def upload_model_to_hub(model_id: str, repo_id: str, hf_token: str) -> str:
-    """Upload a trained model to Hugging Face Hub"""
-    try:
-        if not hf_token:
-            return "❌ Hugging Face token required for upload"
-        model, scaler = model_registry.get_model(model_id)
-        if model is None:
-            return f"❌ Model '{model_id}' not found"
-        api = HfApi(token=hf_token)
-        # Create repo if it doesn't exist
-        try:
-            create_repo(repo_id, token=hf_token, repo_type="model", exist_ok=True)
-        except Exception as e:
-            logger.warning(f"Repo creation note: {e}")
-        # Upload model files
-        model_path = MODELS_DIR / f"{model_id}_model.pkl"
-        scaler_path = MODELS_DIR / f"{model_id}_scaler.pkl"
-        upload_file(
-            path_or_fileobj=str(model_path),
-            path_in_repo=f"{model_id}_model.pkl",
-            repo_id=repo_id,
-            token=hf_token,
-            repo_type="model"
-        )
-        upload_file(
-            path_or_fileobj=str(scaler_path),
-            path_in_repo=f"{model_id}_scaler.pkl",
-            repo_id=repo_id,
-            token=hf_token,
-            repo_type="model"
-        )
-        # Upload metadata
-        metadata = model_registry.metadata.get(model_id, {})
-        metadata_json = json.dumps(metadata, indent=2, default=str)
-        with open(MODELS_DIR / f"{model_id}_metadata.json", 'w') as f:
-            f.write(metadata_json)
-        upload_file(
-            path_or_fileobj=str(MODELS_DIR / f"{model_id}_metadata.json"),
-            path_in_repo=f"{model_id}_metadata.json",
-            repo_id=repo_id,
-            token=hf_token,
-            repo_type="model"
-        )
-        return f"""
-## ✅ Model Uploaded Successfully!
-**Model ID:** `{model_id}`
-**Repository:** `{repo_id}`
-**URL:** https://huggingface.co/{repo_id}
-### Files Uploaded:
-- `{model_id}_model.pkl`
-- `{model_id}_scaler.pkl`
-- `{model_id}_metadata.json`
-You can now use this model from the Hub!
 """
     except Exception as e:
-        logger.error(f"Upload error: {e}")
-        return f"❌ Upload failed: {str(e)}"
-def download_model_from_hub(repo_id: str, model_filename: str, hf_token: str) -> str:
-    """Download a model from Hugging Face Hub"""
     try:
-        model_path = hf_hub_download(
-            repo_id=repo_id,
-            filename=model_filename,
-            token=hf_token if hf_token else None
-        )
-        # Also try to download scaler
-        scaler_filename = model_filename.replace("_model.pkl", "_scaler.pkl")
-        try:
-            scaler_path = hf_hub_download(
-                repo_id=repo_id,
-                filename=scaler_filename,
-                token=hf_token if hf_token else None
-            )
-        except:
-            scaler_path = None
-        # Load and register
-        model = joblib.load(model_path)
-        scaler = joblib.load(scaler_path) if scaler_path else StandardScaler()
-        model_id = model_filename.replace("_model.pkl", "")
-        model_registry.models[model_id] = model
-        model_registry.scalers[model_id] = scaler
-        return f"""
-## ✅ Model Downloaded Successfully!
-**Model ID:** `{model_id}`
-**Source:** `{repo_id}`
-The model is now available for inference.
-"""
     except Exception as e:
-        logger.error(f"Download error: {e}")
-        return f"❌ Download failed: {str(e)}"
-# ============================================================================
-# API ENDPOINTS (For Backend Integration)
-# ============================================================================
-def api_predict(model_id: str, features: Dict) -> Dict:
-    """API endpoint for predictions"""
-    try:
-        model, scaler = model_registry.get_model(model_id)
-        if model is None:
-            return {"error": f"Model '{model_id}' not found"}
-        df = pd.DataFrame([features])
-        X_scaled = scaler.transform(df.fillna(0))
-        prediction = model.predict(X_scaled)[0]
-        try:
-            proba = model.predict_proba(X_scaled)[0]
-            confidence = float(max(proba))
-        except:
-            confidence = None
-        return {
-            "model_id": model_id,
-            "prediction": int(prediction),
-            "confidence": confidence,
-            "timestamp": datetime.now().isoformat()
-        }
-    except Exception as e:
-        return {"error": str(e)}
-def api_batch_predict(model_id: str, batch_data: List[Dict]) -> List[Dict]:
-    """API endpoint for batch predictions"""
-    results = []
-    for item in batch_data:
-        result = api_predict(model_id, item)
-        results.append(result)
-    return results
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
-# Custom CSS
-custom_css = """
-.gradio-container {
-    font-family: 'Inter', sans-serif;
-}
-.main-title {
-    text-align: center;
-    color: #1a1a2e;
-    margin-bottom: 20px;
-}
-.tab-content {
-    padding: 20px;
-}
-"""
-# Build interface
-with gr.Blocks(css=custom_css, title="CyberForge AI - ML Training Platform") as demo:
-    gr.Markdown("""
-    # 🔐 CyberForge AI - ML Training Platform
-    **Train, Deploy, and Serve Cybersecurity ML Models**
-    This platform enables you to:
-    - 📊 Upload and train models on cybersecurity datasets
-    - 🚀 Deploy models to Hugging Face Hub
-    - 🔗 Integrate with your backend via API
-    - 🤖 Run inference on trained models
-    """)
-    with gr.Tabs():
-        # ==================== TRAINING TAB ====================
-        with gr.TabItem("🎯 Train Model"):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("### Dataset Configuration")
-                    train_file = gr.File(
-                        label="Upload Dataset (CSV, JSON, or Parquet)",
-                        file_types=[".csv", ".json", ".parquet"]
-                    )
-                    task_type = gr.Dropdown(
-                        choices=SECURITY_TASKS,
-                        value="Malware Detection",
-                        label="Security Task Type"
-                    )
-                    model_type = gr.Dropdown(
-                        choices=list(MODEL_TYPES.keys()),
-                        value="Random Forest",
-                        label="Model Type"
-                    )
-                    target_column = gr.Textbox(
-                        label="Target Column Name",
-                        placeholder="e.g., 'label', 'is_malicious', 'attack_type'"
-                    )
-                    test_size = gr.Slider(
-                        minimum=0.1,
-                        maximum=0.4,
-                        value=0.2,
-                        step=0.05,
-                        label="Test Size"
-                    )
-                    model_name = gr.Textbox(
-                        label="Model Name",
-                        placeholder="e.g., 'malware_detector_v1'",
-                        value="cyberforge_model"
-                    )
-                    train_btn = gr.Button("🚀 Train Model", variant="primary")
-                with gr.Column(scale=1):
-                    gr.Markdown("### Training Results")
-                    training_output = gr.Markdown()
-                    classification_report_output = gr.Textbox(
-                        label="Classification Report",
-                        lines=10
-                    )
-                    trained_model_id = gr.Textbox(
-                        label="Trained Model ID",
-                        interactive=False
-                    )
-            train_btn.click(
-                fn=train_model,
-                inputs=[train_file, task_type, model_type, target_column, test_size, model_name],
-                outputs=[training_output, classification_report_output, trained_model_id]
-            )
-        # ==================== INFERENCE TAB ====================
-        with gr.TabItem("🔮 Run Inference"):
-            with gr.Row():
-                with gr.Column():
-                    inference_model_id = gr.Textbox(
-                        label="Model ID",
-                        placeholder="Enter the model ID to use"
-                    )
-                    inference_input = gr.Textbox(
-                        label="Input Data (JSON format)",
-                        placeholder='[{"feature1": 0.5, "feature2": 1.2, ...}]',
-                        lines=5
-                    )
-                    inference_btn = gr.Button("🔮 Run Inference", variant="primary")
-                with gr.Column():
-                    inference_output = gr.Textbox(
-                        label="Predictions",
-                        lines=10
-                    )
-            inference_btn.click(
-                fn=run_inference,
-                inputs=[inference_model_id, inference_input],
-                outputs=[inference_output]
-            )
-        # ==================== MODELS TAB ====================
-        with gr.TabItem("🤖 Models"):
-            gr.Markdown("### Trained Models")
-            refresh_btn = gr.Button("🔄 Refresh Models List")
-            models_list = gr.Markdown()
-            refresh_btn.click(
-                fn=list_trained_models,
-                outputs=[models_list]
-            )
-            # Auto-refresh on load
-            demo.load(
-                fn=list_trained_models,
-                outputs=[models_list]
-            )
-        # ==================== HUB TAB ====================
-        with gr.TabItem("☁️ Hugging Face Hub"):
-            gr.Markdown("### Upload & Download Models")
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown("#### Upload to Hub")
-                    upload_model_id = gr.Textbox(
-                        label="Model ID to Upload"
-                    )
-                    upload_repo_id = gr.Textbox(
-                        label="Hub Repository ID",
-                        placeholder="username/repo-name"
-                    )
-                    upload_token = gr.Textbox(
-                        label="Hugging Face Token",
-                        type="password"
-                    )
-                    upload_btn = gr.Button("⬆️ Upload Model", variant="primary")
-                    upload_result = gr.Markdown()
-                with gr.Column():
-                    gr.Markdown("#### Download from Hub")
-                    download_repo_id = gr.Textbox(
-                        label="Hub Repository ID",
-                        placeholder="username/repo-name"
-                    )
-                    download_filename = gr.Textbox(
-                        label="Model Filename",
-                        placeholder="model_name_model.pkl"
-                    )
-                    download_token = gr.Textbox(
-                        label="Hugging Face Token (optional)",
-                        type="password"
-                    )
-                    download_btn = gr.Button("⬇️ Download Model", variant="secondary")
-                    download_result = gr.Markdown()
-            upload_btn.click(
-                fn=upload_model_to_hub,
-                inputs=[upload_model_id, upload_repo_id, upload_token],
-                outputs=[upload_result]
-            )
-            download_btn.click(
-                fn=download_model_from_hub,
-                inputs=[download_repo_id, download_filename, download_token],
-                outputs=[download_result]
-            )
-        # ==================== API TAB ====================
-        with gr.TabItem("🔗 API Integration"):
-            gr.Markdown("""
-            ### API Integration Guide
-            Your backend can integrate with this Space using the Gradio Client library or direct API calls.
-            #### Python Client Example:
-            ```python
-            from gradio_client import Client
-            # Connect to your Space
-            client = Client("Che237/cyberforge")
-            # Run inference
-            result = client.predict(
-                model_id="your_model_id",
-                input_data='[{"feature1": 0.5, "feature2": 1.2}]',
-                api_name="/run_inference"
-            )
-            print(result)
-            ```
-            #### API Endpoints:
-            | Endpoint | Description |
-            |----------|-------------|
-            | `/train_model` | Train a new model |
-            | `/run_inference` | Run predictions |
-            | `/list_trained_models` | List available models |
-            | `/upload_model_to_hub` | Upload model to Hub |
-            #### Backend Integration (Node.js):
-            ```javascript
-            const { Client } = require("@gradio/client");
-            async function runPrediction(modelId, features) {
-                const client = await Client.connect("Che237/cyberforge");
-                const result = await client.predict("/run_inference", {
-                    model_id: modelId,
-                    input_data: JSON.stringify([features])
-                });
-                return JSON.parse(result.data);
-            }
-            ```
-            """)
-# Launch the demo
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 """
+CyberForge AI - ML Training & Inference Platform
+Hugging Face Spaces deployment with Notebook execution support
 """
 import gradio as gr
 import numpy as np
 import json
 import os
+import subprocess
+import sys
 from pathlib import Path
 from datetime import datetime
 import logging
 from typing import Dict, List, Any, Optional, Tuple
 # ML Libraries
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
+import joblib
 # Hugging Face Hub
+from huggingface_hub import HfApi, hf_hub_download, upload_file
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 DATASETS_DIR = Path("./datasets")
 DATASETS_DIR.mkdir(exist_ok=True)
+NOTEBOOKS_DIR = Path("./notebooks")
 # Model types available for training
 MODEL_TYPES = {
     "Random Forest": RandomForestClassifier,
 ]
 # ============================================================================
+# NOTEBOOK EXECUTION
 # ============================================================================
+def get_available_notebooks() -> List[str]:
+    """Get list of available notebooks"""
+    if not NOTEBOOKS_DIR.exists():
+        return []
+    notebooks = sorted([
+        f.name for f in NOTEBOOKS_DIR.glob("*.ipynb")
+    ])
+    return notebooks
+def read_notebook_content(notebook_name: str) -> str:
+    """Read and display notebook content as markdown"""
+    notebook_path = NOTEBOOKS_DIR / notebook_name
+    if not notebook_path.exists():
+        return f"Notebook not found: {notebook_name}"
+    try:
+        with open(notebook_path, 'r') as f:
+            nb = json.load(f)
+        output = f"# {notebook_name}\n\n"
+        for i, cell in enumerate(nb.get('cells', []), 1):
+            cell_type = cell.get('cell_type', 'code')
+            source = ''.join(cell.get('source', []))
+            if cell_type == 'markdown':
+                output += f"{source}\n\n"
+            else:
+                output += f"### Cell {i} (Python)\n```python\n{source}\n```\n\n"
+        return output
+    except Exception as e:
+        return f"Error reading notebook: {str(e)}"
+def execute_notebook(notebook_name: str, progress=gr.Progress()) -> Tuple[str, str]:
+    """Execute a notebook and return output"""
+    notebook_path = NOTEBOOKS_DIR / notebook_name
+    output_path = NOTEBOOKS_DIR / f"output_{notebook_name}"
+    if not notebook_path.exists():
+        return f"Error: Notebook not found: {notebook_name}", ""
+    progress(0.1, desc="Starting notebook execution...")
     try:
+        # Execute notebook using nbconvert
+        cmd = [
+            sys.executable, "-m", "nbconvert",
+            "--to", "notebook",
+            "--execute",
+            "--output", str(output_path.name),
+            "--ExecutePreprocessor.timeout=600",
+            "--ExecutePreprocessor.kernel_name=python3",
+            str(notebook_path)
+        ]
+        progress(0.3, desc="Executing cells...")
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=str(NOTEBOOKS_DIR),
+            timeout=900
+        )
+        progress(0.8, desc="Processing output...")
+        if result.returncode == 0:
+            # Read executed notebook for outputs
+            if output_path.exists():
+                with open(output_path, 'r') as f:
+                    executed_nb = json.load(f)
+                outputs = []
+                for i, cell in enumerate(executed_nb.get('cells', []), 1):
+                    if cell.get('cell_type') == 'code':
+                        cell_outputs = cell.get('outputs', [])
+                        for out in cell_outputs:
+                            if 'text' in out:
+                                text = ''.join(out['text'])
+                                outputs.append(f"Cell {i}:\n{text}")
+                            elif 'data' in out:
+                                if 'text/plain' in out['data']:
+                                    text = ''.join(out['data']['text/plain'])
+                                    outputs.append(f"Cell {i}:\n{text}")
+                progress(1.0, desc="Complete!")
+                return "Notebook executed successfully!", "\n\n".join(outputs)
+            else:
+                return "Notebook executed but output file not found", result.stdout
+        else:
+            return f"Execution failed:\n{result.stderr}", result.stdout
+    except subprocess.TimeoutExpired:
+        return "Error: Notebook execution timed out (15 min limit)", ""
     except Exception as e:
+        return f"Error executing notebook: {str(e)}", ""
+def run_notebook_cell(notebook_name: str, cell_number: int) -> str:
+    """Execute a single cell from a notebook"""
+    notebook_path = NOTEBOOKS_DIR / notebook_name
+    if not notebook_path.exists():
+        return f"Error: Notebook not found"
     try:
+        with open(notebook_path, 'r') as f:
+            nb = json.load(f)
+        cells = [c for c in nb.get('cells', []) if c.get('cell_type') == 'code']
+        if cell_number < 1 or cell_number > len(cells):
+            return f"Error: Cell {cell_number} not found. Available: 1-{len(cells)}"
+        cell = cells[cell_number - 1]
+        source = ''.join(cell.get('source', []))
+        # Execute the code
+        import io
+        from contextlib import redirect_stdout, redirect_stderr
+        stdout_capture = io.StringIO()
+        stderr_capture = io.StringIO()
+        with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
+            try:
+                exec(source, globals())
+            except Exception as e:
+                return f"Error: {str(e)}"
+        output = stdout_capture.getvalue()
+        errors = stderr_capture.getvalue()
+        result = f"### Cell {cell_number} Output:\n"
+        if output:
+            result += f"```\n{output}\n```\n"
+        if errors:
+            result += f"\n**Warnings/Errors:**\n```\n{errors}\n```"
+        if not output and not errors:
+            result += "*(No output)*"
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}"
+# ============================================================================
+# MODEL TRAINING (existing functionality)
+# ============================================================================
+class SecurityModelTrainer:
+    """Train ML models for cybersecurity tasks"""
+    def __init__(self):
+        self.scaler = StandardScaler()
+        self.label_encoder = LabelEncoder()
+        self.models = {}
+    def prepare_data(self, df: pd.DataFrame, target_col: str = 'label') -> Tuple:
+        """Prepare data for training"""
+        if target_col not in df.columns:
+            raise ValueError(f"Target column '{target_col}' not found")
+        X = df.drop(columns=[target_col])
+        y = df[target_col]
+        # Handle categorical columns
+        X = X.select_dtypes(include=[np.number]).fillna(0)
+        if y.dtype == 'object':
+            y = self.label_encoder.fit_transform(y)
+        X_scaled = self.scaler.fit_transform(X)
+        return train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+    def train_model(self, model_type: str, X_train, y_train):
+        """Train a model"""
         if model_type not in MODEL_TYPES:
+            raise ValueError(f"Unknown model type: {model_type}")
         model_class = MODEL_TYPES[model_type]
         if model_type == "Isolation Forest (Anomaly)":
+            model = model_class(contamination=0.1, random_state=42)
         else:
             model = model_class(random_state=42)
+        model.fit(X_train, y_train)
+        return model
+    def evaluate_model(self, model, X_test, y_test) -> Dict:
+        """Evaluate model performance"""
+        y_pred = model.predict(X_test)
         metrics = {
+            'accuracy': accuracy_score(y_test, y_pred),
+            'f1_score': f1_score(y_test, y_pred, average='weighted', zero_division=0)
         }
+        return metrics
+trainer = SecurityModelTrainer()
+def train_model_from_data(data_file, model_type: str, task: str, progress=gr.Progress()):
+    """Train model from uploaded data"""
+    if data_file is None:
+        return "Please upload a CSV file", None, None
+    progress(0.1, desc="Loading data...")
     try:
+        df = pd.read_csv(data_file.name)
+        progress(0.3, desc="Preparing data...")
+        X_train, X_test, y_train, y_test = trainer.prepare_data(df)
+        progress(0.5, desc=f"Training {model_type}...")
+        model = trainer.train_model(model_type, X_train, y_train)
+        progress(0.8, desc="Evaluating model...")
+        metrics = trainer.evaluate_model(model, X_test, y_test)
+        # Save model
+        model_name = f"{task.lower().replace(' ', '_')}_{model_type.lower().replace(' ', '_')}"
+        model_path = MODELS_DIR / f"{model_name}.pkl"
+        joblib.dump(model, model_path)
+        progress(1.0, desc="Complete!")
+        result = f"""
+## Training Complete!
+**Task:** {task}
+**Model:** {model_type}
+**Samples:** {len(df)}
+### Metrics
+- Accuracy: {metrics['accuracy']:.4f}
+- F1 Score: {metrics['f1_score']:.4f}
+**Model saved to:** {model_path}
 """
+        return result, str(model_path), json.dumps(metrics, indent=2)
     except Exception as e:
+        return f"Error: {str(e)}", None, None
+def run_inference(model_file, features_text: str):
+    """Run inference with a trained model"""
+    if model_file is None:
+        return "Please upload a model file"
     try:
+        model = joblib.load(model_file.name)
+        features = json.loads(features_text)
+        X = np.array([list(features.values())])
+        prediction = model.predict(X)[0]
+        result = {
+            'prediction': int(prediction),
+            'features_used': len(features)
+        }
+        if hasattr(model, 'predict_proba'):
+            proba = model.predict_proba(X)[0]
+            result['confidence'] = float(max(proba))
+        return json.dumps(result, indent=2)
     except Exception as e:
+        return f"Error: {str(e)}"
+def list_trained_models():
+    """List all trained models"""
+    models = list(MODELS_DIR.glob("*.pkl"))
+    if not models:
+        return "No trained models found"
+    output = "## Trained Models\n\n"
+    for model_path in models:
+        size_kb = model_path.stat().st_size / 1024
+        output += f"- **{model_path.name}** ({size_kb:.1f} KB)\n"
+    return output
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
+def create_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(title="CyberForge AI", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+# 🔐 CyberForge AI - ML Training Platform
+Train cybersecurity ML models and run Jupyter notebooks on Hugging Face.
+        """)
+        with gr.Tabs():
+            # ============ NOTEBOOKS TAB ============
+            with gr.TabItem("📓 Notebooks"):
+                gr.Markdown("""
+### Run ML Pipeline Notebooks
+Execute the CyberForge ML notebooks directly in the cloud.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        notebook_dropdown = gr.Dropdown(
+                            choices=get_available_notebooks(),
+                            label="Select Notebook",
+                            value=get_available_notebooks()[0] if get_available_notebooks() else None
+                        )
+                        refresh_btn = gr.Button("🔄 Refresh List")
+                        view_btn = gr.Button("👁 View Content", variant="secondary")
+                        execute_btn = gr.Button("▶ Execute Notebook", variant="primary")
+                        gr.Markdown("### Run Single Cell")
+                        cell_number = gr.Number(label="Cell Number", value=1, minimum=1)
+                        run_cell_btn = gr.Button("Run Cell")
+                    with gr.Column(scale=2):
+                        notebook_status = gr.Markdown("Select a notebook to view or execute.")
+                        notebook_output = gr.Markdown("", label="Output")
+                def refresh_notebooks():
+                    notebooks = get_available_notebooks()
+                    return gr.update(choices=notebooks, value=notebooks[0] if notebooks else None)
+                refresh_btn.click(refresh_notebooks, outputs=notebook_dropdown)
+                view_btn.click(read_notebook_content, inputs=notebook_dropdown, outputs=notebook_output)
+                execute_btn.click(execute_notebook, inputs=notebook_dropdown, outputs=[notebook_status, notebook_output])
+                run_cell_btn.click(run_notebook_cell, inputs=[notebook_dropdown, cell_number], outputs=notebook_output)
+            # ============ TRAIN MODEL TAB ============
+            with gr.TabItem("🎯 Train Model"):
+                gr.Markdown("""
+### Train a Security ML Model
+Upload your dataset and train a model for threat detection.
+                """)
+                with gr.Row():
+                    with gr.Column():
+                        task_dropdown = gr.Dropdown(
+                            choices=SECURITY_TASKS,
+                            label="Security Task",
+                            value="Phishing Detection"
+                        )
+                        model_dropdown = gr.Dropdown(
+                            choices=list(MODEL_TYPES.keys()),
+                            label="Model Type",
+                            value="Random Forest"
+                        )
+                        data_upload = gr.File(label="Upload Training Data (CSV)", file_types=[".csv"])
+                        train_btn = gr.Button("🚀 Train Model", variant="primary")
+                    with gr.Column():
+                        train_output = gr.Markdown("Upload data and click Train to begin.")
+                        model_path_output = gr.Textbox(label="Model Path", visible=False)
+                        metrics_output = gr.Textbox(label="Metrics JSON", visible=False)
+                train_btn.click(
+                    train_model_from_data,
+                    inputs=[data_upload, model_dropdown, task_dropdown],
+                    outputs=[train_output, model_path_output, metrics_output]
+                )
+            # ============ INFERENCE TAB ============
+            with gr.TabItem("🔍 Inference"):
+                gr.Markdown("""
+### Run Model Inference
+Load a trained model and make predictions.
+                """)
+                with gr.Row():
+                    with gr.Column():
+                        model_upload = gr.File(label="Upload Model (.pkl)")
+                        features_input = gr.Textbox(
+                            label="Features (JSON)",
+                            value='{"url_length": 50, "has_https": 1, "digit_count": 5}',
+                            lines=5
+                        )
+                        predict_btn = gr.Button("🎯 Predict", variant="primary")
+                    with gr.Column():
+                        prediction_output = gr.Textbox(label="Prediction Result", lines=10)
+                predict_btn.click(run_inference, inputs=[model_upload, features_input], outputs=prediction_output)
+            # ============ MODELS TAB ============
+            with gr.TabItem("📦 Models"):
+                gr.Markdown("### Trained Models")
+                models_list = gr.Markdown(list_trained_models())
+                refresh_models_btn = gr.Button("🔄 Refresh")
+                refresh_models_btn.click(list_trained_models, outputs=models_list)
+            # ============ API TAB ============
+            with gr.TabItem("🔌 API"):
+                gr.Markdown("""
+## API Integration
+### Python Client
+```python
+from huggingface_hub import InferenceClient
+client = InferenceClient("Che237/cyberforge")
+# Make prediction
+result = client.predict(
+    model_name="phishing_detection",
+    features={"url_length": 50, "has_https": 1}
+)
+print(result)
+```
+### REST API
+```bash
+curl -X POST https://che237-cyberforge.hf.space/api/predict \\
+  -H "Content-Type: application/json" \\
+  -d '{"model_name": "phishing_detection", "features": {"url_length": 50}}'
+```
+### Notebook Execution
+The notebooks in this Space implement the complete CyberForge ML pipeline:
+| # | Notebook | Purpose |
+|---|----------|---------|
+| 00 | environment_setup | System validation |
+| 01 | data_acquisition | Data collection |
+| 02 | feature_engineering | Feature extraction |
+| 03 | model_training | Train models |
+| 04 | agent_intelligence | AI reasoning |
+| 05 | model_validation | Testing |
+| 06 | backend_integration | API packaging |
+| 07 | deployment_artifacts | Deployment |
+                """)
+        gr.Markdown("""
+---
+**CyberForge AI** | [GitHub](https://github.com/Che237/cyberforge) | [Datasets](https://huggingface.co/datasets/Che237/cyberforge-datasets)
+        """)
+    return demo
+# ============================================================================
+# MAIN
+# ============================================================================
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()