Spaces:

LorenzoNava
/

deberta-cwe-training

Paused

LorenzoNava commited on Nov 17, 2025

Commit

6792b7f

1 Parent(s): 18ef60c

feat: Add production-grade Gradio training interface with real-time monitoring

Features:
- Real-time training progress with live metrics
- Interactive hyperparameter configuration (10 epochs, batch size 16, optimal settings)
- Live visualization with Plotly (loss, accuracy, F1 score, learning rate)
- Thread-safe training state management
- Automatic model export to local directory
- Training logs streaming
- GPU/CPU automatic detection
- Early stopping and checkpoint management
- Production-ready error handling

Optimal hyperparameters:
- Epochs: 10 (for best quality)
- Batch size: 16 (effective: 64 with gradient accumulation)
- Learning rate: 2e-5 with cosine schedule
- Warmup ratio: 0.1
- Gradient accumulation: 4 steps
- Early stopping: 5 patience

Model will be exported to:
/Users/lorenzo/Documents/Claude Code/projects/mcps/mcp-cwe-identifier/models/deberta-cwe-final

Files changed (3) hide show

.gitignore +58 -0
app.py +802 -0
requirements.txt +24 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,58 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+# Model checkpoints and outputs
+models/
+checkpoints/
+outputs/
+*.pt
+*.pth
+*.bin
+*.safetensors
+# Logs
+logs/
+*.log
+wandb/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Jupyter
+.ipynb_checkpoints/
+# Temporary files
+tmp/
+temp/

app.py ADDED Viewed

	@@ -0,0 +1,802 @@

+#!/usr/bin/env python3
+"""
+DeBERTa CWE Classification - Fine-Tuning Interface
+====================================================
+Production-grade Gradio interface for training DeBERTa models
+on CVE-CWE classification task with real-time monitoring.
+Features:
+- Real-time training progress with live metrics
+- Interactive hyperparameter configuration
+- GPU/CPU automatic detection
+- Checkpoint management and recovery
+- Model export to local directory
+- Training logs streaming
+- Performance visualization
+Author: Berghem - Smart Information Security
+License: MIT
+"""
+import os
+import sys
+import json
+import time
+import threading
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass, asdict
+import queue
+import warnings
+import gradio as gr
+import torch
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from datasets import load_dataset, Dataset
+from transformers import (
+    AutoTokenizer,
+    DebertaV2Tokenizer,
+    AutoModelForSequenceClassification,
+    TrainingArguments,
+    Trainer,
+    TrainerCallback,
+    EarlyStoppingCallback,
+)
+from sklearn.metrics import accuracy_score, f1_score, classification_report
+warnings.filterwarnings('ignore')
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+@dataclass
+class TrainingConfig:
+    """Training configuration with optimal defaults"""
+    # Model selection
+    model_name: str = "microsoft/deberta-v3-base"  # base, large, small
+    # Dataset
+    dataset_name: str = "stasvinokur/cve-and-cwe-dataset-1999-2025"
+    max_length: int = 256
+    # Training hyperparameters (OPTIMAL SETTINGS)
+    num_epochs: int = 10  # More epochs for better quality
+    batch_size: int = 16  # Larger batch size for stability
+    learning_rate: float = 2e-5
+    weight_decay: float = 0.01
+    warmup_ratio: float = 0.1
+    gradient_accumulation_steps: int = 4  # Effective batch size: 64
+    # Optimization
+    max_grad_norm: float = 1.0
+    adam_epsilon: float = 1e-8
+    lr_scheduler_type: str = "cosine"  # Better than linear
+    # Evaluation and checkpointing
+    eval_steps: int = 500
+    save_steps: int = 500
+    logging_steps: int = 50
+    save_total_limit: int = 3
+    # Early stopping
+    early_stopping_patience: int = 5
+    early_stopping_threshold: float = 0.001
+    # Output
+    output_dir: str = "/Users/lorenzo/Documents/Claude Code/projects/mcps/mcp-cwe-identifier/models/deberta-cwe"
+    local_export_dir: str = "/Users/lorenzo/Documents/Claude Code/projects/mcps/mcp-cwe-identifier/models"
+    # Hardware
+    use_fp16: bool = True  # Mixed precision for speed
+    dataloader_num_workers: int = 4
+    def to_dict(self) -> dict:
+        return asdict(self)
+# Model configurations
+MODEL_CONFIGS = {
+    "DeBERTa-v3-Small (44M params, fast)": "microsoft/deberta-v3-small",
+    "DeBERTa-v3-Base (86M params, recommended)": "microsoft/deberta-v3-base",
+    "DeBERTa-v3-Large (435M params, best quality)": "microsoft/deberta-v3-large",
+}
+# ============================================================================
+# TRAINING STATE MANAGEMENT
+# ============================================================================
+class TrainingState:
+    """Thread-safe training state management"""
+    def __init__(self):
+        self.is_training = False
+        self.current_epoch = 0
+        self.total_epochs = 0
+        self.current_step = 0
+        self.total_steps = 0
+        self.train_loss = []
+        self.eval_loss = []
+        self.eval_accuracy = []
+        self.eval_f1 = []
+        self.learning_rates = []
+        self.logs = []
+        self.best_accuracy = 0.0
+        self.best_f1 = 0.0
+        self.training_start_time = None
+        self.training_end_time = None
+        self.lock = threading.Lock()
+        self.log_queue = queue.Queue()
+    def reset(self):
+        """Reset state for new training run"""
+        with self.lock:
+            self.is_training = False
+            self.current_epoch = 0
+            self.current_step = 0
+            self.train_loss = []
+            self.eval_loss = []
+            self.eval_accuracy = []
+            self.eval_f1 = []
+            self.learning_rates = []
+            self.logs = []
+            self.best_accuracy = 0.0
+            self.best_f1 = 0.0
+            self.training_start_time = None
+            self.training_end_time = None
+    def add_log(self, message: str):
+        """Add log message"""
+        timestamp = time.strftime("%H:%M:%S")
+        log_entry = f"[{timestamp}] {message}"
+        with self.lock:
+            self.logs.append(log_entry)
+        self.log_queue.put(log_entry)
+    def get_logs(self) -> str:
+        """Get all logs as string"""
+        with self.lock:
+            return "\n".join(self.logs[-100:])  # Last 100 lines
+    def get_progress(self) -> Dict:
+        """Get current progress"""
+        with self.lock:
+            elapsed = 0
+            if self.training_start_time:
+                end_time = self.training_end_time or time.time()
+                elapsed = end_time - self.training_start_time
+            return {
+                "is_training": self.is_training,
+                "epoch": f"{self.current_epoch}/{self.total_epochs}",
+                "step": f"{self.current_step}/{self.total_steps}",
+                "progress": self.current_step / max(self.total_steps, 1),
+                "elapsed_time": f"{elapsed/60:.1f} min",
+                "best_accuracy": f"{self.best_accuracy*100:.2f}%",
+                "best_f1": f"{self.best_f1*100:.2f}%",
+            }
+# Global training state
+training_state = TrainingState()
+# ============================================================================
+# GRADIO CALLBACK FOR REAL-TIME UPDATES
+# ============================================================================
+class GradioProgressCallback(TrainerCallback):
+    """Custom callback that streams progress to Gradio UI"""
+    def __init__(self, state: TrainingState):
+        self.state = state
+    def on_train_begin(self, args, state, control, **kwargs):
+        self.state.training_start_time = time.time()
+        self.state.is_training = True
+        self.state.total_epochs = int(args.num_train_epochs)
+        self.state.total_steps = state.max_steps
+        self.state.add_log("🚀 Training started!")
+        self.state.add_log(f"📊 Total epochs: {self.state.total_epochs}")
+        self.state.add_log(f"📈 Total steps: {self.state.total_steps}")
+    def on_epoch_begin(self, args, state, control, **kwargs):
+        self.state.current_epoch = int(state.epoch) if state.epoch else 0
+        self.state.add_log(f"\n{'='*60}")
+        self.state.add_log(f"📊 Epoch {self.state.current_epoch + 1}/{self.state.total_epochs}")
+        self.state.add_log(f"{'='*60}")
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if logs:
+            self.state.current_step = state.global_step
+            # Training loss
+            if "loss" in logs:
+                self.state.train_loss.append((state.global_step, logs["loss"]))
+                self.state.add_log(f"📉 Step {state.global_step}: Loss = {logs['loss']:.4f}")
+            # Learning rate
+            if "learning_rate" in logs:
+                self.state.learning_rates.append((state.global_step, logs["learning_rate"]))
+            # Evaluation metrics
+            if "eval_loss" in logs:
+                self.state.eval_loss.append((state.global_step, logs["eval_loss"]))
+                self.state.add_log(f"📊 Evaluation Loss: {logs['eval_loss']:.4f}")
+            if "eval_accuracy" in logs:
+                self.state.eval_accuracy.append((state.global_step, logs["eval_accuracy"]))
+                self.state.best_accuracy = max(self.state.best_accuracy, logs["eval_accuracy"])
+                self.state.add_log(f"🎯 Evaluation Accuracy: {logs['eval_accuracy']*100:.2f}%")
+            if "eval_f1_weighted" in logs:
+                self.state.eval_f1.append((state.global_step, logs["eval_f1_weighted"]))
+                self.state.best_f1 = max(self.state.best_f1, logs["eval_f1_weighted"])
+                self.state.add_log(f"🎯 Evaluation F1 (weighted): {logs['eval_f1_weighted']*100:.2f}%")
+    def on_epoch_end(self, args, state, control, **kwargs):
+        epoch_time = time.time() - self.state.training_start_time
+        self.state.add_log(f"✅ Epoch {self.state.current_epoch + 1} completed")
+        self.state.add_log(f"⏱️  Time elapsed: {epoch_time/60:.1f} minutes")
+    def on_train_end(self, args, state, control, **kwargs):
+        self.state.training_end_time = time.time()
+        self.state.is_training = False
+        total_time = self.state.training_end_time - self.state.training_start_time
+        self.state.add_log(f"\n{'='*60}")
+        self.state.add_log(f"✅ TRAINING COMPLETED!")
+        self.state.add_log(f"{'='*60}")
+        self.state.add_log(f"⏱️  Total time: {total_time/60:.1f} minutes")
+        self.state.add_log(f"🎯 Best Accuracy: {self.state.best_accuracy*100:.2f}%")
+        self.state.add_log(f"🎯 Best F1 Score: {self.state.best_f1*100:.2f}%")
+# ============================================================================
+# DATASET PREPARATION
+# ============================================================================
+class CVECWEDataset:
+    """Prepare CVE→CWE dataset for training"""
+    def __init__(self, tokenizer, config: TrainingConfig):
+        self.tokenizer = tokenizer
+        self.config = config
+        self.cwe_to_id = {}
+        self.id_to_cwe = {}
+    def load_and_prepare(self) -> Tuple[Dict[str, Dataset], int]:
+        """Load and prepare dataset"""
+        training_state.add_log("📦 Loading dataset...")
+        try:
+            dataset = load_dataset(self.config.dataset_name)
+            training_state.add_log(f"✅ Dataset loaded: {len(dataset['train']):,} training samples")
+        except Exception as e:
+            training_state.add_log(f"❌ Failed to load dataset: {e}")
+            raise
+        # Build CWE label mapping
+        training_state.add_log("🏷️  Building CWE label mapping...")
+        self._build_label_mapping(dataset['train'])
+        num_labels = len(self.cwe_to_id)
+        training_state.add_log(f"✅ Found {num_labels} unique CWE classes")
+        # Tokenize
+        training_state.add_log("🔤 Tokenizing dataset...")
+        tokenized = self._tokenize_dataset(dataset)
+        training_state.add_log("✅ Dataset prepared successfully")
+        return tokenized, num_labels
+    def _build_label_mapping(self, dataset):
+        """Build CWE → ID mapping"""
+        all_cwes = set()
+        for example in dataset:
+            cwe = example.get('CWE-ID')
+            if cwe and isinstance(cwe, str) and cwe.startswith('CWE-'):
+                all_cwes.add(cwe)
+        sorted_cwes = sorted(all_cwes)
+        self.cwe_to_id = {cwe: idx for idx, cwe in enumerate(sorted_cwes)}
+        self.id_to_cwe = {idx: cwe for cwe, idx in self.cwe_to_id.items()}
+        # Save mapping
+        mapping_file = Path(self.config.output_dir) / "cwe_label_mapping.json"
+        mapping_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(mapping_file, 'w') as f:
+            json.dump({
+                'cwe_to_id': self.cwe_to_id,
+                'id_to_cwe': self.id_to_cwe,
+                'num_labels': len(self.cwe_to_id)
+            }, f, indent=2)
+    def _tokenize_dataset(self, dataset):
+        """Tokenize dataset"""
+        def tokenize_function(examples):
+            descriptions = examples.get('DESCRIPTION', [])
+            cwes = examples.get('CWE-ID', [])
+            labels = [
+                self.cwe_to_id.get(cwe, -1) if cwe and cwe.startswith('CWE-') else -1
+                for cwe in cwes
+            ]
+            tokenized = self.tokenizer(
+                descriptions,
+                truncation=True,
+                padding='max_length',
+                max_length=self.config.max_length,
+                return_tensors=None
+            )
+            tokenized['labels'] = labels
+            return tokenized
+        tokenized = dataset.map(
+            tokenize_function,
+            batched=True,
+            desc="Tokenizing",
+            remove_columns=dataset['train'].column_names
+        )
+        # Filter invalid labels
+        tokenized = tokenized.filter(lambda x: x['labels'] >= 0)
+        return tokenized
+# ============================================================================
+# TRAINING FUNCTION
+# ============================================================================
+def compute_metrics(eval_pred):
+    """Compute evaluation metrics"""
+    predictions, labels = eval_pred
+    predictions = np.argmax(predictions, axis=1)
+    accuracy = accuracy_score(labels, predictions)
+    f1_macro = f1_score(labels, predictions, average='macro', zero_division=0)
+    f1_weighted = f1_score(labels, predictions, average='weighted', zero_division=0)
+    return {
+        'accuracy': accuracy,
+        'f1_macro': f1_macro,
+        'f1_weighted': f1_weighted,
+    }
+def train_model(config: TrainingConfig):
+    """Main training function"""
+    try:
+        # Reset state
+        training_state.reset()
+        # Device detection
+        if torch.cuda.is_available():
+            device = "cuda"
+            device_name = f"NVIDIA {torch.cuda.get_device_name(0)}"
+        elif torch.backends.mps.is_available():
+            device = "mps"
+            device_name = "Apple Silicon (M-series)"
+        else:
+            device = "cpu"
+            device_name = "CPU"
+        training_state.add_log(f"🖥️  Device: {device_name}")
+        # Load tokenizer
+        training_state.add_log(f"📚 Loading tokenizer: {config.model_name}")
+        tokenizer = DebertaV2Tokenizer.from_pretrained(config.model_name)
+        # Prepare dataset
+        dataset_prep = CVECWEDataset(tokenizer, config)
+        tokenized_dataset, num_labels = dataset_prep.load_and_prepare()
+        # Load model
+        training_state.add_log(f"🤖 Loading model: {config.model_name}")
+        training_state.add_log(f"🎯 Output classes: {num_labels} CWEs")
+        model = AutoModelForSequenceClassification.from_pretrained(
+            config.model_name,
+            num_labels=num_labels,
+            problem_type="single_label_classification"
+        )
+        # Count parameters
+        total_params = sum(p.numel() for p in model.parameters())
+        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        training_state.add_log(f"📊 Total parameters: {total_params:,}")
+        training_state.add_log(f"📊 Trainable parameters: {trainable_params:,}")
+        # Training arguments
+        training_args = TrainingArguments(
+            output_dir=config.output_dir,
+            num_train_epochs=config.num_epochs,
+            per_device_train_batch_size=config.batch_size,
+            per_device_eval_batch_size=config.batch_size * 2,
+            learning_rate=config.learning_rate,
+            weight_decay=config.weight_decay,
+            warmup_ratio=config.warmup_ratio,
+            gradient_accumulation_steps=config.gradient_accumulation_steps,
+            max_grad_norm=config.max_grad_norm,
+            adam_epsilon=config.adam_epsilon,
+            lr_scheduler_type=config.lr_scheduler_type,
+            fp16=config.use_fp16 and device == "cuda",
+            logging_dir=f"{config.output_dir}/logs",
+            logging_steps=config.logging_steps,
+            logging_first_step=True,
+            eval_strategy="steps",
+            eval_steps=config.eval_steps,
+            save_strategy="steps",
+            save_steps=config.save_steps,
+            save_total_limit=config.save_total_limit,
+            load_best_model_at_end=True,
+            metric_for_best_model="accuracy",
+            greater_is_better=True,
+            report_to="none",
+            dataloader_num_workers=config.dataloader_num_workers,
+        )
+        # Initialize trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=tokenized_dataset["train"],
+            eval_dataset=tokenized_dataset.get("validation") or tokenized_dataset.get("test"),
+            tokenizer=tokenizer,
+            compute_metrics=compute_metrics,
+            callbacks=[
+                GradioProgressCallback(training_state),
+                EarlyStoppingCallback(
+                    early_stopping_patience=config.early_stopping_patience,
+                    early_stopping_threshold=config.early_stopping_threshold,
+                )
+            ]
+        )
+        # Train
+        training_state.add_log("\n" + "="*60)
+        training_state.add_log("🚀 STARTING TRAINING")
+        training_state.add_log("="*60)
+        train_result = trainer.train()
+        # Save final model
+        training_state.add_log("\n💾 Saving final model...")
+        trainer.save_model(config.output_dir)
+        tokenizer.save_pretrained(config.output_dir)
+        # Save to local export directory
+        local_model_dir = Path(config.local_export_dir) / "deberta-cwe-final"
+        local_model_dir.mkdir(parents=True, exist_ok=True)
+        trainer.save_model(str(local_model_dir))
+        tokenizer.save_pretrained(str(local_model_dir))
+        training_state.add_log(f"✅ Model exported to: {local_model_dir}")
+        # Save metrics
+        metrics_file = Path(config.output_dir) / "training_metrics.json"
+        with open(metrics_file, 'w') as f:
+            json.dump(train_result.metrics, f, indent=2)
+        # Final evaluation
+        if "test" in tokenized_dataset or "validation" in tokenized_dataset:
+            test_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("validation")
+            eval_results = trainer.evaluate(test_dataset)
+            training_state.add_log("\n" + "="*60)
+            training_state.add_log("📊 FINAL EVALUATION RESULTS")
+            training_state.add_log("="*60)
+            training_state.add_log(f"✅ Accuracy: {eval_results['eval_accuracy']*100:.2f}%")
+            training_state.add_log(f"✅ F1 Score (macro): {eval_results['eval_f1_macro']*100:.2f}%")
+            training_state.add_log(f"✅ F1 Score (weighted): {eval_results['eval_f1_weighted']*100:.2f}%")
+            eval_file = Path(config.output_dir) / "evaluation_results.json"
+            with open(eval_file, 'w') as f:
+                json.dump(eval_results, f, indent=2)
+        training_state.add_log("\n✅ Training completed successfully!")
+    except Exception as e:
+        training_state.add_log(f"\n❌ Training failed: {str(e)}")
+        training_state.is_training = False
+        raise
+# ============================================================================
+# VISUALIZATION FUNCTIONS
+# ============================================================================
+def create_metrics_plot():
+    """Create interactive metrics plot"""
+    if not training_state.train_loss and not training_state.eval_accuracy:
+        # Empty plot
+        fig = go.Figure()
+        fig.add_annotation(
+            text="Training not started yet",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False,
+            font=dict(size=20, color="gray")
+        )
+        fig.update_layout(
+            title="Training Metrics",
+            xaxis_title="Step",
+            yaxis_title="Value",
+            template="plotly_white",
+            height=400
+        )
+        return fig
+    # Create subplots
+    fig = make_subplots(
+        rows=2, cols=2,
+        subplot_titles=("Training Loss", "Evaluation Accuracy", "Evaluation F1 Score", "Learning Rate"),
+        vertical_spacing=0.12,
+        horizontal_spacing=0.1
+    )
+    # Training loss
+    if training_state.train_loss:
+        steps, losses = zip(*training_state.train_loss)
+        fig.add_trace(
+            go.Scatter(x=steps, y=losses, mode='lines', name='Train Loss', line=dict(color='red')),
+            row=1, col=1
+        )
+    # Evaluation accuracy
+    if training_state.eval_accuracy:
+        steps, accs = zip(*training_state.eval_accuracy)
+        fig.add_trace(
+            go.Scatter(x=steps, y=accs, mode='lines+markers', name='Eval Accuracy', line=dict(color='blue')),
+            row=1, col=2
+        )
+    # Evaluation F1
+    if training_state.eval_f1:
+        steps, f1s = zip(*training_state.eval_f1)
+        fig.add_trace(
+            go.Scatter(x=steps, y=f1s, mode='lines+markers', name='Eval F1', line=dict(color='green')),
+            row=2, col=1
+        )
+    # Learning rate
+    if training_state.learning_rates:
+        steps, lrs = zip(*training_state.learning_rates)
+        fig.add_trace(
+            go.Scatter(x=steps, y=lrs, mode='lines', name='Learning Rate', line=dict(color='orange')),
+            row=2, col=2
+        )
+    fig.update_layout(
+        showlegend=False,
+        template="plotly_white",
+        height=600,
+        title_text="Training Metrics Dashboard",
+        title_font_size=20
+    )
+    # Update axes labels
+    fig.update_xaxes(title_text="Step", row=2, col=1)
+    fig.update_xaxes(title_text="Step", row=2, col=2)
+    fig.update_yaxes(title_text="Loss", row=1, col=1)
+    fig.update_yaxes(title_text="Accuracy", row=1, col=2)
+    fig.update_yaxes(title_text="F1 Score", row=2, col=1)
+    fig.update_yaxes(title_text="LR", row=2, col=2)
+    return fig
+def create_progress_info():
+    """Create progress information HTML"""
+    progress = training_state.get_progress()
+    if progress["is_training"]:
+        status_color = "green"
+        status_text = "🟢 TRAINING IN PROGRESS"
+    else:
+        status_color = "gray"
+        status_text = "⚪ READY"
+    html = f"""
+    <div style="padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                border-radius: 10px; color: white; font-family: 'Arial', sans-serif;">
+        <h2 style="margin: 0 0 15px 0; font-size: 24px;">{status_text}</h2>
+        <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 15px;">
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">EPOCH</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['epoch']}</div>
+            </div>
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">STEP</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['step']}</div>
+            </div>
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">TIME ELAPSED</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['elapsed_time']}</div>
+            </div>
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">BEST ACCURACY</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['best_accuracy']}</div>
+            </div>
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">BEST F1 SCORE</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['best_f1']}</div>
+            </div>
+            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
+                <div style="font-size: 12px; opacity: 0.8;">PROGRESS</div>
+                <div style="font-size: 24px; font-weight: bold;">{progress['progress']*100:.1f}%</div>
+            </div>
+        </div>
+    </div>
+    """
+    return html
+# ============================================================================
+# GRADIO INTERFACE
+# ============================================================================
+def start_training(model_choice, epochs, batch_size, learning_rate, warmup_ratio,
+                  grad_accum, use_early_stopping):
+    """Start training in background thread"""
+    if training_state.is_training:
+        return "❌ Training already in progress!"
+    # Update config
+    config = TrainingConfig()
+    config.model_name = MODEL_CONFIGS[model_choice]
+    config.num_epochs = int(epochs)
+    config.batch_size = int(batch_size)
+    config.learning_rate = float(learning_rate)
+    config.warmup_ratio = float(warmup_ratio)
+    config.gradient_accumulation_steps = int(grad_accum)
+    if not use_early_stopping:
+        config.early_stopping_patience = 999  # Effectively disabled
+    # Start training in background thread
+    thread = threading.Thread(target=train_model, args=(config,), daemon=True)
+    thread.start()
+    return "✅ Training started! Check the logs and metrics below for progress."
+def update_ui():
+    """Update UI with current state"""
+    return (
+        create_progress_info(),
+        create_metrics_plot(),
+        training_state.get_logs(),
+        not training_state.is_training  # Enable/disable start button
+    )
+# Build Gradio interface
+with gr.Blocks(title="DeBERTa CWE Classification Training", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🚀 DeBERTa CWE Classification - Fine-Tuning Dashboard
+    Train state-of-the-art DeBERTa models for CVE→CWE classification with real-time monitoring.
+    **Dataset:** stasvinokur/cve-and-cwe-dataset-1999-2025 (~300K CVE-CWE pairs)
+    **Task:** Single-label classification of vulnerabilities to Common Weakness Enumeration (CWE) classes
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Training Configuration")
+            model_choice = gr.Dropdown(
+                choices=list(MODEL_CONFIGS.keys()),
+                value="DeBERTa-v3-Base (86M params, recommended)",
+                label="Model Architecture",
+                info="Larger models = better quality but slower training"
+            )
+            epochs = gr.Slider(
+                minimum=1, maximum=20, value=10, step=1,
+                label="Number of Epochs",
+                info="Recommended: 10 for optimal quality"
+            )
+            batch_size = gr.Slider(
+                minimum=4, maximum=32, value=16, step=4,
+                label="Batch Size per Device",
+                info="Larger = faster training, more memory"
+            )
+            learning_rate = gr.Slider(
+                minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6,
+                label="Learning Rate",
+                info="Default: 2e-5 (recommended for DeBERTa)"
+            )
+            warmup_ratio = gr.Slider(
+                minimum=0.0, maximum=0.3, value=0.1, step=0.01,
+                label="Warmup Ratio",
+                info="Fraction of training for LR warmup"
+            )
+            grad_accum = gr.Slider(
+                minimum=1, maximum=8, value=4, step=1,
+                label="Gradient Accumulation Steps",
+                info="Effective batch size = batch_size × this value"
+            )
+            use_early_stopping = gr.Checkbox(
+                value=True,
+                label="Enable Early Stopping",
+                info="Stop if no improvement for 5 evaluations"
+            )
+            start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
+            status_msg = gr.Textbox(label="Status", interactive=False)
+            gr.Markdown("""
+            ### 📊 Expected Training Time
+            - **Base model (GPU):** ~2-3 hours
+            - **Base model (CPU):** ~10-12 hours
+            - **Large model (GPU):** ~6-8 hours
+            ### 💾 Output Location
+            Model will be saved to:
+            `/Users/lorenzo/Documents/Claude Code/projects/mcps/mcp-cwe-identifier/models/deberta-cwe-final`
+            """)
+        with gr.Column(scale=2):
+            gr.Markdown("### 📈 Training Progress")
+            progress_html = gr.HTML(create_progress_info())
+            metrics_plot = gr.Plot(create_metrics_plot())
+            gr.Markdown("### 📝 Training Logs")
+            logs_box = gr.Textbox(
+                label="Live Training Logs",
+                lines=15,
+                max_lines=20,
+                interactive=False,
+                show_copy_button=True
+            )
+    # Event handlers
+    start_btn.click(
+        fn=start_training,
+        inputs=[model_choice, epochs, batch_size, learning_rate, warmup_ratio,
+                grad_accum, use_early_stopping],
+        outputs=status_msg
+    )
+    # Auto-refresh UI every 2 seconds using timer
+    refresh_timer = gr.Timer(value=2, active=True)
+    refresh_timer.tick(
+        fn=update_ui,
+        outputs=[progress_html, metrics_plot, logs_box, start_btn]
+    )
+    gr.Markdown("""
+    ---
+    ### 🎯 Next Steps After Training
+    1. **Test Model:** Use the trained model for CWE prediction
+    2. **Integrate:** Update MCP server to use the new model
+    3. **Benchmark:** Compare against existing models
+    4. **Deploy:** Push to production environment
+    **Developed by:** Berghem - Smart Information Security | **License:** MIT
+    """)
+# ============================================================================
+# LAUNCH
+# ============================================================================
+if __name__ == "__main__":
+    demo.queue()  # Enable queuing for better concurrency
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# Core ML/DL frameworks
+torch>=2.0.0
+transformers>=4.35.0
+datasets>=2.14.0
+tokenizers>=0.15.0
+# Gradio for UI
+gradio==5.49.1
+# Data processing and visualization
+numpy>=1.24.0
+pandas>=2.0.0
+plotly>=5.18.0
+scikit-learn>=1.3.0
+# Accelerate training (optional but recommended)
+accelerate>=0.24.0
+# For better performance
+sentencepiece>=0.1.99
+protobuf>=3.20.0
+# Utils
+tqdm>=4.66.0