Spaces:

Omarrran
/

Quantize_Any_Neural_Network

Sleeping

App Files Files Community

AI Agent commited on Jan 13

Commit

a0098d0

0 Parent(s):

Deploy to Spaces

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +15 -0
Dockerfile +69 -0
README.md +59 -0
app.py +16 -0
backend/__init__.py +1 -0
backend/__pycache__/__init__.cpython-312.pyc +0 -0
backend/api/__init__.py +6 -0
backend/api/__pycache__/__init__.cpython-312.pyc +0 -0
backend/api/__pycache__/main.cpython-312.pyc +0 -0
backend/api/main.py +61 -0
backend/api/routes/__init__.py +1 -0
backend/api/routes/__pycache__/__init__.cpython-312.pyc +0 -0
backend/api/routes/__pycache__/analysis.cpython-312.pyc +0 -0
backend/api/routes/__pycache__/models.cpython-312.pyc +0 -0
backend/api/routes/__pycache__/quantization.cpython-312.pyc +0 -0
backend/api/routes/__pycache__/system.cpython-312.pyc +0 -0
backend/api/routes/analysis.py +249 -0
backend/api/routes/models.py +411 -0
backend/api/routes/quantization.py +366 -0
backend/api/routes/system.py +64 -0
backend/core/__init__.py +6 -0
backend/core/__pycache__/__init__.cpython-312.pyc +0 -0
backend/core/__pycache__/model_loader.cpython-312.pyc +0 -0
backend/core/__pycache__/model_manager.cpython-312.pyc +0 -0
backend/core/__pycache__/quantizer.cpython-312.pyc +0 -0
backend/core/__pycache__/system_checker.cpython-312.pyc +0 -0
backend/core/__pycache__/visualization.cpython-312.pyc +0 -0
backend/core/model_loader.py +411 -0
backend/core/model_manager.py +247 -0
backend/core/quantizer.py +605 -0
backend/core/system_checker.py +299 -0
backend/core/visualization.py +277 -0
backend/requirements.txt +11 -0
docker-compose.yml +78 -0
frontend/.gitignore +24 -0
frontend/README.md +16 -0
frontend/eslint.config.js +29 -0
frontend/index.html +13 -0
frontend/package-lock.json +0 -0
frontend/package.json +35 -0
frontend/public/vite.svg +1 -0
frontend/src/App.css +42 -0
frontend/src/App.jsx +82 -0
frontend/src/assets/react.svg +1 -0
frontend/src/components/Layout.jsx +297 -0
frontend/src/index.css +751 -0
frontend/src/main.jsx +10 -0
frontend/src/pages/Analysis.jsx +483 -0
frontend/src/pages/Dashboard.jsx +412 -0
frontend/src/pages/ModelLoader.jsx +775 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,15 @@

+node_modules
+dist
+build
+.git
+.gitignore
+venv
+env
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.DS_Store
+.env
+site-packages
+.gemini

Dockerfile ADDED Viewed

	@@ -0,0 +1,69 @@

+# Multi-stage Dockerfile for Neural Network Quantizer
+# Builds frontend and serves with FastAPI backend
+# ============================================
+# Stage 1: Build Frontend
+# ============================================
+FROM node:20-alpine AS frontend-build
+WORKDIR /app/frontend
+# Copy package files
+COPY frontend/package*.json ./
+# Install dependencies
+RUN npm ci
+# Copy frontend source
+COPY frontend/ ./
+# Build production bundle
+RUN npm run build
+# ============================================
+# Stage 2: Python Backend + Frontend
+# ============================================
+FROM python:3.11-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy backend requirements
+COPY backend/requirements.txt ./requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy backend code
+COPY backend/ ./backend/
+# Copy frontend build
+COPY --from=frontend-build /app/frontend/dist ./frontend/dist
+# Copy HuggingFace Spaces entry point
+COPY app.py ./
+# Create non-root user
+RUN useradd -m -u 1000 user
+USER user
+# Expose port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/api/health || exit 1
+# Start the application
+CMD ["python", "-m", "uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+title: Neural Network Quantizer
+emoji: ⚡
+colorFrom: indigo
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+app_port: 7860
+---
+# Neural Network Weight Quantizer
+Quantize neural network weights to lower precision formats (INT8, INT4, NF4) with interactive visualizations.
+## Features
+- 🔢 Multi-bit quantization (4-bit, 8-bit)
+- 📊 Interactive weight visualizations
+- 🤗 HuggingFace model support (optional)
+- ⚡ GPU acceleration (when available)
+- 📈 Quantization error analysis
+- 🔄 Method comparison (INT8 vs INT4 vs NF4)
+## Quick Start
+1. Use the **Quantizer** tab to test on random weights
+2. Compare different methods in the **Analysis** tab
+3. Optionally load a HuggingFace model in the **Models** tab
+## API
+The backend exposes a REST API at `/api`:
+- `GET /api/system/info` - System capabilities
+- `POST /api/quantize/weights` - Quantize custom weights
+- `POST /api/models/load` - Load HuggingFace model
+- `POST /api/analysis/compare` - Compare methods
+## 🚀 Deployment
+### Hugging Face Spaces
+This project is configured for **Hugging Face Spaces** using the Docker SDK.
+1. Create a new Space on [Hugging Face](https://huggingface.co/new-space).
+2. Select **Docker** as the SDK.
+3. Push this repository to your Space:
+   ```bash
+   git remote add space https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
+   git push space main
+   ```
+### Docker
+Run locally with Docker:
+```bash
+docker build -t quantizer .
+docker run -p 7860:7860 quantizer
+```
+Open `http://localhost:7860`.

app.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""
+HuggingFace Spaces Entry Point
+This file serves as the entry point for HuggingFace Spaces deployment.
+It starts the FastAPI application which serves both the API and the React frontend.
+"""
+import uvicorn
+from backend.api.main import app
+if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,
+        log_level="info"
+    )

backend/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Backend package init"""

backend/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (204 Bytes). View file

backend/api/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+API Package Init
+"""
+from .main import app
+__all__ = ["app"]

backend/api/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (275 Bytes). View file

backend/api/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (2.82 kB). View file

backend/api/main.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+FastAPI Main Application
+Neural Network Weight Quantizer API
+"""
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from pathlib import Path
+import os
+from .routes import quantization, models, analysis, system
+# Create FastAPI app
+app = FastAPI(
+    title="Neural Network Quantizer API",
+    description="API for quantizing neural network weights to lower precision formats",
+    version="1.0.0",
+    docs_url="/api/docs",
+    openapi_url="/api/openapi.json"
+)
+# CORS configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure appropriately in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(system.router, prefix="/api/system", tags=["System"])
+app.include_router(models.router, prefix="/api/models", tags=["Models"])
+app.include_router(quantization.router, prefix="/api/quantize", tags=["Quantization"])
+app.include_router(analysis.router, prefix="/api/analysis", tags=["Analysis"])
+# Health check
+@app.get("/api/health")
+async def health_check():
+    return {"status": "healthy", "service": "quantizer-api"}
+# Serve frontend in production
+FRONTEND_DIR = Path(__file__).parent.parent.parent / "frontend" / "dist"
+if FRONTEND_DIR.exists():
+    app.mount("/assets", StaticFiles(directory=FRONTEND_DIR / "assets"), name="assets")
+    @app.get("/{full_path:path}")
+    async def serve_frontend(full_path: str):
+        # Serve index.html for SPA routing
+        file_path = FRONTEND_DIR / full_path
+        if file_path.exists() and file_path.is_file():
+            return FileResponse(file_path)
+        return FileResponse(FRONTEND_DIR / "index.html")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

backend/api/routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Routes package"""

backend/api/routes/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (209 Bytes). View file

backend/api/routes/__pycache__/analysis.cpython-312.pyc ADDED Viewed

Binary file (11.5 kB). View file

backend/api/routes/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (17.9 kB). View file

backend/api/routes/__pycache__/quantization.cpython-312.pyc ADDED Viewed

Binary file (15.9 kB). View file

backend/api/routes/__pycache__/system.cpython-312.pyc ADDED Viewed

Binary file (2.77 kB). View file

backend/api/routes/analysis.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+Analysis Routes
+Weight analysis and visualization endpoints
+"""
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional, Dict, Any, List
+import torch
+from backend.core.model_loader import model_loader
+from backend.core.visualization import visualizer
+from backend.core.quantizer import (
+    QuantizationConfig, QuantizationMethod, QuantizationMode,
+    get_quantizer
+)
+router = APIRouter()
+class AnalyzeLayerRequest(BaseModel):
+    """Request to analyze a specific layer"""
+    layer_name: str
+class CompareQuantizationRequest(BaseModel):
+    """Compare different quantization methods on same weights"""
+    layer_name: Optional[str] = None
+    in_features: int = 64
+    out_features: int = 128
+    methods: List[str] = ["int8", "int4", "nf4"]
+@router.get("/weights/{layer_name}")
+async def get_weight_analysis(layer_name: str) -> Dict[str, Any]:
+    """
+    Get detailed weight analysis for a specific layer.
+    """
+    if model_loader is None or model_loader.get_model() is None:
+        raise HTTPException(status_code=404, detail="No model loaded")
+    weights = model_loader.get_layer_weights(layer_name)
+    if weights is None:
+        raise HTTPException(status_code=404, detail=f"Layer not found: {layer_name}")
+    # Flatten for analysis
+    flat = weights.flatten()
+    # Statistics
+    stats = {
+        "shape": list(weights.shape),
+        "dtype": str(weights.dtype),
+        "num_params": int(weights.numel()),
+        "memory_mb": weights.numel() * weights.element_size() / (1024 * 1024),
+        "min": float(weights.min()),
+        "max": float(weights.max()),
+        "mean": float(weights.mean()),
+        "std": float(weights.std()),
+        "median": float(torch.median(flat)),
+        "sparsity": float((weights == 0).sum() / weights.numel()),
+        "abs_mean": float(weights.abs().mean()),
+        "percentiles": {
+            "1%": float(torch.quantile(flat.float(), 0.01)),
+            "5%": float(torch.quantile(flat.float(), 0.05)),
+            "25%": float(torch.quantile(flat.float(), 0.25)),
+            "50%": float(torch.quantile(flat.float(), 0.50)),
+            "75%": float(torch.quantile(flat.float(), 0.75)),
+            "95%": float(torch.quantile(flat.float(), 0.95)),
+            "99%": float(torch.quantile(flat.float(), 0.99))
+        }
+    }
+    # Visualizations
+    heatmap = visualizer.to_dict(
+        visualizer.weight_heatmap(weights, f"Weights: {layer_name}")
+    )
+    histogram = visualizer.to_dict(
+        visualizer.weight_histogram(weights, "Weight Distribution")
+    )
+    return {
+        "layer_name": layer_name,
+        "stats": stats,
+        "visualizations": {
+            "heatmap": heatmap,
+            "histogram": histogram
+        }
+    }
+@router.post("/compare")
+async def compare_quantization_methods(request: CompareQuantizationRequest) -> Dict[str, Any]:
+    """
+    Compare multiple quantization methods on the same weights.
+    """
+    # Get or generate weights
+    if request.layer_name and model_loader and model_loader.get_model():
+        weights = model_loader.get_layer_weights(request.layer_name)
+        if weights is None:
+            raise HTTPException(status_code=404, detail=f"Layer not found: {request.layer_name}")
+        source = f"layer:{request.layer_name}"
+    else:
+        weights = torch.randn(request.out_features, request.in_features)
+        source = "random"
+    # Ensure 2D
+    if len(weights.shape) == 1:
+        weights = weights.unsqueeze(0)
+    elif len(weights.shape) > 2:
+        weights = weights.reshape(weights.shape[0], -1)
+    # Compare methods
+    method_map = {
+        "int8": QuantizationMethod.INT8,
+        "int4": QuantizationMethod.INT4,
+        "nf4": QuantizationMethod.NF4
+    }
+    comparison = []
+    for method_name in request.methods:
+        if method_name not in method_map:
+            continue
+        config = QuantizationConfig(
+            bits=8 if method_name == "int8" else 4,
+            method=method_map[method_name],
+            group_size=128 if method_name in ["int4", "nf4"] else None
+        )
+        try:
+            quantizer = get_quantizer(config)
+            result = quantizer.quantize(weights)
+            comparison.append({
+                "method": method_name,
+                "bits": config.bits,
+                "max_error": result.max_error,
+                "mean_error": result.mean_error,
+                "memory_savings_percent": result.memory_savings_percent,
+                "histogram": visualizer.to_dict(
+                    visualizer.weight_histogram(
+                        result.quantized_weights.float(),
+                        f"{method_name.upper()} Distribution"
+                    )
+                )
+            })
+        except Exception as e:
+            comparison.append({
+                "method": method_name,
+                "error": str(e)
+            })
+    return {
+        "source": source,
+        "original_shape": list(weights.shape),
+        "original_stats": {
+            "min": float(weights.min()),
+            "max": float(weights.max()),
+            "mean": float(weights.mean()),
+            "std": float(weights.std())
+        },
+        "comparison": comparison
+    }
+@router.get("/model-summary")
+async def get_model_summary() -> Dict[str, Any]:
+    """
+    Get summary statistics for all layers in loaded model.
+    """
+    if model_loader is None or model_loader.get_model() is None:
+        raise HTTPException(status_code=404, detail="No model loaded")
+    model_info = model_loader.get_model_info()
+    if model_info is None:
+        raise HTTPException(status_code=500, detail="Failed to get model info")
+    # Analyze each layer
+    layer_stats = []
+    total_params = 0
+    quantizable_params = 0
+    for layer in model_info.layers:
+        total_params += layer.num_params
+        if layer.is_quantizable:
+            quantizable_params += layer.num_params
+        layer_stats.append({
+            "name": layer.name,
+            "type": layer.module_type,
+            "params": layer.num_params,
+            "params_mb": layer.num_params * 4 / (1024 * 1024),  # Assuming FP32
+            "quantizable": layer.is_quantizable
+        })
+    # Sort by parameter count
+    layer_stats.sort(key=lambda x: x["params"], reverse=True)
+    return {
+        "model_name": model_info.name,
+        "architecture": model_info.architecture,
+        "total_params": total_params,
+        "total_params_billions": total_params / 1e9,
+        "quantizable_params": quantizable_params,
+        "quantizable_percent": quantizable_params / total_params * 100 if total_params > 0 else 0,
+        "memory_fp32_gb": total_params * 4 / (1024**3),
+        "memory_int8_estimate_gb": quantizable_params * 1 / (1024**3) + (total_params - quantizable_params) * 4 / (1024**3),
+        "memory_int4_estimate_gb": quantizable_params * 0.5 / (1024**3) + (total_params - quantizable_params) * 4 / (1024**3),
+        "top_layers": layer_stats[:20]  # Top 20 largest layers
+    }
+@router.get("/outliers/{layer_name}")
+async def detect_outliers(layer_name: str, threshold: float = 3.0) -> Dict[str, Any]:
+    """
+    Detect outlier weights that may cause quantization issues.
+    """
+    if model_loader is None or model_loader.get_model() is None:
+        raise HTTPException(status_code=404, detail="No model loaded")
+    weights = model_loader.get_layer_weights(layer_name)
+    if weights is None:
+        raise HTTPException(status_code=404, detail=f"Layer not found: {layer_name}")
+    flat = weights.flatten()
+    mean = flat.mean()
+    std = flat.std()
+    # Find outliers (values beyond threshold * std from mean)
+    outlier_mask = (flat - mean).abs() > threshold * std
+    num_outliers = outlier_mask.sum().item()
+    outlier_values = flat[outlier_mask].tolist()[:100]  # Limit to 100
+    return {
+        "layer_name": layer_name,
+        "threshold": threshold,
+        "total_weights": int(flat.numel()),
+        "num_outliers": num_outliers,
+        "outlier_percent": num_outliers / flat.numel() * 100,
+        "mean": float(mean),
+        "std": float(std),
+        "outlier_range": {
+            "below": float(mean - threshold * std),
+            "above": float(mean + threshold * std)
+        },
+        "sample_outliers": outlier_values,
+        "recommendation": "Consider clipping or mixed-precision for this layer" if num_outliers > flat.numel() * 0.01 else "Layer is suitable for quantization"
+    }

backend/api/routes/models.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+Model Routes with Download Progress Streaming
+Supports HuggingFace Spaces with proper cache management
+"""
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from typing import Optional, Dict, Any, List
+import torch
+import asyncio
+import json
+import traceback
+import time
+from backend.core.model_loader import model_loader
+from backend.core.model_manager import (
+    get_download_progress, set_download_progress, clear_download_progress,
+    get_cached_models, cleanup_old_models, delete_model_cache,
+    get_cache_stats, ensure_sample_models, start_cleanup_scheduler,
+    SAMPLE_MODELS
+)
+router = APIRouter()
+class LoadModelRequest(BaseModel):
+    """Request to load a model"""
+    model_name: str
+    dtype: str = "auto"
+    device: str = "auto"
+    trust_remote_code: bool = True
+class DeleteModelRequest(BaseModel):
+    """Request to delete a cached model"""
+    model_name: str
+# In-memory state
+_loaded_model = None
+_loaded_tokenizer = None
+_model_name = None
+# Start cleanup scheduler on module load
+start_cleanup_scheduler()
+def _get_device():
+    """Get best available device"""
+    if torch.cuda.is_available():
+        return "cuda"
+    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+def _get_torch_dtype(dtype_str: str, device: str):
+    """Convert dtype string to torch dtype"""
+    if dtype_str == "auto":
+        if device == "cuda":
+            return torch.float16
+        return torch.float32
+    dtype_map = {
+        "fp32": torch.float32,
+        "float32": torch.float32,
+        "fp16": torch.float16,
+        "float16": torch.float16,
+        "bf16": torch.bfloat16,
+        "bfloat16": torch.bfloat16,
+    }
+    return dtype_map.get(dtype_str, torch.float32)
+async def _load_model_with_progress(model_name: str, dtype: str, device: str, trust_remote_code: bool):
+    """Load model and yield progress updates"""
+    global _loaded_model, _loaded_tokenizer, _model_name
+    try:
+        from transformers import AutoModel, AutoTokenizer, AutoConfig
+    except ImportError:
+        yield {"type": "error", "error": "transformers library not installed"}
+        return
+    try:
+        # Phase 1: Fetching config
+        yield {"type": "progress", "phase": "config", "percent": 5, "message": "Fetching model configuration..."}
+        try:
+            config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code)
+        except Exception as e:
+            yield {"type": "error", "error": f"Model not found: {str(e)}", "suggestion": "Check the model ID is correct"}
+            return
+        # Phase 2: Determine device and dtype
+        actual_device = device if device != "auto" else _get_device()
+        torch_dtype = _get_torch_dtype(dtype, actual_device)
+        yield {"type": "progress", "phase": "download", "percent": 10, "message": f"Downloading model to {actual_device}..."}
+        # Set download progress for polling
+        set_download_progress(model_name, {
+            "status": "downloading",
+            "percent": 10,
+            "message": "Downloading model files..."
+        })
+        # Phase 3: Download and load model
+        try:
+            model = AutoModel.from_pretrained(
+                model_name,
+                torch_dtype=torch_dtype,
+                trust_remote_code=trust_remote_code,
+                low_cpu_mem_usage=True
+            )
+            yield {"type": "progress", "phase": "download", "percent": 70, "message": "Model downloaded successfully"}
+        except Exception as e:
+            # Try without low_cpu_mem_usage
+            try:
+                model = AutoModel.from_pretrained(
+                    model_name,
+                    torch_dtype=torch_dtype,
+                    trust_remote_code=trust_remote_code
+                )
+                yield {"type": "progress", "phase": "download", "percent": 70, "message": "Model downloaded (fallback mode)"}
+            except Exception as e2:
+                yield {"type": "error", "error": f"Failed to load model: {str(e2)}"}
+                clear_download_progress(model_name)
+                return
+        # Phase 4: Move to device
+        yield {"type": "progress", "phase": "device", "percent": 80, "message": f"Moving model to {actual_device}..."}
+        if actual_device != "cpu" and not hasattr(model, 'hf_device_map'):
+            try:
+                model = model.to(actual_device)
+            except Exception:
+                actual_device = "cpu"
+                model = model.to("cpu")
+        model.eval()
+        # Phase 5: Load tokenizer
+        yield {"type": "progress", "phase": "tokenizer", "percent": 90, "message": "Loading tokenizer..."}
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code)
+        except Exception:
+            tokenizer = None
+        # Store in memory
+        _loaded_model = model
+        _loaded_tokenizer = tokenizer
+        _model_name = model_name
+        # Sync with global model loader
+        if model_loader:
+            model_loader.register_model(model, model_name, tokenizer)
+        # Compute model info
+        num_params = sum(p.numel() for p in model.parameters())
+        memory_mb = sum(p.numel() * p.element_size() for p in model.parameters()) / (1024 * 1024)
+        quantizable_layers = []
+        for name, module in model.named_modules():
+            if any(t in module.__class__.__name__ for t in ["Linear", "Conv1d", "Conv2d"]):
+                quantizable_layers.append(name)
+        # Phase 6: Complete
+        clear_download_progress(model_name)
+        yield {
+            "type": "complete",
+            "percent": 100,
+            "model_info": {
+                "name": model_name,
+                "architecture": model.config.architectures[0] if hasattr(model.config, 'architectures') and model.config.architectures else "Unknown",
+                "num_params": num_params,
+                "num_params_millions": round(num_params / 1e6, 2),
+                "memory_mb": round(memory_mb, 2),
+                "device": str(next(model.parameters()).device),
+                "dtype": str(next(model.parameters()).dtype),
+                "num_quantizable_layers": len(quantizable_layers),
+                "has_tokenizer": tokenizer is not None,
+                "is_sample": model_name in SAMPLE_MODELS
+            }
+        }
+    except Exception as e:
+        clear_download_progress(model_name)
+        yield {"type": "error", "error": str(e), "traceback": traceback.format_exc()}
+@router.post("/load")
+async def load_model(request: LoadModelRequest) -> Dict[str, Any]:
+    """Load a model (non-streaming version for simple requests)"""
+    result = None
+    async for update in _load_model_with_progress(
+        request.model_name, request.dtype, request.device, request.trust_remote_code
+    ):
+        result = update
+    if result and result.get("type") == "complete":
+        return {"success": True, "model_info": result["model_info"]}
+    elif result and result.get("type") == "error":
+        return {"success": False, "error": result.get("error"), "suggestion": result.get("suggestion")}
+    else:
+        return {"success": False, "error": "Unknown error"}
+@router.post("/load/stream")
+async def load_model_stream(request: LoadModelRequest):
+    """Load a model with Server-Sent Events for progress updates"""
+    async def event_generator():
+        async for update in _load_model_with_progress(
+            request.model_name, request.dtype, request.device, request.trust_remote_code
+        ):
+            yield f"data: {json.dumps(update)}\n\n"
+            await asyncio.sleep(0.1)  # Small delay between events
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+        }
+    )
+@router.get("/progress/{model_name}")
+async def get_model_progress(model_name: str) -> Dict[str, Any]:
+    """Get download progress for a model (polling endpoint)"""
+    progress = get_download_progress(model_name)
+    if progress:
+        return {"downloading": True, **progress}
+    return {"downloading": False}
+@router.get("/status")
+async def get_loading_status() -> Dict[str, Any]:
+    """Get current model loading status"""
+    return {
+        "model_loaded": _loaded_model is not None,
+        "model_name": _model_name,
+        "has_tokenizer": _loaded_tokenizer is not None
+    }
+@router.get("/info")
+async def get_model_info() -> Dict[str, Any]:
+    """Get information about the currently loaded model"""
+    if _loaded_model is None:
+        return {"loaded": False, "message": "No model loaded"}
+    num_params = sum(p.numel() for p in _loaded_model.parameters())
+    memory_mb = sum(p.numel() * p.element_size() for p in _loaded_model.parameters()) / (1024 * 1024)
+    return {
+        "loaded": True,
+        "name": _model_name,
+        "num_params": num_params,
+        "num_params_millions": round(num_params / 1e6, 2),
+        "memory_mb": round(memory_mb, 2),
+        "device": str(next(_loaded_model.parameters()).device),
+        "dtype": str(next(_loaded_model.parameters()).dtype)
+    }
+@router.get("/layers")
+async def get_layers() -> Dict[str, Any]:
+    """Get list of layers in the loaded model"""
+    if _loaded_model is None:
+        return {"error": "No model loaded", "layers": []}
+    layers = []
+    quantizable_names = []
+    for name, module in _loaded_model.named_modules():
+        if not name:
+            continue
+        module_type = module.__class__.__name__
+        is_quantizable = any(t in module_type for t in ["Linear", "Conv1d", "Conv2d", "Embedding"])
+        shape = None
+        num_params = 0
+        if hasattr(module, 'weight') and module.weight is not None:
+            shape = list(module.weight.shape)
+            num_params = module.weight.numel()
+        if num_params > 0:
+            layers.append({
+                "name": name,
+                "type": module_type,
+                "shape": shape,
+                "params": num_params,
+                "quantizable": is_quantizable
+            })
+            if is_quantizable:
+                quantizable_names.append(name)
+    return {
+        "total_layers": len(layers),
+        "quantizable_count": len(quantizable_names),
+        "quantizable_layers": quantizable_names,
+        "layers": layers
+    }
+@router.post("/unload")
+async def unload_model() -> Dict[str, Any]:
+    """Unload the current model and free memory"""
+    global _loaded_model, _loaded_tokenizer, _model_name
+    if _loaded_model is not None:
+        del _loaded_model
+        _loaded_model = None
+    if _loaded_tokenizer is not None:
+        del _loaded_tokenizer
+        _loaded_tokenizer = None
+    _model_name = None
+    # Sync with global module loader
+    if model_loader:
+        model_loader.unload()
+    import gc
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return {"success": True, "message": "Model unloaded"}
+# ============================================
+# Cache Management Endpoints
+# ============================================
+@router.get("/cache")
+async def get_cache_info() -> Dict[str, Any]:
+    """Get information about cached models"""
+    return get_cache_stats()
+@router.post("/cache/cleanup")
+async def trigger_cleanup(hours: float = 4.0) -> Dict[str, Any]:
+    """Manually trigger cache cleanup"""
+    result = cleanup_old_models(hours)
+    return {
+        "success": True,
+        "deleted_count": len(result["deleted"]),
+        "kept_count": len(result["kept"]),
+        **result
+    }
+@router.delete("/cache/{model_name:path}")
+async def delete_cached_model(model_name: str) -> Dict[str, Any]:
+    """Delete a specific model from cache"""
+    if model_name in SAMPLE_MODELS:
+        return {"success": False, "error": "Cannot delete sample models"}
+    success = delete_model_cache(model_name)
+    return {"success": success, "model_name": model_name}
+# ============================================
+# Example Models
+# ============================================
+@router.get("/examples")
+async def get_example_models() -> Dict[str, Any]:
+    """Get list of example models for testing"""
+    return {
+        "sample_models": [
+            {"id": model, "is_default": True, "description": "Pre-cached for quick testing"}
+            for model in SAMPLE_MODELS
+        ],
+        "small_models": [
+            {"id": "gpt2", "size": "124M", "description": "GPT-2 base model"},
+            {"id": "distilbert-base-uncased", "size": "66M", "description": "DistilBERT for NLP"},
+            {"id": "prajjwal1/bert-tiny", "size": "4.4M", "description": "Tiny BERT for testing"},
+            {"id": "microsoft/DialoGPT-small", "size": "124M", "description": "Small conversational model"},
+        ],
+        "medium_models": [
+            {"id": "gpt2-medium", "size": "355M", "description": "GPT-2 medium"},
+            {"id": "bert-base-uncased", "size": "110M", "description": "BERT base model"},
+        ],
+        "cleanup_policy": f"Non-sample models are deleted after {4} hours of inactivity",
+        "note": "Sample models are always available for quick testing"
+    }
+# Helper functions for other routes
+def get_loaded_model():
+    return _loaded_model
+def get_layer_weights_tensor(layer_name: str):
+    if _loaded_model is None:
+        return None
+    for name, module in _loaded_model.named_modules():
+        if name == layer_name and hasattr(module, 'weight'):
+            return module.weight.data.clone()
+    return None

backend/api/routes/quantization.py ADDED Viewed

	@@ -0,0 +1,366 @@

+"""
+Quantization Routes
+Core quantization API endpoints
+"""
+from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
+from pydantic import BaseModel
+from typing import Optional, Dict, Any, List
+import torch
+import asyncio
+import json
+from backend.core.quantizer import (
+    QuantizationConfig, QuantizationMethod, QuantizationMode,
+    INT8Quantizer, INT4Quantizer, NF4Quantizer, get_quantizer
+)
+from backend.core.model_loader import model_loader
+from backend.core.visualization import visualizer
+router = APIRouter()
+class QuantizeWeightsRequest(BaseModel):
+    """Request to quantize custom weights"""
+    in_features: int = 64
+    out_features: int = 128
+    bits: int = 8  # 4 or 8
+    method: str = "int8"  # int8, int4, nf4
+    mode: str = "symmetric"  # symmetric, asymmetric
+    group_size: Optional[int] = None
+    weight_pattern: str = "random"  # random, eye, ones, alternating, gradient
+    dtype: str = "float32"
+class QuantizeLayerRequest(BaseModel):
+    """Request to quantize a specific layer from loaded model"""
+    layer_name: str
+    bits: int = 8
+    method: str = "int8"
+    mode: str = "symmetric"
+    group_size: Optional[int] = None
+class QuantizeModelRequest(BaseModel):
+    """Request to quantize entire model"""
+    bits: int = 8
+    method: str = "int8"
+    mode: str = "symmetric"
+    group_size: Optional[int] = None
+    layers_to_skip: List[str] = []
+    layers_to_include: Optional[List[str]] = None  # None = all quantizable
+def _generate_weights(pattern: str, out_features: int, in_features: int,
+                      dtype: torch.dtype) -> torch.Tensor:
+    """Generate weights based on pattern"""
+    if pattern == "random":
+        return torch.randn((out_features, in_features), dtype=dtype)
+    elif pattern == "eye":
+        weights = torch.zeros((out_features, in_features), dtype=dtype)
+        min_dim = min(out_features, in_features)
+        weights[:min_dim, :min_dim] = torch.eye(min_dim, dtype=dtype)
+        return weights
+    elif pattern == "ones":
+        return torch.ones((out_features, in_features), dtype=dtype)
+    elif pattern == "alternating":
+        weights = torch.ones((out_features, in_features), dtype=dtype)
+        for i in range(out_features):
+            for j in range(in_features):
+                if (i + j) % 2 == 1:
+                    weights[i, j] = -1.0
+        return weights
+    elif pattern == "gradient":
+        x = torch.linspace(-1, 1, in_features)
+        y = torch.linspace(-1, 1, out_features)
+        xx, yy = torch.meshgrid(x, y, indexing='ij')
+        return (xx + yy).t().to(dtype)
+    else:
+        return torch.randn((out_features, in_features), dtype=dtype)
+def _get_quantizer_from_config(request) -> tuple:
+    """Get quantizer and config from request parameters"""
+    method_map = {
+        "int8": QuantizationMethod.INT8,
+        "int4": QuantizationMethod.INT4,
+        "nf4": QuantizationMethod.NF4
+    }
+    mode_map = {
+        "symmetric": QuantizationMode.SYMMETRIC,
+        "asymmetric": QuantizationMode.ASYMMETRIC
+    }
+    config = QuantizationConfig(
+        bits=request.bits,
+        method=method_map.get(request.method, QuantizationMethod.INT8),
+        mode=mode_map.get(request.mode, QuantizationMode.SYMMETRIC),
+        group_size=request.group_size
+    )
+    quantizer = get_quantizer(config)
+    return quantizer, config
+@router.post("/weights")
+async def quantize_custom_weights(request: QuantizeWeightsRequest) -> Dict[str, Any]:
+    """
+    Quantize custom generated weights.
+    This endpoint works without loading a real model.
+    """
+    # Map dtype
+    dtype_map = {
+        "float32": torch.float32,
+        "float16": torch.float16,
+        "bfloat16": torch.bfloat16
+    }
+    dtype = dtype_map.get(request.dtype, torch.float32)
+    # Generate weights
+    weights = _generate_weights(
+        request.weight_pattern,
+        request.out_features,
+        request.in_features,
+        dtype
+    )
+    # Get quantizer
+    quantizer, config = _get_quantizer_from_config(request)
+    # Quantize
+    result = quantizer.quantize(weights)
+    # Dequantize for visualization
+    dequantized = quantizer.dequantize(result)
+    # Generate visualizations
+    original_heatmap = visualizer.to_dict(
+        visualizer.weight_heatmap(weights, "Original Weights")
+    )
+    quantized_heatmap = visualizer.to_dict(
+        visualizer.weight_heatmap(result.quantized_weights.float(), f"Quantized Weights ({request.bits}-bit)")
+    )
+    dequantized_heatmap = visualizer.to_dict(
+        visualizer.weight_heatmap(dequantized, "Dequantized Weights")
+    )
+    error_heatmap = visualizer.to_dict(
+        visualizer.weight_heatmap((weights - dequantized).abs(), "Quantization Error")
+    )
+    original_hist = visualizer.to_dict(
+        visualizer.weight_histogram(weights, "Original Distribution")
+    )
+    quantized_hist = visualizer.to_dict(
+        visualizer.weight_histogram(result.quantized_weights.float(), "Quantized Distribution")
+    )
+    scales_hist = visualizer.to_dict(
+        visualizer.scales_histogram(result.scales)
+    )
+    return {
+        "success": True,
+        "config": config.to_dict(),
+        "stats": {
+            "original_shape": list(weights.shape),
+            "quantized_shape": list(result.quantized_weights.shape),
+            "scales_shape": list(result.scales.shape),
+            "max_error": result.max_error,
+            "mean_error": result.mean_error,
+            "memory_savings_percent": result.memory_savings_percent,
+            "original_dtype": str(weights.dtype),
+            "quantized_dtype": str(result.quantized_weights.dtype)
+        },
+        "visualizations": {
+            "original_heatmap": original_heatmap,
+            "quantized_heatmap": quantized_heatmap,
+            "dequantized_heatmap": dequantized_heatmap,
+            "error_heatmap": error_heatmap,
+            "original_histogram": original_hist,
+            "quantized_histogram": quantized_hist,
+            "scales_histogram": scales_hist
+        }
+    }
+@router.post("/layer")
+async def quantize_layer(request: QuantizeLayerRequest) -> Dict[str, Any]:
+    """
+    Quantize a specific layer from the loaded model.
+    Requires a model to be loaded first.
+    """
+    if model_loader is None or model_loader.get_model() is None:
+        raise HTTPException(
+            status_code=400,
+            detail="No model loaded. Load a model first or use /quantize/weights for custom weights."
+        )
+    # Get layer weights
+    weights = model_loader.get_layer_weights(request.layer_name)
+    if weights is None:
+        raise HTTPException(status_code=404, detail=f"Layer not found: {request.layer_name}")
+    # Ensure 2D
+    original_shape = weights.shape
+    if len(weights.shape) == 1:
+        weights = weights.unsqueeze(0)
+    elif len(weights.shape) > 2:
+        weights = weights.reshape(weights.shape[0], -1)
+    # Get quantizer
+    quantizer, config = _get_quantizer_from_config(request)
+    # Quantize
+    result = quantizer.quantize(weights)
+    dequantized = quantizer.dequantize(result)
+    # Generate Visualizations
+    original_hist = visualizer.to_dict(visualizer.weight_histogram(weights, "Original Distribution"))
+    quantized_hist = visualizer.to_dict(visualizer.weight_histogram(result.quantized_weights.float(), "Quantized Distribution"))
+    scales_hist = visualizer.to_dict(visualizer.scales_histogram(result.scales))
+    return {
+        "success": True,
+        "layer_name": request.layer_name,
+        "config": config.to_dict(),
+        "stats": {
+            "original_shape": list(original_shape),
+            "quantized_shape": list(result.quantized_weights.shape),
+            "scales_shape": list(result.scales.shape),
+            "max_error": result.max_error,
+            "mean_error": result.mean_error,
+            "memory_savings_percent": result.memory_savings_percent,
+            "original_dtype": str(weights.dtype),
+            "quantized_dtype": str(result.quantized_weights.dtype)
+        },
+        "visualizations": {
+            "original_heatmap": visualizer.to_dict(
+                visualizer.weight_heatmap(weights, f"Original: {request.layer_name}")
+            ),
+            "quantized_heatmap": visualizer.to_dict(
+                visualizer.weight_heatmap(result.quantized_weights.float(), f"Quantized ({request.bits}-bit)")
+            ),
+            "dequantized_heatmap": visualizer.to_dict(
+                visualizer.weight_heatmap(dequantized, "Dequantized Weights")
+            ),
+            "error_heatmap": visualizer.to_dict(
+                visualizer.weight_heatmap((weights - dequantized).abs(), "Error")
+            ),
+            "original_histogram": original_hist,
+            "quantized_histogram": quantized_hist,
+            "scales_histogram": scales_hist
+        }
+    }
+@router.post("/model")
+async def quantize_model(request: QuantizeModelRequest) -> Dict[str, Any]:
+    """
+    Quantize all quantizable layers in the loaded model.
+    Returns summary statistics for all layers.
+    """
+    if model_loader is None or model_loader.get_model() is None:
+        raise HTTPException(
+            status_code=400,
+            detail="No model loaded. This feature requires a loaded model."
+        )
+    model_info = model_loader.get_model_info()
+    if model_info is None:
+        raise HTTPException(status_code=500, detail="Failed to get model info")
+    # Determine layers to quantize
+    if request.layers_to_include:
+        layers_to_quantize = request.layers_to_include
+    else:
+        layers_to_quantize = model_info.quantizable_layers
+    # Remove skipped layers
+    layers_to_quantize = [l for l in layers_to_quantize if l not in request.layers_to_skip]
+    # Get quantizer
+    quantizer, config = _get_quantizer_from_config(request)
+    # Quantize each layer
+    results = []
+    total_memory_saved = 0
+    total_original_size = 0
+    for layer_name in layers_to_quantize:
+        weights = model_loader.get_layer_weights(layer_name)
+        if weights is None:
+            continue
+        # Handle non-2D weights
+        original_shape = weights.shape
+        if len(weights.shape) == 1:
+            weights = weights.unsqueeze(0)
+        elif len(weights.shape) > 2:
+            weights = weights.reshape(weights.shape[0], -1)
+        try:
+            result = quantizer.quantize(weights)
+            original_bytes = weights.numel() * weights.element_size()
+            total_original_size += original_bytes
+            total_memory_saved += original_bytes * (result.memory_savings_percent / 100)
+            results.append({
+                "layer": layer_name,
+                "shape": list(original_shape),
+                "max_error": result.max_error,
+                "mean_error": result.mean_error,
+                "memory_savings_percent": result.memory_savings_percent
+            })
+        except Exception as e:
+            results.append({
+                "layer": layer_name,
+                "error": str(e)
+            })
+    return {
+        "success": True,
+        "config": config.to_dict(),
+        "summary": {
+            "layers_quantized": len([r for r in results if "error" not in r]),
+            "layers_failed": len([r for r in results if "error" in r]),
+            "total_memory_saved_mb": total_memory_saved / (1024 * 1024),
+            "average_memory_savings_percent": (total_memory_saved / total_original_size * 100) if total_original_size > 0 else 0
+        },
+        "layers": results
+    }
+# WebSocket for real-time progress
+@router.websocket("/stream")
+async def quantization_stream(websocket: WebSocket):
+    """WebSocket endpoint for streaming quantization progress"""
+    await websocket.accept()
+    try:
+        while True:
+            # Receive quantization request
+            data = await websocket.receive_text()
+            request_data = json.loads(data)
+            # Process and send updates
+            await websocket.send_json({
+                "type": "progress",
+                "progress": 0,
+                "message": "Starting quantization..."
+            })
+            # Simulate progress (in real implementation, this would be actual quantization)
+            for i in range(0, 101, 10):
+                await asyncio.sleep(0.1)
+                await websocket.send_json({
+                    "type": "progress",
+                    "progress": i,
+                    "message": f"Processing... {i}%"
+                })
+            await websocket.send_json({
+                "type": "complete",
+                "message": "Quantization complete"
+            })
+    except WebSocketDisconnect:
+        pass

backend/api/routes/system.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+System Routes
+Hardware detection and system information
+"""
+from fastapi import APIRouter
+from typing import Dict, Any
+from backend.core.system_checker import system_checker, check_model_requirements
+router = APIRouter()
+@router.get("/info")
+async def get_system_info() -> Dict[str, Any]:
+    """
+    Get complete system information including GPU, RAM, and capabilities.
+    """
+    return system_checker.to_dict()
+@router.get("/capabilities")
+async def get_capabilities() -> Dict[str, Any]:
+    """
+    Get system capabilities for quantization tasks.
+    """
+    info = system_checker.check()
+    return {
+        "capability": info.capability.value,
+        "recommended_batch_size": info.recommended_batch_size,
+        "max_model_size": info.max_model_size,
+        "cuda_available": info.cuda_available,
+        "mps_available": info.mps_available,
+        "gpus": [
+            {
+                "name": gpu.name,
+                "memory_gb": gpu.total_memory_gb
+            }
+            for gpu in info.gpus
+        ]
+    }
+@router.post("/check-model")
+async def check_model_requirements_endpoint(
+    model_params_billions: float,
+    dtype: str = "fp16"
+) -> Dict[str, Any]:
+    """
+    Check if system can handle a model of specified size.
+    Args:
+        model_params_billions: Model size in billions of parameters
+        dtype: Data type (fp32, fp16, int8, int4)
+    """
+    return check_model_requirements(model_params_billions, dtype)
+@router.get("/refresh")
+async def refresh_system_info() -> Dict[str, Any]:
+    """
+    Force refresh system information.
+    """
+    return system_checker.check(force_refresh=True).__dict__

backend/core/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+PyTorch Neural Network Quantizer - Backend Core Package
+Multi-bit quantization engine supporting 4-bit, 8-bit, NF4, and GPTQ methods.
+"""
+__version__ = "1.0.0"

backend/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (357 Bytes). View file

backend/core/__pycache__/model_loader.cpython-312.pyc ADDED Viewed

Binary file (16.1 kB). View file

backend/core/__pycache__/model_manager.cpython-312.pyc ADDED Viewed

Binary file (10.9 kB). View file

backend/core/__pycache__/quantizer.cpython-312.pyc ADDED Viewed

Binary file (30.2 kB). View file

backend/core/__pycache__/system_checker.cpython-312.pyc ADDED Viewed

Binary file (11.2 kB). View file

backend/core/__pycache__/visualization.cpython-312.pyc ADDED Viewed

Binary file (13.1 kB). View file

backend/core/model_loader.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+HuggingFace Model Loader
+Loads models from HuggingFace Hub or local files with memory-efficient options.
+"""
+import torch
+import gc
+from pathlib import Path
+from typing import Optional, Dict, Any, List, Tuple, Union, TYPE_CHECKING
+from dataclasses import dataclass
+from enum import Enum
+try:
+    from transformers import (
+        AutoModel, AutoModelForCausalLM, AutoModelForSequenceClassification,
+        AutoTokenizer, AutoConfig
+    )
+    HAS_TRANSFORMERS = True
+except ImportError:
+    HAS_TRANSFORMERS = False
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+from .system_checker import system_checker, check_model_requirements
+class ModelType(Enum):
+    """Supported model types"""
+    CAUSAL_LM = "causal_lm"
+    SEQUENCE_CLASSIFICATION = "sequence_classification"
+    GENERIC = "generic"
+@dataclass
+class LayerInfo:
+    """Information about a model layer"""
+    name: str
+    module_type: str
+    shape: Optional[Tuple[int, ...]]
+    num_params: int
+    dtype: str
+    is_quantizable: bool
+@dataclass
+class ModelInfo:
+    """Complete model information"""
+    name: str
+    model_type: ModelType
+    architecture: str
+    num_params: int
+    num_params_billions: float
+    hidden_size: int
+    num_layers: int
+    vocab_size: Optional[int]
+    dtype: str
+    memory_footprint_gb: float
+    layers: List[LayerInfo]
+    quantizable_layers: List[str]
+class ModelLoader:
+    """
+    Load and inspect HuggingFace models with memory-efficient options.
+    Provides layer-by-layer analysis for selective quantization.
+    """
+    # Layer types that can be quantized
+    QUANTIZABLE_TYPES = (
+        "Linear",
+        "Conv1d",
+        "Conv2d",
+        "Embedding"
+    )
+    def __init__(self):
+        if not HAS_TRANSFORMERS:
+            raise ImportError(
+                "transformers library not installed. "
+                "Install with: pip install transformers"
+            )
+        self._loaded_model = None  # Optional[PreTrainedModel]
+        self._model_info: Optional[ModelInfo] = None
+        self._tokenizer = None
+    def check_requirements(self, model_name: str, dtype: str = "fp16") -> Dict[str, Any]:
+        """Check if system can load the model before attempting"""
+        try:
+            config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+            # Estimate parameters
+            if hasattr(config, 'num_parameters'):
+                num_params = config.num_parameters
+            else:
+                # Estimate from config
+                hidden = getattr(config, 'hidden_size', 768)
+                layers = getattr(config, 'num_hidden_layers', 12)
+                vocab = getattr(config, 'vocab_size', 30000)
+                num_params = self._estimate_params(hidden, layers, vocab)
+            params_billions = num_params / 1e9
+            return check_model_requirements(params_billions, dtype)
+        except Exception as e:
+            return {
+                "can_load": False,
+                "error": str(e),
+                "warnings": [f"Failed to fetch model config: {str(e)}"]
+            }
+    def _estimate_params(self, hidden: int, layers: int, vocab: int) -> int:
+        """Estimate parameter count from config"""
+        # Rough estimate: embeddings + transformer layers
+        embedding_params = vocab * hidden
+        # Each layer: attention (4 * hidden^2) + FFN (8 * hidden^2)
+        layer_params = layers * (12 * hidden * hidden)
+        return embedding_params + layer_params
+    def load(self, model_name: str,
+             model_type: ModelType = ModelType.GENERIC,
+             dtype: str = "auto",
+             device: str = "auto",
+             trust_remote_code: bool = True,
+             low_memory: bool = False) -> Tuple[Any, Optional[Any]]:
+        """
+        Load a model from HuggingFace Hub or local path.
+        Args:
+            model_name: HuggingFace model ID or local path
+            model_type: Type of model to load
+            dtype: Data type ("auto", "fp32", "fp16", "bf16")
+            device: Device to load to ("auto", "cuda", "cpu", "mps")
+            trust_remote_code: Allow custom code from model repos
+            low_memory: Use memory-efficient loading
+        Returns:
+            Tuple of (model, tokenizer)
+        """
+        # Clear previous model
+        self.unload()
+        # Determine device
+        if device == "auto":
+            sys_info = system_checker.check()
+            if sys_info.cuda_available:
+                device = "cuda"
+            elif sys_info.mps_available:
+                device = "mps"
+            else:
+                device = "cpu"
+        # Determine dtype
+        if dtype == "auto":
+            if device == "cuda":
+                dtype = "fp16"
+            elif device == "mps":
+                dtype = "fp32"  # MPS has limited bf16 support
+            else:
+                dtype = "fp32"
+        torch_dtype = {
+            "fp32": torch.float32,
+            "fp16": torch.float16,
+            "bf16": torch.bfloat16
+        }.get(dtype, torch.float32)
+        # Load config first
+        config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code)
+        # Select model class
+        if model_type == ModelType.CAUSAL_LM:
+            model_class = AutoModelForCausalLM
+        elif model_type == ModelType.SEQUENCE_CLASSIFICATION:
+            model_class = AutoModelForSequenceClassification
+        else:
+            model_class = AutoModel
+        # Load model
+        load_kwargs = {
+            "pretrained_model_name_or_path": model_name,
+            "torch_dtype": torch_dtype,
+            "trust_remote_code": trust_remote_code,
+        }
+        if low_memory:
+            load_kwargs["low_cpu_mem_usage"] = True
+            if device == "cuda":
+                load_kwargs["device_map"] = "auto"
+        model = model_class.from_pretrained(**load_kwargs)
+        if not low_memory and device != "cpu":
+            model = model.to(device)
+        model.eval()
+        # Load tokenizer
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_name, trust_remote_code=trust_remote_code
+            )
+        except Exception:
+            tokenizer = None
+        self._loaded_model = model
+        self._tokenizer = tokenizer
+        self._model_info = self._analyze_model(model, model_name, model_type)
+        return model, tokenizer
+    def load_weights_only(self, model_name: str) -> Dict[str, torch.Tensor]:
+        """
+        Load only the state dict without instantiating the model.
+        More memory efficient for inspection.
+        """
+        from safetensors import safe_open
+        from huggingface_hub import hf_hub_download
+        try:
+            # Try safetensors first
+            path = hf_hub_download(model_name, "model.safetensors")
+            weights = {}
+            with safe_open(path, framework="pt") as f:
+                for key in f.keys():
+                    weights[key] = f.get_tensor(key)
+            return weights
+        except Exception:
+            # Fallback to torch
+            try:
+                path = hf_hub_download(model_name, "pytorch_model.bin")
+                return torch.load(path, map_location="cpu")
+            except Exception as e:
+                raise RuntimeError(f"Failed to load weights: {str(e)}")
+    def _analyze_model(self, model: Any, name: str,
+                       model_type: ModelType) -> ModelInfo:
+        """Analyze model structure and extract layer information"""
+        layers = []
+        quantizable_layers = []
+        total_params = 0
+        for layer_name, module in model.named_modules():
+            if not layer_name:
+                continue
+            # Get module info
+            module_type = module.__class__.__name__
+            # Check if quantizable
+            is_quantizable = any(
+                qt in module_type for qt in self.QUANTIZABLE_TYPES
+            )
+            # Get shape and params for leaf modules
+            shape = None
+            num_params = 0
+            dtype = "N/A"
+            if hasattr(module, 'weight') and module.weight is not None:
+                shape = tuple(module.weight.shape)
+                num_params = module.weight.numel()
+                dtype = str(module.weight.dtype)
+                if hasattr(module, 'bias') and module.bias is not None:
+                    num_params += module.bias.numel()
+            if num_params > 0:
+                total_params += num_params
+                layers.append(LayerInfo(
+                    name=layer_name,
+                    module_type=module_type,
+                    shape=shape,
+                    num_params=num_params,
+                    dtype=dtype,
+                    is_quantizable=is_quantizable
+                ))
+                if is_quantizable:
+                    quantizable_layers.append(layer_name)
+        # Get config info
+        config = model.config
+        hidden_size = getattr(config, 'hidden_size', 768)
+        num_layers = getattr(config, 'num_hidden_layers', 12)
+        vocab_size = getattr(config, 'vocab_size', None)
+        # Calculate memory
+        memory_gb = sum(p.numel() * p.element_size() for p in model.parameters()) / (1024**3)
+        return ModelInfo(
+            name=name,
+            model_type=model_type,
+            architecture=config.architectures[0] if hasattr(config, 'architectures') and config.architectures else "Unknown",
+            num_params=total_params,
+            num_params_billions=total_params / 1e9,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            vocab_size=vocab_size,
+            dtype=str(next(model.parameters()).dtype),
+            memory_footprint_gb=round(memory_gb, 2),
+            layers=layers,
+            quantizable_layers=quantizable_layers
+        )
+    def register_model(self, model: Any, name: str, tokenizer: Any = None):
+        """Register an externally loaded model"""
+        self._loaded_model = model
+        self._tokenizer = tokenizer
+        self._model_info = self._analyze_model(model, name, ModelType.GENERIC)
+    def get_layer_weights(self, layer_name: str) -> Optional[torch.Tensor]:
+        """Get weights from a specific layer"""
+        if self._loaded_model is None:
+            raise RuntimeError("No model loaded")
+        for name, module in self._loaded_model.named_modules():
+            if name == layer_name:
+                if hasattr(module, 'weight'):
+                    return module.weight.data.clone()
+        return None
+    def set_layer_weights(self, layer_name: str, weights: torch.Tensor):
+        """Set weights for a specific layer"""
+        if self._loaded_model is None:
+            raise RuntimeError("No model loaded")
+        for name, module in self._loaded_model.named_modules():
+            if name == layer_name:
+                if hasattr(module, 'weight'):
+                    module.weight.data = weights.to(module.weight.device)
+                    return
+        raise ValueError(f"Layer not found: {layer_name}")
+    def get_model_info(self) -> Optional[ModelInfo]:
+        """Get current model information"""
+        return self._model_info
+    def get_model(self) -> Optional[Any]:
+        """Get loaded model"""
+        return self._loaded_model
+    def get_tokenizer(self):
+        """Get loaded tokenizer"""
+        return self._tokenizer
+    def unload(self):
+        """Unload model and free memory"""
+        if self._loaded_model is not None:
+            del self._loaded_model
+            self._loaded_model = None
+        if self._tokenizer is not None:
+            del self._tokenizer
+            self._tokenizer = None
+        self._model_info = None
+        # Force garbage collection
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def to_dict(self) -> Optional[Dict[str, Any]]:
+        """Convert model info to dictionary"""
+        if self._model_info is None:
+            return None
+        info = self._model_info
+        return {
+            "name": info.name,
+            "model_type": info.model_type.value,
+            "architecture": info.architecture,
+            "num_params": info.num_params,
+            "num_params_billions": round(info.num_params_billions, 3),
+            "hidden_size": info.hidden_size,
+            "num_layers": info.num_layers,
+            "vocab_size": info.vocab_size,
+            "dtype": info.dtype,
+            "memory_footprint_gb": info.memory_footprint_gb,
+            "num_quantizable_layers": len(info.quantizable_layers),
+            "quantizable_layers": info.quantizable_layers,
+            "layers": [
+                {
+                    "name": layer.name,
+                    "module_type": layer.module_type,
+                    "shape": layer.shape,
+                    "num_params": layer.num_params,
+                    "dtype": layer.dtype,
+                    "is_quantizable": layer.is_quantizable
+                }
+                for layer in info.layers
+            ]
+        }
+# Global instance
+model_loader = ModelLoader() if HAS_TRANSFORMERS else None
+def load_model(model_name: str, **kwargs) -> Tuple[Any, Any]:
+    """Convenience function to load a model"""
+    if model_loader is None:
+        raise ImportError("transformers not available")
+    return model_loader.load(model_name, **kwargs)
+def get_model_info() -> Optional[Dict[str, Any]]:
+    """Get current model information"""
+    if model_loader is None:
+        return None
+    return model_loader.to_dict()

backend/core/model_manager.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+Model Manager with Download Progress, Caching, and Auto-Cleanup
+Designed to work with HuggingFace Spaces disk storage
+"""
+import os
+import time
+import shutil
+import asyncio
+import threading
+from pathlib import Path
+from typing import Optional, Dict, Any, Callable
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+# HuggingFace cache directory - works on Spaces
+HF_CACHE_DIR = os.environ.get("HF_HOME", Path.home() / ".cache" / "huggingface")
+MODEL_CACHE_DIR = Path(HF_CACHE_DIR) / "hub"
+# Sample models that should always be available (tiny models for quick testing)
+SAMPLE_MODELS = [
+    "prajjwal1/bert-tiny",  # 4.4MB - Perfect for testing
+]
+# Auto-cleanup interval (4 hours)
+CLEANUP_INTERVAL_HOURS = 4
+# Track download progress
+_download_progress: Dict[str, Dict[str, Any]] = {}
+_cleanup_thread: Optional[threading.Thread] = None
+@dataclass
+class DownloadProgress:
+    """Track download progress for a model"""
+    model_name: str
+    status: str  # "pending", "downloading", "extracting", "complete", "error"
+    current_file: str
+    files_completed: int
+    total_files: int
+    bytes_downloaded: int
+    total_bytes: int
+    speed_mbps: float
+    eta_seconds: int
+    error: Optional[str] = None
+def get_download_progress(model_name: str) -> Optional[Dict[str, Any]]:
+    """Get current download progress for a model"""
+    return _download_progress.get(model_name)
+def set_download_progress(model_name: str, progress: Dict[str, Any]):
+    """Update download progress"""
+    _download_progress[model_name] = {
+        **progress,
+        "timestamp": time.time()
+    }
+def clear_download_progress(model_name: str):
+    """Clear download progress after completion"""
+    if model_name in _download_progress:
+        del _download_progress[model_name]
+def get_cached_models() -> list:
+    """Get list of models currently in cache"""
+    cached = []
+    if not MODEL_CACHE_DIR.exists():
+        return cached
+    for item in MODEL_CACHE_DIR.iterdir():
+        if item.is_dir() and item.name.startswith("models--"):
+            # Parse model name from directory name
+            parts = item.name.replace("models--", "").split("--")
+            if len(parts) >= 2:
+                model_name = f"{parts[0]}/{parts[1]}"
+            else:
+                model_name = parts[0]
+            # Get size
+            size_mb = sum(f.stat().st_size for f in item.rglob("*") if f.is_file()) / (1024 * 1024)
+            # Get last access time
+            try:
+                last_access = item.stat().st_atime
+            except:
+                last_access = time.time()
+            cached.append({
+                "name": model_name,
+                "path": str(item),
+                "size_mb": round(size_mb, 2),
+                "last_access": datetime.fromtimestamp(last_access).isoformat(),
+                "is_sample": model_name in SAMPLE_MODELS
+            })
+    return cached
+def cleanup_old_models(max_age_hours: float = CLEANUP_INTERVAL_HOURS):
+    """
+    Remove models that haven't been accessed in max_age_hours.
+    Sample models are never deleted.
+    """
+    if not MODEL_CACHE_DIR.exists():
+        return {"deleted": [], "kept": []}
+    deleted = []
+    kept = []
+    cutoff_time = time.time() - (max_age_hours * 3600)
+    for item in MODEL_CACHE_DIR.iterdir():
+        if item.is_dir() and item.name.startswith("models--"):
+            # Parse model name
+            parts = item.name.replace("models--", "").split("--")
+            if len(parts) >= 2:
+                model_name = f"{parts[0]}/{parts[1]}"
+            else:
+                model_name = parts[0]
+            # Never delete sample models
+            if model_name in SAMPLE_MODELS:
+                kept.append(model_name)
+                continue
+            # Check last access time
+            try:
+                last_access = item.stat().st_atime
+                if last_access < cutoff_time:
+                    shutil.rmtree(item)
+                    deleted.append(model_name)
+                else:
+                    kept.append(model_name)
+            except Exception as e:
+                kept.append(f"{model_name} (error: {str(e)})")
+    return {"deleted": deleted, "kept": kept}
+def delete_model_cache(model_name: str) -> bool:
+    """Delete a specific model from cache"""
+    if model_name in SAMPLE_MODELS:
+        return False  # Don't delete sample models
+    # Convert model name to cache directory name
+    cache_name = f"models--{model_name.replace('/', '--')}"
+    cache_path = MODEL_CACHE_DIR / cache_name
+    if cache_path.exists():
+        try:
+            shutil.rmtree(cache_path)
+            return True
+        except:
+            return False
+    return False
+def ensure_sample_models():
+    """
+    Ensure sample models are downloaded.
+    Called on startup to pre-download tiny test models.
+    """
+    try:
+        from transformers import AutoModel, AutoConfig
+        for model_name in SAMPLE_MODELS:
+            try:
+                # Just load config first (fast)
+                AutoConfig.from_pretrained(model_name)
+                print(f"[ModelManager] Sample model '{model_name}' is available")
+            except Exception as e:
+                print(f"[ModelManager] Sample model '{model_name}' not cached: {e}")
+    except ImportError:
+        print("[ModelManager] transformers not installed, skipping sample model check")
+def start_cleanup_scheduler():
+    """Start background thread for periodic cleanup"""
+    global _cleanup_thread
+    if _cleanup_thread is not None and _cleanup_thread.is_alive():
+        return
+    def cleanup_loop():
+        while True:
+            time.sleep(CLEANUP_INTERVAL_HOURS * 3600)  # Wait 4 hours
+            try:
+                result = cleanup_old_models()
+                if result["deleted"]:
+                    print(f"[ModelManager] Cleaned up models: {result['deleted']}")
+            except Exception as e:
+                print(f"[ModelManager] Cleanup error: {e}")
+    _cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
+    _cleanup_thread.start()
+    print(f"[ModelManager] Cleanup scheduler started (every {CLEANUP_INTERVAL_HOURS} hours)")
+def get_cache_stats() -> Dict[str, Any]:
+    """Get cache statistics"""
+    models = get_cached_models()
+    total_size = sum(m["size_mb"] for m in models)
+    sample_count = sum(1 for m in models if m["is_sample"])
+    return {
+        "cache_dir": str(MODEL_CACHE_DIR),
+        "total_models": len(models),
+        "sample_models": sample_count,
+        "total_size_mb": round(total_size, 2),
+        "cleanup_interval_hours": CLEANUP_INTERVAL_HOURS,
+        "models": models
+    }
+# Progress callback for HuggingFace downloads
+class DownloadProgressCallback:
+    """Callback to track HuggingFace download progress"""
+    def __init__(self, model_name: str):
+        self.model_name = model_name
+        self.start_time = time.time()
+        self.last_update = 0
+    def __call__(self, current: int, total: int, filename: str = ""):
+        now = time.time()
+        # Throttle updates to every 0.5 seconds
+        if now - self.last_update < 0.5:
+            return
+        self.last_update = now
+        elapsed = now - self.start_time
+        speed = current / elapsed if elapsed > 0 else 0
+        eta = int((total - current) / speed) if speed > 0 else 0
+        set_download_progress(self.model_name, {
+            "status": "downloading",
+            "current_file": filename,
+            "bytes_downloaded": current,
+            "total_bytes": total,
+            "percent": round(100 * current / total, 1) if total > 0 else 0,
+            "speed_mbps": round(speed / (1024 * 1024), 2),
+            "eta_seconds": eta
+        })

backend/core/quantizer.py ADDED Viewed

	@@ -0,0 +1,605 @@

+"""
+Multi-bit Weight Quantization Engine
+Supports INT8, INT4, NF4, and GPTQ-style quantization methods.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from typing import Optional, Tuple, Dict, Any, Literal
+from dataclasses import dataclass
+from enum import Enum
+class QuantizationMethod(Enum):
+    """Supported quantization methods"""
+    INT8 = "int8"           # 8-bit integer quantization
+    INT4 = "int4"           # 4-bit integer quantization
+    NF4 = "nf4"             # Normal Float 4-bit (QLoRA style)
+    GPTQ = "gptq"           # GPTQ reconstruction-based
+class QuantizationMode(Enum):
+    """Quantization modes"""
+    SYMMETRIC = "symmetric"     # Range: [-max, max]
+    ASYMMETRIC = "asymmetric"   # Range: [min, max]
+@dataclass
+class QuantizationConfig:
+    """Configuration for quantization process"""
+    bits: int = 8
+    method: QuantizationMethod = QuantizationMethod.INT8
+    mode: QuantizationMode = QuantizationMode.SYMMETRIC
+    group_size: Optional[int] = None  # None = per-channel, else group quantization
+    use_double_quant: bool = False    # Double quantization for scales
+    compute_dtype: torch.dtype = torch.float32
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "bits": self.bits,
+            "method": self.method.value,
+            "mode": self.mode.value,
+            "group_size": self.group_size,
+            "use_double_quant": self.use_double_quant,
+            "compute_dtype": str(self.compute_dtype)
+        }
+@dataclass
+class QuantizationResult:
+    """Result of quantization operation"""
+    quantized_weights: torch.Tensor
+    scales: torch.Tensor
+    zero_points: Optional[torch.Tensor]
+    original_shape: Tuple[int, ...]
+    config: QuantizationConfig
+    max_error: float
+    mean_error: float
+    memory_savings_percent: float
+class BaseQuantizer:
+    """Base class for all quantizers"""
+    def __init__(self, config: QuantizationConfig):
+        self.config = config
+    def quantize(self, weights: torch.Tensor) -> QuantizationResult:
+        raise NotImplementedError
+    def dequantize(self, result: QuantizationResult) -> torch.Tensor:
+        raise NotImplementedError
+    def _calculate_error(self, original: torch.Tensor, dequantized: torch.Tensor) -> Tuple[float, float]:
+        """Calculate quantization error metrics"""
+        abs_error = (original - dequantized).abs()
+        return abs_error.max().item(), abs_error.mean().item()
+    def _calculate_memory_savings(self, original: torch.Tensor, quantized: torch.Tensor,
+                                   scales: torch.Tensor) -> float:
+        """Calculate memory savings percentage"""
+        original_bytes = original.numel() * original.element_size()
+        quantized_bytes = quantized.numel() * quantized.element_size()
+        scales_bytes = scales.numel() * scales.element_size()
+        new_bytes = quantized_bytes + scales_bytes
+        savings = 100 * (1 - new_bytes / original_bytes)
+        return savings
+class INT8Quantizer(BaseQuantizer):
+    """8-bit integer quantization (W8A16)"""
+    def __init__(self, config: Optional[QuantizationConfig] = None):
+        if config is None:
+            config = QuantizationConfig(bits=8, method=QuantizationMethod.INT8)
+        super().__init__(config)
+    def quantize(self, weights: torch.Tensor) -> QuantizationResult:
+        """
+        Quantize weights to INT8 precision.
+        Args:
+            weights: Tensor of shape (out_features, in_features)
+        Returns:
+            QuantizationResult with int8 weights and scales
+        """
+        original_shape = weights.shape
+        w_fp32 = weights.clone().to(torch.float32)
+        if self.config.group_size is not None:
+            # Group quantization
+            return self._quantize_grouped(w_fp32, original_shape)
+        # Per-channel quantization
+        if self.config.mode == QuantizationMode.SYMMETRIC:
+            # Symmetric: scale = max(|w|) / 127
+            scales = w_fp32.abs().max(dim=-1).values / 127
+            scales = scales.clamp(min=1e-8)  # Avoid division by zero
+            zero_points = None
+            # Quantize
+            int8_weights = torch.round(w_fp32 / scales.unsqueeze(1)).clamp(-128, 127).to(torch.int8)
+        else:
+            # Asymmetric: use full [-128, 127] range
+            w_min = w_fp32.min(dim=-1).values
+            w_max = w_fp32.max(dim=-1).values
+            scales = (w_max - w_min) / 255
+            scales = scales.clamp(min=1e-8)
+            zero_points = torch.round(-w_min / scales).clamp(0, 255).to(torch.int32)
+            # Quantize
+            int8_weights = torch.round(w_fp32 / scales.unsqueeze(1) + zero_points.unsqueeze(1))
+            int8_weights = int8_weights.clamp(-128, 127).to(torch.int8)
+        # Dequantize for error calculation
+        dequantized = self.dequantize_weights(int8_weights, scales, zero_points)
+        max_error, mean_error = self._calculate_error(weights, dequantized)
+        memory_savings = self._calculate_memory_savings(weights, int8_weights, scales)
+        return QuantizationResult(
+            quantized_weights=int8_weights,
+            scales=scales,
+            zero_points=zero_points,
+            original_shape=original_shape,
+            config=self.config,
+            max_error=max_error,
+            mean_error=mean_error,
+            memory_savings_percent=memory_savings
+        )
+    def _quantize_grouped(self, weights: torch.Tensor, original_shape: Tuple[int, ...]) -> QuantizationResult:
+        """Quantize with group-wise scaling"""
+        out_features, in_features = weights.shape
+        group_size = self.config.group_size
+        # Pad if necessary
+        if in_features % group_size != 0:
+            pad_size = group_size - (in_features % group_size)
+            weights = F.pad(weights, (0, pad_size))
+            in_features = weights.shape[1]
+        # Reshape for group quantization
+        num_groups = in_features // group_size
+        weights_grouped = weights.reshape(out_features, num_groups, group_size)
+        # Calculate scales per group
+        scales = weights_grouped.abs().max(dim=-1).values / 127
+        scales = scales.clamp(min=1e-8)
+        # Quantize
+        int8_weights = torch.round(weights_grouped / scales.unsqueeze(-1))
+        int8_weights = int8_weights.clamp(-128, 127).to(torch.int8)
+        int8_weights = int8_weights.reshape(out_features, in_features)
+        # Trim padding
+        int8_weights = int8_weights[:, :original_shape[1]]
+        scales = scales.reshape(out_features, num_groups)
+        # Dequantize for error calculation
+        dequantized = self.dequantize_weights(int8_weights, scales, None, group_size)
+        max_error, mean_error = self._calculate_error(
+            weights[:, :original_shape[1]], dequantized
+        )
+        memory_savings = self._calculate_memory_savings(
+            weights[:, :original_shape[1]], int8_weights, scales
+        )
+        return QuantizationResult(
+            quantized_weights=int8_weights,
+            scales=scales,
+            zero_points=None,
+            original_shape=original_shape,
+            config=self.config,
+            max_error=max_error,
+            mean_error=mean_error,
+            memory_savings_percent=memory_savings
+        )
+    def dequantize_weights(self, int8_weights: torch.Tensor, scales: torch.Tensor,
+                           zero_points: Optional[torch.Tensor] = None,
+                           group_size: Optional[int] = None) -> torch.Tensor:
+        """Dequantize INT8 weights back to floating point"""
+        if group_size is not None:
+            # Group dequantization
+            out_features, in_features = int8_weights.shape
+            num_groups = scales.shape[1]
+            # Expand scales to match weight shape
+            scales_expanded = scales.unsqueeze(-1).expand(-1, -1, group_size)
+            scales_expanded = scales_expanded.reshape(out_features, -1)[:, :in_features]
+            return int8_weights.float() * scales_expanded
+        if zero_points is not None:
+            # Asymmetric dequantization
+            return (int8_weights.float() - zero_points.unsqueeze(1).float()) * scales.unsqueeze(1)
+        # Symmetric dequantization
+        return int8_weights.float() * scales.unsqueeze(1)
+    def dequantize(self, result: QuantizationResult) -> torch.Tensor:
+        """Dequantize from QuantizationResult"""
+        return self.dequantize_weights(
+            result.quantized_weights,
+            result.scales,
+            result.zero_points,
+            result.config.group_size
+        )
+class INT4Quantizer(BaseQuantizer):
+    """4-bit integer quantization (W4A16)"""
+    def __init__(self, config: Optional[QuantizationConfig] = None):
+        if config is None:
+            config = QuantizationConfig(bits=4, method=QuantizationMethod.INT4, group_size=128)
+        super().__init__(config)
+    def quantize(self, weights: torch.Tensor) -> QuantizationResult:
+        """
+        Quantize weights to INT4 precision.
+        Uses group quantization for better accuracy.
+        Args:
+            weights: Tensor of shape (out_features, in_features)
+        Returns:
+            QuantizationResult with packed int4 weights and scales
+        """
+        original_shape = weights.shape
+        w_fp32 = weights.clone().to(torch.float32)
+        out_features, in_features = w_fp32.shape
+        group_size = self.config.group_size or 128
+        # Pad if necessary
+        if in_features % group_size != 0:
+            pad_size = group_size - (in_features % group_size)
+            w_fp32 = F.pad(w_fp32, (0, pad_size))
+            in_features = w_fp32.shape[1]
+        # Reshape for group quantization
+        num_groups = in_features // group_size
+        weights_grouped = w_fp32.reshape(out_features, num_groups, group_size)
+        if self.config.mode == QuantizationMode.SYMMETRIC:
+            # Symmetric: range [-8, 7] for signed int4
+            scales = weights_grouped.abs().max(dim=-1).values / 7
+            scales = scales.clamp(min=1e-8)
+            zero_points = None
+            # Quantize to int4 range
+            int4_weights = torch.round(weights_grouped / scales.unsqueeze(-1))
+            int4_weights = int4_weights.clamp(-8, 7).to(torch.int8)  # Store as int8
+        else:
+            # Asymmetric: range [0, 15] for unsigned int4
+            w_min = weights_grouped.min(dim=-1).values
+            w_max = weights_grouped.max(dim=-1).values
+            scales = (w_max - w_min) / 15
+            scales = scales.clamp(min=1e-8)
+            zero_points = torch.round(-w_min / scales).clamp(0, 15).to(torch.int8)
+            int4_weights = torch.round(weights_grouped / scales.unsqueeze(-1) + zero_points.unsqueeze(-1))
+            int4_weights = int4_weights.clamp(0, 15).to(torch.int8)
+        # Reshape back
+        int4_weights = int4_weights.reshape(out_features, in_features)
+        int4_weights = int4_weights[:, :original_shape[1]]
+        # Pack two int4 values into one int8 (for memory efficiency)
+        packed_weights = self._pack_int4(int4_weights)
+        # Dequantize for error calculation
+        dequantized = self.dequantize_weights(int4_weights, scales, zero_points, group_size)
+        dequantized = dequantized[:, :original_shape[1]]
+        max_error, mean_error = self._calculate_error(weights, dequantized)
+        # Memory savings: int4 is half of int8
+        original_bytes = weights.numel() * weights.element_size()
+        packed_bytes = packed_weights.numel() * packed_weights.element_size()
+        scales_bytes = scales.numel() * scales.element_size()
+        memory_savings = 100 * (1 - (packed_bytes + scales_bytes) / original_bytes)
+        return QuantizationResult(
+            quantized_weights=packed_weights,
+            scales=scales.reshape(out_features, num_groups),
+            zero_points=zero_points.reshape(out_features, num_groups) if zero_points is not None else None,
+            original_shape=original_shape,
+            config=self.config,
+            max_error=max_error,
+            mean_error=mean_error,
+            memory_savings_percent=memory_savings
+        )
+    def _pack_int4(self, int4_weights: torch.Tensor) -> torch.Tensor:
+        """Pack two int4 values into one int8"""
+        out_features, in_features = int4_weights.shape
+        # Ensure even number of features
+        if in_features % 2 != 0:
+            int4_weights = F.pad(int4_weights, (0, 1))
+            in_features += 1
+        # Reshape and pack
+        reshaped = int4_weights.reshape(out_features, in_features // 2, 2)
+        # Pack: low 4 bits + high 4 bits
+        packed = (reshaped[:, :, 0] & 0x0F) | ((reshaped[:, :, 1] & 0x0F) << 4)
+        return packed.to(torch.int8)
+    def _unpack_int4(self, packed_weights: torch.Tensor, original_in_features: int) -> torch.Tensor:
+        """Unpack int8 to two int4 values"""
+        out_features = packed_weights.shape[0]
+        # Unpack
+        low = packed_weights & 0x0F
+        high = (packed_weights >> 4) & 0x0F
+        # Handle signed values
+        low = torch.where(low > 7, low - 16, low)
+        high = torch.where(high > 7, high - 16, high)
+        # Interleave
+        unpacked = torch.stack([low, high], dim=-1).reshape(out_features, -1)
+        return unpacked[:, :original_in_features]
+    def dequantize_weights(self, int4_weights: torch.Tensor, scales: torch.Tensor,
+                           zero_points: Optional[torch.Tensor] = None,
+                           group_size: Optional[int] = None) -> torch.Tensor:
+        """Dequantize INT4 weights back to floating point"""
+        out_features, in_features = int4_weights.shape
+        group_size = group_size or self.config.group_size or 128
+        num_groups = scales.shape[1] if scales.dim() > 1 else 1
+        # Expand scales
+        scales_flat = scales.reshape(out_features, num_groups)
+        scales_expanded = scales_flat.unsqueeze(-1).expand(-1, -1, group_size)
+        scales_expanded = scales_expanded.reshape(out_features, -1)[:, :in_features]
+        if zero_points is not None:
+            zp_flat = zero_points.reshape(out_features, num_groups)
+            zp_expanded = zp_flat.unsqueeze(-1).expand(-1, -1, group_size)
+            zp_expanded = zp_expanded.reshape(out_features, -1)[:, :in_features]
+            return (int4_weights.float() - zp_expanded.float()) * scales_expanded
+        return int4_weights.float() * scales_expanded
+    def dequantize(self, result: QuantizationResult) -> torch.Tensor:
+        """Dequantize from QuantizationResult (handles packed weights)"""
+        unpacked = self._unpack_int4(result.quantized_weights, result.original_shape[1])
+        return self.dequantize_weights(
+            unpacked,
+            result.scales,
+            result.zero_points,
+            result.config.group_size
+        )
+class NF4Quantizer(BaseQuantizer):
+    """
+    Normal Float 4-bit quantization (NF4).
+    Uses a fixed codebook optimized for normally distributed weights.
+    """
+    # NF4 codebook: values optimized for normal distribution
+    NF4_CODEBOOK = torch.tensor([
+        -1.0, -0.6961928009986877, -0.5250730514526367, -0.39491748809814453,
+        -0.28444138169288635, -0.18477343022823334, -0.09105003625154495, 0.0,
+        0.07958029955625534, 0.16093020141124725, 0.24611008348274231,
+        0.33791524171829224, 0.44070982933044434, 0.5626170039176941,
+        0.7229568362236023, 1.0
+    ])
+    def __init__(self, config: Optional[QuantizationConfig] = None):
+        if config is None:
+            config = QuantizationConfig(bits=4, method=QuantizationMethod.NF4, group_size=64)
+        super().__init__(config)
+        # Fix the codebook
+        self.codebook = torch.tensor([
+            -1.0, -0.6962, -0.5251, -0.3949, -0.2844, -0.1848, -0.0911, 0.0,
+            0.0796, 0.1609, 0.2461, 0.3379, 0.4407, 0.5626, 0.7230, 1.0
+        ])
+    def quantize(self, weights: torch.Tensor) -> QuantizationResult:
+        """Quantize weights using NF4 codebook"""
+        original_shape = weights.shape
+        w_fp32 = weights.clone().to(torch.float32)
+        out_features, in_features = w_fp32.shape
+        group_size = self.config.group_size or 64
+        # Pad if needed
+        if in_features % group_size != 0:
+            pad_size = group_size - (in_features % group_size)
+            w_fp32 = F.pad(w_fp32, (0, pad_size))
+            in_features = w_fp32.shape[1]
+        # Reshape for group quantization
+        num_groups = in_features // group_size
+        weights_grouped = w_fp32.reshape(out_features, num_groups, group_size)
+        # Calculate absmax scales per group
+        scales = weights_grouped.abs().max(dim=-1).values
+        scales = scales.clamp(min=1e-8)
+        # Normalize weights
+        normalized = weights_grouped / scales.unsqueeze(-1)
+        # Find nearest codebook entry for each weight
+        codebook = self.codebook.to(weights.device)
+        distances = torch.abs(normalized.unsqueeze(-1) - codebook)
+        indices = distances.argmin(dim=-1).to(torch.int8)
+        # Reshape back
+        indices = indices.reshape(out_features, in_features)[:, :original_shape[1]]
+        # Pack indices
+        packed = self._pack_int4(indices)
+        # Dequantize for error calculation
+        dequantized = self.dequantize_weights(indices, scales.reshape(out_features, num_groups), group_size)
+        dequantized = dequantized[:, :original_shape[1]]
+        max_error, mean_error = self._calculate_error(weights, dequantized)
+        # Memory savings
+        original_bytes = weights.numel() * weights.element_size()
+        packed_bytes = packed.numel() * packed.element_size()
+        scales_bytes = scales.numel() * scales.element_size()
+        memory_savings = 100 * (1 - (packed_bytes + scales_bytes) / original_bytes)
+        return QuantizationResult(
+            quantized_weights=packed,
+            scales=scales.reshape(out_features, num_groups),
+            zero_points=None,
+            original_shape=original_shape,
+            config=self.config,
+            max_error=max_error,
+            mean_error=mean_error,
+            memory_savings_percent=memory_savings
+        )
+    def _pack_int4(self, indices: torch.Tensor) -> torch.Tensor:
+        """Pack two indices into one int8"""
+        out_features, in_features = indices.shape
+        if in_features % 2 != 0:
+            indices = F.pad(indices, (0, 1))
+            in_features += 1
+        reshaped = indices.reshape(out_features, in_features // 2, 2)
+        packed = (reshaped[:, :, 0] & 0x0F) | ((reshaped[:, :, 1] & 0x0F) << 4)
+        return packed.to(torch.int8)
+    def _unpack_int4(self, packed: torch.Tensor, original_in_features: int) -> torch.Tensor:
+        """Unpack int8 to two indices"""
+        out_features = packed.shape[0]
+        low = packed & 0x0F
+        high = (packed >> 4) & 0x0F
+        unpacked = torch.stack([low, high], dim=-1).reshape(out_features, -1)
+        return unpacked[:, :original_in_features]
+    def dequantize_weights(self, indices: torch.Tensor, scales: torch.Tensor,
+                           group_size: Optional[int] = None) -> torch.Tensor:
+        """Dequantize NF4 indices back to floating point"""
+        codebook = self.codebook.to(indices.device)
+        # Look up codebook values
+        dequantized = codebook[indices.long()]
+        # Apply scales
+        out_features, in_features = indices.shape
+        group_size = group_size or self.config.group_size or 64
+        num_groups = scales.shape[1]
+        scales_expanded = scales.unsqueeze(-1).expand(-1, -1, group_size)
+        scales_expanded = scales_expanded.reshape(out_features, -1)[:, :in_features]
+        return dequantized * scales_expanded
+    def dequantize(self, result: QuantizationResult) -> torch.Tensor:
+        """Dequantize from QuantizationResult"""
+        unpacked = self._unpack_int4(result.quantized_weights, result.original_shape[1])
+        return self.dequantize_weights(
+            unpacked,
+            result.scales,
+            result.config.group_size
+        )
+class QuantizedLinear(nn.Module):
+    """
+    Quantized Linear layer supporting multiple quantization methods.
+    Compatible with W8A16, W4A16, NF4, and GPTQ quantization.
+    """
+    def __init__(self, in_features: int, out_features: int, bias: bool = True,
+                 config: Optional[QuantizationConfig] = None):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.config = config or QuantizationConfig()
+        # Initialize quantizer based on config
+        self.quantizer = self._get_quantizer()
+        # Buffers for quantized weights
+        self.register_buffer("quantized_weights", None)
+        self.register_buffer("scales", None)
+        self.register_buffer("zero_points", None)
+        # Bias (kept in full precision)
+        if bias:
+            self.register_buffer("bias", torch.zeros(out_features))
+        else:
+            self.bias = None
+        self._quantized = False
+    def _get_quantizer(self) -> BaseQuantizer:
+        """Get appropriate quantizer based on config"""
+        if self.config.method == QuantizationMethod.INT8:
+            return INT8Quantizer(self.config)
+        elif self.config.method == QuantizationMethod.INT4:
+            return INT4Quantizer(self.config)
+        elif self.config.method == QuantizationMethod.NF4:
+            return NF4Quantizer(self.config)
+        else:
+            raise ValueError(f"Unsupported quantization method: {self.config.method}")
+    def quantize_weights(self, weights: torch.Tensor, bias: Optional[torch.Tensor] = None) -> QuantizationResult:
+        """Quantize weights and store in layer"""
+        result = self.quantizer.quantize(weights)
+        self.quantized_weights = result.quantized_weights
+        self.scales = result.scales
+        self.zero_points = result.zero_points
+        if bias is not None:
+            self.bias = bias.clone()
+        self._quantized = True
+        return result
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass with dequantization on-the-fly"""
+        if not self._quantized:
+            raise RuntimeError("Layer has not been quantized. Call quantize_weights first.")
+        # Dequantize weights
+        weights = self.quantizer.dequantize(QuantizationResult(
+            quantized_weights=self.quantized_weights,
+            scales=self.scales,
+            zero_points=self.zero_points,
+            original_shape=(self.out_features, self.in_features),
+            config=self.config,
+            max_error=0, mean_error=0, memory_savings_percent=0
+        ))
+        # Linear operation
+        output = F.linear(x, weights.to(x.dtype))
+        if self.bias is not None:
+            output = output + self.bias.to(x.dtype)
+        return output
+def get_quantizer(config: QuantizationConfig) -> BaseQuantizer:
+    """Factory function to get appropriate quantizer"""
+    if config.method == QuantizationMethod.INT8:
+        return INT8Quantizer(config)
+    elif config.method == QuantizationMethod.INT4:
+        return INT4Quantizer(config)
+    elif config.method == QuantizationMethod.NF4:
+        return NF4Quantizer(config)
+    else:
+        raise ValueError(f"Unsupported method: {config.method}")

backend/core/system_checker.py ADDED Viewed

	@@ -0,0 +1,299 @@

+"""
+System Requirements Checker
+Detects GPU availability, memory, and provides hardware recommendations.
+"""
+import torch
+import psutil
+import platform
+from dataclasses import dataclass
+from typing import Optional, List, Dict, Any
+from enum import Enum
+class HardwareCapability(Enum):
+    """Hardware capability levels"""
+    FULL_GPU = "full_gpu"           # CUDA GPU with sufficient VRAM
+    LIMITED_GPU = "limited_gpu"      # CUDA GPU with limited VRAM
+    CPU_ONLY = "cpu_only"           # No GPU available
+    APPLE_SILICON = "apple_silicon"  # M1/M2/M3 with MPS
+@dataclass
+class GPUInfo:
+    """Information about a GPU device"""
+    index: int
+    name: str
+    total_memory_gb: float
+    free_memory_gb: float
+    compute_capability: Optional[str] = None
+@dataclass
+class SystemInfo:
+    """Complete system information"""
+    platform: str
+    python_version: str
+    torch_version: str
+    cuda_available: bool
+    cuda_version: Optional[str]
+    mps_available: bool
+    cpu_cores: int
+    ram_total_gb: float
+    ram_available_gb: float
+    gpus: List[GPUInfo]
+    capability: HardwareCapability
+    recommended_batch_size: int
+    max_model_size: str
+    warnings: List[str]
+class SystemChecker:
+    """Check system capabilities for quantization tasks"""
+    # Model size thresholds (in billions of parameters)
+    MODEL_SIZES = {
+        "tiny": 0.1,      # ~100M params
+        "small": 0.5,     # ~500M params
+        "medium": 1.0,    # ~1B params
+        "large": 7.0,     # ~7B params
+        "xlarge": 13.0,   # ~13B params
+        "xxlarge": 70.0   # ~70B params
+    }
+    # Memory requirements per billion parameters (GB)
+    MEMORY_PER_BILLION_PARAMS = {
+        "fp32": 4.0,
+        "fp16": 2.0,
+        "int8": 1.0,
+        "int4": 0.5
+    }
+    def __init__(self):
+        self._system_info: Optional[SystemInfo] = None
+    def check(self, force_refresh: bool = False) -> SystemInfo:
+        """Perform full system check"""
+        if self._system_info is not None and not force_refresh:
+            return self._system_info
+        warnings = []
+        gpus = []
+        # Basic info
+        cuda_available = torch.cuda.is_available()
+        mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
+        # CUDA version
+        cuda_version = None
+        if cuda_available:
+            cuda_version = torch.version.cuda
+        # GPU detection
+        if cuda_available:
+            try:
+                for i in range(torch.cuda.device_count()):
+                    props = torch.cuda.get_device_properties(i)
+                    total_mem = props.total_memory / (1024**3)
+                    free_mem = (props.total_memory - torch.cuda.memory_reserved(i)) / (1024**3)
+                    gpus.append(GPUInfo(
+                        index=i,
+                        name=props.name,
+                        total_memory_gb=round(total_mem, 2),
+                        free_memory_gb=round(free_mem, 2),
+                        compute_capability=f"{props.major}.{props.minor}"
+                    ))
+            except Exception as e:
+                warnings.append(f"Error detecting GPU: {str(e)}")
+        # RAM info
+        ram = psutil.virtual_memory()
+        ram_total_gb = ram.total / (1024**3)
+        ram_available_gb = ram.available / (1024**3)
+        # Determine capability
+        capability = self._determine_capability(gpus, mps_available, ram_total_gb)
+        # Recommendations
+        recommended_batch_size = self._get_recommended_batch_size(capability, gpus)
+        max_model_size = self._get_max_model_size(capability, gpus, ram_total_gb)
+        # Add warnings
+        if not cuda_available and not mps_available:
+            warnings.append("No GPU detected. Quantization will run on CPU (slower).")
+        if ram_available_gb < 8:
+            warnings.append(f"Low RAM available ({ram_available_gb:.1f}GB). Large models may fail.")
+        if gpus and gpus[0].free_memory_gb < 4:
+            warnings.append(f"Low GPU memory ({gpus[0].free_memory_gb:.1f}GB free). Consider smaller models.")
+        self._system_info = SystemInfo(
+            platform=platform.system(),
+            python_version=platform.python_version(),
+            torch_version=torch.__version__,
+            cuda_available=cuda_available,
+            cuda_version=cuda_version,
+            mps_available=mps_available,
+            cpu_cores=psutil.cpu_count(logical=False) or 1,
+            ram_total_gb=round(ram_total_gb, 2),
+            ram_available_gb=round(ram_available_gb, 2),
+            gpus=gpus,
+            capability=capability,
+            recommended_batch_size=recommended_batch_size,
+            max_model_size=max_model_size,
+            warnings=warnings
+        )
+        return self._system_info
+    def _determine_capability(self, gpus: List[GPUInfo], mps_available: bool,
+                              ram_total_gb: float) -> HardwareCapability:
+        """Determine hardware capability level"""
+        if mps_available:
+            return HardwareCapability.APPLE_SILICON
+        if not gpus:
+            return HardwareCapability.CPU_ONLY
+        # Check if any GPU has >= 8GB VRAM
+        max_vram = max(gpu.total_memory_gb for gpu in gpus)
+        if max_vram >= 8:
+            return HardwareCapability.FULL_GPU
+        else:
+            return HardwareCapability.LIMITED_GPU
+    def _get_recommended_batch_size(self, capability: HardwareCapability,
+                                     gpus: List[GPUInfo]) -> int:
+        """Get recommended batch size based on hardware"""
+        if capability == HardwareCapability.CPU_ONLY:
+            return 1
+        elif capability == HardwareCapability.LIMITED_GPU:
+            return 4
+        elif capability == HardwareCapability.APPLE_SILICON:
+            return 8
+        else:
+            # Full GPU - scale with VRAM
+            if gpus:
+                vram = gpus[0].total_memory_gb
+                if vram >= 24:
+                    return 32
+                elif vram >= 16:
+                    return 16
+                elif vram >= 8:
+                    return 8
+            return 8
+    def _get_max_model_size(self, capability: HardwareCapability,
+                            gpus: List[GPUInfo], ram_gb: float) -> str:
+        """Get maximum recommended model size"""
+        if capability == HardwareCapability.CPU_ONLY:
+            # CPU-only: limited by RAM, very slow for large models
+            if ram_gb >= 32:
+                return "medium (1B)"
+            elif ram_gb >= 16:
+                return "small (500M)"
+            else:
+                return "tiny (100M)"
+        elif capability == HardwareCapability.LIMITED_GPU:
+            return "small (500M)"
+        elif capability == HardwareCapability.APPLE_SILICON:
+            # Apple Silicon: depends on unified memory
+            if ram_gb >= 32:
+                return "large (7B)"
+            elif ram_gb >= 16:
+                return "medium (1B)"
+            else:
+                return "small (500M)"
+        else:  # FULL_GPU
+            if gpus:
+                vram = gpus[0].total_memory_gb
+                if vram >= 48:
+                    return "xxlarge (70B)"
+                elif vram >= 24:
+                    return "xlarge (13B)"
+                elif vram >= 16:
+                    return "large (7B)"
+                elif vram >= 8:
+                    return "medium (1B)"
+            return "medium (1B)"
+    def can_load_model(self, model_params_billions: float,
+                       dtype: str = "fp16") -> Dict[str, Any]:
+        """Check if a specific model can be loaded"""
+        info = self.check()
+        memory_required = model_params_billions * self.MEMORY_PER_BILLION_PARAMS.get(dtype, 2.0)
+        memory_required *= 1.3  # 30% overhead for activations, optimizer, etc.
+        # Check GPU memory
+        gpu_ok = False
+        gpu_memory = 0
+        if info.gpus:
+            gpu_memory = info.gpus[0].free_memory_gb
+            gpu_ok = gpu_memory >= memory_required
+        # Check RAM
+        ram_ok = info.ram_available_gb >= memory_required
+        can_load = gpu_ok or (info.capability == HardwareCapability.CPU_ONLY and ram_ok)
+        return {
+            "can_load": can_load,
+            "memory_required_gb": round(memory_required, 2),
+            "gpu_available_gb": round(gpu_memory, 2) if info.gpus else 0,
+            "ram_available_gb": round(info.ram_available_gb, 2),
+            "recommended_device": "cuda" if gpu_ok else ("mps" if info.mps_available else "cpu"),
+            "warnings": [] if can_load else [
+                f"Model requires ~{memory_required:.1f}GB memory. " +
+                f"Available: GPU={gpu_memory:.1f}GB, RAM={info.ram_available_gb:.1f}GB"
+            ]
+        }
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert system info to dictionary"""
+        info = self.check()
+        return {
+            "platform": info.platform,
+            "python_version": info.python_version,
+            "torch_version": info.torch_version,
+            "cuda_available": info.cuda_available,
+            "cuda_version": info.cuda_version,
+            "mps_available": info.mps_available,
+            "cpu_cores": info.cpu_cores,
+            "ram_total_gb": info.ram_total_gb,
+            "ram_available_gb": info.ram_available_gb,
+            "gpus": [
+                {
+                    "index": gpu.index,
+                    "name": gpu.name,
+                    "total_memory_gb": gpu.total_memory_gb,
+                    "free_memory_gb": gpu.free_memory_gb,
+                    "compute_capability": gpu.compute_capability
+                }
+                for gpu in info.gpus
+            ],
+            "capability": info.capability.value,
+            "recommended_batch_size": info.recommended_batch_size,
+            "max_model_size": info.max_model_size,
+            "warnings": info.warnings
+        }
+# Global instance
+system_checker = SystemChecker()
+def get_system_info() -> Dict[str, Any]:
+    """Get system information as dictionary"""
+    return system_checker.to_dict()
+def check_model_requirements(model_params_billions: float, dtype: str = "fp16") -> Dict[str, Any]:
+    """Check if system can handle a specific model"""
+    return system_checker.can_load_model(model_params_billions, dtype)

backend/core/visualization.py ADDED Viewed

	@@ -0,0 +1,277 @@

+"""
+Visualization utilities for weight matrices and quantization analysis.
+Generates chart data for frontend consumption.
+"""
+import torch
+import numpy as np
+from typing import Dict, Any, List, Tuple, Optional
+from dataclasses import dataclass
+import base64
+import io
+# Import matplotlib with non-interactive backend
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from matplotlib.colors import TwoSlopeNorm
+@dataclass
+class ChartData:
+    """Data structure for chart rendering"""
+    chart_type: str
+    data: Dict[str, Any]
+    layout: Dict[str, Any]
+class Visualizer:
+    """Generate visualization data for weight matrices and quantization analysis"""
+    def __init__(self, max_display_size: int = 128):
+        """
+        Args:
+            max_display_size: Maximum dimension for heatmap display (downsampled if larger)
+        """
+        self.max_display_size = max_display_size
+    def weight_heatmap(self, weights: torch.Tensor, title: str = "Weight Matrix",
+                       downsample: bool = True) -> ChartData:
+        """
+        Generate heatmap data for weight matrix visualization.
+        Returns Plotly-compatible data structure.
+        """
+        w = weights.detach().cpu().float().numpy()
+        # Downsample if too large
+        if downsample and (w.shape[0] > self.max_display_size or w.shape[1] > self.max_display_size):
+            w = self._downsample_2d(w, self.max_display_size)
+        # Calculate symmetric colorscale bounds - convert to Python float for JSON
+        vmax = float(max(abs(w.min()), abs(w.max())))
+        return ChartData(
+            chart_type="heatmap",
+            data={
+                "z": w.tolist(),
+                "colorscale": "RdBu_r",
+                "zmin": -vmax,
+                "zmax": vmax,
+                "zmid": 0
+            },
+            layout={
+                "title": title,
+                "xaxis": {"title": "Input Features"},
+                "yaxis": {"title": "Output Features"}
+            }
+        )
+    def weight_histogram(self, weights: torch.Tensor, title: str = "Weight Distribution",
+                         bins: int = 50) -> ChartData:
+        """Generate histogram data for weight distribution"""
+        w = weights.detach().cpu().float().numpy().flatten()
+        hist, bin_edges = np.histogram(w, bins=bins)
+        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
+        return ChartData(
+            chart_type="bar",
+            data={
+                "x": bin_centers.tolist(),
+                "y": hist.tolist(),
+                "type": "bar"
+            },
+            layout={
+                "title": title,
+                "xaxis": {"title": "Weight Value"},
+                "yaxis": {"title": "Frequency"},
+                "bargap": 0.05
+            }
+        )
+    def error_heatmap(self, original: torch.Tensor, quantized: torch.Tensor,
+                      scales: torch.Tensor, title: str = "Quantization Error") -> ChartData:
+        """Generate error heatmap between original and dequantized weights"""
+        orig = original.detach().cpu().float()
+        quant = quantized.detach().cpu().float()
+        sc = scales.detach().cpu().float()
+        # Dequantize
+        if sc.dim() == 1:
+            dequant = quant * sc.unsqueeze(1)
+        else:
+            # Group quantization - expand scales
+            dequant = quant * sc.unsqueeze(-1)
+            dequant = dequant.reshape(orig.shape)
+        error = (orig - dequant).abs().numpy()
+        # Downsample if needed
+        if error.shape[0] > self.max_display_size or error.shape[1] > self.max_display_size:
+            error = self._downsample_2d(error, self.max_display_size)
+        return ChartData(
+            chart_type="heatmap",
+            data={
+                "z": error.tolist(),
+                "colorscale": "Reds",
+                "zmin": 0
+            },
+            layout={
+                "title": title,
+                "xaxis": {"title": "Input Features"},
+                "yaxis": {"title": "Output Features"}
+            }
+        )
+    def comparison_overlay(self, original: torch.Tensor, dequantized: torch.Tensor,
+                           sample_size: int = 1000) -> ChartData:
+        """Generate scatter plot comparing original vs dequantized values"""
+        orig = original.detach().cpu().float().numpy().flatten()
+        deq = dequantized.detach().cpu().float().numpy().flatten()
+        # Sample if too large
+        if len(orig) > sample_size:
+            indices = np.random.choice(len(orig), sample_size, replace=False)
+            orig = orig[indices]
+            deq = deq[indices]
+        return ChartData(
+            chart_type="scatter",
+            data={
+                "x": orig.tolist(),
+                "y": deq.tolist(),
+                "mode": "markers",
+                "marker": {"size": 3, "opacity": 0.5}
+            },
+            layout={
+                "title": "Original vs Dequantized Weights",
+                "xaxis": {"title": "Original Value"},
+                "yaxis": {"title": "Dequantized Value"},
+                "shapes": [{
+                    "type": "line",
+                    "x0": float(orig.min()),
+                    "x1": float(orig.max()),
+                    "y0": float(orig.min()),
+                    "y1": float(orig.max()),
+                    "line": {"color": "red", "dash": "dash"}
+                }]
+            }
+        )
+    def scales_histogram(self, scales: torch.Tensor,
+                         title: str = "Quantization Scales Distribution") -> ChartData:
+        """Generate histogram of quantization scales"""
+        s = scales.detach().cpu().float().numpy().flatten()
+        hist, bin_edges = np.histogram(s, bins=30)
+        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
+        return ChartData(
+            chart_type="bar",
+            data={
+                "x": bin_centers.tolist(),
+                "y": hist.tolist(),
+                "marker": {"color": "green"}
+            },
+            layout={
+                "title": title,
+                "xaxis": {"title": "Scale Value"},
+                "yaxis": {"title": "Frequency"}
+            }
+        )
+    def layer_error_bar(self, layer_errors: Dict[str, float],
+                        title: str = "Quantization Error by Layer") -> ChartData:
+        """Generate bar chart of errors per layer"""
+        layers = list(layer_errors.keys())
+        errors = list(layer_errors.values())
+        return ChartData(
+            chart_type="bar",
+            data={
+                "x": layers,
+                "y": errors,
+                "marker": {"color": "coral"}
+            },
+            layout={
+                "title": title,
+                "xaxis": {"title": "Layer", "tickangle": 45},
+                "yaxis": {"title": "Mean Absolute Error"}
+            }
+        )
+    def memory_comparison(self, original_mb: float, quantized_mb: float,
+                          overhead_mb: float = 0) -> ChartData:
+        """Generate memory comparison chart"""
+        return ChartData(
+            chart_type="bar",
+            data={
+                "x": ["Original (FP32)", "Quantized + Scales", "Savings"],
+                "y": [original_mb, quantized_mb + overhead_mb, original_mb - quantized_mb - overhead_mb],
+                "marker": {"color": ["#3498db", "#2ecc71", "#e74c3c"]}
+            },
+            layout={
+                "title": "Memory Usage Comparison",
+                "yaxis": {"title": "Memory (MB)"}
+            }
+        )
+    def _downsample_2d(self, arr: np.ndarray, max_size: int) -> np.ndarray:
+        """Downsample 2D array to max_size x max_size"""
+        h, w = arr.shape
+        if h > max_size:
+            step_h = h // max_size
+            arr = arr[::step_h, :][:max_size, :]
+        if w > max_size:
+            step_w = w // max_size
+            arr = arr[:, ::step_w][:, :max_size]
+        return arr
+    def generate_png(self, weights: torch.Tensor, title: str = "Weights") -> bytes:
+        """Generate PNG image bytes (for backward compatibility)"""
+        w = weights.detach().cpu().float().numpy()
+        if w.shape[0] > self.max_display_size or w.shape[1] > self.max_display_size:
+            w = self._downsample_2d(w, self.max_display_size)
+        fig, ax = plt.subplots(figsize=(10, 8))
+        vmax = max(abs(w.min()), abs(w.max()))
+        norm = TwoSlopeNorm(vmin=-vmax, vcenter=0, vmax=vmax)
+        im = ax.imshow(w, cmap='RdBu_r', norm=norm)
+        plt.colorbar(im, label='Weight Value')
+        ax.set_title(title)
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', bbox_inches='tight')
+        plt.close(fig)
+        buf.seek(0)
+        return buf.getvalue()
+    def to_dict(self, chart: ChartData) -> Dict[str, Any]:
+        """Convert ChartData to dictionary"""
+        return {
+            "type": chart.chart_type,
+            "data": chart.data,
+            "layout": chart.layout
+        }
+# Global instance
+visualizer = Visualizer()
+def get_weight_heatmap(weights: torch.Tensor, title: str = "Weights") -> Dict[str, Any]:
+    """Generate weight heatmap data"""
+    return visualizer.to_dict(visualizer.weight_heatmap(weights, title))
+def get_weight_histogram(weights: torch.Tensor, title: str = "Distribution") -> Dict[str, Any]:
+    """Generate weight histogram data"""
+    return visualizer.to_dict(visualizer.weight_histogram(weights, title))

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi>=0.100.0
+uvicorn>=0.23.0
+python-multipart>=0.0.6
+torch>=2.0.0
+transformers>=4.31.0
+accelerate>=0.21.0
+bitsandbytes>=0.40.0
+scipy>=1.11.0
+numpy>=1.24.0
+pydantic>=2.0.0
+jinja2>=3.1.2

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,78 @@

+# Docker Compose for local development with GPU support
+services:
+  # Full application (frontend + backend)
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7860:7860"
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+    volumes:
+      - ./models:/app/models
+      - ./cache:/app/cache
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [ gpu ]
+    restart: unless-stopped
+  # Development mode: separate frontend and backend
+  backend-dev:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: python-base
+    command: python -m uvicorn backend.api.main:app --host 0.0.0.0 --port 8000 --reload
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./backend:/app/backend
+      - ./models:/app/models
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+    profiles:
+      - dev
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [ gpu ]
+  frontend-dev:
+    image: node:20-alpine
+    working_dir: /app
+    command: sh -c "npm install && npm run dev -- --host"
+    ports:
+      - "5173:5173"
+    volumes:
+      - ./frontend:/app
+      - /app/node_modules
+    environment:
+      - VITE_API_URL=http://localhost:8000/api
+    profiles:
+      - dev
+  # CPU-only version (no GPU)
+  app-cpu:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7860:7860"
+    volumes:
+      - ./models:/app/models
+    profiles:
+      - cpu
+    restart: unless-stopped
+networks:
+  default:
+    name: quantizer-network

frontend/.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules
+dist
+dist-ssr
+*.local
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

frontend/README.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# React + Vite
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+Currently, two official plugins are available:
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
+## React Compiler
+The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
+## Expanding the ESLint configuration
+If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.

frontend/eslint.config.js ADDED Viewed

	@@ -0,0 +1,29 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{js,jsx}'],
+    extends: [
+      js.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+      parserOptions: {
+        ecmaVersion: 'latest',
+        ecmaFeatures: { jsx: true },
+        sourceType: 'module',
+      },
+    },
+    rules: {
+      'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
+    },
+  },
+])

frontend/index.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>frontend</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "name": "frontend",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint .",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "axios": "^1.13.2",
+    "framer-motion": "^12.26.1",
+    "lucide-react": "^0.562.0",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0",
+    "react-hot-toast": "^2.6.0",
+    "react-router-dom": "^7.12.0",
+    "recharts": "^3.6.0",
+    "zustand": "^5.0.10"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.39.1",
+    "@types/react": "^19.2.5",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^5.1.1",
+    "buffer": "^6.0.3",
+    "eslint": "^9.39.1",
+    "eslint-plugin-react-hooks": "^7.0.1",
+    "eslint-plugin-react-refresh": "^0.4.24",
+    "globals": "^16.5.0",
+    "vite": "^7.2.4"
+  }
+}

frontend/public/vite.svg ADDED Viewed

frontend/src/App.css ADDED Viewed

	@@ -0,0 +1,42 @@

+#root {
+  max-width: 1280px;
+  margin: 0 auto;
+  padding: 2rem;
+  text-align: center;
+}
+.logo {
+  height: 6em;
+  padding: 1.5em;
+  will-change: filter;
+  transition: filter 300ms;
+}
+.logo:hover {
+  filter: drop-shadow(0 0 2em #646cffaa);
+}
+.logo.react:hover {
+  filter: drop-shadow(0 0 2em #61dafbaa);
+}
+@keyframes logo-spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+@media (prefers-reduced-motion: no-preference) {
+  a:nth-of-type(2) .logo {
+    animation: logo-spin infinite 20s linear;
+  }
+}
+.card {
+  padding: 2em;
+}
+.read-the-docs {
+  color: #888;
+}

frontend/src/App.jsx ADDED Viewed

	@@ -0,0 +1,82 @@

+import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
+import { Toaster, toast } from 'react-hot-toast';
+import { useEffect } from 'react';
+import Layout from './components/Layout';
+import Dashboard from './pages/Dashboard';
+import Quantizer from './pages/Quantizer';
+import Analysis from './pages/Analysis';
+import ModelLoader from './pages/ModelLoader';
+import { useSystemStore } from './store';
+import './index.css';
+function App() {
+  const fetchSystemInfo = useSystemStore((state) => state.fetchSystemInfo);
+  useEffect(() => {
+    // Fetch system info on app load
+    fetchSystemInfo();
+    const handleOffline = () => toast.error("Internet connection lost");
+    const handleOnline = () => toast.success("Internet connection restored");
+    window.addEventListener('offline', handleOffline);
+    window.addEventListener('online', handleOnline);
+    return () => {
+      window.removeEventListener('offline', handleOffline);
+      window.removeEventListener('online', handleOnline);
+    };
+  }, [fetchSystemInfo]);
+  return (
+    <BrowserRouter>
+      <Routes>
+        <Route path="/" element={<Layout />}>
+          <Route index element={<Navigate to="/dashboard" replace />} />
+          <Route path="dashboard" element={<Dashboard />} />
+          <Route path="quantize" element={<Quantizer />} />
+          <Route path="analysis" element={<Analysis />} />
+          <Route path="models" element={<ModelLoader />} />
+        </Route>
+      </Routes>
+      <Toaster
+        position="top-right"
+        toastOptions={{
+          duration: 4000,
+          style: {
+            background: 'rgba(15, 23, 42, 0.8)',
+            color: '#e2e8f0',
+            backdropFilter: 'blur(12px)',
+            border: '1px solid rgba(255, 255, 255, 0.1)',
+            padding: '12px 24px',
+            borderRadius: '12px',
+            boxShadow: '0 8px 32px rgba(0, 0, 0, 0.2)',
+            fontSize: '0.95rem'
+          },
+          success: {
+            iconTheme: {
+              primary: '#6366f1',
+              secondary: '#fff',
+            },
+            style: {
+              border: '1px solid rgba(99, 102, 241, 0.2)',
+              background: 'rgba(99, 102, 241, 0.1)',
+            }
+          },
+          error: {
+            iconTheme: {
+              primary: '#ef4444',
+              secondary: '#fff',
+            },
+            style: {
+              border: '1px solid rgba(239, 68, 68, 0.2)',
+              background: 'rgba(239, 68, 68, 0.1)',
+            }
+          }
+        }}
+      />
+    </BrowserRouter>
+  );
+}
+export default App;

frontend/src/assets/react.svg ADDED Viewed

frontend/src/components/Layout.jsx ADDED Viewed

	@@ -0,0 +1,297 @@

+import { Outlet, NavLink, useLocation } from 'react-router-dom';
+import { useEffect } from 'react';
+import {
+  LayoutDashboard,
+  Layers,
+  BarChart3,
+  Settings,
+  Cpu,
+  HardDrive,
+  Zap,
+  Github,
+  Menu,
+  X
+} from 'lucide-react';
+import { useSystemStore, useUIStore, useModelStore } from '../store';
+import { motion, AnimatePresence } from 'framer-motion';
+/**
+ * Main application layout with sidebar navigation
+ */
+export default function Layout() {
+  const { sidebarOpen, toggleSidebar } = useUIStore();
+  const systemInfo = useSystemStore((state) => state.systemInfo);
+  const checkLoadedModel = useModelStore((state) => state.checkLoadedModel);
+  const location = useLocation();
+  // Sync model state on mount
+  useEffect(() => {
+    checkLoadedModel();
+  }, []);
+  const navItems = [
+    { path: '/dashboard', label: 'Dashboard', icon: LayoutDashboard },
+    { path: '/quantize', label: 'Quantizer', icon: Layers },
+    { path: '/analysis', label: 'Analysis', icon: BarChart3 },
+    { path: '/models', label: 'Models', icon: HardDrive },
+  ];
+  return (
+    <div className="app-layout">
+      {/* Sidebar */}
+      <aside className={`sidebar ${sidebarOpen ? '' : 'closed'}`}>
+        {/* Logo */}
+        <div className="sidebar-header">
+          <div className="logo">
+            <div className="logo-icon">
+              <Zap size={24} />
+            </div>
+            <div className="logo-text">
+              <span className="logo-title">Quantizer</span>
+              <span className="logo-subtitle">Neural Network</span>
+            </div>
+          </div>
+          <button className="btn btn-ghost btn-icon mobile-menu" onClick={toggleSidebar}>
+            <X size={20} />
+          </button>
+        </div>
+        {/* Navigation */}
+        <nav className="sidebar-nav">
+          {navItems.map((item) => (
+            <NavLink
+              key={item.path}
+              to={item.path}
+              className={({ isActive }) => `nav-item ${isActive ? 'active' : ''}`}
+            >
+              <item.icon size={20} />
+              <span>{item.label}</span>
+            </NavLink>
+          ))}
+        </nav>
+        {/* System Status */}
+        <div className="sidebar-footer">
+          <div className="system-status glass-card no-hover">
+            <div className="status-header">
+              <Cpu size={16} />
+              <span>System Status</span>
+            </div>
+            {systemInfo ? (
+              <div className="status-details">
+                <div className="status-item">
+                  <span className="status-label">GPU</span>
+                  <span className={`badge ${systemInfo.cuda_available ? 'badge-success' : 'badge-warning'}`}>
+                    {systemInfo.cuda_available ? 'CUDA' : systemInfo.mps_available ? 'MPS' : 'CPU'}
+                  </span>
+                </div>
+                {systemInfo.gpus?.length > 0 && (
+                  <div className="status-item">
+                    <span className="status-label">{systemInfo.gpus[0].name}</span>
+                    <span className="text-xs text-muted">{systemInfo.gpus[0].total_memory_gb}GB</span>
+                  </div>
+                )}
+                <div className="status-item">
+                  <span className="status-label">RAM</span>
+                  <span className="text-xs text-muted">
+                    {systemInfo.ram_available_gb?.toFixed(1)}GB / {systemInfo.ram_total_gb?.toFixed(1)}GB
+                  </span>
+                </div>
+              </div>
+            ) : (
+              <div className="status-loading">
+                <div className="spinner"></div>
+                <span className="text-xs text-muted">Detecting...</span>
+              </div>
+            )}
+          </div>
+          <a
+            href="https://github.com"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="nav-item github-link"
+          >
+            <Github size={20} />
+            <span>GitHub</span>
+          </a>
+        </div>
+      </aside>
+      {/* Mobile menu button */}
+      <button className="mobile-menu-btn btn btn-secondary btn-icon" onClick={toggleSidebar}>
+        <Menu size={20} />
+      </button>
+      {/* Main Content */}
+      <main className="main-content">
+        <AnimatePresence mode="wait">
+          <motion.div
+            key={location.pathname}
+            initial={{ opacity: 0, y: 10 }}
+            animate={{ opacity: 1, y: 0 }}
+            exit={{ opacity: 0, y: -10 }}
+            transition={{ duration: 0.2 }}
+          >
+            <Outlet />
+          </motion.div>
+        </AnimatePresence>
+      </main>
+      <style>{`
+        .sidebar {
+          display: flex;
+          flex-direction: column;
+        }
+        .sidebar-header {
+          display: flex;
+          align-items: center;
+          justify-content: space-between;
+          margin-bottom: var(--space-xl);
+        }
+        .logo {
+          display: flex;
+          align-items: center;
+          gap: var(--space-md);
+        }
+        .logo-icon {
+          width: 40px;
+          height: 40px;
+          display: flex;
+          align-items: center;
+          justify-content: center;
+          background: var(--gradient-primary);
+          border-radius: var(--radius-lg);
+          color: white;
+        }
+        .logo-text {
+          display: flex;
+          flex-direction: column;
+        }
+        .logo-title {
+          font-size: var(--text-lg);
+          font-weight: 700;
+          color: var(--text-primary);
+          line-height: 1.2;
+        }
+        .logo-subtitle {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+        }
+        .mobile-menu {
+          display: none;
+        }
+        .mobile-menu-btn {
+          display: none;
+          position: fixed;
+          top: var(--space-md);
+          left: var(--space-md);
+          z-index: 99;
+        }
+        .sidebar-nav {
+          flex: 1;
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-xs);
+        }
+        .nav-item {
+          display: flex;
+          align-items: center;
+          gap: var(--space-md);
+          padding: var(--space-sm) var(--space-md);
+          border-radius: var(--radius-lg);
+          color: var(--text-secondary);
+          text-decoration: none;
+          transition: all var(--transition-fast);
+        }
+        .nav-item:hover {
+          background: var(--glass-bg);
+          color: var(--text-primary);
+        }
+        .nav-item.active {
+          background: var(--gradient-primary);
+          color: white;
+          box-shadow: var(--shadow-md);
+        }
+        .sidebar-footer {
+          margin-top: auto;
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-md);
+        }
+        .system-status {
+          padding: var(--space-md);
+        }
+        .status-header {
+          display: flex;
+          align-items: center;
+          gap: var(--space-sm);
+          font-size: var(--text-sm);
+          font-weight: 500;
+          color: var(--text-primary);
+          margin-bottom: var(--space-sm);
+        }
+        .status-details {
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-xs);
+        }
+        .status-item {
+          display: flex;
+          align-items: center;
+          justify-content: space-between;
+          font-size: var(--text-xs);
+        }
+        .status-label {
+          color: var(--text-secondary);
+        }
+        .status-loading {
+          display: flex;
+          align-items: center;
+          gap: var(--space-sm);
+        }
+        .github-link {
+          opacity: 0.7;
+        }
+        .github-link:hover {
+          opacity: 1;
+        }
+        @media (max-width: 768px) {
+          .mobile-menu {
+            display: flex;
+          }
+          .mobile-menu-btn {
+            display: flex;
+          }
+          .sidebar.closed {
+            transform: translateX(-100%);
+          }
+        }
+      `}</style>
+    </div>
+  );
+}

frontend/src/index.css ADDED Viewed

	@@ -0,0 +1,751 @@

+/*
+ * Neural Network Quantizer - Design System
+ * Premium glassmorphism dark theme with smooth animations
+ */
+/* ============================================
+   CSS Variables - Design Tokens
+   ============================================ */
+:root {
+  /* Colors - Dark Theme */
+  --color-bg-primary: #0a0a0f;
+  --color-bg-secondary: #12121a;
+  --color-bg-tertiary: #1a1a25;
+  --color-bg-elevated: #22222f;
+  /* Glass effect backgrounds */
+  --glass-bg: rgba(255, 255, 255, 0.03);
+  --glass-bg-hover: rgba(255, 255, 255, 0.06);
+  --glass-border: rgba(255, 255, 255, 0.08);
+  --glass-border-hover: rgba(255, 255, 255, 0.15);
+  /* Accent colors */
+  --color-accent-primary: #6366f1;
+  --color-accent-secondary: #8b5cf6;
+  --color-accent-tertiary: #a855f7;
+  --color-accent-glow: rgba(99, 102, 241, 0.3);
+  /* Status colors */
+  --color-success: #10b981;
+  --color-success-bg: rgba(16, 185, 129, 0.1);
+  --color-warning: #f59e0b;
+  --color-warning-bg: rgba(245, 158, 11, 0.1);
+  --color-error: #ef4444;
+  --color-error-bg: rgba(239, 68, 68, 0.1);
+  --color-info: #06b6d4;
+  --color-info-bg: rgba(6, 182, 212, 0.1);
+  /* Text colors */
+  --text-primary: #f8fafc;
+  --text-secondary: #94a3b8;
+  --text-tertiary: #64748b;
+  --text-muted: #475569;
+  /* Gradients */
+  --gradient-primary: linear-gradient(135deg, var(--color-accent-primary) 0%, var(--color-accent-secondary) 100%);
+  --gradient-secondary: linear-gradient(135deg, var(--color-accent-secondary) 0%, var(--color-accent-tertiary) 100%);
+  --gradient-glow: radial-gradient(ellipse at center, var(--color-accent-glow) 0%, transparent 70%);
+  --gradient-mesh: radial-gradient(at 40% 20%, hsla(228,100%,74%,0.15) 0px, transparent 50%),
+                   radial-gradient(at 80% 0%, hsla(189,100%,56%,0.1) 0px, transparent 50%),
+                   radial-gradient(at 0% 50%, hsla(355,100%,93%,0.05) 0px, transparent 50%),
+                   radial-gradient(at 80% 50%, hsla(340,100%,76%,0.1) 0px, transparent 50%);
+  /* Spacing */
+  --space-xs: 0.25rem;
+  --space-sm: 0.5rem;
+  --space-md: 1rem;
+  --space-lg: 1.5rem;
+  --space-xl: 2rem;
+  --space-2xl: 3rem;
+  --space-3xl: 4rem;
+  /* Border radius */
+  --radius-sm: 0.375rem;
+  --radius-md: 0.5rem;
+  --radius-lg: 0.75rem;
+  --radius-xl: 1rem;
+  --radius-2xl: 1.5rem;
+  --radius-full: 9999px;
+  /* Shadows */
+  --shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.3);
+  --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.3), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
+  --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4), 0 4px 6px -2px rgba(0, 0, 0, 0.3);
+  --shadow-xl: 0 20px 25px -5px rgba(0, 0, 0, 0.5), 0 10px 10px -5px rgba(0, 0, 0, 0.3);
+  --shadow-glow: 0 0 40px var(--color-accent-glow);
+  /* Typography */
+  --font-sans: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+  --font-mono: 'JetBrains Mono', 'Fira Code', Consolas, monospace;
+  --text-xs: 0.75rem;
+  --text-sm: 0.875rem;
+  --text-base: 1rem;
+  --text-lg: 1.125rem;
+  --text-xl: 1.25rem;
+  --text-2xl: 1.5rem;
+  --text-3xl: 1.875rem;
+  --text-4xl: 2.25rem;
+  /* Transitions */
+  --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-base: 200ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-slow: 300ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-spring: 500ms cubic-bezier(0.34, 1.56, 0.64, 1);
+  /* Layout */
+  --sidebar-width: 280px;
+  --header-height: 64px;
+  --max-content-width: 1400px;
+}
+/* ============================================
+   Base Styles
+   ============================================ */
+*, *::before, *::after {
+  box-sizing: border-box;
+  margin: 0;
+  padding: 0;
+}
+html {
+  font-size: 16px;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+body {
+  font-family: var(--font-sans);
+  background: var(--color-bg-primary);
+  color: var(--text-primary);
+  line-height: 1.6;
+  min-height: 100vh;
+  overflow-x: hidden;
+}
+/* Background mesh gradient */
+body::before {
+  content: '';
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  background: var(--gradient-mesh);
+  pointer-events: none;
+  z-index: -1;
+}
+#root {
+  min-height: 100vh;
+  display: flex;
+  flex-direction: column;
+}
+/* ============================================
+   Typography
+   ============================================ */
+h1, h2, h3, h4, h5, h6 {
+  font-weight: 600;
+  line-height: 1.3;
+  color: var(--text-primary);
+}
+h1 { font-size: var(--text-4xl); }
+h2 { font-size: var(--text-3xl); }
+h3 { font-size: var(--text-2xl); }
+h4 { font-size: var(--text-xl); }
+h5 { font-size: var(--text-lg); }
+h6 { font-size: var(--text-base); }
+p {
+  color: var(--text-secondary);
+  margin-bottom: var(--space-md);
+}
+a {
+  color: var(--color-accent-primary);
+  text-decoration: none;
+  transition: color var(--transition-fast);
+}
+a:hover {
+  color: var(--color-accent-secondary);
+}
+code {
+  font-family: var(--font-mono);
+  background: var(--glass-bg);
+  padding: 0.2em 0.4em;
+  border-radius: var(--radius-sm);
+  font-size: 0.9em;
+}
+/* ============================================
+   Glass Card Component
+   ============================================ */
+.glass-card {
+  background: var(--glass-bg);
+  border: 1px solid var(--glass-border);
+  border-radius: var(--radius-xl);
+  padding: var(--space-lg);
+  backdrop-filter: blur(20px);
+  -webkit-backdrop-filter: blur(20px);
+  transition: all var(--transition-base);
+}
+.glass-card:hover {
+  background: var(--glass-bg-hover);
+  border-color: var(--glass-border-hover);
+  transform: translateY(-2px);
+  box-shadow: var(--shadow-lg);
+}
+.glass-card.no-hover:hover {
+  transform: none;
+  box-shadow: none;
+}
+/* ============================================
+   Button Styles
+   ============================================ */
+.btn {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  gap: var(--space-sm);
+  padding: var(--space-sm) var(--space-lg);
+  border: none;
+  border-radius: var(--radius-lg);
+  font-family: var(--font-sans);
+  font-size: var(--text-sm);
+  font-weight: 500;
+  cursor: pointer;
+  transition: all var(--transition-base);
+  white-space: nowrap;
+}
+.btn:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
+.btn-primary {
+  background: var(--gradient-primary);
+  color: white;
+  box-shadow: var(--shadow-md), 0 0 20px var(--color-accent-glow);
+}
+.btn-primary:hover:not(:disabled) {
+  transform: translateY(-2px);
+  box-shadow: var(--shadow-lg), 0 0 30px var(--color-accent-glow);
+}
+.btn-secondary {
+  background: var(--glass-bg);
+  color: var(--text-primary);
+  border: 1px solid var(--glass-border);
+  backdrop-filter: blur(10px);
+}
+.btn-secondary:hover:not(:disabled) {
+  background: var(--glass-bg-hover);
+  border-color: var(--glass-border-hover);
+}
+.btn-ghost {
+  background: transparent;
+  color: var(--text-secondary);
+}
+.btn-ghost:hover:not(:disabled) {
+  background: var(--glass-bg);
+  color: var(--text-primary);
+}
+.btn-success {
+  background: var(--color-success);
+  color: white;
+}
+.btn-danger {
+  background: var(--color-error);
+  color: white;
+}
+.btn-lg {
+  padding: var(--space-md) var(--space-xl);
+  font-size: var(--text-base);
+}
+.btn-sm {
+  padding: var(--space-xs) var(--space-md);
+  font-size: var(--text-xs);
+}
+.btn-icon {
+  padding: var(--space-sm);
+  aspect-ratio: 1;
+}
+/* ============================================
+   Input Styles
+   ============================================ */
+.input-group {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-xs);
+}
+.input-label {
+  font-size: var(--text-sm);
+  font-weight: 500;
+  color: var(--text-secondary);
+}
+.input {
+  width: 100%;
+  padding: var(--space-sm) var(--space-md);
+  background: var(--glass-bg);
+  border: 1px solid var(--glass-border);
+  border-radius: var(--radius-md);
+  color: var(--text-primary);
+  font-family: var(--font-sans);
+  font-size: var(--text-sm);
+  transition: all var(--transition-fast);
+}
+.input:focus {
+  outline: none;
+  border-color: var(--color-accent-primary);
+  box-shadow: 0 0 0 3px var(--color-accent-glow);
+}
+.input::placeholder {
+  color: var(--text-muted);
+}
+.input-error {
+  border-color: var(--color-error);
+}
+/* Select dropdown */
+.select {
+  appearance: none;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='%2394a3b8' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E");
+  background-repeat: no-repeat;
+  background-position: right var(--space-sm) center;
+  background-size: 16px;
+  padding-right: var(--space-xl);
+}
+/* Slider */
+.slider {
+  width: 100%;
+  height: 6px;
+  background: var(--glass-bg);
+  border-radius: var(--radius-full);
+  appearance: none;
+  cursor: pointer;
+}
+.slider::-webkit-slider-thumb {
+  appearance: none;
+  width: 18px;
+  height: 18px;
+  background: var(--gradient-primary);
+  border-radius: 50%;
+  cursor: pointer;
+  box-shadow: var(--shadow-md);
+  transition: transform var(--transition-fast);
+}
+.slider::-webkit-slider-thumb:hover {
+  transform: scale(1.2);
+}
+/* ============================================
+   Status Badges
+   ============================================ */
+.badge {
+  display: inline-flex;
+  align-items: center;
+  gap: var(--space-xs);
+  padding: var(--space-xs) var(--space-sm);
+  border-radius: var(--radius-full);
+  font-size: var(--text-xs);
+  font-weight: 500;
+}
+.badge-success {
+  background: var(--color-success-bg);
+  color: var(--color-success);
+}
+.badge-warning {
+  background: var(--color-warning-bg);
+  color: var(--color-warning);
+}
+.badge-error {
+  background: var(--color-error-bg);
+  color: var(--color-error);
+}
+.badge-info {
+  background: var(--color-info-bg);
+  color: var(--color-info);
+}
+/* ============================================
+   Layout Components
+   ============================================ */
+.app-layout {
+  display: flex;
+  min-height: 100vh;
+}
+.sidebar {
+  width: var(--sidebar-width);
+  background: var(--color-bg-secondary);
+  border-right: 1px solid var(--glass-border);
+  padding: var(--space-lg);
+  display: flex;
+  flex-direction: column;
+  position: fixed;
+  top: 0;
+  left: 0;
+  bottom: 0;
+  z-index: 100;
+}
+.main-content {
+  flex: 1;
+  margin-left: var(--sidebar-width);
+  padding: var(--space-xl);
+  max-width: calc(100vw - var(--sidebar-width));
+}
+.page-header {
+  margin-bottom: var(--space-xl);
+}
+.page-title {
+  font-size: var(--text-3xl);
+  font-weight: 700;
+  margin-bottom: var(--space-xs);
+}
+.page-subtitle {
+  color: var(--text-secondary);
+  font-size: var(--text-base);
+}
+/* Grid layout */
+.grid {
+  display: grid;
+  gap: var(--space-lg);
+}
+.grid-2 { grid-template-columns: repeat(2, 1fr); }
+.grid-3 { grid-template-columns: repeat(3, 1fr); }
+.grid-4 { grid-template-columns: repeat(4, 1fr); }
+@media (max-width: 1024px) {
+  .grid-3, .grid-4 { grid-template-columns: repeat(2, 1fr); }
+}
+@media (max-width: 768px) {
+  .grid-2, .grid-3, .grid-4 { grid-template-columns: 1fr; }
+  .sidebar {
+    transform: translateX(-100%);
+    transition: transform var(--transition-base);
+  }
+  .sidebar.open {
+    transform: translateX(0);
+  }
+  .main-content {
+    margin-left: 0;
+    max-width: 100vw;
+  }
+}
+/* ============================================
+   Stats Card
+   ============================================ */
+.stat-card {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-sm);
+}
+.stat-value {
+  font-size: var(--text-3xl);
+  font-weight: 700;
+  background: var(--gradient-primary);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+}
+.stat-label {
+  font-size: var(--text-sm);
+  color: var(--text-secondary);
+}
+.stat-change {
+  font-size: var(--text-xs);
+  display: flex;
+  align-items: center;
+  gap: var(--space-xs);
+}
+.stat-change.positive { color: var(--color-success); }
+.stat-change.negative { color: var(--color-error); }
+/* ============================================
+   Progress Bar
+   ============================================ */
+.progress-bar {
+  width: 100%;
+  height: 8px;
+  background: var(--glass-bg);
+  border-radius: var(--radius-full);
+  overflow: hidden;
+}
+.progress-fill {
+  height: 100%;
+  background: var(--gradient-primary);
+  border-radius: var(--radius-full);
+  transition: width var(--transition-slow);
+}
+/* ============================================
+   Tabs
+   ============================================ */
+.tabs {
+  display: flex;
+  gap: var(--space-xs);
+  padding: var(--space-xs);
+  background: var(--glass-bg);
+  border-radius: var(--radius-lg);
+  margin-bottom: var(--space-lg);
+}
+.tab {
+  flex: 1;
+  padding: var(--space-sm) var(--space-md);
+  background: transparent;
+  border: none;
+  border-radius: var(--radius-md);
+  color: var(--text-secondary);
+  font-size: var(--text-sm);
+  font-weight: 500;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+}
+.tab:hover {
+  color: var(--text-primary);
+  background: var(--glass-bg-hover);
+}
+.tab.active {
+  background: var(--gradient-primary);
+  color: white;
+}
+/* ============================================
+   Chart Container
+   ============================================ */
+.chart-container {
+  background: var(--glass-bg);
+  border: 1px solid var(--glass-border);
+  border-radius: var(--radius-xl);
+  padding: var(--space-md);
+  min-height: 300px;
+}
+.chart-title {
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--text-primary);
+  margin-bottom: var(--space-md);
+}
+/* ============================================
+   Loading States
+   ============================================ */
+.skeleton {
+  background: linear-gradient(
+    90deg,
+    var(--glass-bg) 25%,
+    var(--glass-bg-hover) 50%,
+    var(--glass-bg) 75%
+  );
+  background-size: 200% 100%;
+  animation: shimmer 1.5s infinite;
+  border-radius: var(--radius-md);
+}
+@keyframes shimmer {
+  0% { background-position: 200% 0; }
+  100% { background-position: -200% 0; }
+}
+.spinner {
+  width: 24px;
+  height: 24px;
+  border: 2px solid var(--glass-border);
+  border-top-color: var(--color-accent-primary);
+  border-radius: 50%;
+  animation: spin 0.8s linear infinite;
+}
+@keyframes spin {
+  to { transform: rotate(360deg); }
+}
+/* ============================================
+   Tooltips
+   ============================================ */
+.tooltip {
+  position: relative;
+}
+.tooltip::after {
+  content: attr(data-tooltip);
+  position: absolute;
+  bottom: 100%;
+  left: 50%;
+  transform: translateX(-50%);
+  padding: var(--space-xs) var(--space-sm);
+  background: var(--color-bg-elevated);
+  border: 1px solid var(--glass-border);
+  border-radius: var(--radius-md);
+  font-size: var(--text-xs);
+  white-space: nowrap;
+  opacity: 0;
+  visibility: hidden;
+  transition: all var(--transition-fast);
+}
+.tooltip:hover::after {
+  opacity: 1;
+  visibility: visible;
+}
+/* ============================================
+   Animations
+   ============================================ */
+@keyframes fadeIn {
+  from { opacity: 0; }
+  to { opacity: 1; }
+}
+@keyframes slideUp {
+  from {
+    opacity: 0;
+    transform: translateY(20px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+@keyframes scaleIn {
+  from {
+    opacity: 0;
+    transform: scale(0.95);
+  }
+  to {
+    opacity: 1;
+    transform: scale(1);
+  }
+}
+.animate-fade-in { animation: fadeIn var(--transition-slow) ease-out; }
+.animate-slide-up { animation: slideUp var(--transition-slow) ease-out; }
+.animate-scale-in { animation: scaleIn var(--transition-spring) ease-out; }
+/* Staggered animations */
+.stagger > * {
+  animation: slideUp var(--transition-slow) ease-out forwards;
+  opacity: 0;
+}
+.stagger > *:nth-child(1) { animation-delay: 0ms; }
+.stagger > *:nth-child(2) { animation-delay: 50ms; }
+.stagger > *:nth-child(3) { animation-delay: 100ms; }
+.stagger > *:nth-child(4) { animation-delay: 150ms; }
+.stagger > *:nth-child(5) { animation-delay: 200ms; }
+.stagger > *:nth-child(6) { animation-delay: 250ms; }
+/* ============================================
+   Scrollbar Styles
+   ============================================ */
+::-webkit-scrollbar {
+  width: 8px;
+  height: 8px;
+}
+::-webkit-scrollbar-track {
+  background: var(--color-bg-secondary);
+}
+::-webkit-scrollbar-thumb {
+  background: var(--glass-border);
+  border-radius: var(--radius-full);
+}
+::-webkit-scrollbar-thumb:hover {
+  background: var(--glass-border-hover);
+}
+/* ============================================
+   Utility Classes
+   ============================================ */
+.text-center { text-align: center; }
+.text-right { text-align: right; }
+.text-sm { font-size: var(--text-sm); }
+.text-xs { font-size: var(--text-xs); }
+.text-muted { color: var(--text-secondary); }
+.text-accent { color: var(--color-accent-primary); }
+.flex { display: flex; }
+.flex-col { flex-direction: column; }
+.items-center { align-items: center; }
+.justify-between { justify-content: space-between; }
+.justify-center { justify-content: center; }
+.gap-sm { gap: var(--space-sm); }
+.gap-md { gap: var(--space-md); }
+.gap-lg { gap: var(--space-lg); }
+.mt-sm { margin-top: var(--space-sm); }
+.mt-md { margin-top: var(--space-md); }
+.mt-lg { margin-top: var(--space-lg); }
+.mb-sm { margin-bottom: var(--space-sm); }
+.mb-md { margin-bottom: var(--space-md); }
+.mb-lg { margin-bottom: var(--space-lg); }
+.w-full { width: 100%; }
+.h-full { height: 100%; }
+.overflow-hidden { overflow: hidden; }
+.overflow-auto { overflow: auto; }
+.relative { position: relative; }
+.absolute { position: absolute; }
+.rounded { border-radius: var(--radius-md); }
+.rounded-lg { border-radius: var(--radius-lg); }
+.rounded-xl { border-radius: var(--radius-xl); }
+.shadow { box-shadow: var(--shadow-md); }
+.shadow-lg { box-shadow: var(--shadow-lg); }
+.shadow-glow { box-shadow: var(--shadow-glow); }

frontend/src/main.jsx ADDED Viewed

	@@ -0,0 +1,10 @@

+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+import './index.css'
+import App from './App.jsx'
+createRoot(document.getElementById('root')).render(
+  <StrictMode>
+    <App />
+  </StrictMode>,
+)

frontend/src/pages/Analysis.jsx ADDED Viewed

	@@ -0,0 +1,483 @@

+import { useState, useEffect } from 'react';
+import {
+    BarChart3,
+    Layers,
+    TrendingUp,
+    RefreshCw,
+    AlertTriangle
+} from 'lucide-react';
+import { useQuantizationStore, useModelStore } from '../store';
+import { motion } from 'framer-motion';
+import {
+    BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip,
+    ResponsiveContainer, Cell, Legend
+} from 'recharts';
+/**
+ * Analysis page - compare quantization methods and analyze weights
+ */
+export default function Analysis() {
+    const { compareMethod } = useQuantizationStore();
+    const { modelInfo, layers, fetchLayers } = useModelStore();
+    const [comparison, setComparison] = useState(null);
+    const [isLoading, setIsLoading] = useState(false);
+    const [selectedMethods, setSelectedMethods] = useState(['int8', 'int4', 'nf4']);
+    const [source, setSource] = useState('random'); // 'random' | 'layer'
+    const [selectedLayer, setSelectedLayer] = useState('');
+    // Switch to layer mode if model is loaded
+    // Switch to layer mode if model is loaded
+    useEffect(() => {
+        if (modelInfo) {
+            setSource('layer');
+            if (layers.length === 0) fetchLayers();
+        }
+    }, [modelInfo]);
+    const runComparison = async () => {
+        setIsLoading(true);
+        const layerToCompare = source === 'layer' ? selectedLayer : null;
+        const result = await compareMethod(selectedMethods, layerToCompare);
+        setComparison(result);
+        setIsLoading(false);
+    };
+    const toggleMethod = (method) => {
+        setSelectedMethods((prev) =>
+            prev.includes(method)
+                ? prev.filter(m => m !== method)
+                : [...prev, method]
+        );
+    };
+    // Prepare chart data
+    const getComparisonData = () => {
+        if (!comparison?.comparison) return [];
+        return comparison.comparison
+            .filter(c => !c.error)
+            .map(c => ({
+                method: c.method.toUpperCase(),
+                meanError: c.mean_error,
+                maxError: c.max_error,
+                memorySavings: c.memory_savings_percent
+            }));
+    };
+    const COLORS = ['#6366f1', '#8b5cf6', '#a855f7'];
+    return (
+        <div className="analysis">
+            {/* Header */}
+            <div className="page-header">
+                <h1 className="page-title">Analysis</h1>
+                <p className="page-subtitle">
+                    Compare quantization methods and analyze weight distributions
+                </p>
+                {modelInfo && (
+                    <div className="model-badge" style={{ marginTop: '0.5rem', display: 'inline-flex', alignItems: 'center', gap: '0.5rem', padding: '4px 12px', background: 'var(--glass-bg)', border: '1px solid var(--glass-border)', color: 'var(--color-accent-primary)', borderRadius: 'var(--radius-full)', fontSize: '0.875rem' }}>
+                        <span style={{ opacity: 0.7 }}>Active Model:</span>
+                        <strong>{modelInfo.name}</strong>
+                    </div>
+                )}
+            </div>
+            {/* Method Comparison */}
+            <section className="section">
+                <div className="section-header">
+                    <h2 className="section-title">
+                        <BarChart3 size={20} />
+                        Method Comparison
+                        {comparison && (
+                            <span className="source-badge">
+                                Source: {comparison.source.startsWith('layer:') ? comparison.source.replace('layer:', '') : 'Random Weights'}
+                            </span>
+                        )}
+                    </h2>
+                    <button
+                        className="btn btn-primary"
+                        onClick={runComparison}
+                        disabled={isLoading || selectedMethods.length === 0}
+                    >
+                        {isLoading ? (
+                            <>
+                                <RefreshCw size={16} className="spinning" />
+                                Comparing...
+                            </>
+                        ) : (
+                            <>
+                                <TrendingUp size={16} />
+                                Run Comparison
+                            </>
+                        )}
+                    </button>
+                </div>
+                {/* Data Source Selection */}
+                <div className="glass-card mb-lg">
+                    <p className="text-sm text-muted mb-md">Select data source:</p>
+                    <div className="source-selection mb-md">
+                        <div className="btn-group">
+                            {modelInfo && (
+                                <button
+                                    className={`btn ${source === 'layer' ? 'btn-primary' : 'btn-secondary'}`}
+                                    onClick={() => setSource('layer')}
+                                >
+                                    Loaded Model Layer
+                                </button>
+                            )}
+                            <button
+                                className={`btn ${source === 'random' ? 'btn-primary' : 'btn-secondary'}`}
+                                onClick={() => setSource('random')}
+                            >
+                                Random Weights
+                            </button>
+                        </div>
+                    </div>
+                    {source === 'layer' && (
+                        <div className="layer-selection">
+                            <select
+                                className="input select"
+                                value={selectedLayer}
+                                onChange={(e) => setSelectedLayer(e.target.value)}
+                            >
+                                <option value="">Select a layer...</option>
+                                {layers.map((layer) => (
+                                    <option key={layer} value={layer}>
+                                        {layer}
+                                    </option>
+                                ))}
+                            </select>
+                        </div>
+                    )}
+                </div>
+                {/* Method Selection */}
+                <div className="glass-card">
+                    <p className="text-sm text-muted mb-md">Select methods to compare:</p>
+                    <div className="method-selection">
+                        {['int8', 'int4', 'nf4'].map((method) => (
+                            <button
+                                key={method}
+                                className={`method-btn ${selectedMethods.includes(method) ? 'active' : ''}`}
+                                onClick={() => toggleMethod(method)}
+                            >
+                                <div className="method-check">
+                                    {selectedMethods.includes(method) && '✓'}
+                                </div>
+                                <div className="method-info">
+                                    <span className="method-name">{method.toUpperCase()}</span>
+                                    <span className="method-desc">
+                                        {method === 'int8' && '8-bit integer quantization'}
+                                        {method === 'int4' && '4-bit integer with grouping'}
+                                        {method === 'nf4' && 'Normal Float 4-bit (QLoRA)'}
+                                    </span>
+                                </div>
+                            </button>
+                        ))}
+                    </div>
+                </div>
+                {/* Comparison Results */}
+                {comparison && (
+                    <motion.div
+                        className="comparison-results mt-lg"
+                        initial={{ opacity: 0, y: 20 }}
+                        animate={{ opacity: 1, y: 0 }}
+                    >
+                        <div className="grid grid-2">
+                            {/* Error Chart */}
+                            <div className="glass-card chart-card">
+                                <h4 className="chart-title">Quantization Error by Method</h4>
+                                <ResponsiveContainer width="100%" height={300}>
+                                    <BarChart data={getComparisonData()}>
+                                        <CartesianGrid strokeDasharray="3 3" stroke="rgba(255,255,255,0.1)" />
+                                        <XAxis dataKey="method" tick={{ fill: '#94a3b8' }} />
+                                        <YAxis tick={{ fill: '#94a3b8' }} />
+                                        <Tooltip
+                                            contentStyle={{
+                                                backgroundColor: '#1a1a25',
+                                                border: '1px solid rgba(255,255,255,0.1)',
+                                                borderRadius: '8px'
+                                            }}
+                                        />
+                                        <Bar dataKey="meanError" name="Mean Error" radius={[4, 4, 0, 0]}>
+                                            {getComparisonData().map((entry, index) => (
+                                                <Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
+                                            ))}
+                                        </Bar>
+                                    </BarChart>
+                                </ResponsiveContainer>
+                            </div>
+                            {/* Memory Savings Chart */}
+                            <div className="glass-card chart-card">
+                                <h4 className="chart-title">Memory Savings by Method</h4>
+                                <ResponsiveContainer width="100%" height={300}>
+                                    <BarChart data={getComparisonData()}>
+                                        <CartesianGrid strokeDasharray="3 3" stroke="rgba(255,255,255,0.1)" />
+                                        <XAxis dataKey="method" tick={{ fill: '#94a3b8' }} />
+                                        <YAxis tick={{ fill: '#94a3b8' }} unit="%" />
+                                        <Tooltip
+                                            contentStyle={{
+                                                backgroundColor: '#1a1a25',
+                                                border: '1px solid rgba(255,255,255,0.1)',
+                                                borderRadius: '8px'
+                                            }}
+                                            formatter={(value) => [`${value.toFixed(1)}%`, 'Savings']}
+                                        />
+                                        <Bar dataKey="memorySavings" name="Memory Savings" radius={[4, 4, 0, 0]}>
+                                            {getComparisonData().map((entry, index) => (
+                                                <Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
+                                            ))}
+                                        </Bar>
+                                    </BarChart>
+                                </ResponsiveContainer>
+                            </div>
+                        </div>
+                        {/* Results Table */}
+                        <div className="glass-card mt-lg">
+                            <table className="results-table">
+                                <thead>
+                                    <tr>
+                                        <th>Method</th>
+                                        <th>Bits</th>
+                                        <th>Max Error</th>
+                                        <th>Mean Error</th>
+                                        <th>Memory Savings</th>
+                                    </tr>
+                                </thead>
+                                <tbody>
+                                    {comparison.comparison?.filter(c => !c.error).map((result) => (
+                                        <tr key={result.method}>
+                                            <td><strong>{result.method.toUpperCase()}</strong></td>
+                                            <td>{result.bits}</td>
+                                            <td>{result.max_error?.toFixed(6)}</td>
+                                            <td>{result.mean_error?.toFixed(6)}</td>
+                                            <td>
+                                                <span className="badge badge-success">
+                                                    {result.memory_savings_percent?.toFixed(1)}%
+                                                </span>
+                                            </td>
+                                        </tr>
+                                    ))}
+                                </tbody>
+                            </table>
+                        </div>
+                    </motion.div>
+                )}
+            </section>
+            {/* Model Analysis (if model loaded) */}
+            {modelInfo && (
+                <section className="section">
+                    <h2 className="section-title">
+                        <Layers size={20} />
+                        Model Analysis
+                    </h2>
+                    <div className="glass-card">
+                        <p>
+                            Model <strong>{modelInfo.name}</strong> is loaded with{' '}
+                            <strong>{modelInfo.num_quantizable_layers}</strong> quantizable layers.
+                        </p>
+                        <p className="text-sm text-muted mt-md">
+                            Use the Models page to analyze individual layer weights and detect outliers.
+                        </p>
+                    </div>
+                </section>
+            )}
+            {/* Info Section */}
+            <section className="section">
+                <div className="glass-card info-card">
+                    <AlertTriangle size={24} className="text-warning" />
+                    <div>
+                        <h3>Understanding Quantization Trade-offs</h3>
+                        <p>
+                            Lower bit precision (4-bit) provides better memory savings but introduces more error.
+                            8-bit quantization offers a good balance between compression and accuracy for most models.
+                            NF4 uses a codebook optimized for normally distributed weights, ideal for LLMs.
+                        </p>
+                    </div>
+                </div>
+            </section>
+            <style>{`
+        .section {
+          margin-top: var(--space-2xl);
+        }
+        .section-header {
+          display: flex;
+          align-items: center;
+          justify-content: space-between;
+          margin-bottom: var(--space-lg);
+        }
+        .section-title {
+          display: flex;
+          align-items: center;
+          gap: var(--space-sm);
+          font-size: var(--text-xl);
+          font-weight: 600;
+          margin: 0;
+        }
+        .method-selection {
+          display: grid;
+          grid-template-columns: repeat(3, 1fr);
+          gap: var(--space-md);
+        }
+        .method-btn {
+          display: flex;
+          align-items: flex-start;
+          gap: var(--space-md);
+          padding: var(--space-md);
+          background: var(--glass-bg);
+          border: 2px solid var(--glass-border);
+          border-radius: var(--radius-lg);
+          cursor: pointer;
+          transition: all var(--transition-fast);
+          text-align: left;
+        }
+        .method-btn:hover {
+          border-color: var(--glass-border-hover);
+        }
+        .method-btn.active {
+          border-color: var(--color-accent-primary);
+          background: rgba(99, 102, 241, 0.1);
+        }
+        .method-check {
+          width: 24px;
+          height: 24px;
+          display: flex;
+          align-items: center;
+          justify-content: center;
+          border: 2px solid var(--glass-border);
+          border-radius: var(--radius-md);
+          font-size: var(--text-sm);
+          color: var(--color-accent-primary);
+          flex-shrink: 0;
+        }
+        .method-btn.active .method-check {
+          background: var(--color-accent-primary);
+          border-color: var(--color-accent-primary);
+          color: white;
+        }
+        .method-info {
+          display: flex;
+          flex-direction: column;
+        }
+        .method-name {
+          font-weight: 600;
+          color: var(--text-primary);
+        }
+        .method-desc {
+          font-size: var(--text-xs);
+          color: var(--text-secondary);
+        }
+        .chart-card {
+          padding: var(--space-lg);
+        }
+        .chart-title {
+          font-size: var(--text-sm);
+          font-weight: 600;
+          color: var(--text-primary);
+          margin-bottom: var(--space-md);
+        }
+        .results-table {
+          width: 100%;
+          border-collapse: collapse;
+        }
+        .results-table th,
+        .results-table td {
+          padding: var(--space-sm) var(--space-md);
+          text-align: left;
+          border-bottom: 1px solid var(--glass-border);
+        }
+        .results-table th {
+          font-size: var(--text-xs);
+          font-weight: 600;
+          color: var(--text-secondary);
+          text-transform: uppercase;
+        }
+        .results-table td {
+          font-size: var(--text-sm);
+          color: var(--text-primary);
+        }
+        .info-card {
+          display: flex;
+          gap: var(--space-lg);
+          padding: var(--space-lg);
+        }
+        .info-card h3 {
+          font-size: var(--text-base);
+          margin-bottom: var(--space-sm);
+        }
+        .info-card p {
+          margin: 0;
+          font-size: var(--text-sm);
+        }
+        .text-warning {
+          color: var(--color-warning);
+          flex-shrink: 0;
+        }
+        .spinning {
+          animation: spin 1s linear infinite;
+        }
+          .method-selection {
+            grid-template-columns: 1fr;
+          }
+        }
+        .btn-group {
+          display: flex;
+          gap: var(--space-xs);
+          max-width: 400px;
+        }
+        .source-badge {
+          font-size: var(--text-xs);
+          font-weight: 500;
+          padding: 4px 8px;
+          background: var(--glass-bg);
+          border: 1px solid var(--glass-border);
+          border-radius: var(--radius-full);
+          color: var(--text-secondary);
+          margin-left: var(--space-md);
+        }
+        .btn-group .btn {
+          flex: 1;
+        }
+        .mb-lg { margin-bottom: var(--space-lg); }
+        .mb-md { margin-bottom: var(--space-md); }
+      `}</style>
+        </div>
+    );
+}

frontend/src/pages/Dashboard.jsx ADDED Viewed

	@@ -0,0 +1,412 @@

+import { useEffect, useState } from 'react';
+import { Link } from 'react-router-dom';
+import {
+    Zap,
+    Cpu,
+    HardDrive,
+    TrendingUp,
+    ArrowRight,
+    Layers,
+    Activity,
+    MemoryStick
+} from 'lucide-react';
+import { useSystemStore, useQuantizationStore, useModelStore } from '../store';
+import { motion } from 'framer-motion';
+/**
+ * Dashboard page - overview of system and recent activity
+ */
+export default function Dashboard() {
+    const systemInfo = useSystemStore((state) => state.systemInfo);
+    const fetchSystemInfo = useSystemStore((state) => state.fetchSystemInfo);
+    const quantizationHistory = useQuantizationStore((state) => state.history);
+    const modelInfo = useModelStore((state) => state.modelInfo);
+    useEffect(() => {
+        if (!systemInfo) {
+            fetchSystemInfo();
+        }
+    }, [systemInfo, fetchSystemInfo]);
+    const stats = [
+        {
+            label: 'GPU Status',
+            value: systemInfo?.cuda_available ? 'CUDA Ready' : systemInfo?.mps_available ? 'MPS Ready' : 'CPU Only',
+            icon: Cpu,
+            color: systemInfo?.cuda_available ? 'success' : 'warning',
+            detail: systemInfo?.gpus?.[0]?.name || 'No GPU detected'
+        },
+        {
+            label: 'Available RAM',
+            value: `${systemInfo?.ram_available_gb?.toFixed(1) || '?'}GB`,
+            icon: MemoryStick,
+            color: 'info',
+            detail: `of ${systemInfo?.ram_total_gb?.toFixed(1) || '?'}GB total`
+        },
+        {
+            label: 'Max Model Size',
+            value: systemInfo?.max_model_size || 'Unknown',
+            icon: Layers,
+            color: 'accent',
+            detail: 'Recommended limit'
+        },
+        {
+            label: 'Quantizations',
+            value: quantizationHistory.length,
+            icon: Activity,
+            color: 'success',
+            detail: 'This session'
+        }
+    ];
+    const quickActions = [
+        {
+            title: 'Quick Quantize',
+            description: 'Test quantization on random weights',
+            path: '/quantize',
+            icon: Zap,
+            gradient: 'var(--gradient-primary)'
+        },
+        {
+            title: 'Load Model',
+            description: 'Load a HuggingFace model',
+            path: '/models',
+            icon: HardDrive,
+            gradient: 'var(--gradient-secondary)'
+        },
+        {
+            title: 'Analyze Weights',
+            description: 'Deep dive into weight distributions',
+            path: '/analysis',
+            icon: TrendingUp,
+            gradient: 'linear-gradient(135deg, #10b981 0%, #06b6d4 100%)'
+        }
+    ];
+    return (
+        <div className="dashboard">
+            {/* Header */}
+            <div className="page-header">
+                <h1 className="page-title">Dashboard</h1>
+                <p className="page-subtitle">
+                    Neural Network Weight Quantization Tool
+                </p>
+            </div>
+            {/* Stats Grid */}
+            <div className="grid grid-4 stagger">
+                {stats.map((stat, index) => (
+                    <motion.div
+                        key={stat.label}
+                        className="glass-card stat-card"
+                        initial={{ opacity: 0, y: 20 }}
+                        animate={{ opacity: 1, y: 0 }}
+                        transition={{ delay: index * 0.1 }}
+                    >
+                        <div className={`stat-icon ${stat.color}`}>
+                            <stat.icon size={20} />
+                        </div>
+                        <div className="stat-content">
+                            <div className="stat-value">{stat.value}</div>
+                            <div className="stat-label">{stat.label}</div>
+                            <div className="stat-detail">{stat.detail}</div>
+                        </div>
+                    </motion.div>
+                ))}
+            </div>
+            {/* Quick Actions */}
+            <section className="section">
+                <h2 className="section-title">Quick Actions</h2>
+                <div className="grid grid-3">
+                    {quickActions.map((action, index) => (
+                        <motion.div
+                            key={action.path}
+                            initial={{ opacity: 0, y: 20 }}
+                            animate={{ opacity: 1, y: 0 }}
+                            transition={{ delay: 0.4 + index * 0.1 }}
+                        >
+                            <Link to={action.path} className="action-card glass-card">
+                                <div className="action-icon" style={{ background: action.gradient }}>
+                                    <action.icon size={24} />
+                                </div>
+                                <div className="action-content">
+                                    <h3 className="action-title">{action.title}</h3>
+                                    <p className="action-description">{action.description}</p>
+                                </div>
+                                <ArrowRight size={20} className="action-arrow" />
+                            </Link>
+                        </motion.div>
+                    ))}
+                </div>
+            </section>
+            {/* Current Model */}
+            {modelInfo && (
+                <section className="section">
+                    <h2 className="section-title">Loaded Model</h2>
+                    <div className="glass-card model-info">
+                        <div className="model-header">
+                            <HardDrive size={24} />
+                            <div>
+                                <h3 className="model-name">{modelInfo.name}</h3>
+                                <p className="model-arch">{modelInfo.architecture}</p>
+                            </div>
+                        </div>
+                        <div className="model-stats">
+                            <div className="model-stat">
+                                <span className="stat-value">{modelInfo.num_params_billions?.toFixed(2)}B</span>
+                                <span className="stat-label">Parameters</span>
+                            </div>
+                            <div className="model-stat">
+                                <span className="stat-value">{modelInfo.num_quantizable_layers}</span>
+                                <span className="stat-label">Quantizable Layers</span>
+                            </div>
+                            <div className="model-stat">
+                                <span className="stat-value">{modelInfo.memory_footprint_gb}GB</span>
+                                <span className="stat-label">Memory</span>
+                            </div>
+                        </div>
+                    </div>
+                </section>
+            )}
+            {/* Getting Started */}
+            {!modelInfo && quantizationHistory.length === 0 && (
+                <section className="section">
+                    <div className="glass-card getting-started">
+                        <div className="getting-started-content">
+                            <Zap size={48} className="getting-started-icon" />
+                            <h2>Get Started</h2>
+                            <p>
+                                Welcome to the Neural Network Quantizer! You can either test quantization
+                                on random weights or load a real HuggingFace model for production use.
+                            </p>
+                            <div className="getting-started-actions">
+                                <Link to="/quantize" className="btn btn-primary btn-lg">
+                                    <Layers size={20} />
+                                    Try Quantization
+                                </Link>
+                                <Link to="/models" className="btn btn-secondary btn-lg">
+                                    <HardDrive size={20} />
+                                    Load Model
+                                </Link>
+                            </div>
+                        </div>
+                    </div>
+                </section>
+            )}
+            {/* System Warnings */}
+            {systemInfo?.warnings?.length > 0 && (
+                <section className="section">
+                    <h2 className="section-title">System Warnings</h2>
+                    <div className="warnings-list">
+                        {systemInfo.warnings.map((warning, index) => (
+                            <div key={index} className="warning-item glass-card">
+                                <span className="badge badge-warning">Warning</span>
+                                <span>{warning}</span>
+                            </div>
+                        ))}
+                    </div>
+                </section>
+            )}
+            <style>{`
+        .dashboard {
+          max-width: 1400px;
+        }
+        .section {
+          margin-top: var(--space-2xl);
+        }
+        .section-title {
+          font-size: var(--text-xl);
+          font-weight: 600;
+          margin-bottom: var(--space-lg);
+          color: var(--text-primary);
+        }
+        .stat-card {
+          display: flex;
+          align-items: flex-start;
+          gap: var(--space-md);
+        }
+        .stat-icon {
+          width: 44px;
+          height: 44px;
+          display: flex;
+          align-items: center;
+          justify-content: center;
+          border-radius: var(--radius-lg);
+          flex-shrink: 0;
+        }
+        .stat-icon.success {
+          background: var(--color-success-bg);
+          color: var(--color-success);
+        }
+        .stat-icon.warning {
+          background: var(--color-warning-bg);
+          color: var(--color-warning);
+        }
+        .stat-icon.info {
+          background: var(--color-info-bg);
+          color: var(--color-info);
+        }
+        .stat-icon.accent {
+          background: rgba(99, 102, 241, 0.1);
+          color: var(--color-accent-primary);
+        }
+        .stat-content {
+          flex: 1;
+        }
+        .stat-card .stat-value {
+          font-size: var(--text-xl);
+          font-weight: 700;
+          color: var(--text-primary);
+          line-height: 1.2;
+        }
+        .stat-card .stat-label {
+          font-size: var(--text-sm);
+          color: var(--text-secondary);
+        }
+        .stat-detail {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+          margin-top: var(--space-xs);
+        }
+        .action-card {
+          display: flex;
+          align-items: center;
+          gap: var(--space-md);
+          text-decoration: none;
+          transition: all var(--transition-base);
+        }
+        .action-card:hover {
+          transform: translateY(-4px);
+        }
+        .action-card:hover .action-arrow {
+          transform: translateX(4px);
+        }
+        .action-icon {
+          width: 48px;
+          height: 48px;
+          display: flex;
+          align-items: center;
+          justify-content: center;
+          border-radius: var(--radius-lg);
+          color: white;
+          flex-shrink: 0;
+        }
+        .action-content {
+          flex: 1;
+        }
+        .action-title {
+          font-size: var(--text-base);
+          font-weight: 600;
+          color: var(--text-primary);
+          margin-bottom: var(--space-xs);
+        }
+        .action-description {
+          font-size: var(--text-sm);
+          color: var(--text-secondary);
+          margin: 0;
+        }
+        .action-arrow {
+          color: var(--text-tertiary);
+          transition: transform var(--transition-fast);
+        }
+        .model-info {
+          padding: var(--space-xl);
+        }
+        .model-header {
+          display: flex;
+          align-items: center;
+          gap: var(--space-md);
+          margin-bottom: var(--space-lg);
+          color: var(--color-accent-primary);
+        }
+        .model-name {
+          font-size: var(--text-lg);
+          font-weight: 600;
+          color: var(--text-primary);
+        }
+        .model-arch {
+          font-size: var(--text-sm);
+          color: var(--text-secondary);
+          margin: 0;
+        }
+        .model-stats {
+          display: flex;
+          gap: var(--space-2xl);
+        }
+        .model-stat {
+          display: flex;
+          flex-direction: column;
+        }
+        .getting-started {
+          text-align: center;
+          padding: var(--space-3xl);
+        }
+        .getting-started-icon {
+          color: var(--color-accent-primary);
+          margin-bottom: var(--space-lg);
+        }
+        .getting-started h2 {
+          margin-bottom: var(--space-md);
+        }
+        .getting-started p {
+          max-width: 500px;
+          margin: 0 auto var(--space-xl);
+        }
+        .getting-started-actions {
+          display: flex;
+          gap: var(--space-md);
+          justify-content: center;
+        }
+        .warnings-list {
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-sm);
+        }
+        .warning-item {
+          display: flex;
+          align-items: center;
+          gap: var(--space-md);
+          padding: var(--space-md);
+        }
+      `}</style>
+        </div>
+    );
+}

frontend/src/pages/ModelLoader.jsx ADDED Viewed

	@@ -0,0 +1,775 @@

+import { useState, useEffect, useRef } from 'react';
+import {
+  Upload,
+  Cpu,
+  HardDrive,
+  Database,
+  CheckCircle,
+  AlertCircle,
+  Loader2,
+  Package,
+  Trash2,
+  Sparkles,
+  Clock,
+  Download
+} from 'lucide-react';
+import { useSystemStore } from '../store';
+import { motion, AnimatePresence } from 'framer-motion';
+/**
+ * ModelLoader page - load HuggingFace models with progress tracking
+ */
+export default function ModelLoader() {
+  const systemInfo = useSystemStore((state) => state.systemInfo);
+  const [modelName, setModelName] = useState('');
+  const [exampleModels, setExampleModels] = useState(null);
+  const [loadResult, setLoadResult] = useState(null);
+  const [isLoading, setIsLoading] = useState(false);
+  const [progress, setProgress] = useState(null);
+  const [cachedModels, setCachedModels] = useState([]);
+  const [modelInfo, setModelInfo] = useState(null);
+  const progressPollRef = useRef(null);
+  // Fetch example models and cache info on mount
+  useEffect(() => {
+    // Optimistic load from cache
+    const cachedExamples = localStorage.getItem('example_models');
+    if (cachedExamples) {
+      try {
+        setExampleModels(JSON.parse(cachedExamples));
+      } catch (e) { }
+    }
+    fetch('/api/models/examples')
+      .then(res => res.json())
+      .then(data => {
+        setExampleModels(data);
+        localStorage.setItem('example_models', JSON.stringify(data));
+      })
+      .catch(() => { });
+    fetchCacheInfo();
+    fetchModelInfo();
+  }, []);
+  const fetchCacheInfo = async () => {
+    try {
+      const res = await fetch('/api/models/cache');
+      const data = await res.json();
+      setCachedModels(data.models || []);
+    } catch (e) { }
+  };
+  const fetchModelInfo = async () => {
+    try {
+      const res = await fetch('/api/models/info');
+      const data = await res.json();
+      if (data.loaded) {
+        setModelInfo(data);
+      }
+    } catch (e) { }
+  };
+  const pollProgress = (name) => {
+    if (progressPollRef.current) {
+      clearInterval(progressPollRef.current);
+    }
+    progressPollRef.current = setInterval(async () => {
+      try {
+        const res = await fetch(`/api/models/progress/${encodeURIComponent(name)}`);
+        const data = await res.json();
+        if (data.downloading) {
+          setProgress(data);
+        }
+      } catch (e) { }
+    }, 500);
+  };
+  const stopPolling = () => {
+    if (progressPollRef.current) {
+      clearInterval(progressPollRef.current);
+      progressPollRef.current = null;
+    }
+  };
+  const handleLoadModel = async () => {
+    if (!modelName.trim() || isLoading) return;
+    setIsLoading(true);
+    setLoadResult(null);
+    setProgress({ status: 'starting', percent: 0, message: 'Starting download...' });
+    // Start polling for progress
+    pollProgress(modelName.trim());
+    try {
+      const response = await fetch('/api/models/load', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          model_name: modelName.trim(),
+          dtype: 'auto',
+          device: 'auto',
+          trust_remote_code: true
+        })
+      });
+      const data = await response.json();
+      setLoadResult(data);
+      if (data.success) {
+        setModelInfo(data.model_info);
+        setProgress({ status: 'complete', percent: 100, message: 'Model loaded!' });
+        fetchCacheInfo();
+      } else {
+        setProgress(null);
+      }
+    } catch (err) {
+      setLoadResult({ success: false, error: err.message });
+      setProgress(null);
+    } finally {
+      setIsLoading(false);
+      stopPolling();
+    }
+  };
+  const handleQuickLoad = (modelId) => {
+    setModelName(modelId);
+  };
+  const handleUnload = async () => {
+    try {
+      await fetch('/api/models/unload', { method: 'POST' });
+      setModelInfo(null);
+      setLoadResult(null);
+      setProgress(null);
+    } catch (e) { }
+  };
+  const handleDeleteFromCache = async (name) => {
+    try {
+      await fetch(`/api/models/cache/${encodeURIComponent(name)}`, { method: 'DELETE' });
+      fetchCacheInfo();
+    } catch (e) { }
+  };
+  const handleCleanup = async () => {
+    try {
+      const res = await fetch('/api/models/cache/cleanup', { method: 'POST' });
+      const data = await res.json();
+      fetchCacheInfo();
+      alert(`Cleaned up ${data.deleted_count} models`);
+    } catch (e) { }
+  };
+  return (
+    <div className="model-loader">
+      {/* Header */}
+      <div className="page-header">
+        <h1 className="page-title">Load HuggingFace Model</h1>
+        <p className="page-subtitle">
+          Download and analyze models directly from HuggingFace Hub
+        </p>
+      </div>
+      {/* Main Content */}
+      <div className="loader-grid">
+        {/* Load Model Card */}
+        <motion.div
+          className="glass-card load-card"
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+        >
+          <div className="card-header">
+            <Package size={24} />
+            <h2>Load Model</h2>
+          </div>
+          <div className="input-section">
+            <label className="input-label">Model ID</label>
+            <input
+              type="text"
+              className="input"
+              placeholder="e.g. gpt2, bert-base-uncased, prajjwal1/bert-tiny"
+              value={modelName}
+              onChange={(e) => setModelName(e.target.value)}
+              onKeyDown={(e) => e.key === 'Enter' && handleLoadModel()}
+              disabled={isLoading}
+            />
+            <p className="input-hint">
+              Enter the HuggingFace model identifier (organization/model-name)
+            </p>
+          </div>
+          <button
+            className="btn btn-primary btn-lg w-full"
+            onClick={handleLoadModel}
+            disabled={isLoading || !modelName.trim()}
+          >
+            {isLoading ? (
+              <>
+                <Loader2 size={20} className="spinning" />
+                Loading...
+              </>
+            ) : (
+              <>
+                <Download size={20} />
+                Download & Load Model
+              </>
+            )}
+          </button>
+          {/* Progress Bar */}
+          <AnimatePresence>
+            {progress && (
+              <motion.div
+                className="progress-container"
+                initial={{ opacity: 0, height: 0 }}
+                animate={{ opacity: 1, height: 'auto' }}
+                exit={{ opacity: 0, height: 0 }}
+              >
+                <div className="progress-header">
+                  <span className="progress-status">{progress.message || progress.status}</span>
+                  <span className="progress-percent">{progress.percent || 0}%</span>
+                </div>
+                <div className="progress-bar">
+                  <motion.div
+                    className="progress-fill"
+                    initial={{ width: 0 }}
+                    animate={{ width: `${progress.percent || 0}%` }}
+                    transition={{ duration: 0.3 }}
+                  />
+                </div>
+                {progress.speed_mbps && (
+                  <div className="progress-details">
+                    <span>{progress.speed_mbps} MB/s</span>
+                    {progress.eta_seconds && <span>ETA: {progress.eta_seconds}s</span>}
+                  </div>
+                )}
+              </motion.div>
+            )}
+          </AnimatePresence>
+          {/* Result Message */}
+          <AnimatePresence>
+            {loadResult && !isLoading && (
+              <motion.div
+                className={`result-message ${loadResult.success ? 'success' : 'error'}`}
+                initial={{ opacity: 0, height: 0 }}
+                animate={{ opacity: 1, height: 'auto' }}
+                exit={{ opacity: 0, height: 0 }}
+              >
+                {loadResult.success ? (
+                  <>
+                    <CheckCircle size={20} />
+                    <div>
+                      <strong>Model loaded successfully!</strong>
+                      <p>{loadResult.model_info?.architecture} - {loadResult.model_info?.num_params_millions}M params</p>
+                    </div>
+                  </>
+                ) : (
+                  <>
+                    <AlertCircle size={20} />
+                    <div>
+                      <strong>Failed to load model</strong>
+                      <p>{loadResult.error}</p>
+                      {loadResult.suggestion && <p className="suggestion">{loadResult.suggestion}</p>}
+                    </div>
+                  </>
+                )}
+              </motion.div>
+            )}
+          </AnimatePresence>
+        </motion.div>
+        {/* Currently Loaded Model */}
+        {modelInfo && (
+          <motion.div
+            className="glass-card loaded-model-card"
+            initial={{ opacity: 0, scale: 0.95 }}
+            animate={{ opacity: 1, scale: 1 }}
+          >
+            <div className="card-header">
+              <CheckCircle size={24} className="text-success" />
+              <h2>Loaded Model</h2>
+              <button className="btn btn-ghost btn-sm ml-auto" onClick={handleUnload}>
+                <Trash2 size={16} />
+                Unload
+              </button>
+            </div>
+            <div className="model-details">
+              <div className="detail-item">
+                <span className="label">Name</span>
+                <span className="value">{modelInfo.name}</span>
+              </div>
+              <div className="detail-item">
+                <span className="label">Parameters</span>
+                <span className="value">{modelInfo.num_params_millions}M</span>
+              </div>
+              <div className="detail-item">
+                <span className="label">Memory</span>
+                <span className="value">{modelInfo.memory_mb?.toFixed(1)} MB</span>
+              </div>
+              <div className="detail-item">
+                <span className="label">Device</span>
+                <span className="value">{modelInfo.device}</span>
+              </div>
+              <div className="detail-item">
+                <span className="label">Quantizable Layers</span>
+                <span className="value highlight">{modelInfo.num_quantizable_layers}</span>
+              </div>
+            </div>
+          </motion.div>
+        )}
+        {/* Quick Start */}
+        <motion.div
+          className="glass-card"
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.1 }}
+        >
+          <div className="card-header">
+            <Sparkles size={24} />
+            <h2>Quick Start</h2>
+          </div>
+          <p className="text-sm text-muted mb-md">Click to select a model:</p>
+          {exampleModels ? (
+            <>
+              {exampleModels.sample_models?.length > 0 && (
+                <div className="model-group">
+                  <h4 className="group-title">⭐ Sample Models (Pre-cached)</h4>
+                  <div className="model-list">
+                    {exampleModels.sample_models.map((model) => (
+                      <button
+                        key={model.id}
+                        className={`model-chip sample ${modelName === model.id ? 'selected' : ''}`}
+                        onClick={() => handleQuickLoad(model.id)}
+                      >
+                        <span className="model-id">{model.id}</span>
+                        <span className="model-desc">Instant load</span>
+                      </button>
+                    ))}
+                  </div>
+                </div>
+              )}
+              <div className="model-group">
+                <h4 className="group-title">Small Models</h4>
+                <div className="model-list">
+                  {exampleModels.small_models?.map((model) => (
+                    <button
+                      key={model.id}
+                      className={`model-chip ${modelName === model.id ? 'selected' : ''}`}
+                      onClick={() => handleQuickLoad(model.id)}
+                    >
+                      <span className="model-id">{model.id}</span>
+                      <span className="model-size">{model.size}</span>
+                    </button>
+                  ))}
+                </div>
+              </div>
+            </>
+          ) : (
+            <div className="loading-placeholder">
+              <Loader2 size={20} className="spinning" />
+              <span>Loading examples...</span>
+            </div>
+          )}
+        </motion.div>
+        {/* System Status */}
+        <motion.div
+          className="glass-card"
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.2 }}
+        >
+          <div className="card-header">
+            <Cpu size={24} />
+            <h2>System</h2>
+          </div>
+          {systemInfo ? (
+            <div className="status-list">
+              <div className="status-item">
+                <span className="status-label">Device</span>
+                <span className="status-value">
+                  {systemInfo.cuda_available ? '🟢 CUDA GPU' :
+                    systemInfo.mps_available ? '🟢 Apple MPS' : '🟡 CPU'}
+                </span>
+              </div>
+              {systemInfo.gpus?.length > 0 && (
+                <div className="status-item">
+                  <span className="status-label">GPU</span>
+                  <span className="status-value">{systemInfo.gpus[0].name}</span>
+                </div>
+              )}
+              <div className="status-item">
+                <span className="status-label">RAM</span>
+                <span className="status-value">{systemInfo.ram_available_gb?.toFixed(1)} GB</span>
+              </div>
+            </div>
+          ) : (
+            <p className="text-muted">Loading...</p>
+          )}
+        </motion.div>
+        {/* Cached Models */}
+        <motion.div
+          className="glass-card cache-card"
+          initial={{ opacity: 0, y: 20 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ delay: 0.3 }}
+        >
+          <div className="card-header">
+            <Database size={24} />
+            <h2>Model Cache</h2>
+            <button className="btn btn-ghost btn-sm ml-auto" onClick={handleCleanup}>
+              <Clock size={16} />
+              Cleanup
+            </button>
+          </div>
+          <p className="text-xs text-muted mb-sm">
+            Models auto-delete after 4 hours (except samples)
+          </p>
+          {cachedModels.length > 0 ? (
+            <div className="cache-list">
+              {cachedModels.map((model) => (
+                <div key={model.name} className={`cache-item ${model.is_sample ? 'sample' : ''}`}>
+                  <div className="cache-info">
+                    <span className="cache-name">
+                      {model.is_sample && '⭐ '}
+                      {model.name}
+                    </span>
+                    <span className="cache-size">{model.size_mb} MB</span>
+                  </div>
+                  {!model.is_sample && (
+                    <button
+                      className="btn btn-ghost btn-xs"
+                      onClick={() => handleDeleteFromCache(model.name)}
+                    >
+                      <Trash2 size={14} />
+                    </button>
+                  )}
+                </div>
+              ))}
+            </div>
+          ) : (
+            <p className="text-muted text-sm">No models cached</p>
+          )}
+        </motion.div>
+      </div>
+      <style>{`
+        .loader-grid {
+          display: grid;
+          grid-template-columns: 1fr 1fr;
+          gap: var(--space-lg);
+        }
+        @media (max-width: 1024px) {
+          .loader-grid {
+            grid-template-columns: 1fr;
+          }
+        }
+        .load-card {
+          grid-column: span 2;
+        }
+        @media (max-width: 1024px) {
+          .load-card {
+            grid-column: span 1;
+          }
+        }
+        .loaded-model-card {
+          grid-column: span 2;
+          background: rgba(16, 185, 129, 0.05);
+          border-color: rgba(16, 185, 129, 0.3);
+        }
+        .cache-card {
+          grid-column: span 2;
+        }
+        .card-header {
+          display: flex;
+          align-items: center;
+          gap: var(--space-sm);
+          margin-bottom: var(--space-lg);
+          color: var(--text-primary);
+        }
+        .card-header h2 {
+          font-size: var(--text-lg);
+          font-weight: 600;
+          margin: 0;
+        }
+        .input-section {
+          margin-bottom: var(--space-lg);
+        }
+        .input-hint {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+          margin-top: var(--space-xs);
+        }
+        /* Progress Bar */
+        .progress-container {
+          margin-top: var(--space-lg);
+          padding: var(--space-md);
+          background: var(--glass-bg);
+          border-radius: var(--radius-md);
+        }
+        .progress-header {
+          display: flex;
+          justify-content: space-between;
+          margin-bottom: var(--space-sm);
+          font-size: var(--text-sm);
+        }
+        .progress-status {
+          color: var(--text-secondary);
+        }
+        .progress-percent {
+          color: var(--color-accent-primary);
+          font-weight: 600;
+        }
+        .progress-bar {
+          height: 8px;
+          background: rgba(255, 255, 255, 0.1);
+          border-radius: 4px;
+          overflow: hidden;
+        }
+        .progress-fill {
+          height: 100%;
+          background: linear-gradient(90deg, var(--color-accent-primary), var(--color-accent-secondary));
+          border-radius: 4px;
+        }
+        .progress-details {
+          display: flex;
+          justify-content: space-between;
+          margin-top: var(--space-xs);
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+        }
+        .result-message {
+          display: flex;
+          align-items: flex-start;
+          gap: var(--space-md);
+          padding: var(--space-md);
+          border-radius: var(--radius-md);
+          margin-top: var(--space-md);
+        }
+        .result-message.success {
+          background: rgba(16, 185, 129, 0.1);
+          border: 1px solid rgba(16, 185, 129, 0.3);
+          color: var(--color-success);
+        }
+        .result-message.error {
+          background: rgba(239, 68, 68, 0.1);
+          border: 1px solid rgba(239, 68, 68, 0.3);
+          color: var(--color-error);
+        }
+        .result-message strong {
+          display: block;
+        }
+        .result-message p {
+          margin: var(--space-xs) 0 0 0;
+          font-size: var(--text-sm);
+          opacity: 0.9;
+        }
+        .model-details {
+          display: grid;
+          grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+          gap: var(--space-sm);
+        }
+        .detail-item {
+          display: flex;
+          flex-direction: column;
+          padding: var(--space-sm);
+          background: var(--glass-bg);
+          border-radius: var(--radius-md);
+        }
+        .detail-item .label {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+        }
+        .detail-item .value {
+          font-size: var(--text-base);
+          font-weight: 500;
+          color: var(--text-primary);
+        }
+        .detail-item .value.highlight {
+          color: var(--color-accent-primary);
+        }
+        .model-group {
+          margin-bottom: var(--space-lg);
+        }
+        .group-title {
+          font-size: var(--text-xs);
+          font-weight: 600;
+          color: var(--text-secondary);
+          text-transform: uppercase;
+          margin-bottom: var(--space-sm);
+        }
+        .model-list {
+          display: flex;
+          flex-wrap: wrap;
+          gap: var(--space-sm);
+        }
+        .model-chip {
+          display: flex;
+          flex-direction: column;
+          padding: var(--space-sm) var(--space-md);
+          background: var(--glass-bg);
+          border: 1px solid var(--glass-border);
+          border-radius: var(--radius-md);
+          cursor: pointer;
+          transition: all var(--transition-fast);
+          text-align: left;
+        }
+        .model-chip:hover {
+          border-color: var(--glass-border-hover);
+          transform: translateY(-1px);
+        }
+        .model-chip.selected {
+          border-color: var(--color-accent-primary);
+          background: rgba(99, 102, 241, 0.1);
+        }
+        .model-chip.sample {
+          border-color: rgba(16, 185, 129, 0.4);
+          background: rgba(16, 185, 129, 0.1);
+        }
+        .model-id {
+          font-size: var(--text-sm);
+          font-weight: 500;
+          color: var(--text-primary);
+        }
+        .model-size, .model-desc {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+        }
+        .status-list {
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-xs);
+        }
+        .status-item {
+          display: flex;
+          justify-content: space-between;
+          padding: var(--space-xs) 0;
+          border-bottom: 1px solid var(--glass-border);
+        }
+        .status-item:last-child {
+          border-bottom: none;
+        }
+        .status-label {
+          font-size: var(--text-sm);
+          color: var(--text-secondary);
+        }
+        .status-value {
+          font-size: var(--text-sm);
+          font-weight: 500;
+          color: var(--text-primary);
+        }
+        .cache-list {
+          display: flex;
+          flex-direction: column;
+          gap: var(--space-xs);
+        }
+        .cache-item {
+          display: flex;
+          align-items: center;
+          justify-content: space-between;
+          padding: var(--space-sm);
+          background: var(--glass-bg);
+          border-radius: var(--radius-md);
+        }
+        .cache-item.sample {
+          background: rgba(16, 185, 129, 0.05);
+        }
+        .cache-info {
+          display: flex;
+          flex-direction: column;
+        }
+        .cache-name {
+          font-size: var(--text-sm);
+          color: var(--text-primary);
+        }
+        .cache-size {
+          font-size: var(--text-xs);
+          color: var(--text-tertiary);
+        }
+        .ml-auto {
+          margin-left: auto;
+        }
+        .text-success {
+          color: var(--color-success);
+        }
+        .spinning {
+          animation: spin 1s linear infinite;
+        }
+        .loading-placeholder {
+          display: flex;
+          align-items: center;
+          gap: var(--space-sm);
+          color: var(--text-tertiary);
+        }
+      `}</style>
+    </div>
+  );
+}