Spaces:

zen-vton
/

product-classify

Sleeping

App Files Files Community

Abhishek7356 commited on Oct 29, 2025

Commit

d12790d

1 Parent(s): a260940

creating new projects fro product categorise

Browse files

Files changed (13) hide show

.gitignore +85 -0
Dockerfile +24 -0
models/categories_processed.csv +0 -0
models/category_embeddings_mpnet.npy +3 -0
models/category_metadata.pkl +3 -0
models/config.json +12 -0
requirements.txt +22 -0
src/__init__.py +0 -0
src/api.py +324 -0
src/classifier.py +354 -0
src/config.py +133 -0
templates/index.html +615 -0
tests/test_api.py +289 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,85 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual environment
+venv/
+env/
+.venv/
+ENV/
+env.bak/
+venv.bak/
+# VS Code settings
+.vscode/
+# Distribution / packaging
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype
+.pytype/
+# Cython debug symbols
+cython_debug/
+# Logs and local data
+*.log
+*.sqlite3
+# Environment files
+.env
+.env.*
+*.env
+# OS-specific
+.DS_Store
+Thumbs.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+# Use Python 3.10 (Hugging Face supports this)
+FROM python:3.10
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+# Set working directory
+WORKDIR /app
+# Copy dependencies
+COPY --chown=user requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the app
+COPY --chown=user ./src ./src
+# Expose the port (Hugging Face Spaces use 7860)
+EXPOSE 7860
+# Run the app with uvicorn
+CMD ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860"]

models/categories_processed.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

models/category_embeddings_mpnet.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9d5292d260ce14beadb6f8f8a0f75f96e5cf355a384325a3ce24116c9b378b1
+size 102310016

models/category_metadata.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52f6eb174e166b0ddb618bf92ae9f0584366e8c60f97f86af3a8c275a7f2ffdd
+size 10085806

models/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_name": "sentence-transformers/all-mpnet-base-v2",
+  "embedding_dimension": 768,
+  "total_categories": 33304,
+  "preprocessing_strategy": "rich",
+  "thresholds": {
+    "auto_approve": 0.75,
+    "quick_review": 0.6
+  },
+  "boost_factor": 0.15,
+  "created_date": "2025-01-15"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+# Core ML Dependencies
+sentence-transformers==3.0.0
+torch>=2.0.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+# API Framework
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==2.5.0
+python-multipart==0.0.6
+# Data Processing
+pandas>=2.0.0
+# Optional but Recommended
+python-dotenv==1.0.0
+# For Production (optional for now)
+# pymongo>=4.5.0  # If using MongoDB
+# redis>=5.0.0     # If using Redis caching
+# gunicorn>=21.2.0 # For production server

src/__init__.py ADDED Viewed

File without changes

src/api.py ADDED Viewed

	@@ -0,0 +1,324 @@

+# """
+# FastAPI REST API for Product Classification
+# """
+from fastapi.templating import Jinja2Templates
+from fastapi.responses import HTMLResponse, JSONResponse
+from starlette.requests import Request
+from fastapi import FastAPI, HTTPException, status
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Optional
+import logging
+import time
+# from classifier import ProductClassifier
+# from config import API_TITLE, API_VERSION, API_DESCRIPTION, validate_files
+from .classifier import ProductClassifier
+from .config import API_TITLE, API_VERSION, API_DESCRIPTION, validate_files
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+# Validate files exist before starting
+try:
+    validate_files()
+    logger.info("✅ All required model files found")
+except FileNotFoundError as e:
+    logger.error(f"❌ Missing files: {e}")
+    raise
+# Create FastAPI app
+app = FastAPI(title=API_TITLE, version=API_VERSION, description=API_DESCRIPTION)
+templates = Jinja2Templates(directory="templates")
+# Add CORS middleware (allows frontend to access API)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify actual origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize classifier (loaded once at startup)
+classifier = None
+# Pydantic models for request/response validation
+class ProductInput(BaseModel):
+    """Input model for single product classification"""
+    id: Optional[str] = Field(default="unknown", description="Product ID")
+    title: str = Field(..., description="Product title", min_length=1)
+    product_type: Optional[str] = Field(default="", description="Product type/category")
+    vendor: Optional[str] = Field(default="", description="Brand or vendor name")
+    tags: Optional[List[str]] = Field(default=[], description="Product tags")
+    description: Optional[str] = Field(default="", description="Product description")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "id": "prod_123",
+                "title": "Apple iPhone 15 Pro",
+                "product_type": "Smartphone",
+                "vendor": "Apple Inc",
+                "tags": ["electronics", "phone", "mobile"],
+                "description": "Latest flagship smartphone",
+            }
+        }
+class CategoryResult(BaseModel):
+    """Result for a single category match"""
+    rank: int
+    category_id: str
+    category_path: str
+    confidence_percentage: float
+    semantic_score: Optional[float] = None
+    boost_applied: Optional[float] = None
+class ClassificationResponse(BaseModel):
+    """Response model for classification"""
+    product_id: str
+    action: str
+    reason: str
+    top_category: str
+    top_confidence: float
+    product_text: str
+    alternatives: List[CategoryResult]
+    processing_time_ms: Optional[float] = None
+class BatchProductInput(BaseModel):
+    """Input model for batch classification"""
+    products: List[ProductInput] = Field(
+        ..., description="List of products to classify"
+    )
+    top_k: int = Field(
+        default=5, ge=1, le=20, description="Number of top matches to return"
+    )
+class HealthResponse(BaseModel):
+    """Health check response"""
+    status: str
+    model: str
+    categories_loaded: int
+    embedding_dimension: int
+# Startup event - load classifier
+@app.on_event("startup")
+async def startup_event():
+    """Load the classifier when API starts"""
+    global classifier
+    logger.info("🚀 Starting API server...")
+    logger.info("Loading Product Classifier...")
+    try:
+        classifier = ProductClassifier()
+        logger.info("✅ Classifier loaded successfully!")
+    except Exception as e:
+        logger.error(f"❌ Failed to load classifier: {e}")
+        raise
+# Root endpoint
+# @app.get("/", tags=["General"])
+# async def root():
+#     """Root endpoint - API information"""
+#     return {
+#         "message": "Insurance Product Classification API",
+#         "version": API_VERSION,
+#         "status": "running",
+#         "docs": "/docs",
+#         "health": "/health",
+#     }
+@app.get("/", response_class=HTMLResponse, tags=["General"])
+async def root(request: Request):
+    """Serve the web UI"""
+    return templates.TemplateResponse("index.html", {"request": request})
+# Health check endpoint
+@app.get("/health", response_model=HealthResponse, tags=["General"])
+async def health_check():
+    """
+    Health check endpoint
+    Returns system status and model information
+    """
+    if classifier is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Classifier not initialized",
+        )
+    return {
+        "status": "healthy",
+        "model": "all-mpnet-base-v2",
+        "categories_loaded": len(classifier.embeddings),
+        "embedding_dimension": classifier.embeddings.shape[1],
+    }
+# Single product classification
+@app.post("/classify", response_model=ClassificationResponse, tags=["Classification"])
+async def classify_product(product: ProductInput):
+    """
+    Classify a single product into insurance categories
+    Returns:
+    - action: AUTO_APPROVE, QUICK_REVIEW, or MANUAL_CATEGORIZATION
+    - top_category: Best matching category
+    - confidence: Confidence score (0-100%)
+    - alternatives: Top alternative categories
+    """
+    if classifier is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Classifier not initialized",
+        )
+    try:
+        # Start timer
+        start_time = time.time()
+        # Classify
+        result = classifier.classify(product.dict())
+        # Calculate processing time
+        processing_time = (time.time() - start_time) * 1000  # Convert to ms
+        result["processing_time_ms"] = round(processing_time, 2)
+        logger.info(
+            f"Classified product '{product.title}' → "
+            f"{result['action']} ({result['top_confidence']}%)"
+        )
+        return result
+    except Exception as e:
+        logger.error(f"Classification error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Classification failed: {str(e)}",
+        )
+# Batch product classification
+@app.post("/classify-batch", tags=["Classification"])
+async def classify_batch(batch: BatchProductInput):
+    """
+    Classify multiple products at once
+    Useful for bulk processing of product catalogs
+    """
+    if classifier is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Classifier not initialized",
+        )
+    try:
+        start_time = time.time()
+        # Convert to list of dicts
+        products_data = [p.dict() for p in batch.products]
+        # Classify batch
+        results = classifier.classify_batch(products_data, top_k=batch.top_k)
+        # Calculate stats
+        processing_time = (time.time() - start_time) * 1000
+        # Count actions
+        action_counts = {}
+        for result in results:
+            action = result.get("action", "UNKNOWN")
+            action_counts[action] = action_counts.get(action, 0) + 1
+        logger.info(
+            f"Batch classified {len(products_data)} products in {processing_time:.0f}ms"
+        )
+        return {
+            "total_products": len(products_data),
+            "processing_time_ms": round(processing_time, 2),
+            "action_counts": action_counts,
+            "results": results,
+        }
+    except Exception as e:
+        logger.error(f"Batch classification error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Batch classification failed: {str(e)}",
+        )
+# Get statistics
+@app.get("/stats", tags=["General"])
+async def get_statistics():
+    """
+    Get system statistics
+    """
+    if classifier is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Classifier not initialized",
+        )
+    return {
+        "total_categories": len(classifier.embeddings),
+        "embedding_dimension": classifier.embeddings.shape[1],
+        "model_name": "all-mpnet-base-v2",
+        "thresholds": {
+            "auto_approve": "≥75%",
+            "quick_review": "60-75%",
+            "manual": "<60%",
+        },
+    }
+# Error handlers
+from fastapi.responses import JSONResponse
+@app.exception_handler(404)
+async def not_found_handler(request, exc):
+    """Handle 404 errors"""
+    return JSONResponse(
+        status_code=404,
+        content={
+            "error": "Endpoint not found",
+            "message": "Check /docs for available endpoints",
+        },
+    )
+@app.exception_handler(500)
+async def internal_error_handler(request, exc):
+    """Handle 500 errors"""
+    logger.error(f"Internal server error: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal server error",
+            "message": "Something went wrong. Check logs for details.",
+        },
+    )
+# Run with: uvicorn api:app --reload
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True, log_level="info")

src/classifier.py ADDED Viewed

	@@ -0,0 +1,354 @@

+# # src/classifier.py
+# from sentence_transformers import SentenceTransformer
+# import numpy as np
+# import pickle
+# class ProductClassifier:
+#     def __init__(self, model_path="./models"):
+#         self.model = SentenceTransformer("all-mpnet-base-v2")
+#         self.embeddings = np.load(f"{model_path}/category_embeddings_mpnet.npy")
+#         with open(f"{model_path}/category_metadata.pkl", "rb") as f:
+#             self.metadata = pickle.load(f)
+#     def classify(self, product_data, top_k=5):
+#         # Implementation here
+#         pass
+# """
+# Product Classification Engine
+# Loads pre-trained embeddings and performs similarity-based classification
+# """
+import numpy as np
+import pickle
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from typing import Dict, List, Optional
+import re
+import logging
+from .config import (
+    MODEL_NAME,
+    EMBEDDINGS_FILE,
+    METADATA_FILE,
+    AUTO_APPROVE_THRESHOLD,
+    QUICK_REVIEW_THRESHOLD,
+    BOOST_FACTOR,
+    MAX_BOOST,
+    DEFAULT_TOP_K,
+    PRODUCT_KEYWORDS,
+)
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ProductClassifier:
+    """
+    ML-powered product classifier for insurance categorization
+    """
+    def __init__(self):
+        """Initialize classifier by loading model and embeddings"""
+        logger.info("Initializing Product Classifier...")
+        # Load the embedding model
+        logger.info(f"Loading model: {MODEL_NAME}")
+        self.model = SentenceTransformer(MODEL_NAME)
+        logger.info(
+            f"✅ Model loaded (dimension: {self.model.get_sentence_embedding_dimension()})"
+        )
+        # Load pre-computed category embeddings
+        logger.info(f"Loading category embeddings from {EMBEDDINGS_FILE}")
+        self.embeddings = np.load(EMBEDDINGS_FILE)
+        logger.info(f"✅ Loaded {self.embeddings.shape[0]:,} category embeddings")
+        # Load category metadata
+        logger.info(f"Loading metadata from {METADATA_FILE}")
+        with open(METADATA_FILE, "rb") as f:
+            self.metadata = pickle.load(f)
+        logger.info(f"✅ Metadata loaded")
+        # Cache for processed texts
+        self.embedding_texts = self.metadata.get("embedding_texts", [])
+        logger.info("🎉 Classifier ready!")
+    def preprocess_product(self, product_data: Dict) -> str:
+        """
+        Preprocess product data into searchable text
+        Args:
+            product_data: Dictionary with product fields
+                - title (str): Product title
+                - product_type (str, optional): Product type/category
+                - vendor (str, optional): Brand/vendor name
+                - tags (list/str, optional): Product tags
+                - description (str, optional): Product description
+        Returns:
+            Processed text string for embedding
+        """
+        parts = []
+        # Extract fields in priority order
+        title = product_data.get("title", "")
+        product_type = product_data.get("product_type", "")
+        vendor = product_data.get("vendor", "")
+        description = product_data.get("description", "")
+        tags = product_data.get("tags", [])
+        # 1. Title (most important)
+        if title:
+            parts.append(title)
+        # 2. Product type (category hint)
+        if product_type:
+            parts.append(f"Product type: {product_type}")
+        # 3. Brand/Vendor
+        if vendor:
+            parts.append(f"Brand: {vendor}")
+        # 4. Tags (keywords)
+        if tags:
+            tag_text = " ".join(tags) if isinstance(tags, list) else tags
+            parts.append(f"Keywords: {tag_text}")
+        # 5. Description (limited to 100 chars)
+        if description:
+            desc_short = description[:100].strip()
+            parts.append(desc_short)
+        return ". ".join(parts)
+    def extract_keywords(self, text: str) -> List[str]:
+        """
+        Extract important keywords from product text
+        Args:
+            text: Product text
+        Returns:
+            List of detected keywords
+        """
+        text_lower = text.lower()
+        found_keywords = [kw for kw in PRODUCT_KEYWORDS if kw in text_lower]
+        return found_keywords
+    def classify(
+        self, product_data: Dict, top_k: int = DEFAULT_TOP_K, use_boost: bool = True
+    ) -> Dict:
+        """
+        Classify a product into insurance categories
+        Args:
+            product_data: Product information dictionary
+            top_k: Number of top matches to return
+            use_boost: Whether to apply keyword boosting
+        Returns:
+            Classification results with confidence scores and recommendations
+        """
+        # Preprocess product text
+        product_text = self.preprocess_product(product_data)
+        # Generate embedding for product
+        product_embedding = self.model.encode([product_text], normalize_embeddings=True)
+        # Calculate semantic similarities
+        semantic_scores = cosine_similarity(product_embedding, self.embeddings)[0]
+        # Apply keyword boosting if enabled
+        if use_boost:
+            product_keywords = self.extract_keywords(product_text)
+            boosted_scores = self._apply_keyword_boost(
+                semantic_scores, product_keywords
+            )
+        else:
+            boosted_scores = semantic_scores
+        # Get top K indices
+        top_indices = boosted_scores.argsort()[-top_k:][::-1]
+        # Format results
+        results = []
+        for rank, idx in enumerate(top_indices, 1):
+            category_data = {
+                "rank": rank,
+                "category_id": self.metadata["category_ids"][idx],
+                "category_path": self.metadata["category_paths"][idx],
+                "semantic_score": float(semantic_scores[idx]),
+                "final_score": float(boosted_scores[idx]),
+                "confidence_percentage": round(float(boosted_scores[idx]) * 100, 2),
+            }
+            # Add boost information if used
+            if use_boost:
+                category_data["boost_applied"] = round(
+                    (boosted_scores[idx] - semantic_scores[idx]) * 100, 2
+                )
+            results.append(category_data)
+        # Determine action based on top score
+        top_confidence = results[0]["final_score"]
+        if top_confidence >= AUTO_APPROVE_THRESHOLD:
+            action = "AUTO_APPROVE"
+            reason = f"High confidence ({results[0]['confidence_percentage']}%)"
+        elif top_confidence >= QUICK_REVIEW_THRESHOLD:
+            action = "QUICK_REVIEW"
+            reason = f"Medium confidence ({results[0]['confidence_percentage']}%) - verify category"
+        else:
+            action = "MANUAL_CATEGORIZATION"
+            reason = f"Low confidence ({results[0]['confidence_percentage']}%) - needs expert review"
+        return {
+            "product_id": product_data.get("id", "unknown"),
+            "product_text": product_text,
+            "action": action,
+            "reason": reason,
+            "top_category": results[0]["category_path"],
+            "top_confidence": results[0]["confidence_percentage"],
+            "alternatives": results[1:3] if len(results) > 1 else [],
+            "all_results": results,
+        }
+    def _apply_keyword_boost(
+        self, scores: np.ndarray, product_keywords: List[str]
+    ) -> np.ndarray:
+        """
+        Apply keyword-based score boosting
+        Args:
+            scores: Original semantic similarity scores
+            product_keywords: List of keywords found in product
+        Returns:
+            Boosted scores
+        """
+        boosted_scores = scores.copy()
+        if not product_keywords:
+            return boosted_scores
+        # Boost categories that contain product keywords
+        for idx, cat_text in enumerate(self.embedding_texts):
+            cat_text_lower = cat_text.lower()
+            matches = sum(1 for kw in product_keywords if kw in cat_text_lower)
+            if matches > 0:
+                # Boost proportional to keyword matches
+                boost = min(matches * BOOST_FACTOR, MAX_BOOST)
+                boosted_scores[idx] = min(boosted_scores[idx] + boost, 1.0)
+        return boosted_scores
+    def classify_batch(
+        self, products: List[Dict], top_k: int = DEFAULT_TOP_K
+    ) -> List[Dict]:
+        """
+        Classify multiple products at once
+        Args:
+            products: List of product data dictionaries
+            top_k: Number of top matches per product
+        Returns:
+            List of classification results
+        """
+        logger.info(f"Classifying batch of {len(products)} products...")
+        results = []
+        for i, product in enumerate(products, 1):
+            try:
+                result = self.classify(product, top_k=top_k)
+                # Convert all numpy types to Python native types for JSON serialization
+                result = self._convert_to_json_serializable(result)
+                results.append(result)
+                if i % 100 == 0:
+                    logger.info(f"  Processed {i}/{len(products)} products")
+            except Exception as e:
+                logger.error(f"  Error classifying product {i}: {e}")
+                results.append(
+                    {
+                        "product_id": product.get("id", f"product_{i}"),
+                        "action": "ERROR",
+                        "reason": str(e),
+                        "top_category": None,
+                        "top_confidence": 0.0,
+                    }
+                )
+        logger.info(f"✅ Batch classification complete!")
+        return results
+    def _convert_to_json_serializable(self, obj):
+        """
+        Recursively convert numpy types to Python native types
+        """
+        import numpy as np
+        if isinstance(obj, dict):
+            return {
+                key: self._convert_to_json_serializable(value)
+                for key, value in obj.items()
+            }
+        elif isinstance(obj, list):
+            return [self._convert_to_json_serializable(item) for item in obj]
+        elif isinstance(obj, (np.integer, np.int64, np.int32)):
+            return int(obj)
+        elif isinstance(obj, (np.floating, np.float64, np.float32)):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return obj
+# Test the classifier if run directly
+if __name__ == "__main__":
+    print("Testing Product Classifier...")
+    print("=" * 80)
+    # Initialize classifier
+    classifier = ProductClassifier()
+    # Test product
+    test_product = {
+        "id": "test_001",
+        "title": "Apple iPhone 15 Pro Max",
+        "product_type": "Smartphone",
+        "vendor": "Apple Inc",
+        "tags": ["electronics", "mobile", "phone", "smartphone"],
+        "description": "Latest flagship smartphone with titanium design",
+    }
+    print("\n📱 Test Product:")
+    print(f"  {test_product['title']}")
+    # Classify
+    result = classifier.classify(test_product)
+    print(f"\n🎯 Classification Result:")
+    print(f"  Action: {result['action']}")
+    print(f"  Top Category: {result['top_category']}")
+    print(f"  Confidence: {result['top_confidence']}%")
+    print(f"  Reason: {result['reason']}")
+    print("\n📊 Top 3 Alternatives:")
+    for alt in result["alternatives"][:3]:
+        print(
+            f"  {alt['rank']}. {alt['category_path']} ({alt['confidence_percentage']}%)"
+        )
+    print("\n" + "=" * 80)
+    print("✅ Classifier test complete!")

src/config.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# """
+# Configuration settings for the insurance product classifier
+# """
+import os
+from pathlib import Path
+# Base directory (project root)
+BASE_DIR = Path(__file__).resolve().parent.parent
+# Model directory
+MODEL_DIR = BASE_DIR / "models"
+# Model files
+EMBEDDINGS_FILE = MODEL_DIR / "category_embeddings_mpnet.npy"
+METADATA_FILE = MODEL_DIR / "category_metadata.pkl"
+CONFIG_FILE = MODEL_DIR / "config.json"
+# Model configuration
+MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
+EMBEDDING_DIMENSION = 768
+# Classification thresholds
+AUTO_APPROVE_THRESHOLD = 0.75  # 75% confidence
+QUICK_REVIEW_THRESHOLD = 0.60  # 60% confidence
+# Keyword boosting
+BOOST_FACTOR = 0.15  # 15% boost for keyword matches
+MAX_BOOST = 0.30  # Maximum 30% total boost
+# API settings
+API_TITLE = "Insurance Product Classification API"
+API_VERSION = "1.0.0"
+API_DESCRIPTION = "ML-powered product categorization for insurance underwriting"
+# Processing settings
+DEFAULT_TOP_K = 5  # Return top 5 matches
+BATCH_SIZE = 32  # For batch processing
+# Keywords for boosting
+PRODUCT_KEYWORDS = {
+    # Electronics
+    "iphone",
+    "ipad",
+    "macbook",
+    "smartphone",
+    "laptop",
+    "tablet",
+    "computer",
+    "electronics",
+    "phone",
+    "mobile",
+    "samsung",
+    "apple",
+    "dell",
+    "hp",
+    # Appliances
+    "refrigerator",
+    "dishwasher",
+    "washing machine",
+    "dryer",
+    "oven",
+    "microwave",
+    "coffee maker",
+    "blender",
+    "toaster",
+    "appliance",
+    # Clothing
+    "shoes",
+    "shirt",
+    "pants",
+    "dress",
+    "jacket",
+    "sneakers",
+    "boots",
+    "clothing",
+    "apparel",
+    "footwear",
+    # Books
+    "book",
+    "novel",
+    "textbook",
+    "ebook",
+    "reading",
+    "literature",
+    # Sports
+    "sports",
+    "fitness",
+    "exercise",
+    "gym",
+    "athletic",
+    "running",
+    "yoga",
+    # Home
+    "furniture",
+    "decor",
+    "bedding",
+    "kitchen",
+    "home",
+    "garden",
+}
+def validate_files():
+    """Validate that all required model files exist"""
+    required_files = [EMBEDDINGS_FILE, METADATA_FILE, CONFIG_FILE]
+    missing_files = []
+    for file_path in required_files:
+        if not file_path.exists():
+            missing_files.append(str(file_path))
+    if missing_files:
+        raise FileNotFoundError(
+            f"Missing required files:\n" + "\n".join(f"  - {f}" for f in missing_files)
+        )
+    return True
+if __name__ == "__main__":
+    print("Configuration Settings:")
+    print(f"  Model Directory: {MODEL_DIR}")
+    print(f"  Embeddings File: {EMBEDDINGS_FILE.name}")
+    print(f"  Metadata File: {METADATA_FILE.name}")
+    print(f"  Auto-Approve Threshold: {AUTO_APPROVE_THRESHOLD * 100}%")
+    print(f"  Quick Review Threshold: {QUICK_REVIEW_THRESHOLD * 100}%")
+    try:
+        validate_files()
+        print("\n✅ All required files found!")
+    except FileNotFoundError as e:
+        print(f"\n❌ Error: {e}")

templates/index.html ADDED Viewed

	@@ -0,0 +1,615 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Insurance Product Classification System</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+        }
+        .header {
+            background: white;
+            border-radius: 15px;
+            padding: 30px;
+            margin-bottom: 30px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+            text-align: center;
+        }
+        .header h1 {
+            color: #667eea;
+            font-size: 2.5em;
+            margin-bottom: 10px;
+        }
+        .header p {
+            color: #666;
+            font-size: 1.1em;
+        }
+        .stats-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 20px;
+            margin-bottom: 30px;
+        }
+        .stat-card {
+            background: white;
+            border-radius: 15px;
+            padding: 25px;
+            box-shadow: 0 5px 15px rgba(0,0,0,0.1);
+            transition: transform 0.3s;
+        }
+        .stat-card:hover {
+            transform: translateY(-5px);
+        }
+        .stat-card h3 {
+            color: #667eea;
+            font-size: 2.5em;
+            margin-bottom: 10px;
+        }
+        .stat-card p {
+            color: #666;
+            font-size: 1em;
+        }
+        .main-content {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 30px;
+        }
+        .card {
+            background: white;
+            border-radius: 15px;
+            padding: 30px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+            width: 100%;
+        }
+        .card h2 {
+            color: #667eea;
+            margin-bottom: 20px;
+            font-size: 1.8em;
+        }
+        .form-group {
+            margin-bottom: 20px;
+        }
+        label {
+            display: block;
+            color: #333;
+            font-weight: 600;
+            margin-bottom: 8px;
+        }
+        input, textarea, select {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #e0e0e0;
+            border-radius: 8px;
+            font-size: 1em;
+            transition: border-color 0.3s;
+        }
+        input:focus, textarea:focus, select:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        textarea {
+            resize: vertical;
+            min-height: 80px;
+        }
+        .btn {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 15px 30px;
+            border: none;
+            border-radius: 8px;
+            font-size: 1.1em;
+            font-weight: 600;
+            cursor: pointer;
+            width: 100%;
+            transition: transform 0.2s;
+        }
+        .btn:hover {
+            transform: scale(1.02);
+        }
+        .btn:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+        }
+        .result {
+            display: none;
+            margin-top: 20px;
+            padding: 20px;
+            border-radius: 10px;
+            animation: slideIn 0.5s;
+        }
+        @keyframes slideIn {
+            from {
+                opacity: 0;
+                transform: translateY(20px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .result.success {
+            background: #d4edda;
+            border: 2px solid #28a745;
+        }
+        .result.warning {
+            background: #fff3cd;
+            border: 2px solid #ffc107;
+        }
+        .result.info {
+            background: #d1ecf1;
+            border: 2px solid #17a2b8;
+        }
+        .result-header {
+            display: flex;
+            align-items: center;
+            margin-bottom: 15px;
+        }
+        .result-icon {
+            font-size: 2em;
+            margin-right: 15px;
+        }
+        .result-title {
+            font-size: 1.5em;
+            font-weight: 700;
+        }
+        .result-content {
+            margin-top: 15px;
+        }
+        .result-item {
+            margin-bottom: 10px;
+            padding: 10px;
+            background: white;
+            border-radius: 5px;
+        }
+        .confidence-bar {
+            height: 25px;
+            background: #e0e0e0;
+            border-radius: 15px;
+            overflow: hidden;
+            margin-top: 10px;
+        }
+        .confidence-fill {
+            height: 100%;
+            background: linear-gradient(90deg, #667eea, #764ba2);
+            transition: width 1s ease;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            color: white;
+            font-weight: 600;
+        }
+        .alternatives {
+            margin-top: 15px;
+        }
+        .alternative-item {
+            padding: 10px;
+            margin-bottom: 8px;
+            background: #f8f9fa;
+            border-radius: 5px;
+            border-left: 4px solid #667eea;
+        }
+        .loading {
+            display: none;
+            text-align: center;
+            margin: 20px 0;
+        }
+        .spinner {
+            border: 4px solid #f3f3f3;
+            border-top: 4px solid #667eea;
+            border-radius: 50%;
+            width: 40px;
+            height: 40px;
+            animation: spin 1s linear infinite;
+            margin: 0 auto;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .footer {
+            text-align: center;
+            color: white;
+            margin-top: 30px;
+            padding: 20px;
+        }
+        @media (max-width: 768px) {
+            .main-content {
+                grid-template-columns: 1fr;
+            }
+            .header h1 {
+                font-size: 1.8em;
+            }
+        }
+        .badge {
+            display: inline-block;
+            padding: 5px 12px;
+            border-radius: 20px;
+            font-size: 0.9em;
+            font-weight: 600;
+            margin-left: 10px;
+        }
+        .badge-success {
+            background: #28a745;
+            color: white;
+        }
+        .badge-warning {
+            background: #ffc107;
+            color: #333;
+        }
+        .badge-info {
+            background: #17a2b8;
+            color: white;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <!-- Header -->
+        <div class="header">
+            <h1>🏥 Insurance Product Classification System</h1>
+            <p>AI-Powered Product Categorization for Insurance Underwriting</p>
+        </div>
+        <!-- Statistics -->
+        <div class="stats-grid">
+            <div class="stat-card">
+                <h3 id="totalCategories">-</h3>
+                <p>Insurance Categories</p>
+            </div>
+            <div class="stat-card">
+                <h3 id="automationRate">87.5%</h3>
+                <p>Automation Rate</p>
+            </div>
+            <div class="stat-card">
+                <h3 id="avgConfidence">86.1%</h3>
+                <p>Average Confidence</p>
+            </div>
+            <div class="stat-card">
+                <h3 id="processingSpeed">~100ms</h3>
+                <p>Processing Speed</p>
+            </div>
+        </div>
+        <!-- Main Content -->
+        <div class="main-content">
+            <!-- Classification Form -->
+            <div class="card" style="width: 79vw;">
+                <h2>🔍 Classify Product</h2>
+                <form id="classifyForm">
+                    <div class="form-group">
+                        <label for="productTitle">Product Title *</label>
+                        <input type="text" id="productTitle" placeholder="e.g., Apple iPhone 15 Pro Max" required>
+                    </div>
+                    <div class="form-group">
+                        <label for="productType">Product Type</label>
+                        <input type="text" id="productType" placeholder="e.g., Smartphone">
+                    </div>
+                    <div class="form-group">
+                        <label for="vendor">Brand/Vendor</label>
+                        <input type="text" id="vendor" placeholder="e.g., Apple Inc">
+                    </div>
+                    <div class="form-group">
+                        <label for="tags">Tags (comma-separated)</label>
+                        <input type="text" id="tags" placeholder="e.g., electronics, phone, mobile">
+                    </div>
+                    <div class="form-group">
+                        <label for="description">Description</label>
+                        <textarea id="description" placeholder="Product description..."></textarea>
+                    </div>
+                    <button type="submit" class="btn" id="classifyBtn">
+                        Classify Product
+                    </button>
+                </form>
+                <div class="loading" id="loading">
+                    <div class="spinner"></div>
+                    <p style="margin-top: 10px; color: #667eea;">Analyzing product...</p>
+                </div>
+                <div class="result" id="result"></div>
+            </div>
+            <!-- Quick Test Examples
+            <div class="card">
+                <h2>⚡ Quick Test Examples</h2>
+                <p style="margin-bottom: 20px; color: #666;">Click to test with pre-filled examples:</p>
+                <div class="alternative-item" style="cursor: pointer; margin-bottom: 15px;" onclick="testProduct('iphone')">
+                    <strong>📱 Apple iPhone 15 Pro</strong><br>
+                    <small>Smartphone • Expected: 65-70% confidence</small>
+                </div>
+                <div class="alternative-item" style="cursor: pointer; margin-bottom: 15px;" onclick="testProduct('shoes')">
+                    <strong>👟 Nike Running Shoes</strong><br>
+                    <small>Athletic Footwear • Expected: 75-80% confidence</small>
+                </div>
+                <div class="alternative-item" style="cursor: pointer; margin-bottom: 15px;" onclick="testProduct('coffee')">
+                    <strong>☕ Coffee Maker</strong><br>
+                    <small>Kitchen Appliance • Expected: 80-85% confidence</small>
+                </div>
+                <div class="alternative-item" style="cursor: pointer; margin-bottom: 15px;" onclick="testProduct('book')">
+                    <strong>📚 The Great Gatsby</strong><br>
+                    <small>Book • Expected: 85-90% confidence</small>
+                </div>
+                <div class="alternative-item" style="cursor: pointer;" onclick="testProduct('laptop')">
+                    <strong>💻 Gaming Laptop</strong><br>
+                    <small>Computer • Expected: 70-75% confidence</small>
+                </div>
+                <div style="margin-top: 30px; padding: 15px; background: #f8f9fa; border-radius: 8px;">
+                    <strong style="color: #667eea;">System Status:</strong>
+                    <p style="margin-top: 10px; color: #666;">
+                        <span id="systemStatus">Checking...</span>
+                    </p>
+                </div>
+            </div> -->
+        </div>
+        <!-- Footer -->
+        <div class="footer">
+            <p>Powered by Machine Learning • MPNet Model • 768-Dimensional Embeddings</p>
+            <p style="margin-top: 10px; opacity: 0.8;">API Documentation: <a href="/docs" style="color: white; text-decoration: underline;">/docs</a></p>
+        </div>
+    </div>
+    <script>
+        // Load statistics on page load
+        async function loadStats() {
+            try {
+                const response = await fetch('/stats');
+                const data = await response.json();
+                document.getElementById('totalCategories').textContent = data.total_categories.toLocaleString();
+            } catch (error) {
+                console.error('Error loading stats:', error);
+            }
+        }
+        // Check system health
+        async function checkHealth() {
+            try {
+                const response = await fetch('/health');
+                const data = await response.json();
+                if (data.status === 'healthy') {
+                    document.getElementById('systemStatus').innerHTML = '✅ <strong style="color: #28a745;">Online</strong> • ' +
+                        data.categories_loaded.toLocaleString() + ' categories loaded';
+                }
+            } catch (error) {
+                document.getElementById('systemStatus').innerHTML = '❌ <strong style="color: #dc3545;">Offline</strong>';
+            }
+        }
+        // Classify product
+        document.getElementById('classifyForm').addEventListener('submit', async (e) => {
+            e.preventDefault();
+            const title = document.getElementById('productTitle').value;
+            const productType = document.getElementById('productType').value;
+            const vendor = document.getElementById('vendor').value;
+            const tags = document.getElementById('tags').value.split(',').map(t => t.trim()).filter(t => t);
+            const description = document.getElementById('description').value;
+            const product = {
+                id: 'demo_' + Date.now(),
+                title,
+                product_type: productType,
+                vendor,
+                tags,
+                description
+            };
+            // Show loading
+            document.getElementById('loading').style.display = 'block';
+            document.getElementById('result').style.display = 'none';
+            document.getElementById('classifyBtn').disabled = true;
+            try {
+                const response = await fetch('/classify', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify(product)
+                });
+                const result = await response.json();
+                displayResult(result);
+            } catch (error) {
+                alert('Error: ' + error.message);
+            } finally {
+                document.getElementById('loading').style.display = 'none';
+                document.getElementById('classifyBtn').disabled = false;
+            }
+        });
+        // Display classification result
+        function displayResult(result) {
+            const resultDiv = document.getElementById('result');
+            let resultClass = 'info';
+            let icon = 'ℹ️';
+            let badge = '';
+            if (result.action === 'AUTO_APPROVE') {
+                resultClass = 'success';
+                icon = '✅';
+                badge = '<span class="badge badge-success">AUTO APPROVED</span>';
+            } else if (result.action === 'QUICK_REVIEW') {
+                resultClass = 'warning';
+                icon = '⚠️';
+                badge = '<span class="badge badge-warning">NEEDS REVIEW</span>';
+            } else {
+                resultClass = 'info';
+                icon = '📋';
+                badge = '<span class="badge badge-info">MANUAL</span>';
+            }
+            const confidence = result.top_confidence;
+            let html = `
+                <div class="result-header">
+                    <div class="result-icon">${icon}</div>
+                    <div>
+                        <div class="result-title">${result.action.replace('_', ' ')}${badge}</div>
+                        <small style="color: #666;">${result.reason}</small>
+                    </div>
+                </div>
+                <div class="result-content">
+                    <div class="result-item">
+                        <strong style="color: #667eea;">Top Category:</strong><br>
+                        ${result.top_category}
+                    </div>
+                    <div class="result-item">
+                        <strong style="color: #667eea;">Confidence Score:</strong>
+                        <div class="confidence-bar">
+                            <div class="confidence-fill" style="width: ${confidence}%">
+                                ${confidence.toFixed(2)}%
+                            </div>
+                        </div>
+                    </div>
+                    <div class="result-item">
+                        <strong style="color: #667eea;">Processing Time:</strong> ${result.processing_time_ms.toFixed(2)}ms
+                    </div>
+                    <div class="alternatives">
+                        <strong style="color: #667eea;">Alternative Categories:</strong>
+                        ${result.alternatives.slice(0, 3).map((alt, i) => `
+                            <div class="alternative-item">
+                                <strong>${i + 2}. ${alt.category_path}</strong><br>
+                                <small>Confidence: ${alt.confidence_percentage}%</small>
+                            </div>
+                        `).join('')}
+                    </div>
+                </div>
+            `;
+            resultDiv.className = `result ${resultClass}`;
+            resultDiv.innerHTML = html;
+            resultDiv.style.display = 'block';
+        }
+        // Pre-fill test products
+        function testProduct(type) {
+            const products = {
+                iphone: {
+                    title: 'Apple iPhone 15 Pro Max',
+                    type: 'Smartphone',
+                    vendor: 'Apple Inc',
+                    tags: 'electronics, mobile, phone, smartphone, 5G',
+                    description: 'Latest flagship smartphone with titanium design and A17 Bionic chip'
+                },
+                shoes: {
+                    title: 'Nike Air Zoom Pegasus 40',
+                    type: 'Running Shoes',
+                    vendor: 'Nike',
+                    tags: 'shoes, athletic, running, sports, footwear',
+                    description: 'Premium running shoes with responsive cushioning'
+                },
+                coffee: {
+                    title: 'Cuisinart DCC-3200 Coffee Maker',
+                    type: 'Coffee Machine',
+                    vendor: 'Cuisinart',
+                    tags: 'appliances, kitchen, coffee, brewing',
+                    description: 'Programmable automatic drip coffee maker with 14-cup carafe'
+                },
+                book: {
+                    title: 'The Great Gatsby by F. Scott Fitzgerald',
+                    type: 'Book',
+                    vendor: 'Scribner',
+                    tags: 'books, fiction, literature, classic',
+                    description: 'Classic American novel set in the Jazz Age'
+                },
+                laptop: {
+                    title: 'ASUS ROG Strix Gaming Laptop',
+                    type: 'Laptop Computer',
+                    vendor: 'ASUS',
+                    tags: 'computers, gaming, laptop, electronics',
+                    description: 'High-performance gaming laptop with RTX 4070 graphics'
+                }
+            };
+            const product = products[type];
+            document.getElementById('productTitle').value = product.title;
+            document.getElementById('productType').value = product.type;
+            document.getElementById('vendor').value = product.vendor;
+            document.getElementById('tags').value = product.tags;
+            document.getElementById('description').value = product.description;
+            // Scroll to form
+            document.getElementById('classifyForm').scrollIntoView({ behavior: 'smooth' });
+        }
+        // Initialize
+        loadStats();
+        checkHealth();
+    </script>
+</body>
+</html>

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""
+Test script for Product Classification API
+Run this to test your API endpoints
+"""
+import requests
+import json
+from typing import Dict, List
+# API base URL
+BASE_URL = "http://localhost:8000"
+def test_health():
+    """Test health check endpoint"""
+    print("\n" + "=" * 80)
+    print("TEST 1: Health Check")
+    print("=" * 80)
+    response = requests.get(f"{BASE_URL}/health")
+    if response.status_code == 200:
+        data = response.json()
+        print("✅ API is healthy!")
+        print(f"   Status: {data['status']}")
+        print(f"   Categories loaded: {data['categories_loaded']:,}")
+        print(f"   Embedding dimension: {data['embedding_dimension']}")
+    else:
+        print(f"❌ Health check failed: {response.status_code}")
+    return response.status_code == 200
+def test_single_classification():
+    """Test single product classification"""
+    print("\n" + "=" * 80)
+    print("TEST 2: Single Product Classification")
+    print("=" * 80)
+    # Test product
+    product = {
+        "id": "test_001",
+        "title": "Sony WH-1000XM5 Wireless Headphones",
+        "product_type": "Headphones",
+        "vendor": "Sony",
+        "tags": ["audio", "electronics", "wireless", "bluetooth"],
+        "description": "Premium noise-canceling over-ear headphones",
+    }
+    print(f"\n📱 Test Product: {product['title']}")
+    response = requests.post(f"{BASE_URL}/classify", json=product)
+    if response.status_code == 200:
+        result = response.json()
+        print(f"\n✅ Classification successful!")
+        print(f"   Action: {result['action']}")
+        print(f"   Top Category: {result['top_category']}")
+        print(f"   Confidence: {result['top_confidence']}%")
+        print(f"   Processing Time: {result['processing_time_ms']}ms")
+        print(f"\n📊 Top 3 Alternative Categories:")
+        for alt in result["alternatives"][:3]:
+            print(f"   {alt['rank']}. {alt['category_path']}")
+            print(f"      Confidence: {alt['confidence_percentage']}%")
+        return True
+    else:
+        print(f"❌ Classification failed: {response.status_code}")
+        print(f"   Error: {response.text}")
+        return False
+def test_batch_classification():
+    """Test batch product classification"""
+    print("\n" + "=" * 80)
+    print("TEST 3: Batch Classification")
+    print("=" * 80)
+    # Multiple test products
+    products = [
+        {
+            "id": "prod_001",
+            "title": "Samsung Galaxy S24 Ultra",
+            "product_type": "Smartphone",
+            "vendor": "Samsung",
+            "tags": ["electronics", "phone", "mobile", "android"],
+        },
+        {
+            "id": "prod_002",
+            "title": "KitchenAid Stand Mixer",
+            "product_type": "Kitchen Appliance",
+            "vendor": "KitchenAid",
+            "tags": ["appliance", "kitchen", "cooking"],
+        },
+        {
+            "id": "prod_003",
+            "title": "Nike Air Zoom Running Shoes",
+            "product_type": "Athletic Footwear",
+            "vendor": "Nike",
+            "tags": ["shoes", "sports", "running", "athletic"],
+        },
+    ]
+    batch_request = {"products": products, "top_k": 3}
+    print(f"\n📦 Testing batch of {len(products)} products...")
+    response = requests.post(f"{BASE_URL}/classify-batch", json=batch_request)
+    if response.status_code == 200:
+        result = response.json()
+        print(f"\n✅ Batch classification successful!")
+        print(f"   Total products: {result['total_products']}")
+        print(f"   Processing time: {result['processing_time_ms']:.2f}ms")
+        print(
+            f"   Time per product: {result['processing_time_ms']/result['total_products']:.2f}ms"
+        )
+        print(f"\n📊 Action Distribution:")
+        for action, count in result["action_counts"].items():
+            percentage = (count / result["total_products"]) * 100
+            print(f"   {action}: {count} ({percentage:.1f}%)")
+        print(f"\n🎯 Individual Results:")
+        for res in result["results"]:
+            print(f"\n   • {res.get('product_id', 'N/A')}")
+            print(f"     Action: {res['action']}")
+            print(f"     Confidence: {res.get('top_confidence', 0)}%")
+            if res.get("top_category"):
+                print(f"     Category: {res['top_category'][:60]}...")
+        return True
+    else:
+        print(f"❌ Batch classification failed: {response.status_code}")
+        print(f"   Error: {response.text}")
+        return False
+def test_various_products():
+    """Test with various product types"""
+    print("\n" + "=" * 80)
+    print("TEST 4: Various Product Types")
+    print("=" * 80)
+    test_cases = [
+        {
+            "name": "Electronics",
+            "product": {
+                "title": "MacBook Pro 16 inch M3",
+                "product_type": "Laptop Computer",
+                "vendor": "Apple",
+                "tags": ["computer", "laptop", "electronics"],
+            },
+        },
+        {
+            "name": "Books",
+            "product": {
+                "title": "The Great Gatsby by F. Scott Fitzgerald",
+                "product_type": "Book",
+                "vendor": "Scribner",
+                "tags": ["books", "fiction", "literature", "classic"],
+            },
+        },
+        {
+            "name": "Home Appliances",
+            "product": {
+                "title": "Dyson V15 Detect Vacuum Cleaner",
+                "product_type": "Vacuum Cleaner",
+                "vendor": "Dyson",
+                "tags": ["appliance", "cleaning", "home", "cordless"],
+            },
+        },
+        {
+            "name": "Toys",
+            "product": {
+                "title": "LEGO Star Wars Millennium Falcon",
+                "product_type": "Building Toy",
+                "vendor": "LEGO",
+                "tags": ["toys", "kids", "lego", "star wars", "building"],
+            },
+        },
+    ]
+    results_summary = []
+    for test_case in test_cases:
+        print(f"\n🧪 Testing: {test_case['name']}")
+        print(f"   Product: {test_case['product']['title']}")
+        response = requests.post(f"{BASE_URL}/classify", json=test_case["product"])
+        if response.status_code == 200:
+            result = response.json()
+            confidence = result["top_confidence"]
+            action = result["action"]
+            emoji = (
+                "✅"
+                if action == "AUTO_APPROVE"
+                else "⚠️" if action == "QUICK_REVIEW" else "❌"
+            )
+            print(f"   {emoji} {action}: {confidence}%")
+            results_summary.append(
+                {
+                    "category": test_case["name"],
+                    "confidence": confidence,
+                    "action": action,
+                }
+            )
+        else:
+            print(f"   ❌ Failed: {response.status_code}")
+            results_summary.append(
+                {"category": test_case["name"], "confidence": 0, "action": "ERROR"}
+            )
+    # Print summary
+    print(f"\n📈 SUMMARY:")
+    print("-" * 80)
+    avg_confidence = sum(r["confidence"] for r in results_summary) / len(
+        results_summary
+    )
+    auto_approve_count = sum(
+        1 for r in results_summary if r["action"] == "AUTO_APPROVE"
+    )
+    print(f"Average Confidence: {avg_confidence:.2f}%")
+    print(
+        f"Auto-Approve Rate: {auto_approve_count}/{len(results_summary)} ({auto_approve_count/len(results_summary)*100:.1f}%)"
+    )
+    return True
+def run_all_tests():
+    """Run all tests"""
+    print("\n" + "=" * 80)
+    print("🧪 RUNNING ALL API TESTS")
+    print("=" * 80)
+    print("\nMake sure API is running: uvicorn src.api:app --reload")
+    tests = [
+        ("Health Check", test_health),
+        ("Single Classification", test_single_classification),
+        ("Batch Classification", test_batch_classification),
+        ("Various Products", test_various_products),
+    ]
+    results = []
+    for test_name, test_func in tests:
+        try:
+            result = test_func()
+            results.append((test_name, result))
+        except requests.exceptions.ConnectionError:
+            print(f"\n❌ Connection Error: Is the API running?")
+            print("   Start it with: uvicorn src.api:app --reload")
+            return
+        except Exception as e:
+            print(f"\n❌ Error in {test_name}: {e}")
+            results.append((test_name, False))
+    # Final summary
+    print("\n" + "=" * 80)
+    print("📊 TEST RESULTS SUMMARY")
+    print("=" * 80)
+    for test_name, result in results:
+        status = "✅ PASS" if result else "❌ FAIL"
+        print(f"{status} - {test_name}")
+    passed = sum(1 for _, r in results if r)
+    total = len(results)
+    print(f"\n🎯 Overall: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
+    if passed == total:
+        print("\n🎉 ALL TESTS PASSED! Your API is working perfectly!")
+    else:
+        print(f"\n⚠️  Some tests failed. Check the errors above.")
+if __name__ == "__main__":
+    run_all_tests()