Spaces:

issa-ennab
/

quickdraw-api

Sleeping

App Files Files Community

issaennab commited on Dec 1, 2025

Commit

d2a2955

1 Parent(s): 586dd8f

Deploy QuickDraw API with trained model and comprehensive logging

Browse files

Files changed (11) hide show

.gitattributes +1 -0
Dockerfile +33 -0
README.md +77 -4
app.py +317 -0
config.py +65 -0
model.py +132 -0
requirements.txt +30 -0
saved_models/quickdraw_house_cat_dog_car.h5 +3 -0
saved_models/quickdraw_house_cat_dog_car.keras +3 -0
saved_models/quickdraw_house_cat_dog_car.onnx +3 -0
utils.py +199 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.keras filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# Hugging Face Space Dockerfile for QuickDraw API
+FROM python:3.10-slim
+# Create user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+# Install system dependencies
+USER root
+RUN apt-get update && apt-get install -y \
+    libgomp1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+USER user
+# Copy requirements and install
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy application files
+COPY --chown=user . /app
+# Create directories for logs
+RUN mkdir -p api_logs/received_images
+# Expose port 7860 (required by HF Spaces)
+EXPOSE 7860
+# Start the API on port 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,84 @@
 ---
-title: Quickdraw Api
-emoji: ⚡
 colorFrom: blue
-colorTo: gray
 sdk: docker
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: QuickDraw Sketch Recognition API
+emoji: 🎨
 colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
 license: mit
 ---
+# QuickDraw Sketch Recognition API
+Real-time sketch recognition API for VR/AR applications. Recognizes 46 different hand-drawn objects using a CNN trained on Google's QuickDraw dataset.
+## 🎯 Try It Out
+Once the Space is running, you can:
+### Test via Swagger UI
+Visit the API docs at: `https://issa-ennab-quickdraw-api.hf.space/docs`
+### Test via cURL
+```bash
+# Health check
+curl https://issa-ennab-quickdraw-api.hf.space/health
+# Get supported classes
+curl https://issa-ennab-quickdraw-api.hf.space/classes
+# Make a prediction (replace with your base64 image)
+curl -X POST https://issa-ennab-quickdraw-api.hf.space/predict/base64 \
+  -H "Content-Type: application/json" \
+  -d '{"image_base64": "YOUR_BASE64_IMAGE", "top_k": 3}'
+```
+### Unity/VR Integration
+```csharp
+private string apiUrl = "https://issa-ennab-quickdraw-api.hf.space/predict/base64";
+```
+## 📋 Supported Classes (46 total)
+**Animals:** cat, dog, bird, fish, bear, butterfly, spider
+**Buildings:** house, castle, barn, bridge, lighthouse, church
+**Transportation:** car, airplane, bicycle, truck, train
+**Nature:** tree, flower, sun, moon, cloud, mountain
+**Objects:** apple, banana, book, chair, table, cup, umbrella
+**Body Parts:** face, eye, hand, foot
+**Shapes:** circle, triangle, square, star
+**Tools:** sword, axe, hammer, key, crown
+**Music:** guitar, piano
+## 🔧 API Endpoints
+- `GET /` - API information
+- `GET /health` - Health check
+- `GET /classes` - List all supported classes
+- `POST /predict` - Upload image file for prediction
+- `POST /predict/base64` - Send base64 encoded image (recommended for VR)
+## 🎮 Perfect For
+- VR/AR drawing applications
+- Educational games
+- Real-time sketch recognition
+- Interactive art tools
+## 📊 Model Performance
+- **Accuracy:** 84.89% on validation set
+- **Inference Time:** ~50-80ms on CPU
+- **Model Size:** 2.9 MB
+- **Input:** 28x28 grayscale images
+## 📖 Full Documentation
+[GitHub Repository](https://github.com/Beakal-23/Augmented-Reality--Image-Detector-Final-Project-)
+## 🚀 Built With
+- FastAPI for the REST API
+- TensorFlow/Keras for the CNN model
+- Google QuickDraw dataset
+- Docker for deployment

app.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+FastAPI application for QuickDraw sketch recognition.
+Exposes API endpoints for VR/AR applications to classify drawings.
+"""
+from fastapi import FastAPI, File, UploadFile, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import List, Optional
+import uvicorn
+import logging
+import os
+import base64
+from datetime import datetime
+from pathlib import Path
+import json
+from model import SketchClassifier
+from utils import preprocess_image_from_bytes, preprocess_image_from_base64
+# Configure comprehensive logging
+LOG_DIR = "api_logs"
+IMAGES_LOG_DIR = os.path.join(LOG_DIR, "received_images")
+os.makedirs(LOG_DIR, exist_ok=True)
+os.makedirs(IMAGES_LOG_DIR, exist_ok=True)
+# Setup logging to both file and console
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(os.path.join(LOG_DIR, 'api.log')),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+# Create separate logger for request details
+request_logger = logging.getLogger("requests")
+request_handler = logging.FileHandler(os.path.join(LOG_DIR, 'requests_detailed.log'))
+request_handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
+request_logger.addHandler(request_handler)
+request_logger.setLevel(logging.INFO)
+# Initialize FastAPI app
+app = FastAPI(
+    title="QuickDraw Sketch Recognition API",
+    description="API for recognizing hand-drawn sketches (house, cat, dog, car) for VR/AR applications",
+    version="1.0.0"
+)
+# CORS middleware - adjust origins based on your VR application needs
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify your VR app's origin
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize model (singleton)
+classifier = None
+class PredictionRequest(BaseModel):
+    """Request model for base64 encoded image"""
+    image_base64: str
+    top_k: Optional[int] = 3
+class PredictionResponse(BaseModel):
+    """Response model for predictions"""
+    predictions: List[dict]
+    success: bool
+    message: Optional[str] = None
+@app.on_event("startup")
+async def startup_event():
+    """Load the model on startup"""
+    global classifier
+    try:
+        logger.info("Loading QuickDraw model...")
+        classifier = SketchClassifier()
+        logger.info("Model loaded successfully!")
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "message": "QuickDraw Sketch Recognition API",
+        "version": "1.0.0",
+        "endpoints": {
+            "/health": "Health check",
+            "/predict": "Predict from uploaded image file (POST)",
+            "/predict/base64": "Predict from base64 encoded image (POST)",
+            "/classes": "Get list of supported classes (GET)"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    model_loaded = classifier is not None
+    return {
+        "status": "healthy" if model_loaded else "unhealthy",
+        "model_loaded": model_loaded
+    }
+@app.get("/classes")
+async def get_classes():
+    """Get list of supported drawing classes"""
+    if classifier is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    return {
+        "classes": classifier.class_names,
+        "num_classes": len(classifier.class_names)
+    }
+@app.post("/predict", response_model=PredictionResponse)
+async def predict_from_file(
+    file: UploadFile = File(...),
+    top_k: int = 3,
+    http_request: Request = None
+):
+    """
+    Predict drawing class from uploaded image file.
+    Args:
+        file: Image file (PNG, JPG, etc.)
+        top_k: Number of top predictions to return (default: 3)
+        http_request: FastAPI request object for logging
+    Returns:
+        PredictionResponse with top predictions and confidence scores
+    """
+    if classifier is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    # Generate unique request ID
+    request_id = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    logger.info(f"="*80)
+    logger.info(f"[FILE-REQUEST {request_id}] New file upload prediction")
+    logger.info(f"[FILE-REQUEST {request_id}] Filename: {file.filename}")
+    logger.info(f"[FILE-REQUEST {request_id}] Content-Type: {file.content_type}")
+    logger.info(f"[FILE-REQUEST {request_id}] Top K: {top_k}")
+    try:
+        # Read image bytes
+        image_bytes = await file.read()
+        logger.info(f"[FILE-REQUEST {request_id}] File size: {len(image_bytes)} bytes")
+        # Save uploaded file
+        uploaded_file = os.path.join(IMAGES_LOG_DIR, f"uploaded_{request_id}_{file.filename}")
+        with open(uploaded_file, 'wb') as f:
+            f.write(image_bytes)
+        logger.info(f"[FILE-REQUEST {request_id}] File saved to: {uploaded_file}")
+        # Preprocess image
+        logger.info(f"[FILE-REQUEST {request_id}] Preprocessing image...")
+        processed_image = preprocess_image_from_bytes(image_bytes)
+        logger.info(f"[FILE-REQUEST {request_id}] Preprocessed shape: {processed_image.shape}")
+        # Make prediction
+        logger.info(f"[FILE-REQUEST {request_id}] Running inference...")
+        predictions = classifier.predict(processed_image, top_k=top_k)
+        # Log predictions
+        logger.info(f"[FILE-REQUEST {request_id}] PREDICTIONS:")
+        for i, pred in enumerate(predictions, 1):
+            logger.info(f"[FILE-REQUEST {request_id}]   {i}. {pred['class']}: {pred['confidence_percent']}")
+        logger.info(f"[FILE-REQUEST {request_id}] ✓ Success")
+        logger.info(f"="*80)
+        return PredictionResponse(
+            predictions=predictions,
+            success=True,
+            message=f"Prediction successful (Request ID: {request_id})"
+        )
+    except Exception as e:
+        logger.error(f"[FILE-REQUEST {request_id}] ✗ FAILED: {e}")
+        logger.info(f"="*80)
+        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
+@app.post("/predict/base64", response_model=PredictionResponse)
+async def predict_from_base64(request: PredictionRequest, http_request: Request):
+    """
+    Predict drawing class from base64 encoded image.
+    Ideal for VR/AR applications sending image data directly.
+    Args:
+        request: PredictionRequest containing base64 image and optional top_k
+        http_request: FastAPI request object for logging
+    Returns:
+        PredictionResponse with top predictions and confidence scores
+    """
+    if classifier is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    # Generate unique request ID
+    request_id = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    # Log incoming request details
+    logger.info(f"="*80)
+    logger.info(f"[REQUEST {request_id}] New prediction request from VR")
+    logger.info(f"[REQUEST {request_id}] Client: {http_request.client.host}:{http_request.client.port}")
+    logger.info(f"[REQUEST {request_id}] User-Agent: {http_request.headers.get('user-agent', 'Unknown')}")
+    logger.info(f"[REQUEST {request_id}] Top K: {request.top_k}")
+    # Log base64 image details
+    base64_length = len(request.image_base64)
+    logger.info(f"[REQUEST {request_id}] Base64 image length: {base64_length} characters")
+    logger.info(f"[REQUEST {request_id}] Base64 prefix (first 100 chars): {request.image_base64[:100]}...")
+    # Save base64 string to file for debugging
+    base64_log_file = os.path.join(LOG_DIR, f"request_{request_id}_base64.txt")
+    with open(base64_log_file, 'w') as f:
+        f.write(request.image_base64)
+    logger.info(f"[REQUEST {request_id}] Base64 saved to: {base64_log_file}")
+    try:
+        # Decode and save the actual image
+        try:
+            image_data = base64.b64decode(request.image_base64)
+            image_file = os.path.join(IMAGES_LOG_DIR, f"request_{request_id}.png")
+            with open(image_file, 'wb') as f:
+                f.write(image_data)
+            logger.info(f"[REQUEST {request_id}] Decoded image saved to: {image_file}")
+            logger.info(f"[REQUEST {request_id}] Decoded image size: {len(image_data)} bytes")
+        except Exception as decode_error:
+            logger.warning(f"[REQUEST {request_id}] Failed to decode/save image: {decode_error}")
+        # Preprocess image from base64
+        logger.info(f"[REQUEST {request_id}] Preprocessing image...")
+        processed_image = preprocess_image_from_base64(request.image_base64)
+        logger.info(f"[REQUEST {request_id}] Preprocessed image shape: {processed_image.shape}")
+        # Make prediction
+        logger.info(f"[REQUEST {request_id}] Running model inference...")
+        predictions = classifier.predict(processed_image, top_k=request.top_k)
+        # Log predictions
+        logger.info(f"[REQUEST {request_id}] PREDICTIONS:")
+        for i, pred in enumerate(predictions, 1):
+            logger.info(f"[REQUEST {request_id}]   {i}. {pred['class']}: {pred['confidence_percent']} (confidence: {pred['confidence']:.4f})")
+        # Save detailed request log as JSON
+        request_log = {
+            "request_id": request_id,
+            "timestamp": datetime.now().isoformat(),
+            "client_ip": http_request.client.host,
+            "client_port": http_request.client.port,
+            "user_agent": http_request.headers.get('user-agent', 'Unknown'),
+            "base64_length": base64_length,
+            "image_file": image_file if 'image_file' in locals() else None,
+            "top_k": request.top_k,
+            "predictions": predictions,
+            "success": True
+        }
+        json_log_file = os.path.join(LOG_DIR, f"request_{request_id}.json")
+        with open(json_log_file, 'w') as f:
+            json.dump(request_log, f, indent=2)
+        logger.info(f"[REQUEST {request_id}] Full request log saved to: {json_log_file}")
+        logger.info(f"[REQUEST {request_id}] ✓ Prediction completed successfully")
+        logger.info(f"="*80)
+        return PredictionResponse(
+            predictions=predictions,
+            success=True,
+            message=f"Prediction successful (Request ID: {request_id})"
+        )
+    except Exception as e:
+        logger.error(f"[REQUEST {request_id}] ✗ Prediction FAILED")
+        logger.error(f"[REQUEST {request_id}] Error: {str(e)}")
+        logger.error(f"[REQUEST {request_id}] Error type: {type(e).__name__}")
+        logger.info(f"="*80)
+        # Save error log
+        error_log = {
+            "request_id": request_id,
+            "timestamp": datetime.now().isoformat(),
+            "error": str(e),
+            "error_type": type(e).__name__,
+            "base64_length": base64_length,
+            "success": False
+        }
+        error_log_file = os.path.join(LOG_DIR, f"request_{request_id}_ERROR.json")
+        with open(error_log_file, 'w') as f:
+            json.dump(error_log, f, indent=2)
+        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
+if __name__ == "__main__":
+    # Run the API server
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )

config.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Configuration settings for the QuickDraw API.
+Modify these settings based on your deployment needs.
+"""
+import os
+from typing import List
+class Settings:
+    """Application settings"""
+    # API Settings
+    API_TITLE: str = "QuickDraw Sketch Recognition API"
+    API_VERSION: str = "1.0.0"
+    API_DESCRIPTION: str = "API for recognizing hand-drawn sketches for VR/AR applications"
+    # Server Settings
+    HOST: str = "0.0.0.0"
+    PORT: int = 8000
+    RELOAD: bool = False  # Set to True for development
+    # CORS Settings
+    CORS_ORIGINS: List[str] = ["*"]  # In production, specify allowed origins
+    CORS_ALLOW_CREDENTIALS: bool = True
+    CORS_ALLOW_METHODS: List[str] = ["*"]
+    CORS_ALLOW_HEADERS: List[str] = ["*"]
+    # Model Settings
+    MODEL_PATH: str = os.path.join("saved_models", "quickdraw_house_cat_dog_car.keras")
+    CLASS_NAMES: List[str] = [
+        # Animals (7)
+        "cat", "dog", "bird", "fish", "bear", "butterfly", "spider",
+        # Buildings & Structures (6)
+        "house", "castle", "barn", "bridge", "lighthouse", "church",
+        # Transportation (5)
+        "car", "airplane", "bicycle", "truck", "train",
+        # Nature (6)
+        "tree", "flower", "sun", "moon", "cloud", "mountain",
+        # Common Objects (7)
+        "apple", "banana", "book", "chair", "table", "cup", "umbrella",
+        # People & Body (4)
+        "face", "eye", "hand", "foot",
+        # Shapes (4)
+        "circle", "triangle", "square", "star",
+        # Tools & Items (5)
+        "sword", "axe", "hammer", "key", "crown",
+        # Musical Instruments (2)
+        "guitar", "piano"
+    ]
+    # Prediction Settings
+    DEFAULT_TOP_K: int = 3
+    CONFIDENCE_THRESHOLD: float = 0.5  # Minimum confidence for valid predictions
+    # Image Processing Settings
+    INPUT_IMAGE_SIZE: tuple = (28, 28)
+    GRAYSCALE: bool = True
+    NORMALIZE: bool = True  # Normalize pixel values to [0, 1]
+    # Logging
+    LOG_LEVEL: str = "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
+# Create a singleton instance
+settings = Settings()

model.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Model inference module for QuickDraw sketch classification.
+Handles model loading and prediction logic.
+"""
+import os
+import numpy as np
+import tensorflow as tf
+from typing import List, Dict
+import logging
+logger = logging.getLogger(__name__)
+class SketchClassifier:
+    """QuickDraw sketch classifier"""
+    def __init__(self, model_path: str = None):
+        """
+        Initialize the classifier with a trained model.
+        Args:
+            model_path: Path to the trained model file. If None, uses default path.
+        """
+        # Extended class list matching Model-Training.py
+        self.class_names = [
+            # Animals
+            "cat", "dog", "bird", "fish", "bear", "butterfly", "bee", "spider",
+            # Buildings & Structures
+            "house", "castle", "barn", "bridge", "lighthouse", "church",
+            # Transportation
+            "car", "airplane", "bicycle", "boat", "train", "truck", "bus",
+            # Nature
+            "tree", "flower", "sun", "moon", "cloud", "mountain", "river",
+            # Common Objects
+            "apple", "banana", "book", "chair", "table", "cup", "umbrella",
+            # People & Body
+            "face", "eye", "hand", "foot",
+            # Shapes & Symbols
+            "circle", "triangle", "square", "star", "heart",
+            # Tools & Items
+            "sword", "axe", "hammer", "key", "crown"
+        ]
+        # Default model path
+        if model_path is None:
+            model_path = os.path.join("saved_models", "quickdraw_house_cat_dog_car.keras")
+        # Check if model exists
+        if not os.path.exists(model_path):
+            # Try .h5 format as fallback
+            h5_path = model_path.replace(".keras", ".h5")
+            if os.path.exists(h5_path):
+                model_path = h5_path
+                logger.info(f"Using H5 model format: {model_path}")
+            else:
+                raise FileNotFoundError(
+                    f"Model file not found at {model_path}. "
+                    "Please train the model first using Model-Training.py"
+                )
+        logger.info(f"Loading model from: {model_path}")
+        self.model = tf.keras.models.load_model(model_path)
+        logger.info("Model loaded successfully!")
+        # Verify input shape
+        self.input_shape = self.model.input_shape[1:]  # (28, 28, 1)
+        logger.info(f"Model input shape: {self.input_shape}")
+    def predict(self, image: np.ndarray, top_k: int = 3) -> List[Dict[str, any]]:
+        """
+        Make prediction on a preprocessed image.
+        Args:
+            image: Preprocessed image array of shape (1, 28, 28, 1)
+            top_k: Number of top predictions to return
+        Returns:
+            List of dictionaries containing class names and confidence scores
+        """
+        # Validate input shape
+        if image.shape != (1, 28, 28, 1):
+            raise ValueError(
+                f"Expected input shape (1, 28, 28, 1), got {image.shape}. "
+                "Please preprocess the image first."
+            )
+        # Make prediction
+        predictions = self.model.predict(image, verbose=0)
+        # Get top k predictions
+        top_indices = np.argsort(predictions[0])[::-1][:top_k]
+        results = []
+        for idx in top_indices:
+            results.append({
+                "class": self.class_names[idx],
+                "confidence": float(predictions[0][idx]),
+                "confidence_percent": f"{predictions[0][idx] * 100:.2f}%"
+            })
+        return results
+    def predict_batch(self, images: np.ndarray, top_k: int = 3) -> List[List[Dict[str, any]]]:
+        """
+        Make predictions on a batch of preprocessed images.
+        Args:
+            images: Batch of preprocessed images of shape (N, 28, 28, 1)
+            top_k: Number of top predictions to return per image
+        Returns:
+            List of prediction results for each image
+        """
+        # Make predictions
+        predictions = self.model.predict(images, verbose=0)
+        results = []
+        for pred in predictions:
+            # Get top k predictions for this image
+            top_indices = np.argsort(pred)[::-1][:top_k]
+            image_results = []
+            for idx in top_indices:
+                image_results.append({
+                    "class": self.class_names[idx],
+                    "confidence": float(pred[idx]),
+                    "confidence_percent": f"{pred[idx] * 100:.2f}%"
+                })
+            results.append(image_results)
+        return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+# QuickDraw Sketch Recognition API
+# Compatible with Python 3.10+ on Windows, macOS (Intel & Apple Silicon), and Linux
+# Core dependencies
+fastapi>=0.115.2
+uvicorn[standard]>=0.24.0
+pydantic>=2.7.4
+python-multipart>=0.0.18
+# ML/AI libraries
+tensorflow>=2.15.0
+numpy>=1.25.0,<2.0  # TensorFlow 2.15 requires numpy < 2.0
+scikit-learn>=1.3.2
+matplotlib>=3.8.2
+# Image processing
+Pillow>=10.1.0
+# ONNX support (optional, for model export)
+tf2onnx>=1.15.1
+onnx>=1.15.0
+onnxruntime>=1.16.3
+# Development and testing
+pytest>=7.4.3
+httpx>=0.25.2
+requests>=2.32.2
+# Hugging Face integration
+huggingface-hub>=0.20.0

saved_models/quickdraw_house_cat_dog_car.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24dd8c8b1b1e19b927d937f8fae3ba1507ce312ee35e4f3e015591a327e3edfe
+size 3000896

saved_models/quickdraw_house_cat_dog_car.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa5ca71b085fb590fed2d5a550154f905b90516c98617e3e0c8f665ce2bd6590
+size 2999536

saved_models/quickdraw_house_cat_dog_car.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c339e3d8798df6c473f15cb052e98f5bff92cc711e2ee4058f695b27f185ac6
+size 989107

utils.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""
+Utility functions for image preprocessing.
+Handles various input formats: bytes, base64, PIL images, etc.
+"""
+import io
+import base64
+import numpy as np
+from PIL import Image
+import logging
+logger = logging.getLogger(__name__)
+def preprocess_image_from_bytes(image_bytes: bytes) -> np.ndarray:
+    """
+    Preprocess image from raw bytes.
+    Args:
+        image_bytes: Raw image bytes (PNG, JPG, etc.)
+    Returns:
+        Preprocessed numpy array of shape (1, 28, 28, 1) normalized to [0, 1]
+    """
+    try:
+        # Load image from bytes
+        image = Image.open(io.BytesIO(image_bytes))
+        # Convert to grayscale
+        image = image.convert('L')
+        # Resize to 28x28
+        image = image.resize((28, 28), Image.Resampling.LANCZOS)
+        # Convert to numpy array
+        image_array = np.array(image, dtype=np.float32)
+        # Normalize to [0, 1]
+        image_array = image_array / 255.0
+        # Reshape to (1, 28, 28, 1) for model input
+        image_array = image_array.reshape(1, 28, 28, 1)
+        return image_array
+    except Exception as e:
+        logger.error(f"Error preprocessing image from bytes: {e}")
+        raise ValueError(f"Failed to process image: {str(e)}")
+def preprocess_image_from_base64(base64_string: str) -> np.ndarray:
+    """
+    Preprocess image from base64 encoded string.
+    Args:
+        base64_string: Base64 encoded image string (with or without data URI prefix)
+    Returns:
+        Preprocessed numpy array of shape (1, 28, 28, 1) normalized to [0, 1]
+    """
+    try:
+        # Remove data URI prefix if present (e.g., "data:image/png;base64,")
+        if ',' in base64_string and base64_string.startswith('data:'):
+            base64_string = base64_string.split(',', 1)[1]
+        # Decode base64 to bytes
+        image_bytes = base64.b64decode(base64_string)
+        # Use the bytes preprocessing function
+        return preprocess_image_from_bytes(image_bytes)
+    except Exception as e:
+        logger.error(f"Error preprocessing image from base64: {e}")
+        raise ValueError(f"Failed to process base64 image: {str(e)}")
+def preprocess_image_from_array(image_array: np.ndarray) -> np.ndarray:
+    """
+    Preprocess image from numpy array.
+    Handles various input shapes and formats.
+    Args:
+        image_array: Numpy array representing an image
+    Returns:
+        Preprocessed numpy array of shape (1, 28, 28, 1) normalized to [0, 1]
+    """
+    try:
+        # Convert to float32
+        image_array = image_array.astype(np.float32)
+        # Handle different input shapes
+        if len(image_array.shape) == 4:  # (batch, height, width, channels)
+            # Take first image if batch
+            image_array = image_array[0]
+        if len(image_array.shape) == 3:  # (height, width, channels)
+            # If RGB, convert to grayscale
+            if image_array.shape[2] == 3:
+                # Simple RGB to grayscale conversion
+                image_array = 0.299 * image_array[:, :, 0] + \
+                             0.587 * image_array[:, :, 1] + \
+                             0.114 * image_array[:, :, 2]
+            elif image_array.shape[2] == 1:
+                image_array = image_array.squeeze(-1)
+        # Now image_array should be 2D (height, width)
+        if len(image_array.shape) != 2:
+            raise ValueError(f"Cannot process image with shape {image_array.shape}")
+        # Resize if needed
+        if image_array.shape != (28, 28):
+            image_pil = Image.fromarray(image_array.astype(np.uint8))
+            image_pil = image_pil.resize((28, 28), Image.Resampling.LANCZOS)
+            image_array = np.array(image_pil, dtype=np.float32)
+        # Normalize to [0, 1] if not already
+        if image_array.max() > 1.0:
+            image_array = image_array / 255.0
+        # Reshape to (1, 28, 28, 1)
+        image_array = image_array.reshape(1, 28, 28, 1)
+        return image_array
+    except Exception as e:
+        logger.error(f"Error preprocessing image from array: {e}")
+        raise ValueError(f"Failed to process image array: {str(e)}")
+def preprocess_stroke_data(strokes: list, canvas_size: int = 256) -> np.ndarray:
+    """
+    Convert stroke data (list of coordinates) to a 28x28 image.
+    Useful if VR application sends raw drawing coordinates.
+    Args:
+        strokes: List of strokes, where each stroke is a list of (x, y) coordinates
+                Example: [[(x1, y1), (x2, y2), ...], [(x3, y3), ...]]
+        canvas_size: Size of the virtual canvas (default: 256x256)
+    Returns:
+        Preprocessed numpy array of shape (1, 28, 28, 1) normalized to [0, 1]
+    """
+    try:
+        # Create a blank canvas
+        canvas = np.zeros((canvas_size, canvas_size), dtype=np.uint8)
+        # Draw strokes on canvas
+        for stroke in strokes:
+            if len(stroke) < 2:
+                continue
+            # Draw lines between consecutive points
+            for i in range(len(stroke) - 1):
+                x1, y1 = stroke[i]
+                x2, y2 = stroke[i + 1]
+                # Simple line drawing (Bresenham's algorithm would be better)
+                # For now, use a simple approximation
+                points = _interpolate_points(x1, y1, x2, y2)
+                for x, y in points:
+                    if 0 <= x < canvas_size and 0 <= y < canvas_size:
+                        canvas[int(y), int(x)] = 255
+        # Convert canvas to PIL Image for resizing
+        image = Image.fromarray(canvas)
+        image = image.resize((28, 28), Image.Resampling.LANCZOS)
+        # Convert to numpy array and normalize
+        image_array = np.array(image, dtype=np.float32) / 255.0
+        # Reshape to (1, 28, 28, 1)
+        image_array = image_array.reshape(1, 28, 28, 1)
+        return image_array
+    except Exception as e:
+        logger.error(f"Error preprocessing stroke data: {e}")
+        raise ValueError(f"Failed to process stroke data: {str(e)}")
+def _interpolate_points(x1: float, y1: float, x2: float, y2: float, num_points: int = 10) -> list:
+    """
+    Interpolate points between two coordinates for smooth line drawing.
+    Args:
+        x1, y1: Start coordinates
+        x2, y2: End coordinates
+        num_points: Number of points to interpolate
+    Returns:
+        List of (x, y) coordinate tuples
+    """
+    points = []
+    for i in range(num_points + 1):
+        t = i / num_points
+        x = x1 + t * (x2 - x1)
+        y = y1 + t * (y2 - y1)
+        points.append((x, y))
+    return points