Spaces:

Eli181927
/

Classification-doodle-RNN

Sleeping

App Files Files Community

Elliot Sones commited on Dec 14, 2025

Commit

5fcc2e6

1 Parent(s): 76aaddb

Switch to Gradio with custom canvas for HF Spaces

Browse files

Files changed (3) hide show

README.md +7 -7
app.py +208 -139
requirements.txt +1 -2

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Animal Doodle Classifier
 emoji: 🎨
 colorFrom: blue
 colorTo: purple
-sdk: streamlit
-sdk_version: "1.28.0"
 app_file: app.py
 pinned: false
 ---
@@ -14,8 +14,7 @@ pinned: false
 An RNN-based classifier that recognizes hand-drawn animal doodles in real-time!
 ## Supported Animals
-- butterfly, cow, elephant, giraffe, monkey
-- octopus, scorpion, shark, snake, spider
 ## Model
 - **Architecture:** Bidirectional GRU
@@ -24,5 +23,6 @@ An RNN-based classifier that recognizes hand-drawn animal doodles in real-time!
 ## How It Works
 1. Draw an animal on the canvas
-2. Your strokes are captured and preprocessed to match Quick Draw format
-3. The RNN model predicts which animal you drew

 ---
+title: Classification Doodle RNN
 emoji: 🎨
 colorFrom: blue
 colorTo: purple
+sdk: gradio
+sdk_version: "4.44.0"
 app_file: app.py
 pinned: false
 ---
 An RNN-based classifier that recognizes hand-drawn animal doodles in real-time!
 ## Supported Animals
+butterfly, cow, elephant, giraffe, monkey, octopus, scorpion, shark, snake, spider
 ## Model
 - **Architecture:** Bidirectional GRU
 ## How It Works
 1. Draw an animal on the canvas
+2. Click **Predict**
+3. Your strokes are captured and preprocessed to match Quick Draw format
+4. The RNN model predicts which animal you drew

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
 """
-RNN Animal Doodle Classifier - Hugging Face Spaces
-Self-contained Streamlit app with embedded model class
 """
 import ast
 import json
 from pathlib import Path
 import numpy as np
-import streamlit as st
-from streamlit_drawable_canvas import st_canvas
 import torch
 from torch import nn
 # ============================================================================
-# Model Definition (embedded from training-doodle.py)
 # ============================================================================
 class GRUClassifier(nn.Module):
@@ -22,11 +21,8 @@ class GRUClassifier(nn.Module):
         super().__init__()
         self.use_packing = use_packing
         self.gru = nn.GRU(
-            input_size=input_size,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            bidirectional=bidirectional,
             dropout=dropout if num_layers > 1 else 0.0,
         )
         out_dim = hidden_size * (2 if bidirectional else 1)
@@ -35,27 +31,21 @@ class GRUClassifier(nn.Module):
     def forward(self, x: torch.Tensor, lengths: torch.Tensor):
         if self.use_packing:
-            packed = nn.utils.rnn.pack_padded_sequence(
-                x, lengths.cpu(), batch_first=True, enforce_sorted=False
-            )
             _, h_n = self.gru(packed)
         else:
             _, h_n = self.gru(x)
-        if self.gru.bidirectional:
-            h = torch.cat([h_n[-2], h_n[-1]], dim=1)
-        else:
-            h = h_n[-1]
-        h = self.norm(h)
-        return self.fc(h)
 def parse_drawing_to_seq(drawing_str: str) -> np.ndarray:
     """Convert drawing JSON to sequence of [dx, dy, pen_lift]."""
     try:
         strokes = json.loads(drawing_str)
-    except Exception:
         try:
             strokes = ast.literal_eval(drawing_str)
-        except Exception:
             return np.zeros((0, 3), dtype=np.float32)
     seq_parts = []
@@ -78,114 +68,45 @@ def parse_drawing_to_seq(drawing_str: str) -> np.ndarray:
     if not seq_parts:
         return np.zeros((0, 3), dtype=np.float32)
     seq = np.concatenate(seq_parts, axis=0)
     seq[:, :2] = np.clip(seq[:, :2], -1.0, 1.0)
     return seq.astype(np.float32)
 # ============================================================================
-# Constants
 # ============================================================================
-CANVAS_SIZE = 400
-STROKE_WIDTH = 3
 ANIMALS = ["butterfly", "cow", "elephant", "giraffe", "monkey",
            "octopus", "scorpion", "shark", "snake", "spider"]
-CALIB_TARGET_MEAN = 0.04
-CALIB_MAX_GAIN = 12.0
-CALIB_MIN_GAIN = 0.5
-def _calibrate_seq(seq: np.ndarray) -> np.ndarray:
-    """Scale (dx, dy) so the mean step magnitude matches training data."""
-    if seq is None or seq.ndim != 2 or seq.shape[1] < 2 or seq.shape[0] == 0:
         return seq
     steps = np.sqrt((seq[:, 0] ** 2) + (seq[:, 1] ** 2))
     curr = float(steps.mean()) if steps.size else 0.0
     if curr <= 1e-6:
         return seq
-    gain = float(np.clip(CALIB_TARGET_MEAN / curr, CALIB_MIN_GAIN, CALIB_MAX_GAIN))
     out = seq.astype(np.float32).copy()
     out[:, 0:2] = np.clip(out[:, 0:2] * gain, -1.0, 1.0)
     return out
-# ============================================================================
-# Model Loading
-# ============================================================================
-@st.cache_resource
-def load_model():
-    """Load the trained RNN model."""
-    model_path = Path(__file__).parent / "rnn_animals_best.pt"
-    if not model_path.exists():
-        st.error(f"Model file not found: {model_path}")
-        return None, None
-    checkpoint = torch.load(model_path, map_location="cpu", weights_only=False)
-    cfg = checkpoint.get("config", {})
-    model = GRUClassifier(
-        input_size=3,
-        hidden_size=cfg.get("hidden_size", 512),
-        num_layers=cfg.get("num_layers", 2),
-        bidirectional=cfg.get("bidirectional", True),
-        dropout=cfg.get("dropout", 0.3),
-        num_classes=len(ANIMALS),
-        use_packing=True
-    )
-    model.load_state_dict(checkpoint["model_state"])
-    model.eval()
-    class_to_idx = checkpoint.get("class_to_idx", {a: i for i, a in enumerate(ANIMALS)})
-    idx_to_class = {v: k for k, v in class_to_idx.items()}
-    return model, idx_to_class
-# ============================================================================
-# Stroke Processing
-# ============================================================================
-def canvas_strokes_to_quickdraw(canvas_json):
-    """Convert canvas to QuickDraw format with preprocessing."""
-    if canvas_json is None:
-        return []
-    objects = canvas_json.get("objects", [])
-    raw_strokes = []
-    for obj in objects:
-        if obj.get("type") != "path":
-            continue
-        path = obj.get("path", [])
-        xs, ys = [], []
-        for cmd in path:
-            if len(cmd) < 3:
-                continue
-            if cmd[0] == "M":
-                xs.append(float(cmd[1]))
-                ys.append(float(cmd[2]))
-            elif cmd[0] == "Q" and len(cmd) >= 5:
-                xs.append(float(cmd[3]))
-                ys.append(float(cmd[4]))
-            elif cmd[0] == "L":
-                xs.append(float(cmd[1]))
-                ys.append(float(cmd[2]))
-        if len(xs) >= 2:
-            raw_strokes.append((xs, ys))
     if not raw_strokes:
         return []
     # Downsample
-    downsampled = []
     for xs, ys in raw_strokes:
         if len(xs) > 25:
             step = max(1, len(xs) // 25)
             xs, ys = xs[::step], ys[::step]
-        downsampled.append((xs, ys))
     # Smooth
     smoothed = []
-    for xs, ys in downsampled:
         if len(xs) >= 3:
             xs_s = [xs[0]] + [(xs[i-1]+xs[i]+xs[i+1])/3 for i in range(1, len(xs)-1)] + [xs[-1]]
             ys_s = [ys[0]] + [(ys[i-1]+ys[i]+ys[i+1])/3 for i in range(1, len(ys)-1)] + [ys[-1]]
@@ -196,9 +117,11 @@ def canvas_strokes_to_quickdraw(canvas_json):
     # Center and scale
     all_x = [x for xs, _ in smoothed for x in xs]
     all_y = [y for _, ys in smoothed for y in ys]
     min_x, max_x = min(all_x), max(all_x)
     min_y, max_y = min(all_y), max(all_y)
     scale = 235 / max(max(1, max_x - min_x), max(1, max_y - min_y))
     cx, cy = (min_x + max_x) / 2, (min_y + max_y) / 2
     ox, oy = 127.5 - cx * scale, 127.5 - cy * scale
@@ -210,56 +133,202 @@ def canvas_strokes_to_quickdraw(canvas_json):
         result.append([xs_n, ys_n])
     return result
-def predict(model, idx_to_class, strokes):
-    """Make prediction from strokes."""
-    if not strokes or model is None:
-        return None
     try:
-        seq = parse_drawing_to_seq(json.dumps(strokes))
-        if seq is None or len(seq) < 6:
-            return None
         seq = _calibrate_seq(seq)
         seq_t = torch.tensor(seq, dtype=torch.float32).unsqueeze(0)
         lengths = torch.tensor([seq.shape[0]], dtype=torch.long)
         with torch.no_grad():
-            probs = torch.softmax(model(seq_t, lengths), dim=1)
-            top_p, top_i = torch.topk(probs, k=5, dim=1)
-        return [(idx_to_class.get(top_i[0,i].item()), top_p[0,i].item()) for i in range(5)]
     except Exception as e:
-        st.error(f"Error: {e}")
-        return None
 # ============================================================================
-# Main App
 # ============================================================================
-def main():
-    st.set_page_config(page_title="Animal Doodle Classifier", page_icon="🎨", layout="wide")
-    st.title("🎨 Animal Doodle Classifier")
-    st.caption("Draw: butterfly, cow, elephant, giraffe, monkey, octopus, scorpion, shark, snake, spider")
-    model, idx_to_class = load_model()
-    if model is None:
-        return
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        canvas = st_canvas(
-            stroke_width=STROKE_WIDTH, stroke_color="#000000",
-            background_color="#FFFFFF", height=CANVAS_SIZE, width=CANVAS_SIZE,
-            drawing_mode="freedraw", key="canvas"
-        )
-    with col2:
-        st.subheader("Predictions")
-        if canvas.json_data:
-            strokes = canvas_strokes_to_quickdraw(canvas.json_data)
-            if strokes:
-                results = predict(model, idx_to_class, strokes)
-                if results:
-                    st.success(f"**{results[0][0].upper()}** ({results[0][1]*100:.1f}%)")
-                    for name, prob in results:
-                        st.progress(prob, text=f"{name}: {prob*100:.1f}%")
 if __name__ == "__main__":
-    main()

 """
+RNN Animal Doodle Classifier - Gradio App for HF Spaces
+Uses custom HTML canvas to capture stroke coordinates (not rasterized)
 """
 import ast
 import json
 from pathlib import Path
 import numpy as np
+import gradio as gr
 import torch
 from torch import nn
 # ============================================================================
+# Model Definition
 # ============================================================================
 class GRUClassifier(nn.Module):
         super().__init__()
         self.use_packing = use_packing
         self.gru = nn.GRU(
+            input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
+            batch_first=True, bidirectional=bidirectional,
             dropout=dropout if num_layers > 1 else 0.0,
         )
         out_dim = hidden_size * (2 if bidirectional else 1)
     def forward(self, x: torch.Tensor, lengths: torch.Tensor):
         if self.use_packing:
+            packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
             _, h_n = self.gru(packed)
         else:
             _, h_n = self.gru(x)
+        h = torch.cat([h_n[-2], h_n[-1]], dim=1) if self.gru.bidirectional else h_n[-1]
+        return self.fc(self.norm(h))
 def parse_drawing_to_seq(drawing_str: str) -> np.ndarray:
     """Convert drawing JSON to sequence of [dx, dy, pen_lift]."""
     try:
         strokes = json.loads(drawing_str)
+    except:
         try:
             strokes = ast.literal_eval(drawing_str)
+        except:
             return np.zeros((0, 3), dtype=np.float32)
     seq_parts = []
     if not seq_parts:
         return np.zeros((0, 3), dtype=np.float32)
     seq = np.concatenate(seq_parts, axis=0)
     seq[:, :2] = np.clip(seq[:, :2], -1.0, 1.0)
     return seq.astype(np.float32)
 # ============================================================================
+# Constants & Utils
 # ============================================================================
 ANIMALS = ["butterfly", "cow", "elephant", "giraffe", "monkey",
            "octopus", "scorpion", "shark", "snake", "spider"]
+def _calibrate_seq(seq, target=0.04, max_gain=12.0, min_gain=0.5):
+    if seq is None or len(seq) == 0:
         return seq
     steps = np.sqrt((seq[:, 0] ** 2) + (seq[:, 1] ** 2))
     curr = float(steps.mean()) if steps.size else 0.0
     if curr <= 1e-6:
         return seq
+    gain = float(np.clip(target / curr, min_gain, max_gain))
     out = seq.astype(np.float32).copy()
     out[:, 0:2] = np.clip(out[:, 0:2] * gain, -1.0, 1.0)
     return out
+def preprocess_strokes(raw_strokes):
+    """Downsample, smooth, center, and scale strokes."""
     if not raw_strokes:
         return []
     # Downsample
+    processed = []
     for xs, ys in raw_strokes:
         if len(xs) > 25:
             step = max(1, len(xs) // 25)
             xs, ys = xs[::step], ys[::step]
+        processed.append((list(xs), list(ys)))
     # Smooth
     smoothed = []
+    for xs, ys in processed:
         if len(xs) >= 3:
             xs_s = [xs[0]] + [(xs[i-1]+xs[i]+xs[i+1])/3 for i in range(1, len(xs)-1)] + [xs[-1]]
             ys_s = [ys[0]] + [(ys[i-1]+ys[i]+ys[i+1])/3 for i in range(1, len(ys)-1)] + [ys[-1]]
     # Center and scale
     all_x = [x for xs, _ in smoothed for x in xs]
     all_y = [y for _, ys in smoothed for y in ys]
+    if not all_x:
+        return []
     min_x, max_x = min(all_x), max(all_x)
     min_y, max_y = min(all_y), max(all_y)
     scale = 235 / max(max(1, max_x - min_x), max(1, max_y - min_y))
     cx, cy = (min_x + max_x) / 2, (min_y + max_y) / 2
     ox, oy = 127.5 - cx * scale, 127.5 - cy * scale
         result.append([xs_n, ys_n])
     return result
+# ============================================================================
+# Model Loading
+# ============================================================================
+def load_model():
+    model_path = Path(__file__).parent / "rnn_animals_best.pt"
+    if not model_path.exists():
+        return None, None
+    ckpt = torch.load(model_path, map_location="cpu", weights_only=False)
+    cfg = ckpt.get("config", {})
+    model = GRUClassifier(
+        input_size=3, hidden_size=cfg.get("hidden_size", 512),
+        num_layers=cfg.get("num_layers", 2), bidirectional=cfg.get("bidirectional", True),
+        dropout=cfg.get("dropout", 0.3), num_classes=len(ANIMALS), use_packing=True
+    )
+    model.load_state_dict(ckpt["model_state"])
+    model.eval()
+    class_to_idx = ckpt.get("class_to_idx", {a: i for i, a in enumerate(ANIMALS)})
+    idx_to_class = {v: k for k, v in class_to_idx.items()}
+    return model, idx_to_class
+MODEL, IDX_TO_CLASS = load_model()
+# ============================================================================
+# Prediction
+# ============================================================================
+def predict(strokes_json):
+    """Predict from JSON stroke data."""
+    if MODEL is None:
+        return {"error": "Model not loaded"}
     try:
+        raw_strokes = json.loads(strokes_json) if isinstance(strokes_json, str) else strokes_json
+        if not raw_strokes:
+            return {a: 0.0 for a in ANIMALS}
+        # Convert to list of (xs, ys) tuples
+        stroke_tuples = [(s[0], s[1]) for s in raw_strokes if len(s) == 2]
+        processed = preprocess_strokes(stroke_tuples)
+        if not processed:
+            return {a: 0.0 for a in ANIMALS}
+        seq = parse_drawing_to_seq(json.dumps(processed))
+        if seq is None or len(seq) < 3:
+            return {a: 0.0 for a in ANIMALS}
         seq = _calibrate_seq(seq)
         seq_t = torch.tensor(seq, dtype=torch.float32).unsqueeze(0)
         lengths = torch.tensor([seq.shape[0]], dtype=torch.long)
         with torch.no_grad():
+            probs = torch.softmax(MODEL(seq_t, lengths), dim=1)[0]
+        return {IDX_TO_CLASS.get(i, f"class_{i}"): float(probs[i]) for i in range(len(ANIMALS))}
     except Exception as e:
+        return {"error": str(e)}
 # ============================================================================
+# Custom Canvas HTML
 # ============================================================================
+CANVAS_HTML = """
+<div id="canvas-container" style="display: flex; flex-direction: column; align-items: center;">
+    <canvas id="drawing-canvas" width="400" height="400"
+            style="border: 2px solid #333; border-radius: 8px; background: white; cursor: crosshair;"></canvas>
+    <div style="margin-top: 10px;">
+        <button onclick="clearCanvas()" style="padding: 8px 16px; margin-right: 10px; cursor: pointer;">Clear</button>
+        <button onclick="sendStrokes()" style="padding: 8px 16px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer;">Predict</button>
+    </div>
+    <p style="color: #666; font-size: 12px; margin-top: 5px;">Draw an animal, then click Predict</p>
+</div>
+<script>
+const canvas = document.getElementById('drawing-canvas');
+const ctx = canvas.getContext('2d');
+let isDrawing = false;
+let strokes = [];
+let currentStroke = {x: [], y: []};
+ctx.strokeStyle = '#000';
+ctx.lineWidth = 3;
+ctx.lineCap = 'round';
+ctx.lineJoin = 'round';
+canvas.addEventListener('mousedown', (e) => {
+    isDrawing = true;
+    const rect = canvas.getBoundingClientRect();
+    const x = e.clientX - rect.left;
+    const y = e.clientY - rect.top;
+    currentStroke = {x: [x], y: [y]};
+    ctx.beginPath();
+    ctx.moveTo(x, y);
+});
+canvas.addEventListener('mousemove', (e) => {
+    if (!isDrawing) return;
+    const rect = canvas.getBoundingClientRect();
+    const x = e.clientX - rect.left;
+    const y = e.clientY - rect.top;
+    currentStroke.x.push(x);
+    currentStroke.y.push(y);
+    ctx.lineTo(x, y);
+    ctx.stroke();
+});
+canvas.addEventListener('mouseup', () => {
+    if (isDrawing && currentStroke.x.length > 1) {
+        strokes.push([currentStroke.x, currentStroke.y]);
+    }
+    isDrawing = false;
+});
+canvas.addEventListener('mouseleave', () => {
+    if (isDrawing && currentStroke.x.length > 1) {
+        strokes.push([currentStroke.x, currentStroke.y]);
+    }
+    isDrawing = false;
+});
+// Touch support
+canvas.addEventListener('touchstart', (e) => {
+    e.preventDefault();
+    const touch = e.touches[0];
+    const rect = canvas.getBoundingClientRect();
+    const x = touch.clientX - rect.left;
+    const y = touch.clientY - rect.top;
+    isDrawing = true;
+    currentStroke = {x: [x], y: [y]};
+    ctx.beginPath();
+    ctx.moveTo(x, y);
+});
+canvas.addEventListener('touchmove', (e) => {
+    e.preventDefault();
+    if (!isDrawing) return;
+    const touch = e.touches[0];
+    const rect = canvas.getBoundingClientRect();
+    const x = touch.clientX - rect.left;
+    const y = touch.clientY - rect.top;
+    currentStroke.x.push(x);
+    currentStroke.y.push(y);
+    ctx.lineTo(x, y);
+    ctx.stroke();
+});
+canvas.addEventListener('touchend', () => {
+    if (isDrawing && currentStroke.x.length > 1) {
+        strokes.push([currentStroke.x, currentStroke.y]);
+    }
+    isDrawing = false;
+});
+function clearCanvas() {
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+    strokes = [];
+}
+function sendStrokes() {
+    const strokesJson = JSON.stringify(strokes);
+    // Update the hidden textbox with strokes data
+    const textbox = document.querySelector('#strokes-input textarea');
+    if (textbox) {
+        textbox.value = strokesJson;
+        textbox.dispatchEvent(new Event('input', { bubbles: true }));
+    }
+    // Also trigger the button
+    const btn = document.querySelector('#predict-btn');
+    if (btn) btn.click();
+}
+</script>
+"""
+# ============================================================================
+# Gradio App
+# ============================================================================
+with gr.Blocks(title="Animal Doodle Classifier", theme=gr.themes.Soft()) as app:
+    gr.Markdown("# 🎨 Animal Doodle Classifier")
+    gr.Markdown("Draw an animal and click **Predict**! Supported: butterfly, cow, elephant, giraffe, monkey, octopus, scorpion, shark, snake, spider")
+    with gr.Row():
+        with gr.Column(scale=1):
+            canvas = gr.HTML(CANVAS_HTML)
+            strokes_input = gr.Textbox(label="Strokes", elem_id="strokes-input", visible=False)
+            predict_btn = gr.Button("Predict", elem_id="predict-btn", visible=False)
+        with gr.Column(scale=1):
+            output = gr.Label(num_top_classes=5, label="Predictions")
+    predict_btn.click(fn=predict, inputs=strokes_input, outputs=output)
+    strokes_input.change(fn=predict, inputs=strokes_input, outputs=output)
 if __name__ == "__main__":
+    app.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-streamlit>=1.28.0
-streamlit-drawable-canvas>=0.9.3
 torch>=2.0.0
 numpy>=1.24.0

+gradio>=4.0.0
 torch>=2.0.0
 numpy>=1.24.0