Spaces:

Eli181927
/

0-99_Classification

Sleeping

App Files Files Community

Eli181927 commited on Oct 16, 2025

Commit

6d8cc91

verified ·

1 Parent(s): 4ba948e

Upload app.py

Browse files

Files changed (1) hide show

2.CNN/app.py +247 -52

2.CNN/app.py CHANGED Viewed

@@ -187,42 +187,244 @@ def _auto_balance_stroke(arr: np.ndarray, *, target_mass_fraction: float, clamp:
     return adjusted, scale, new_mass_fraction
-def compose_dual_canvas(left_input, right_input):
-    left_img = extract_canvas_array(left_input)
-    right_img = extract_canvas_array(right_input)
-    if left_img is None and right_img is None:
         return None
-    if left_img is None:
-        if right_img is None:
-            return None
-        base_size = right_img.size
-        left_img = Image.new("L", base_size, color=255)
-    if right_img is None:
-        base_size = left_img.size
-        right_img = Image.new("L", base_size, color=255)
-    left_img = left_img.convert("L")
-    right_img = right_img.convert("L")
-    if left_img.height != right_img.height:
-        target_height = min(left_img.height, right_img.height)
-        left_img = left_img.resize(
-            (left_img.width, target_height), Image.Resampling.LANCZOS
-        )
-        right_img = right_img.resize(
-            (right_img.width, target_height), Image.Resampling.LANCZOS
-        )
-    combined = Image.new(
-        "L",
-        (left_img.width + right_img.width, left_img.height),
-        color=255,
     )
-    combined.paste(left_img, (0, 0))
-    combined.paste(right_img, (left_img.width, 0))
-    return combined
 def preprocess_image(img_input, stroke_scale: float = 1.0):
@@ -523,25 +725,26 @@ def enrich_diagnostics(stats, probs):
     return stats
-def predict_number(left_canvas, right_canvas, stroke_scale):
     ensure_model_loaded()
-    combined_canvas = compose_dual_canvas(left_canvas, right_canvas)
-    if combined_canvas is None:
         blank_probs = {f"{i:02d}": 0.0 for i in range(OUTPUT_CLASSES)}
         empty_preview = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
         empty_diff = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
-        diagnostics = {"warnings": ["Draw both digits to see diagnostics."]}
         return None, blank_probs, empty_preview, empty_diff, json.dumps(diagnostics, indent=2)
-    result = preprocess_image(
-        combined_canvas,
         stroke_scale=stroke_scale,
     )
     if result is None:
         blank_probs = {f"{i:02d}": 0.0 for i in range(OUTPUT_CLASSES)}
         empty_preview = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
         empty_diff = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
-        diagnostics = {"warnings": ["Draw a number to see diagnostics."]}
         return None, blank_probs, empty_preview, empty_diff, json.dumps(diagnostics, indent=2)
     standardized_variants, preview, mean_diff, diagnostics = result
@@ -567,15 +770,13 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
         # Elliot's MNIST-100 Classifier
-        Draw a two-digit number (00-99). Use the left canvas for the tens digit and the right canvas for the ones digit. The model will predict the number, show the top class probabilities, and display diagnostics for the processed input.
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
-            with gr.Row():
-                left_canvas = gr.Sketchpad(label="Tens Digit")
-                right_canvas = gr.Sketchpad(label="Ones Digit")
             stroke_slider = gr.Slider(
                 minimum=0.3,
                 maximum=1.2,
@@ -598,8 +799,7 @@ with gr.Blocks() as demo:
             predict_btn = gr.Button("Predict", variant="primary")
             clear_btn = gr.ClearButton(
                 [
-                    left_canvas,
-                    right_canvas,
                     stroke_slider,
                     pred_box,
                     prob_table,
@@ -611,19 +811,14 @@ with gr.Blocks() as demo:
     predict_btn.click(
         fn=predict_number,
-        inputs=[left_canvas, right_canvas, stroke_slider],
         outputs=[pred_box, prob_table, preview, mean_diff_view, diagnostics_box],
     )
     # On Spaces, avoid per-stroke inference to prevent event floods
     if not IS_SPACE:
-        left_canvas.change(
-            fn=predict_number,
-            inputs=[left_canvas, right_canvas, stroke_slider],
-            outputs=[pred_box, prob_table, preview, mean_diff_view, diagnostics_box],
-        )
-        right_canvas.change(
             fn=predict_number,
-            inputs=[left_canvas, right_canvas, stroke_slider],
             outputs=[pred_box, prob_table, preview, mean_diff_view, diagnostics_box],
         )

     return adjusted, scale, new_mass_fraction
+def _valley_split(mask: np.ndarray) -> int | None:
+    # Find a vertical seam (column) with minimal foreground to split two digits
+    H, W = mask.shape
+    if W < 8:
+        return None
+    col_sums = mask.sum(axis=0)
+    start = max(1, int(W * 0.25))
+    end = min(W - 1, int(W * 0.75))
+    if end <= start:
+        start, end = 1, W - 1
+    idx = int(np.argmin(col_sums[start:end])) + start
+    left_mass = int(col_sums[:idx].sum())
+    right_mass = int(col_sums[idx:].sum())
+    if left_mass > 50 and right_mass > 50:
+        return idx
+    return None
+def _connected_components(mask: np.ndarray):
+    H, W = mask.shape
+    visited = np.zeros_like(mask, dtype=bool)
+    comps = []
+    for y in range(H):
+        row = mask[y]
+        for x in range(W):
+            if row[x] and not visited[y, x]:
+                stack = [(y, x)]
+                visited[y, x] = True
+                ys, xs = [], []
+                while stack:
+                    cy, cx = stack.pop()
+                    ys.append(cy)
+                    xs.append(cx)
+                    # 4-connectivity
+                    if cy > 0 and mask[cy - 1, cx] and not visited[cy - 1, cx]:
+                        visited[cy - 1, cx] = True
+                        stack.append((cy - 1, cx))
+                    if cy + 1 < H and mask[cy + 1, cx] and not visited[cy + 1, cx]:
+                        visited[cy + 1, cx] = True
+                        stack.append((cy + 1, cx))
+                    if cx > 0 and mask[cy, cx - 1] and not visited[cy, cx - 1]:
+                        visited[cy, cx - 1] = True
+                        stack.append((cy, cx - 1))
+                    if cx + 1 < W and mask[cy, cx + 1] and not visited[cy, cx + 1]:
+                        visited[cy, cx + 1] = True
+                        stack.append((cy, cx + 1))
+                y1, y2 = min(ys), max(ys) + 1
+                x1, x2 = min(xs), max(xs) + 1
+                comps.append({"bbox": (y1, y2, x1, x2), "size": len(ys)})
+    return comps
+def canonicalize_digit_28x28(arr: np.ndarray) -> np.ndarray:
+    # Input arr: float32 in [0,1], arbitrary HxW; output: 28x28 centered tile
+    if arr.size == 0:
+        return np.zeros((TARGET_HEIGHT, TARGET_HEIGHT), dtype=np.float32)
+    thr = arr > 0.05
+    if not thr.any():
+        return np.zeros((TARGET_HEIGHT, TARGET_HEIGHT), dtype=np.float32)
+    ys, xs = np.where(thr)
+    y1, y2 = ys.min(), ys.max() + 1
+    x1, x2 = xs.min(), xs.max() + 1
+    # small padding
+    pad = 2
+    y1 = max(0, y1 - pad)
+    x1 = max(0, x1 - pad)
+    y2 = min(arr.shape[0], y2 + pad)
+    x2 = min(arr.shape[1], x2 + pad)
+    crop = arr[y1:y2, x1:x2]
+    h, w = crop.shape
+    if h == 0 or w == 0:
+        return np.zeros((TARGET_HEIGHT, TARGET_HEIGHT), dtype=np.float32)
+    # resize shorter side to 20
+    if h >= w:
+        new_h = 20
+        new_w = max(1, int(round(w * (20.0 / h))))
+    else:
+        new_w = 20
+        new_h = max(1, int(round(h * (20.0 / w))))
+    small = Image.fromarray((crop * 255.0).astype(np.uint8)).resize(
+        (new_w, new_h), Image.Resampling.LANCZOS
+    )
+    tile = Image.new("L", (TARGET_HEIGHT, TARGET_HEIGHT), color=0)
+    # paste centered
+    top = (TARGET_HEIGHT - new_h) // 2
+    left = (TARGET_HEIGHT - new_w) // 2
+    tile.paste(small, (left, top))
+    tile_arr = np.array(tile, dtype=np.float32) / 255.0
+    # center-of-mass shift to exact center
+    mass = tile_arr
+    tot = float(mass.sum())
+    if tot > 1e-6:
+        gy, gx = np.indices(mass.shape)
+        cy = float((gy * mass).sum() / tot)
+        cx = float((gx * mass).sum() / tot)
+        ideal = (TARGET_HEIGHT - 1) / 2.0
+        dy = int(np.clip(round(ideal - cy), -2, 2))
+        dx = int(np.clip(round(ideal - cx), -2, 2))
+        if dy != 0 or dx != 0:
+            tile_arr = shift_with_zero_pad(tile_arr, dy, dx)
+    return tile_arr.astype(np.float32, copy=False)
+def compose_from_single_canvas(img_input):
+    img = extract_canvas_array(img_input)
+    if img is None:
+        return None, {"warnings": ["No image provided."]}
+    try:
+        bands = img.getbands()
+    except Exception:
+        bands = ()
+    if "A" in bands:
+        rgba = img.convert("RGBA")
+        white_bg = Image.new("RGBA", rgba.size, (255, 255, 255, 255))
+        img = Image.alpha_composite(white_bg, rgba).convert("RGB")
+    gray = img.convert("L")
+    inv = ImageOps.invert(gray)
+    arr_u8 = np.array(inv, dtype=np.uint8)
+    mask = arr_u8 > 10
+    if not mask.any():
+        return None, {"warnings": ["Empty drawing detected."]}
+    # Global bbox trim for speed
+    ys, xs = np.where(mask)
+    y1, y2 = ys.min(), ys.max() + 1
+    x1, x2 = xs.min(), xs.max() + 1
+    pad = 4
+    y1 = max(0, y1 - pad)
+    x1 = max(0, x1 - pad)
+    y2 = min(arr_u8.shape[0], y2 + pad)
+    x2 = min(arr_u8.shape[1], x2 + pad)
+    arr_u8 = arr_u8[y1:y2, x1:x2]
+    mask = mask[y1:y2, x1:x2]
+    method = "valley"
+    split = _valley_split(mask)
+    left_arr = right_arr = None
+    if split is not None:
+        left_area = arr_u8[:, :split]
+        right_area = arr_u8[:, split:]
+        if (left_area > 10).any():
+            l_ys, l_xs = np.where(left_area > 10)
+            ly1, ly2 = l_ys.min(), l_ys.max() + 1
+            lx1, lx2 = l_xs.min(), l_xs.max() + 1
+            left_arr = left_area[ly1:ly2, lx1:lx2]
+        if (right_area > 10).any():
+            r_ys, r_xs = np.where(right_area > 10)
+            ry1, ry2 = r_ys.min(), r_ys.max() + 1
+            rx1, rx2 = r_xs.min(), r_xs.max() + 1
+            right_arr = right_area[ry1:ry2, rx1:rx2]
+    else:
+        method = "components"
+        comps = _connected_components(mask)
+        if len(comps) >= 2:
+            comps.sort(key=lambda c: c["size"], reverse=True)
+            a, b = comps[0], comps[1]
+            # sort left/right by x1
+            if a["bbox"][2] <= b["bbox"][2]:
+                left_bbox, right_bbox = a["bbox"], b["bbox"]
+            else:
+                left_bbox, right_bbox = b["bbox"], a["bbox"]
+            ly1, ly2, lx1, lx2 = left_bbox
+            ry1, ry2, rx1, rx2 = right_bbox
+            left_arr = arr_u8[ly1:ly2, lx1:lx2]
+            right_arr = arr_u8[ry1:ry2, rx1:rx2]
+        else:
+            # Fallback: split the single bbox in half
+            method = "fallback_center_split"
+            W = arr_u8.shape[1]
+            split = W // 2
+            left_arr = arr_u8[:, :split]
+            right_arr = arr_u8[:, split:]
+    # Convert to float and canonicalize per digit
+    left_tile = canonicalize_digit_28x28((left_arr.astype(np.float32) / 255.0) if left_arr is not None else np.zeros((1, 1), dtype=np.float32))
+    right_tile = canonicalize_digit_28x28((right_arr.astype(np.float32) / 255.0) if right_arr is not None else np.zeros((1, 1), dtype=np.float32))
+    composed = np.concatenate([left_tile, right_tile], axis=1)
+    diag = {
+        "segmentation": {
+            "method": method,
+            "canvas_crop": {"top": int(y1), "bottom": int(y2), "left": int(x1), "right": int(x2)},
+        }
+    }
+    return composed.astype(np.float32, copy=False), diag
+def preprocess_composed_28x56(arr_28x56: np.ndarray, stroke_scale: float = 1.0, *, extra_diag: dict | None = None):
+    ensure_model_loaded()
+    if arr_28x56 is None:
         return None
+    arr_resized = np.clip(arr_28x56.astype(np.float32), 0.0, 1.0)
+    mean_image = mean.reshape(TARGET_HEIGHT, TARGET_WIDTH)
+    std_safe = np.maximum(std, STD_FLOOR)
+    stroke_scale = float(stroke_scale)
+    stroke_scale = max(0.3, min(stroke_scale, 1.5))
+    arr_resized = np.clip(arr_resized * stroke_scale, 0.0, 1.0)
+    auto_balance_scale = 1.0
+    pre_balance_mass_fraction = float(arr_resized.mean())
+    target_mass = float(mean.mean())
+    arr_resized, auto_balance_scale, balanced_mass_fraction = _auto_balance_stroke(
+        arr_resized,
+        target_mass_fraction=target_mass,
+        clamp=(0.6, 1.6),
+    )
+    # We already centered per 28x28 tile; skip whole-image recentering here
+    arr_centered = arr_resized
+    augmented_arrays = [arr_centered, *generate_inference_variants(arr_centered, fast=IS_SPACE)]
+    augmented_standardized = []
+    for arr in augmented_arrays:
+        z = (arr.reshape(TARGET_HEIGHT * TARGET_WIDTH, 1) - mean) / std_safe
+        z = np.clip(z, -8.0, 8.0)
+        augmented_standardized.append(z.astype(np.float32, copy=False))
+    mean_diff = np.abs(arr_centered - mean_image)
+    mean_diff_uint8 = (mean_diff / (mean_diff.max() + 1e-8) * 255.0).astype(np.uint8)
+    diagnostics = compute_diagnostics(
+        arr_centered,
+        None,
+        arr_centered.shape,
+        mean_image,
+        augmented_standardized[0],
+        std_safe,
     )
+    diagnostics["applied_auto_balance"] = {
+        "enabled": True,
+        "scale": float(auto_balance_scale),
+        "mass_fraction_after": float(balanced_mass_fraction),
+        "mass_fraction_before": float(pre_balance_mass_fraction),
+        "target_mass_fraction": float(target_mass),
+    }
+    if extra_diag:
+        diagnostics.update(extra_diag)
+    return augmented_standardized, arr_centered, mean_diff_uint8, diagnostics
 def preprocess_image(img_input, stroke_scale: float = 1.0):
     return stats
+def predict_number(main_canvas, stroke_scale):
     ensure_model_loaded()
+    composed, seg_diag = compose_from_single_canvas(main_canvas)
+    if composed is None:
         blank_probs = {f"{i:02d}": 0.0 for i in range(OUTPUT_CLASSES)}
         empty_preview = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
         empty_diff = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
+        diagnostics = {"warnings": ["Draw two digits to see diagnostics."]}
         return None, blank_probs, empty_preview, empty_diff, json.dumps(diagnostics, indent=2)
+    result = preprocess_composed_28x56(
+        composed,
         stroke_scale=stroke_scale,
+        extra_diag=seg_diag,
     )
     if result is None:
         blank_probs = {f"{i:02d}": 0.0 for i in range(OUTPUT_CLASSES)}
         empty_preview = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
         empty_diff = np.zeros((TARGET_HEIGHT, TARGET_WIDTH), dtype=np.uint8)
+        diagnostics = {"warnings": ["Draw two digits to see diagnostics."]}
         return None, blank_probs, empty_preview, empty_diff, json.dumps(diagnostics, indent=2)
     standardized_variants, preview, mean_diff, diagnostics = result
     gr.Markdown(
         """
         # Elliot's MNIST-100 Classifier
+        Draw a two-digit number (00-99) on the single canvas. The app automatically segments, centers, and scales each digit to match the training layout (28×28 per digit), then predicts and shows diagnostics.
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
+            main_canvas = gr.Sketchpad(label="Draw Two Digits (00–99)")
             stroke_slider = gr.Slider(
                 minimum=0.3,
                 maximum=1.2,
             predict_btn = gr.Button("Predict", variant="primary")
             clear_btn = gr.ClearButton(
                 [
+                    main_canvas,
                     stroke_slider,
                     pred_box,
                     prob_table,
     predict_btn.click(
         fn=predict_number,
+        inputs=[main_canvas, stroke_slider],
         outputs=[pred_box, prob_table, preview, mean_diff_view, diagnostics_box],
     )
     # On Spaces, avoid per-stroke inference to prevent event floods
     if not IS_SPACE:
+        main_canvas.change(
             fn=predict_number,
+            inputs=[main_canvas, stroke_slider],
             outputs=[pred_box, prob_table, preview, mean_diff_view, diagnostics_box],
         )