Spaces:

ASomeoneWhoInterestedWithAI
/

LookThem_V8-MNIST_Classifier

Paused

App Files Files Community

ASomeoneWhoInterestedWithAI commited on 4 days ago

Commit

077e064

verified ·

1 Parent(s): 51849ff

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -13

app.py CHANGED Viewed

@@ -129,7 +129,7 @@ def predict_digit(input_image):
         return default_output
     try:
-        # Extract the background or composite layer from the Gradio Sketchpad dictionary
         if isinstance(input_image, dict):
             img_array = input_image.get("composite", None)
             if img_array is None:
@@ -140,28 +140,34 @@ def predict_digit(input_image):
         if img_array is None:
             return default_output
-        # Extract channels safely
         if isinstance(img_array, np.ndarray) and img_array.ndim == 3:
-            if img_array.shape[-1] == 4:      # RGBA -> alpha channel
-                grayscale = img_array[..., 3]
-            else:                             # RGB -> luminance
                 grayscale = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])
         else:
-            grayscale = img_array
-        # Check if canvas is essentially empty
-        if np.max(grayscale) < 5:
             return default_output
-        # Ensure the background is black and the text is white (standard MNIST setup)
-        # If your brush was black and canvas was white, invert it here:
-        # grayscale = 255 - grayscale
-        # Resize & normalize
         img = Image.fromarray(grayscale.astype(np.uint8), mode="L")
-        img = img.resize((28, 28), Image.Image.Resampling.BILINEAR if hasattr(Image, 'Image') else Image.BILINEAR)
         tensor_img = transform_fn(img).unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model(tensor_img)
             probabilities = F.softmax(outputs, dim=1)[0]
@@ -172,6 +178,7 @@ def predict_digit(input_image):
         print(f"Prediction error: {e}")
         return default_output
 # --- GRADIO INTERFACE ---
 with gr.Blocks() as demo:
     gr.Markdown(

         return default_output
     try:
+        # 1. Handle Gradio Sketchpad dictionary output
         if isinstance(input_image, dict):
             img_array = input_image.get("composite", None)
             if img_array is None:
         if img_array is None:
             return default_output
+        # 2. Convert to Grayscale safely
         if isinstance(img_array, np.ndarray) and img_array.ndim == 3:
+            if img_array.shape[-1] == 4:      # RGBA (Canvas often uses alpha)
+                # If background is transparent/white, alpha channel might be inverted
+                grayscale = img_array[..., 3]
+            else:                             # RGB -> Grayscale
                 grayscale = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])
         else:
+            grayscale = img_array.copy()
+        # 3. AUTO-INVERT: Ensure white digit on black background
+        # If the average pixel value is bright (> 127), the user drew dark text on light background.
+        if np.mean(grayscale) > 127:
+            grayscale = 255.0 - grayscale
+        # 4. Check if the canvas is empty
+        if np.max(grayscale) < 15:
             return default_output
+        # Debugging print to check what your model is actually receiving
+        print(f"Processed image shape: {grayscale.shape} | Max Val: {np.max(grayscale)} | Mean Val: {np.mean(grayscale):.2f}")
+        # 5. Convert to PIL, Resize, and Transform
         img = Image.fromarray(grayscale.astype(np.uint8), mode="L")
+        img = img.resize((28, 28), Image.Resampling.BILINEAR)
         tensor_img = transform_fn(img).unsqueeze(0).to(device)
+        # 6. Model Inference
         with torch.no_grad():
             outputs = model(tensor_img)
             probabilities = F.softmax(outputs, dim=1)[0]
         print(f"Prediction error: {e}")
         return default_output
 # --- GRADIO INTERFACE ---
 with gr.Blocks() as demo:
     gr.Markdown(