Spaces:

ASomeoneWhoInterestedWithAI
/

LookThem_V8-MNIST_Classifier

Paused

App Files Files Community

ASomeoneWhoInterestedWithAI commited on 4 days ago

Commit

51849ff

verified ·

1 Parent(s): ceb8453

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -21

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ if not os.path.exists(MODEL_PATH):
     urllib.request.urlretrieve(HF_URL, MODEL_PATH)
     print("Download complete!")
-# --- DEFINE YOUR MODEL ARCHITECTURE (TETAP SAMA) ---
 class LookThemLayer(nn.Module):
     def __init__(self, num_tokens, in_features, hidden_dim):
         super().__init__()
@@ -107,7 +107,6 @@ class LookThemV8MNIST(nn.Module):
         x = self.compressor(x).flatten(1)
         x = self.res_blocks(self.input_proj(x))
         return self.head(x)
-# ... (Salin definisi kelas LookThemLayer, LiteResidualBlock, dan LookThemV8MNIST Anda di sini) ...
 # --- LOAD WEIGHTS ON CPU/GPU ---
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -124,33 +123,43 @@ transform_fn = transforms.Compose([
 ])
 def predict_digit(input_image):
-    # Default output jika kanvas kosong
     default_output = {str(i): 0.1 for i in range(10)}
     if input_image is None:
         return default_output
     try:
-        # gr.Image(source="canvas") mengembalikan numpy array secara langsung
-        img_array = input_image
-        # Konversi ke grayscale jika perlu (hasil kanvas biasanya sudah grayscale)
         if isinstance(img_array, np.ndarray) and img_array.ndim == 3:
-            # Ambil channel pertama jika multichannel, atau konversi ke luminance
-            if img_array.shape[-1] == 4:      # RGBA -> alpha
                 grayscale = img_array[..., 3]
             else:                             # RGB -> luminance
                 grayscale = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])
         else:
             grayscale = img_array
-        # Cek apakah kanvas kosong (semua piksel bernilai 0 atau mendekati)
         if np.max(grayscale) < 5:
             return default_output
-        # Resize & normalisasi
         img = Image.fromarray(grayscale.astype(np.uint8), mode="L")
-        img = img.resize((28, 28), Image.Resampling.BILINEAR)
         tensor_img = transform_fn(img).unsqueeze(0).to(device)
         with torch.no_grad():
@@ -174,14 +183,11 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            # Gunakan gr.Image dengan source="canvas"
-            input_canvas = gr.Image(
-                image_mode="L",
-                height=280,
-                width=280,
-                sources="canvas",            # Mengaktifkan mode kanvas untuk menggambar
-                invert_colors=True,         # Membalik warna: latar hitam, coretan putih
-                brush=gr.Brush(default_color="rgb(0,0,0)", color_mode="fixed") # Kuas hitam (akan dibalik jadi putih)
             )
             submit_btn = gr.Button("Classify Digit 🏎️", variant="primary")
@@ -191,4 +197,4 @@ with gr.Blocks() as demo:
     submit_btn.click(fn=predict_digit, inputs=input_canvas, outputs=output_label)
 if __name__ == "__main__":
-    demo.launch()

     urllib.request.urlretrieve(HF_URL, MODEL_PATH)
     print("Download complete!")
+# --- DEFINE YOUR MODEL ARCHITECTURE ---
 class LookThemLayer(nn.Module):
     def __init__(self, num_tokens, in_features, hidden_dim):
         super().__init__()
         x = self.compressor(x).flatten(1)
         x = self.res_blocks(self.input_proj(x))
         return self.head(x)
 # --- LOAD WEIGHTS ON CPU/GPU ---
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 ])
 def predict_digit(input_image):
     default_output = {str(i): 0.1 for i in range(10)}
     if input_image is None:
         return default_output
     try:
+        # Extract the background or composite layer from the Gradio Sketchpad dictionary
+        if isinstance(input_image, dict):
+            img_array = input_image.get("composite", None)
+            if img_array is None:
+                img_array = input_image.get("background", None)
+        else:
+            img_array = input_image
+        if img_array is None:
+            return default_output
+        # Extract channels safely
         if isinstance(img_array, np.ndarray) and img_array.ndim == 3:
+            if img_array.shape[-1] == 4:      # RGBA -> alpha channel
                 grayscale = img_array[..., 3]
             else:                             # RGB -> luminance
                 grayscale = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])
         else:
             grayscale = img_array
+        # Check if canvas is essentially empty
         if np.max(grayscale) < 5:
             return default_output
+        # Ensure the background is black and the text is white (standard MNIST setup)
+        # If your brush was black and canvas was white, invert it here:
+        # grayscale = 255 - grayscale
+        # Resize & normalize
         img = Image.fromarray(grayscale.astype(np.uint8), mode="L")
+        img = img.resize((28, 28), Image.Image.Resampling.BILINEAR if hasattr(Image, 'Image') else Image.BILINEAR)
         tensor_img = transform_fn(img).unsqueeze(0).to(device)
         with torch.no_grad():
     with gr.Row():
         with gr.Column():
+            # Standardized setup for canvas sketching in modern Gradio versions
+            input_canvas = gr.Sketchpad(
+                type="numpy",
+                layers=False,
+                canvas_size=(280, 280)
             )
             submit_btn = gr.Button("Classify Digit 🏎️", variant="primary")
     submit_btn.click(fn=predict_digit, inputs=input_canvas, outputs=output_label)
 if __name__ == "__main__":
+    demo.launch()