Spaces:

Jazz1508
/

Infrawatch

Sleeping

App Files Files Community

Jazz1508 commited on Feb 11

Commit

e451a47

verified ·

1 Parent(s): dc909e1

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -6

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from albumentations.pytorch import ToTensorV2
 MODEL_PATH = "s2ds_deeplabv3plus.pth"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 NUM_CLASSES = 7
 CLASS_NAMES = {
     0: "Background",
@@ -74,13 +75,16 @@ def colorize_mask(mask):
     return color_mask
 # ================================
-# INFERENCE
 # ================================
 def segment_image(image):
     if image is None:
         return None, ""
-    # Gradio provides RGB already
     padded, orig_h, orig_w = pad_to_16(image)
     img = normalize(image=padded)["image"]
@@ -88,13 +92,21 @@ def segment_image(image):
     img = img.unsqueeze(0).to(DEVICE)
     with torch.no_grad():
-        pred = model(img)
         pred_mask = torch.argmax(pred, dim=1)[0].cpu().numpy()
     pred_mask = pred_mask[:orig_h, :orig_w]
     color_mask = colorize_mask(pred_mask)
-    overlay = cv2.addWeighted(image, 0.6, color_mask, 0.4, 0)
     # Image-level classification
     vals, counts = np.unique(pred_mask, return_counts=True)
@@ -121,8 +133,13 @@ with gr.Blocks() as demo:
         btn = gr.Button("Run Segmentation")
         btn.click(segment_image, inputs=input_img, outputs=[output_img, output_text])
-    with gr.Tab("Live Camera (Real-Time)"):
-        cam = gr.Image(sources=["webcam"], streaming=True, type="numpy")
         cam_out = gr.Image()
         cam.stream(lambda x: segment_image(x)[0], inputs=cam, outputs=cam_out)

 MODEL_PATH = "s2ds_deeplabv3plus.pth"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 NUM_CLASSES = 7
+INFER_SIZE = 512  # 🔥 reduce for speed (important for live feed)
 CLASS_NAMES = {
     0: "Background",
     return color_mask
 # ================================
+# FAST INFERENCE FUNCTION
 # ================================
 def segment_image(image):
     if image is None:
         return None, ""
+    # 🔥 Downscale for speed
+    original = image.copy()
+    image = cv2.resize(image, (INFER_SIZE, INFER_SIZE))
     padded, orig_h, orig_w = pad_to_16(image)
     img = normalize(image=padded)["image"]
     img = img.unsqueeze(0).to(DEVICE)
     with torch.no_grad():
+        if DEVICE == "cuda":
+            with torch.cuda.amp.autocast():
+                pred = model(img)
+        else:
+            pred = model(img)
         pred_mask = torch.argmax(pred, dim=1)[0].cpu().numpy()
     pred_mask = pred_mask[:orig_h, :orig_w]
     color_mask = colorize_mask(pred_mask)
+    overlay_small = cv2.addWeighted(image, 0.6, color_mask, 0.4, 0)
+    # 🔥 Resize back to original size
+    overlay = cv2.resize(overlay_small, (original.shape[1], original.shape[0]))
     # Image-level classification
     vals, counts = np.unique(pred_mask, return_counts=True)
         btn = gr.Button("Run Segmentation")
         btn.click(segment_image, inputs=input_img, outputs=[output_img, output_text])
+    with gr.Tab("Live Camera (Fast Mode)"):
+        cam = gr.Image(
+            sources=["webcam"],
+            streaming=True,
+            type="numpy",
+            webcam_options={"facingMode": "environment"}  # 🔥 force back camera
+        )
         cam_out = gr.Image()
         cam.stream(lambda x: segment_image(x)[0], inputs=cam, outputs=cam_out)