Spaces:

BiasLab2025
/

perception

Paused

App Files Files Community

Zhen Ye commited on Jan 7

Commit

65dd451

1 Parent(s): cac69cc

git commit -m "Handle grounding dino post-process API differences"

Browse files

Files changed (2) hide show

demo.html +42 -43
models/detectors/grounding_dino.py +23 -7

demo.html CHANGED Viewed

@@ -4,7 +4,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Video Object Detection</title>
     <style>
         * {
             margin: 0;
@@ -13,8 +13,9 @@
         }
         body {
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             min-height: 100vh;
             padding: 20px;
         }
@@ -25,17 +26,17 @@
         }
         h1 {
-            color: white;
             text-align: center;
             margin-bottom: 30px;
             font-size: 2.5rem;
-            text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
         }
         .main-card {
-            background: white;
             border-radius: 16px;
-            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
             padding: 40px;
         }
@@ -60,22 +61,23 @@
         .mode-card {
             position: relative;
             padding: 20px;
-            border: 2px solid #e0e0e0;
             border-radius: 12px;
             cursor: pointer;
             transition: all 0.3s ease;
             text-align: center;
         }
         .mode-card:hover {
-            border-color: #667eea;
             transform: translateY(-2px);
-            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.2);
         }
         .mode-card.selected {
-            border-color: #667eea;
-            background: #f0f4ff;
         }
         .mode-card.disabled {
@@ -89,8 +91,7 @@
         }
         .mode-icon {
-            font-size: 2rem;
-            margin-bottom: 10px;
         }
         .mode-title {
@@ -102,8 +103,8 @@
         .mode-badge {
             display: inline-block;
             padding: 4px 8px;
-            background: #ffc107;
-            color: white;
             font-size: 0.7rem;
             border-radius: 4px;
             font-weight: 600;
@@ -126,16 +127,17 @@
         .input-group select {
             width: 100%;
             padding: 12px;
-            border: 2px solid #e0e0e0;
             border-radius: 8px;
             font-size: 1rem;
             transition: border-color 0.3s;
         }
         .input-group input[type="text"]:focus,
         .input-group select:focus {
             outline: none;
-            border-color: #667eea;
         }
         .file-input-wrapper {
@@ -147,8 +149,8 @@
         .file-input-label {
             display: block;
             padding: 15px;
-            background: #f8f9fa;
-            border: 2px dashed #ccc;
             border-radius: 8px;
             text-align: center;
             cursor: pointer;
@@ -156,13 +158,13 @@
         }
         .file-input-label:hover {
-            border-color: #667eea;
-            background: #f0f4ff;
         }
         .file-input-label.has-file {
-            border-color: #28a745;
-            background: #d4edda;
         }
         input[type="file"] {
@@ -185,13 +187,13 @@
         }
         .btn-primary {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
         }
         .btn-primary:hover:not(:disabled) {
             transform: translateY(-2px);
-            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
         }
         .btn:disabled {
@@ -232,8 +234,8 @@
         .download-btn {
             margin-top: 12px;
             padding: 10px 16px;
-            background: #28a745;
-            color: white;
             text-decoration: none;
             border-radius: 6px;
             display: inline-block;
@@ -241,7 +243,7 @@
         }
         .download-btn:hover {
-            background: #218838;
         }
         /* Loading spinner */
@@ -256,8 +258,8 @@
         }
         .spinner {
-            border: 4px solid #f3f3f3;
-            border-top: 4px solid #667eea;
             border-radius: 50%;
             width: 40px;
             height: 40px;
@@ -283,7 +285,7 @@
             top: 0;
             width: 100%;
             height: 100%;
-            background: rgba(0,0,0,0.5);
             align-items: center;
             justify-content: center;
         }
@@ -312,8 +314,8 @@
         .modal-btn {
             padding: 10px 24px;
-            background: #667eea;
-            color: white;
             border: none;
             border-radius: 6px;
             cursor: pointer;
@@ -321,13 +323,13 @@
         }
         .modal-btn:hover {
-            background: #5568d3;
         }
     </style>
 </head>
 <body>
     <div class="container">
-        <h1>🎥 Video Object Detection</h1>
         <div class="main-card">
             <!-- Mode Selection -->
@@ -336,19 +338,16 @@
                 <div class="mode-selector">
                     <label class="mode-card selected">
                         <input type="radio" name="mode" value="object_detection" checked>
-                        <div class="mode-icon">🎯</div>
                         <div class="mode-title">Object Detection</div>
                     </label>
                     <label class="mode-card">
                         <input type="radio" name="mode" value="segmentation">
-                        <div class="mode-icon">🎨</div>
                         <div class="mode-title">Segmentation</div>
                     </label>
                     <label class="mode-card disabled">
                         <input type="radio" name="mode" value="drone_detection">
-                        <div class="mode-icon">🚁</div>
                         <div class="mode-title">Drone Detection</div>
                         <span class="mode-badge">COMING SOON</span>
                     </label>
@@ -399,7 +398,7 @@
                     <label>3. Upload Video</label>
                     <div class="file-input-wrapper">
                         <label class="file-input-label" id="fileLabel" for="videoFile">
-                            📁 Click to select video file (MP4)
                         </label>
                         <input type="file" id="videoFile" accept="video/*">
                     </div>
@@ -409,7 +408,7 @@
             <!-- Process Button -->
             <div class="section">
                 <button class="btn btn-primary" id="processBtn" disabled>
-                    🚀 Process Video
                 </button>
             </div>
@@ -434,7 +433,7 @@
                         <div class="video-card-body">
                             <video id="processedVideo" controls autoplay loop></video>
                             <a id="downloadBtn" class="download-btn" download="processed.mp4">
-                                ⬇️ Download Processed Video
                             </a>
                         </div>
                     </div>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Perception System</title>
     <style>
         * {
             margin: 0;
         }
         body {
+            font-family: "IBM Plex Sans", "Avenir Next", "Helvetica Neue", sans-serif;
+            background: linear-gradient(180deg, #f6f7f9 0%, #eef1f4 100%);
+            color: #1f2933;
             min-height: 100vh;
             padding: 20px;
         }
         }
         h1 {
+            color: #1f2933;
             text-align: center;
             margin-bottom: 30px;
             font-size: 2.5rem;
+            letter-spacing: 0.5px;
         }
         .main-card {
+            background: #ffffff;
             border-radius: 16px;
+            box-shadow: 0 18px 40px rgba(16, 24, 40, 0.12);
             padding: 40px;
         }
         .mode-card {
             position: relative;
             padding: 20px;
+            border: 1px solid #d6dbe0;
             border-radius: 12px;
             cursor: pointer;
             transition: all 0.3s ease;
             text-align: center;
+            background: #f9fafb;
         }
         .mode-card:hover {
+            border-color: #4b5563;
             transform: translateY(-2px);
+            box-shadow: 0 6px 16px rgba(16, 24, 40, 0.12);
         }
         .mode-card.selected {
+            border-color: #1f2933;
+            background: #eef2f6;
         }
         .mode-card.disabled {
         }
         .mode-icon {
+            display: none;
         }
         .mode-title {
         .mode-badge {
             display: inline-block;
             padding: 4px 8px;
+            background: #6b7280;
+            color: #f9fafb;
             font-size: 0.7rem;
             border-radius: 4px;
             font-weight: 600;
         .input-group select {
             width: 100%;
             padding: 12px;
+            border: 1px solid #d6dbe0;
             border-radius: 8px;
             font-size: 1rem;
             transition: border-color 0.3s;
+            background: #ffffff;
         }
         .input-group input[type="text"]:focus,
         .input-group select:focus {
             outline: none;
+            border-color: #4b5563;
         }
         .file-input-wrapper {
         .file-input-label {
             display: block;
             padding: 15px;
+            background: #f3f4f6;
+            border: 1px dashed #bfc5cc;
             border-radius: 8px;
             text-align: center;
             cursor: pointer;
         }
         .file-input-label:hover {
+            border-color: #4b5563;
+            background: #eceff3;
         }
         .file-input-label.has-file {
+            border-color: #1f2933;
+            background: #e8edf2;
         }
         input[type="file"] {
         }
         .btn-primary {
+            background: #1f2933;
+            color: #f9fafb;
         }
         .btn-primary:hover:not(:disabled) {
             transform: translateY(-2px);
+            box-shadow: 0 6px 16px rgba(16, 24, 40, 0.2);
         }
         .btn:disabled {
         .download-btn {
             margin-top: 12px;
             padding: 10px 16px;
+            background: #374151;
+            color: #f9fafb;
             text-decoration: none;
             border-radius: 6px;
             display: inline-block;
         }
         .download-btn:hover {
+            background: #1f2933;
         }
         /* Loading spinner */
         }
         .spinner {
+            border: 4px solid #e5e7eb;
+            border-top: 4px solid #1f2933;
             border-radius: 50%;
             width: 40px;
             height: 40px;
             top: 0;
             width: 100%;
             height: 100%;
+            background: rgba(15, 23, 42, 0.5);
             align-items: center;
             justify-content: center;
         }
         .modal-btn {
             padding: 10px 24px;
+            background: #1f2933;
+            color: #f9fafb;
             border: none;
             border-radius: 6px;
             cursor: pointer;
         }
         .modal-btn:hover {
+            background: #111827;
         }
     </style>
 </head>
 <body>
     <div class="container">
+        <h1>Perception System</h1>
         <div class="main-card">
             <!-- Mode Selection -->
                 <div class="mode-selector">
                     <label class="mode-card selected">
                         <input type="radio" name="mode" value="object_detection" checked>
                         <div class="mode-title">Object Detection</div>
                     </label>
                     <label class="mode-card">
                         <input type="radio" name="mode" value="segmentation">
                         <div class="mode-title">Segmentation</div>
                     </label>
                     <label class="mode-card disabled">
                         <input type="radio" name="mode" value="drone_detection">
                         <div class="mode-title">Drone Detection</div>
                         <span class="mode-badge">COMING SOON</span>
                     </label>
                     <label>3. Upload Video</label>
                     <div class="file-input-wrapper">
                         <label class="file-input-label" id="fileLabel" for="videoFile">
+                            Click to select video file (MP4)
                         </label>
                         <input type="file" id="videoFile" accept="video/*">
                     </div>
             <!-- Process Button -->
             <div class="section">
                 <button class="btn btn-primary" id="processBtn" disabled>
+                    Process Video
                 </button>
             </div>
                         <div class="video-card-body">
                             <video id="processedVideo" controls autoplay loop></video>
                             <a id="downloadBtn" class="download-btn" download="processed.mp4">
+                                Download Processed Video
                             </a>
                         </div>
                     </div>

models/detectors/grounding_dino.py CHANGED Viewed

@@ -37,13 +37,29 @@ class GroundingDinoDetector(ObjectDetector):
         with torch.no_grad():
             outputs = self.model(**inputs)
         target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
-        processed = self.processor.post_process_grounded_object_detection(
-            outputs,
-            inputs["input_ids"],
-            box_threshold=self.box_threshold,
-            text_threshold=self.text_threshold,
-            target_sizes=target_sizes,
-        )[0]
         boxes = processed["boxes"].cpu().numpy()
         scores = processed["scores"].cpu().tolist()
         label_names = list(processed.get("labels") or [])

         with torch.no_grad():
             outputs = self.model(**inputs)
         target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
+        try:
+            processed = self.processor.post_process_grounded_object_detection(
+                outputs,
+                inputs["input_ids"],
+                box_threshold=self.box_threshold,
+                text_threshold=self.text_threshold,
+                target_sizes=target_sizes,
+            )[0]
+        except TypeError:
+            try:
+                processed = self.processor.post_process_grounded_object_detection(
+                    outputs,
+                    inputs["input_ids"],
+                    threshold=self.box_threshold,
+                    text_threshold=self.text_threshold,
+                    target_sizes=target_sizes,
+                )[0]
+            except TypeError:
+                processed = self.processor.post_process_grounded_object_detection(
+                    outputs,
+                    inputs["input_ids"],
+                    target_sizes=target_sizes,
+                )[0]
         boxes = processed["boxes"].cpu().numpy()
         scores = processed["scores"].cpu().tolist()
         label_names = list(processed.get("labels") or [])