Spaces:

Dorn4449
/

yolodo

Runtime error

App Files Files Community

Dorn4449 commited on Oct 12, 2024

Commit

8d6fec1

verified ·

1 Parent(s): ee1b855

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -7

app.py CHANGED Viewed

@@ -33,8 +33,8 @@ def detect_and_draw(frame):
         cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
     return frame
-# Function to generate detailed captions using a prompt
-def generate_detailed_caption(image_frame):
     transform = transforms.Compose([
         transforms.ToTensor()
     ])
@@ -46,10 +46,9 @@ def generate_detailed_caption(image_frame):
     captions = []
     for box, label, score in zip(outputs[0]['boxes'], outputs[0]['labels'], outputs[0]['scores']):
         if score > confidence_threshold:
-            captions.append(f"Detected a {label.item()} with {score.item():.2f} confidence.")
-    prompt = "Describe the scene with details about objects and their confidence levels: "
-    return prompt + " ".join(captions)
 # Define the stream URL for live video
 stream_url = "https://edge01.london.nginx.hdontap.com/hosb5/ng_showcase-coke_bottle-street_fixed.stream/chunklist_w464099566.m3u8"
@@ -67,7 +66,7 @@ def process_stream():
         frame_count += 1
         if frame_count % 10 == 0:  # Process every 10th frame for efficiency
             result = detect_and_draw(frame)
-            caption = generate_detailed_caption(frame)
             result_rgb = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
             print(f"Caption: {caption}")
             yield result_rgb
@@ -80,7 +79,7 @@ def predict_image(image):
     object_count = len(results[0].boxes)
     # Generate caption for the uploaded image
-    caption = generate_detailed_caption(image)
     return annotated_image, f"Objects detected: {object_count}, Caption: {caption}"

         cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
     return frame
+# Function to generate captions using Faster R-CNN
+def generate_caption(image_frame):
     transform = transforms.Compose([
         transforms.ToTensor()
     ])
     captions = []
     for box, label, score in zip(outputs[0]['boxes'], outputs[0]['labels'], outputs[0]['scores']):
         if score > confidence_threshold:
+            captions.append(f"Object {label} detected with confidence {score:.2f}")
+    return " ".join(captions)
 # Define the stream URL for live video
 stream_url = "https://edge01.london.nginx.hdontap.com/hosb5/ng_showcase-coke_bottle-street_fixed.stream/chunklist_w464099566.m3u8"
         frame_count += 1
         if frame_count % 10 == 0:  # Process every 10th frame for efficiency
             result = detect_and_draw(frame)
+            caption = generate_caption(frame)
             result_rgb = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
             print(f"Caption: {caption}")
             yield result_rgb
     object_count = len(results[0].boxes)
     # Generate caption for the uploaded image
+    caption = generate_caption(image)
     return annotated_image, f"Objects detected: {object_count}, Caption: {caption}"