Update app.py
Browse files
app.py
CHANGED
|
@@ -33,8 +33,8 @@ def detect_and_draw(frame):
|
|
| 33 |
cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 34 |
return frame
|
| 35 |
|
| 36 |
-
# Function to generate
|
| 37 |
-
def
|
| 38 |
transform = transforms.Compose([
|
| 39 |
transforms.ToTensor()
|
| 40 |
])
|
|
@@ -46,10 +46,9 @@ def generate_detailed_caption(image_frame):
|
|
| 46 |
captions = []
|
| 47 |
for box, label, score in zip(outputs[0]['boxes'], outputs[0]['labels'], outputs[0]['scores']):
|
| 48 |
if score > confidence_threshold:
|
| 49 |
-
captions.append(f"
|
| 50 |
|
| 51 |
-
|
| 52 |
-
return prompt + " ".join(captions)
|
| 53 |
|
| 54 |
# Define the stream URL for live video
|
| 55 |
stream_url = "https://edge01.london.nginx.hdontap.com/hosb5/ng_showcase-coke_bottle-street_fixed.stream/chunklist_w464099566.m3u8"
|
|
@@ -67,7 +66,7 @@ def process_stream():
|
|
| 67 |
frame_count += 1
|
| 68 |
if frame_count % 10 == 0: # Process every 10th frame for efficiency
|
| 69 |
result = detect_and_draw(frame)
|
| 70 |
-
caption =
|
| 71 |
result_rgb = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
|
| 72 |
print(f"Caption: {caption}")
|
| 73 |
yield result_rgb
|
|
@@ -80,7 +79,7 @@ def predict_image(image):
|
|
| 80 |
object_count = len(results[0].boxes)
|
| 81 |
|
| 82 |
# Generate caption for the uploaded image
|
| 83 |
-
caption =
|
| 84 |
|
| 85 |
return annotated_image, f"Objects detected: {object_count}, Caption: {caption}"
|
| 86 |
|
|
|
|
| 33 |
cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 34 |
return frame
|
| 35 |
|
| 36 |
+
# Function to generate captions using Faster R-CNN
|
| 37 |
+
def generate_caption(image_frame):
|
| 38 |
transform = transforms.Compose([
|
| 39 |
transforms.ToTensor()
|
| 40 |
])
|
|
|
|
| 46 |
captions = []
|
| 47 |
for box, label, score in zip(outputs[0]['boxes'], outputs[0]['labels'], outputs[0]['scores']):
|
| 48 |
if score > confidence_threshold:
|
| 49 |
+
captions.append(f"Object {label} detected with confidence {score:.2f}")
|
| 50 |
|
| 51 |
+
return " ".join(captions)
|
|
|
|
| 52 |
|
| 53 |
# Define the stream URL for live video
|
| 54 |
stream_url = "https://edge01.london.nginx.hdontap.com/hosb5/ng_showcase-coke_bottle-street_fixed.stream/chunklist_w464099566.m3u8"
|
|
|
|
| 66 |
frame_count += 1
|
| 67 |
if frame_count % 10 == 0: # Process every 10th frame for efficiency
|
| 68 |
result = detect_and_draw(frame)
|
| 69 |
+
caption = generate_caption(frame)
|
| 70 |
result_rgb = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
|
| 71 |
print(f"Caption: {caption}")
|
| 72 |
yield result_rgb
|
|
|
|
| 79 |
object_count = len(results[0].boxes)
|
| 80 |
|
| 81 |
# Generate caption for the uploaded image
|
| 82 |
+
caption = generate_caption(image)
|
| 83 |
|
| 84 |
return annotated_image, f"Objects detected: {object_count}, Caption: {caption}"
|
| 85 |
|