RacingDemo / src /app.py
Vlad Bastina
merge
5d65cd5
import streamlit as st
import os
import re
from models.frame_extractor import extract_frame_timestamp
from models.bounding_box_extractor import extract_bounding_box
from models.catchphrase_extractor import extract_catchphrase
from models.bet_extractor import extract_bet_amount
from utils import (
extract_frame,
center_bbox_in_circle,
overlay_image_bottom_right,
overlay_image_top_left,
annotate_image_with_phrase_and_label
)
import numpy as np
def compute_iou(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA) * max(0, yB - yA)
if interArea == 0:
return 0.0
boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
def extract_voted_bounding_box(image_path, description, num_attempts=5, iou_threshold=0.5):
bboxes = []
# Extract multiple bounding boxes
for _ in range(num_attempts):
response = extract_bounding_box(image_path, description)
match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response)
if match:
bbox = tuple(map(int, match.groups()))
bboxes.append(bbox)
if not bboxes:
return None
# Group boxes by IoU
groups = []
used = set()
for i in range(len(bboxes)):
if i in used:
continue
group = [bboxes[i]]
used.add(i)
for j in range(i+1, len(bboxes)):
if j in used:
continue
if compute_iou(bboxes[i], bboxes[j]) > iou_threshold:
group.append(bboxes[j])
used.add(j)
if len(group) >= 3:
groups.append(group)
if not groups:
return bboxes[0] # Fallback to the first bounding box if no groups found
# Use the group with the most members
best_group = max(groups, key=len)
voted_bbox = np.mean(np.array(best_group), axis=0).astype(int)
return tuple(voted_bbox)
# Set your API key
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"]
os.makedirs("src/tmp", exist_ok=True)
# Paths
video_path = "src/race_vid.mp4"
image_path = "/tmp/extracted_frame.png"
# Streamlit UI
st.set_page_config(page_title="Autonomous Thumbnail Maker", layout="centered")
st.title("🏁 Autonomous Thumbnail Maker")
st.markdown("Generate a social-media-ready racing thumbnail from a video clip using AI.")
st.video(video_path)
if st.button("Generate Thumbnail"):
with st.spinner("Running AI pipeline..."):
# FRAME TIMESTAMP
response, uploaded_obj = extract_frame_timestamp(video_path)
timestamp_match = re.search(r'\*\*Winning Timestamp:\*\*\s*(\d{2}:\d{2}:\d{2}\.\d{3})', response)
timestamp = timestamp_match.group(1) if timestamp_match else None
description_match = re.search(r'\*\*Short description of the winner:\*\*\s*(.*?)\s*(?=\*\*|$)', response, re.DOTALL)
description = description_match.group(1).strip() if description_match else None
extract_frame(video_path, timestamp, image_path)
# BOUNDING BOX
bbox = extract_voted_bounding_box(image_path, description)
center_bbox_in_circle(image_path, bbox, image_path)
overlay_image_bottom_right(image_path, "src/assets/corner.png", image_path)
overlay_image_top_left(image_path, "src/assets/GetOn_Logo.png", image_path)
# PHRASES
catchphrases = extract_catchphrase(video_path)
phrases = re.findall(r'"(.*?)"', catchphrases)
# BET LABEL
bet_amounts = extract_bet_amount(video_path)
labels = re.findall(r'\*\*Exact Label Text:\*\* "([^"]+)"', bet_amounts)
phrase = phrases[0] if phrases else "No catchphrase found"
label = labels[0] if labels else "No bet label found"
annotate_image_with_phrase_and_label(image_path, image_path, phrase, label)
st.success("Thumbnail generated!")
st.image(image_path, caption="Generated Thumbnail", use_container_width=True)
with open(image_path, "rb") as f:
st.download_button("πŸ“₯ Download Thumbnail", f, file_name="thumbnail.png", mime="image/png")