import streamlit as st import os import re from models.frame_extractor import extract_frame_timestamp from models.bounding_box_extractor import extract_bounding_box from models.catchphrase_extractor import extract_catchphrase from models.bet_extractor import extract_bet_amount from utils import ( extract_frame, center_bbox_in_circle, overlay_image_bottom_right, overlay_image_top_left, annotate_image_with_phrase_and_label ) import numpy as np def compute_iou(boxA, boxB): xA = max(boxA[0], boxB[0]) yA = max(boxA[1], boxB[1]) xB = min(boxA[2], boxB[2]) yB = min(boxA[3], boxB[3]) interArea = max(0, xB - xA) * max(0, yB - yA) if interArea == 0: return 0.0 boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]) boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]) iou = interArea / float(boxAArea + boxBArea - interArea) return iou def extract_voted_bounding_box(image_path, description, num_attempts=5, iou_threshold=0.5): bboxes = [] # Extract multiple bounding boxes for _ in range(num_attempts): response = extract_bounding_box(image_path, description) match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response) if match: bbox = tuple(map(int, match.groups())) bboxes.append(bbox) if not bboxes: return None # Group boxes by IoU groups = [] used = set() for i in range(len(bboxes)): if i in used: continue group = [bboxes[i]] used.add(i) for j in range(i+1, len(bboxes)): if j in used: continue if compute_iou(bboxes[i], bboxes[j]) > iou_threshold: group.append(bboxes[j]) used.add(j) if len(group) >= 3: groups.append(group) if not groups: return bboxes[0] # Fallback to the first bounding box if no groups found # Use the group with the most members best_group = max(groups, key=len) voted_bbox = np.mean(np.array(best_group), axis=0).astype(int) return tuple(voted_bbox) # Set your API key os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"] os.makedirs("src/tmp", exist_ok=True) # Paths video_path = "src/race_vid.mp4" image_path = "/tmp/extracted_frame.png" # Streamlit UI st.set_page_config(page_title="Autonomous Thumbnail Maker", layout="centered") st.title("🏁 Autonomous Thumbnail Maker") st.markdown("Generate a social-media-ready racing thumbnail from a video clip using AI.") st.video(video_path) if st.button("Generate Thumbnail"): with st.spinner("Running AI pipeline..."): # FRAME TIMESTAMP response, uploaded_obj = extract_frame_timestamp(video_path) timestamp_match = re.search(r'\*\*Winning Timestamp:\*\*\s*(\d{2}:\d{2}:\d{2}\.\d{3})', response) timestamp = timestamp_match.group(1) if timestamp_match else None description_match = re.search(r'\*\*Short description of the winner:\*\*\s*(.*?)\s*(?=\*\*|$)', response, re.DOTALL) description = description_match.group(1).strip() if description_match else None extract_frame(video_path, timestamp, image_path) # BOUNDING BOX bbox = extract_voted_bounding_box(image_path, description) center_bbox_in_circle(image_path, bbox, image_path) overlay_image_bottom_right(image_path, "src/assets/corner.png", image_path) overlay_image_top_left(image_path, "src/assets/GetOn_Logo.png", image_path) # PHRASES catchphrases = extract_catchphrase(video_path) phrases = re.findall(r'"(.*?)"', catchphrases) # BET LABEL bet_amounts = extract_bet_amount(video_path) labels = re.findall(r'\*\*Exact Label Text:\*\* "([^"]+)"', bet_amounts) phrase = phrases[0] if phrases else "No catchphrase found" label = labels[0] if labels else "No bet label found" annotate_image_with_phrase_and_label(image_path, image_path, phrase, label) st.success("Thumbnail generated!") st.image(image_path, caption="Generated Thumbnail", use_container_width=True) with open(image_path, "rb") as f: st.download_button("📥 Download Thumbnail", f, file_name="thumbnail.png", mime="image/png")