Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| from ultralytics import YOLO | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw | |
| import os | |
| # Load model and detection index | |
| print("Loading model and detection index...") | |
| model = YOLO("best.pt") | |
| detection_df = pd.read_parquet("detections.parquet") | |
| # Video path (you may need to download this at runtime or use URL) | |
| VIDEO_PATH = "data/videoplayback.mp4" | |
| VIDEO_URL = "YOUR_VIDEO_URL_HERE" # Replace with actual video URL or YouTube link | |
| def download_video_if_needed(): | |
| """Download video if not present""" | |
| if not os.path.exists(VIDEO_PATH): | |
| print(f"Video not found at {VIDEO_PATH}") | |
| print("Please upload video or provide YouTube URL") | |
| # You can add yt-dlp here to download from YouTube | |
| return False | |
| return True | |
| def merge_intervals(timestamps, gap_threshold=3.0): | |
| """Merge nearby timestamps into contiguous clips""" | |
| if not timestamps: | |
| return [] | |
| timestamps = sorted(list(set(timestamps))) | |
| clips = [] | |
| start = timestamps[0] | |
| prev = timestamps[0] | |
| for t in timestamps[1:]: | |
| if t - prev > gap_threshold: | |
| clips.append((start, prev)) | |
| start = t | |
| prev = t | |
| clips.append((start, prev)) | |
| return clips | |
| def retrieve_clips(query_image): | |
| """Main retrieval function""" | |
| if query_image is None: | |
| return "Please upload an image", None, None | |
| # Convert to PIL if needed | |
| if isinstance(query_image, np.ndarray): | |
| query_image = Image.fromarray(query_image) | |
| # Detect components in query image | |
| results = model(query_image, verbose=False)[0] | |
| if len(results.boxes) == 0: | |
| return "No car parts detected in the image", query_image, None | |
| # Draw boxes on query image | |
| query_draw = query_image.copy() | |
| draw = ImageDraw.Draw(query_draw) | |
| retrieval_info = [] | |
| all_clips = [] | |
| # Process each detected component | |
| for box_idx in range(len(results.boxes)): | |
| cls_id = int(results.boxes.cls[box_idx]) | |
| cls_name = model.names[cls_id] | |
| conf = float(results.boxes.conf[box_idx]) | |
| bbox = results.boxes.xyxy[box_idx].tolist() | |
| if conf < 0.5: | |
| continue | |
| # Draw bounding box | |
| x1, y1, x2, y2 = bbox | |
| draw.rectangle([x1, y1, x2, y2], outline='red', width=3) | |
| draw.text((x1, y1-20), f"{cls_name} ({conf:.2f})", fill='red') | |
| # Search detection index | |
| matches = detection_df[detection_df['class_label'] == cls_name] | |
| matches = matches[matches['confidence_score'] > 0.5] | |
| if len(matches) == 0: | |
| retrieval_info.append(f"β {cls_name}: No matches found") | |
| continue | |
| # Merge into clips | |
| timestamps = matches['timestamp'].tolist() | |
| clips = merge_intervals(timestamps, gap_threshold=3.0) | |
| retrieval_info.append( | |
| f"β {cls_name} (conf: {conf:.2%}): {len(clips)} clips, {len(matches)} frames" | |
| ) | |
| for start, end in clips[:3]: # Limit to first 3 clips per component | |
| all_clips.append({ | |
| 'component': cls_name, | |
| 'start': start, | |
| 'end': end, | |
| 'duration': end - start | |
| }) | |
| info_text = "\n".join(retrieval_info) | |
| # Create clips table | |
| if all_clips: | |
| clips_df = pd.DataFrame(all_clips) | |
| return info_text, query_draw, clips_df | |
| else: | |
| return info_text, query_draw, None | |
| def extract_frame(component, start_time): | |
| """Extract a frame from video at given timestamp""" | |
| if not download_video_if_needed(): | |
| return None | |
| cap = cv2.VideoCapture(VIDEO_PATH) | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| frame_num = int(start_time * fps) | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) | |
| ret, frame = cap.read() | |
| cap.release() | |
| if ret: | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| return Image.fromarray(frame_rgb) | |
| return None | |
| # Create Gradio interface | |
| with gr.Blocks(title="Image-to-Video Retrieval Demo") as demo: | |
| gr.Markdown(""" | |
| # π Car Parts Image-to-Video Retrieval System | |
| Upload an image of a car part, and this system will find matching video clips! | |
| **How it works:** | |
| 1. Upload a car image (doors, wheels, headlights, etc.) | |
| 2. YOLOv26s detects all car parts in your image | |
| 3. System retrieves matching video clips from the indexed video | |
| 4. View timestamps and sample frames | |
| **Supported Components:** Doors, wheels, headlights, mirrors, bumpers, and more! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(type="pil", label="Upload Query Image") | |
| search_btn = gr.Button("π Search Video", variant="primary") | |
| with gr.Column(scale=1): | |
| output_image = gr.Image(type="pil", label="Detected Components") | |
| output_text = gr.Textbox(label="Retrieval Results", lines=8) | |
| with gr.Row(): | |
| output_table = gr.Dataframe( | |
| label="Matching Video Clips", | |
| headers=["component", "start", "end", "duration"] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π Technical Details | |
| - **Model:** YOLOv26s fine-tuned on car parts dataset | |
| - **Video Sampling:** Every 5th frame | |
| - **Matching:** Semantic component matching with confidence β₯ 0.5 | |
| - **Clip Formation:** 3.0s gap threshold for temporal merging | |
| **Assignment 2 - CS-UY 4613 Artificial Intelligence** | |
| Hanze (James) Qiu | Spring 2026 | |
| """) | |
| # Connect button | |
| search_btn.click( | |
| fn=retrieve_clips, | |
| inputs=[input_image], | |
| outputs=[output_text, output_image, output_table] | |
| ) | |
| # Example images (optional - add paths to example images) | |
| gr.Examples( | |
| examples=[ | |
| # Add paths to example images if you have them | |
| # ["examples/car1.jpg"], | |
| # ["examples/car2.jpg"], | |
| ], | |
| inputs=input_image, | |
| label="Example Query Images" | |
| ) | |
| if __name__ == "__main__": | |
| print("Starting Gradio app...") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |