""" ShortSmith v2 - Gradio Application Hugging Face Space interface for video highlight extraction. Features: - Multi-modal analysis (visual + audio + motion) - Domain-optimized presets - Person-specific filtering (optional) - Scene-aware clip cutting """ import os import sys import tempfile import shutil from pathlib import Path import time import traceback import gradio as gr # Add project root to path sys.path.insert(0, str(Path(__file__).parent)) # Initialize logging try: from utils.logger import setup_logging, get_logger setup_logging(log_level="INFO", log_to_console=True) logger = get_logger("app") except Exception: import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("app") def process_video( video_file, domain, num_clips, clip_duration, reference_image, custom_prompt, progress=gr.Progress() ): """ Main video processing function. Args: video_file: Uploaded video file path domain: Content domain for scoring weights num_clips: Number of clips to extract clip_duration: Duration of each clip in seconds reference_image: Optional reference image for person filtering custom_prompt: Optional custom instructions progress: Gradio progress tracker Returns: Tuple of (status_message, clip1, clip2, clip3, log_text) """ if video_file is None: return "Please upload a video first.", None, None, None, "" log_messages = [] def log(msg): log_messages.append(f"[{time.strftime('%H:%M:%S')}] {msg}") logger.info(msg) try: video_path = Path(video_file) log(f"Processing video: {video_path.name}") progress(0.05, desc="Validating video...") # Import pipeline components from utils.helpers import validate_video_file, validate_image_file, format_duration from pipeline.orchestrator import PipelineOrchestrator # Validate video validation = validate_video_file(video_file) if not validation.is_valid: return f"Error: {validation.error_message}", None, None, None, "\n".join(log_messages) log(f"Video size: {validation.file_size / (1024*1024):.1f} MB") # Validate reference image if provided ref_path = None if reference_image is not None: ref_validation = validate_image_file(reference_image) if ref_validation.is_valid: ref_path = reference_image log(f"Reference image: {Path(reference_image).name}") else: log(f"Warning: Invalid reference image - {ref_validation.error_message}") # Map domain string to internal value domain_map = { "Sports": "sports", "Vlogs": "vlogs", "Music Videos": "music", "Podcasts": "podcasts", "Gaming": "gaming", "General": "general", } domain_value = domain_map.get(domain, "general") log(f"Domain: {domain_value}") # Create output directory output_dir = Path(tempfile.mkdtemp(prefix="shortsmith_output_")) log(f"Output directory: {output_dir}") # Progress callback to update UI during processing def on_progress(pipeline_progress): stage = pipeline_progress.stage.value pct = pipeline_progress.progress msg = pipeline_progress.message log(f"[{stage}] {msg}") # Map pipeline progress (0-1) to our range (0.1-0.9) mapped_progress = 0.1 + (pct * 0.8) progress(mapped_progress, desc=f"{stage}: {msg}") # Initialize pipeline progress(0.1, desc="Initializing AI models...") log("Initializing pipeline...") pipeline = PipelineOrchestrator(progress_callback=on_progress) # Process video progress(0.15, desc="Starting analysis...") log(f"Processing: {int(num_clips)} clips @ {int(clip_duration)}s each") result = pipeline.process( video_path=video_path, num_clips=int(num_clips), clip_duration=float(clip_duration), domain=domain_value, reference_image=ref_path, custom_prompt=custom_prompt.strip() if custom_prompt else None, ) progress(0.9, desc="Extracting clips...") # Handle result if result.success: log(f"Processing complete in {result.processing_time:.1f}s") clip_paths = [] for i, clip in enumerate(result.clips): if clip.clip_path.exists(): output_path = output_dir / f"highlight_{i+1}.mp4" shutil.copy2(clip.clip_path, output_path) clip_paths.append(str(output_path)) log(f"Clip {i+1}: {format_duration(clip.start_time)} - {format_duration(clip.end_time)} (score: {clip.hype_score:.2f})") status = f"Successfully extracted {len(clip_paths)} highlight clips!\nProcessing time: {result.processing_time:.1f}s" pipeline.cleanup() progress(1.0, desc="Done!") # Return up to 3 clips clip1 = clip_paths[0] if len(clip_paths) > 0 else None clip2 = clip_paths[1] if len(clip_paths) > 1 else None clip3 = clip_paths[2] if len(clip_paths) > 2 else None return status, clip1, clip2, clip3, "\n".join(log_messages) else: log(f"Processing failed: {result.error_message}") pipeline.cleanup() return f"Error: {result.error_message}", None, None, None, "\n".join(log_messages) except Exception as e: error_msg = f"Unexpected error: {str(e)}" log(error_msg) log(traceback.format_exc()) logger.exception("Pipeline error") return error_msg, None, None, None, "\n".join(log_messages) # Build Gradio interface with gr.Blocks( title="ShortSmith v2", theme=gr.themes.Soft(), css=""" .container { max-width: 1200px; margin: auto; } .output-video { min-height: 200px; } """ ) as demo: gr.Markdown(""" # 🎬 ShortSmith v2 ### AI-Powered Video Highlight Extractor Upload a video and automatically extract the most engaging highlight clips using AI analysis. """) with gr.Row(): # Left column - Inputs with gr.Column(scale=1): gr.Markdown("### 📤 Input") video_input = gr.Video( label="Upload Video", sources=["upload"], ) with gr.Accordion("⚙️ Settings", open=True): domain_dropdown = gr.Dropdown( choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"], value="General", label="Content Domain", info="Select the type of content for optimized scoring" ) with gr.Row(): num_clips_slider = gr.Slider( minimum=1, maximum=3, value=3, step=1, label="Number of Clips", info="How many highlight clips to extract" ) duration_slider = gr.Slider( minimum=5, maximum=30, value=15, step=1, label="Clip Duration (seconds)", info="Target duration for each clip" ) with gr.Accordion("👤 Person Filtering (Optional)", open=False): reference_image = gr.Image( label="Reference Image", type="filepath", sources=["upload"], ) gr.Markdown("*Upload a photo of a person to prioritize clips featuring them.*") with gr.Accordion("📝 Custom Instructions (Optional)", open=False): custom_prompt = gr.Textbox( label="Additional Instructions", placeholder="E.g., 'Focus on crowd reactions' or 'Prioritize action scenes'", lines=2, ) process_btn = gr.Button( "🚀 Extract Highlights", variant="primary", size="lg" ) # Right column - Outputs with gr.Column(scale=1): gr.Markdown("### 📥 Output") status_output = gr.Textbox( label="Status", lines=2, interactive=False ) gr.Markdown("#### Extracted Clips") clip1_output = gr.Video(label="Clip 1", elem_classes=["output-video"]) clip2_output = gr.Video(label="Clip 2", elem_classes=["output-video"]) clip3_output = gr.Video(label="Clip 3", elem_classes=["output-video"]) with gr.Accordion("📋 Processing Log", open=True): log_output = gr.Textbox( label="Log", lines=10, interactive=False, show_copy_button=True ) gr.Markdown(""" --- **ShortSmith v2** | Powered by Qwen2-VL, InsightFace, and Librosa | [GitHub](https://github.com) | Built with Gradio """) # Connect the button to the processing function process_btn.click( fn=process_video, inputs=[ video_input, domain_dropdown, num_clips_slider, duration_slider, reference_image, custom_prompt ], outputs=[ status_output, clip1_output, clip2_output, clip3_output, log_output ], show_progress="full" ) # Launch the app if __name__ == "__main__": demo.queue() demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True ) else: # For HuggingFace Spaces demo.queue() demo.launch()